From 1fa8c2c70b164f1cf13e2a3bc195259f19ad1bf1 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 2 May 2019 10:05:15 -0700 Subject: [PATCH] filter-repo: add --replace-refs option This adds the ability to automatically add new replacement refs for each rewritten commit (as well as delete or update replacement refs that existed before the run). This will allow users to use either new or old commit hashes to reference commits locally, though old commit hashes will need to be unabbreviated. The only requirement for this to work, is that the person who does the rewrite also needs to push the replace refs up where other users can grab them, and users who want to use them need to modify their fetch refspecs to grab the replace refs. However, other tools external to git may not understand replace refs... Tools like Gerrit and GitHub apparently do not yet natively understand replace refs. Trying to view "commits" by the replacement ref will yield various forms of "Not Found" in each tool. One has to instead try to view it as a branch with an odd name (including "refs/replace/"), and often branches are accessed via a different URL style than commits so it becomes very non-obvious to users how to access the info associated with an old commit hash. * In Gerrit, instead of being able to search on the sha1sum or use a pre-defined URL to search and auto-redirect to the appropriate code review with https://gerrit.SITE.COM/#/q/${OLD_SHA1SUM},n,z one instead has to have a special plugin and go to a URL like https://gerrit.SITE.COM/plugins/gitiles/ORG/REPO/+/refs/replace/${OLD_SHA1SUM} but then the user isn't shown the actual code review and will need to guess which link to click on to get to it (and it'll only be there if the user included a Change-Id in the commit message). * In GitHub, instead of being able to go to a URL like https://github.SITE.COM/ORG/REPO/commit/${OLD_SHA1SUM} one instead has to navigate based on branch using https://github.SITE.COM/ORG/REPO/tree/refs/replace/${OLD_SHA1SUM} but that will show a listing of commits instead of information about a specific commit; the user has to manually click on the first commit to get to the desired location. For now, providing replace refs at least allows users to access information locally using old IDs; perhaps in time as other external tools will gain a better understanding of how to use replace refs, the barrier to history rewrites will decrease enough that big projects that really need it (e.g. those that have committed many sins by commiting stupidly large useless binary blobs) can at least seriously contemplate the undertaking. History rewrites will always have some drawbacks and pain associated with them, as they should, but when warranted it's nice to have transition plans that are more smooth than a massive flag day. Signed-off-by: Elijah Newren --- README.md | 6 ++- git-filter-repo | 101 ++++++++++++++++++++++++++++++++++------- t/t9390-filter-repo.sh | 68 +++++++++++++++++++++++++-- 3 files changed, 153 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index c5b2bdc..0b90b9b 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ new and old history before pushing somewhere. Other caveats: None of the existing repository filtering tools do what I want. They're all good in their own way, but come up short for my needs. No tool -provided any of the first seven traits below I wanted, and all failed to +provided any of the first eight traits below I wanted, and all failed to provide at least one of the last four traits as well: 1. [Starting report] Provide user an analysis of their repo to help @@ -186,6 +186,10 @@ provide at least one of the last four traits as well: provides reasonable string manipulation capabilities (which are sorely lacking in shell). + 1. [Old commit references] Provide a way for users to use old commit + IDs with the new repository (in particular via mapping from old to + new hashes with refs/replace/ references). + 1. [Commit message consistency] If commit messages refer to other commits by ID (e.g. "this reverts commit 01234567890abcdef", "In commit 0013deadbeef9a..."), those commit messages should be diff --git a/git-filter-repo b/git-filter-repo index d2b87f0..4fa9285 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -1759,6 +1759,10 @@ class FastExportFilter(object): def get_seen_refs(self): return self._seen_refs.keys() + def get_commit_renames(self): + self._flush_renames() + return self._commit_renames + def run(self, input, output, fast_import_pipes, quiet): """ This method filters fast export output. @@ -2016,6 +2020,20 @@ class FilteringOptions(object): "'***REMOVED***'. ")) parents = parser.add_argument_group(title=_("Parent rewriting")) + parents.add_argument('--replace-refs', default=None, + choices=['delete-no-add', 'delete-and-add', + 'update-no-add', 'update-or-add', + 'update-and-add'], + help=_("Replace refs (see git-replace(1)) are used to rewrite " + "parents (unless turned off by the usual git mechanism); this " + "flag specifies what do do with those refs afterward. " + "Replace refs can either be deleted or updated to point at new " + "commit hashes. Also, new replace refs can be added for each " + "commit rewrite. With 'update-or-add', new replace refs are " + "only added for commit rewrites that aren't used to update an " + "existing replace ref. default is 'update-and-add' if " + "$GIT_DIR/filter-repo/already_ran does not exist; " + "'update-or-add' otherwise.")) parents.add_argument('--empty-pruning', default='auto', choices=['always', 'auto', 'never'], help=_("Whether to prune empty commits. 'auto' (the default) means " @@ -2758,6 +2776,10 @@ class RepoFilter(object): def _run_sanity_checks(self): self._sanity_checks_handled = True if not self._managed_output: + if not self._args.replace_refs: + # If not _managed_output we don't want to make extra changes to the + # repo, so set default to no-op 'update-no-add' + self._args.replace_refs = 'update-no-add' return if self._args.debug: @@ -2768,10 +2790,17 @@ class RepoFilter(object): self._orig_refs = GitUtils.get_refs(target_working_dir) is_bare = GitUtils.is_repository_bare(target_working_dir) - # Do sanity checks from the correct directory + # Determine if this is second or later run of filter-repo tmp_dir = self.results_tmp_dir(create_if_missing=False) - if not self._args.force and \ - not os.path.isfile(os.path.join(tmp_dir, b'already_ran')): + already_ran = os.path.isfile(os.path.join(tmp_dir, b'already_ran')) + + # Default for --replace-refs + if not self._args.replace_refs: + self._args.replace_refs = ('update-or-add' if already_ran + else 'update-and-add') + + # Do sanity checks from the correct directory + if not self._args.force and not already_ran: cwd = os.getcwd() os.chdir(target_working_dir) RepoFilter.sanity_check(self._orig_refs, is_bare) @@ -3116,22 +3145,58 @@ class RepoFilter(object): print(" longer be related; consider re-pushing it elsewhere.") subprocess.call('git remote rm origin'.split(), cwd=target_working_dir) - def _ref_update(self, target_working_dir, seen_refs): + def _ref_update(self, target_working_dir, seen_refs, commit_renames): + # Start the update-ref process + p = subprocess.Popen('git update-ref --no-deref --stdin'.split(), + stdin=subprocess.PIPE, + cwd=target_working_dir) + + # Remove replace_refs from _orig_refs + replace_refs = {k:v for k, v in self._orig_refs.items() + if k.startswith(b'refs/replace/')} + reverse_replace_refs = collections.defaultdict(list) + for k,v in replace_refs.items(): + reverse_replace_refs[v].append(k) + all(map(self._orig_refs.pop, replace_refs)) + # Remove unused refs refs_to_nuke = set(self._orig_refs) - set(seen_refs) - if refs_to_nuke: - if self._args.debug: - print("[DEBUG] Deleting the following refs:\n "+ - decode(b"\n ".join(refs_to_nuke))) - p = subprocess.Popen('git update-ref --stdin'.split(), - stdin=subprocess.PIPE, - cwd=target_working_dir) - p.stdin.write(b''.join([b"option no-deref\ndelete %s\n" % x - for x in refs_to_nuke])) - p.stdin.close() - if p.wait(): - raise SystemExit(_("git update-ref failed; see above")) # pragma: no cover + if refs_to_nuke and self._args.debug: + print("[DEBUG] Deleting the following refs:\n "+ + decode(b"\n ".join(refs_to_nuke))) + p.stdin.write(b''.join([b"delete %s\n" % x + for x in refs_to_nuke])) + # Delete or update and add replace_refs; note that fast-export automatically + # handles 'update-no-add', we only need to take action for the other four + # choices for replace_refs. + actual_renames = {k:v for k,v in commit_renames.items() if k != v} + if self._args.replace_refs in ['delete-no-add', 'delete-and-add']: + # Delete old replace refs, if unwanted + replace_refs_to_nuke = set(replace_refs) + if self._args.replace_refs == 'delete-and-add': + # git-update-ref won't allow us to update a ref twice, so be careful + # to avoid deleting refs we'll later update + replace_refs_to_nuke = replace_refs_to_nuke.difference( + [b'refs/replace/'+x for x in actual_renames]) + p.stdin.write(b''.join([b"delete %s\n" % x + for x in replace_refs_to_nuke])) + if self._args.replace_refs in ['delete-and-add', 'update-or-add', + 'update-and-add']: + # Add new replace refs + update_only = (self._args.replace_refs == 'update-or-add') + p.stdin.write(b''.join([b"update refs/replace/%s %s\n" % (old, new) + for old,new in actual_renames.items() + if new and not (update_only and + old in reverse_replace_refs)])) + + # Complete the update-ref process + p.stdin.close() + if p.wait(): + raise SystemExit(_("git update-ref failed; see above")) # pragma: no cover + + # Return what we removed due to being filtered out (not being one of the + # seen refs means it was filtered out). return refs_to_nuke def finish(self): @@ -3213,7 +3278,9 @@ class RepoFilter(object): target_working_dir = self._args.target or '.' if self._input: - refs_nuked = self._ref_update(target_working_dir, fef.get_seen_refs()) + refs_nuked = self._ref_update(target_working_dir, + fef.get_seen_refs(), + fef.get_commit_renames()) # Write out data about run fef.record_metadata(self.results_tmp_dir(), diff --git a/t/t9390-filter-repo.sh b/t/t9390-filter-repo.sh index fad0933..21de381 100755 --- a/t/t9390-filter-repo.sh +++ b/t/t9390-filter-repo.sh @@ -22,7 +22,7 @@ filter_testcase() { rm .git/packed-refs && # Run the example - cat $DATA/$INPUT | git filter-repo --stdin --quiet --force "${REST[@]}" && + cat $DATA/$INPUT | git filter-repo --stdin --quiet --force --replace-refs delete-no-add "${REST[@]}" && # Compare the resulting repo to expected value git fast-export --use-done-feature --all >compare && @@ -145,7 +145,7 @@ test_expect_success '--path-rename inability to squash' ' ) ' -test_expect_success 'more setup' ' +test_expect_success 'setup metasyntactic repo' ' test_create_repo metasyntactic && ( cd metasyntactic && @@ -240,7 +240,7 @@ test_expect_success 'refs/replace/ to skip a parent' ' git tag -d v2.0 && git replace HEAD~1 HEAD~2 && - git filter-repo --path "" --force && + git filter-repo --replace-refs delete-no-add --path "" --force && test $(git rev-list --count HEAD) = 2 && git cat-file --batch-check --batch-all-objects >all-objs && test_line_count = 16 all-objs && @@ -270,7 +270,7 @@ test_expect_success 'refs/replace/ to add more initial history' ' git --no-replace-objects cat-file -p master~2 >grandparent && ! grep parent grandparent && - git filter-repo --path "" --force && + git filter-repo --replace-refs delete-no-add --path "" --force && git --no-replace-objects cat-file -p master~2 >new-grandparent && grep parent new-grandparent && @@ -287,6 +287,64 @@ test_expect_success 'refs/replace/ to add more initial history' ' ) ' +test_expect_success 'creation/deletion/updating of replace refs' ' + ( + git clone file://"$(pwd)"/metasyntactic replace_handling && + + # Same setup as "refs/replace/ to skip a parent", so we + # do not have to check that replacement refs were used + # correctly in the rewrite, just that replacement refs were + # deleted, added, or updated correctly. + cd replace_handling && + git tag -d v2.0 && + master=$(git rev-parse master) && + master_1=$(git rev-parse master~1) && + master_2=$(git rev-parse master~2) && + git replace HEAD~1 HEAD~2 && + cd .. && + + mkdir -p test_replace_refs && + cd test_replace_refs && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs delete-no-add --path-rename numbers:counting && + git show-ref >output && + ! grep refs/replace/ output && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs delete-and-add --path-rename numbers:counting && + echo "$(git rev-parse master) refs/replace/$master" >out && + echo "$(git rev-parse master~1) refs/replace/$master_1" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_2" >>out && + sort -k 2 out >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs update-no-add --path-rename numbers:counting && + echo "$(git rev-parse master~1) refs/replace/$master_1" >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs update-or-add --path-rename numbers:counting && + echo "$(git rev-parse master) refs/replace/$master" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_1" >>out && + sort -k 2 out >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs update-and-add --path-rename numbers:counting && + echo "$(git rev-parse master) refs/replace/$master" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_1" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_2" >>out && + sort -k 2 out >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect + ) +' + test_expect_success '--debug' ' ( git clone file://"$(pwd)"/metasyntactic debug && @@ -850,6 +908,7 @@ test_expect_success 'handle funny characters' ' cd funny_chars_checks && file_sha=$(git rev-parse :0:señor) && + former_head_sha=$(git rev-parse HEAD) && git filter-repo --to-subdirectory-filter títulos && cat <<-EOF >expect && @@ -865,6 +924,7 @@ test_expect_success 'handle funny characters' ' tag_sha=$(git rev-parse סְפָרַד) && cat <<-EOF >expect && $commit_sha refs/heads/españa + $commit_sha refs/replace/$former_head_sha $tag_sha refs/tags/סְפָרַד EOF