diff --git a/README.md b/README.md index c5b2bdc..0b90b9b 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ new and old history before pushing somewhere. Other caveats: None of the existing repository filtering tools do what I want. They're all good in their own way, but come up short for my needs. No tool -provided any of the first seven traits below I wanted, and all failed to +provided any of the first eight traits below I wanted, and all failed to provide at least one of the last four traits as well: 1. [Starting report] Provide user an analysis of their repo to help @@ -186,6 +186,10 @@ provide at least one of the last four traits as well: provides reasonable string manipulation capabilities (which are sorely lacking in shell). + 1. [Old commit references] Provide a way for users to use old commit + IDs with the new repository (in particular via mapping from old to + new hashes with refs/replace/ references). + 1. [Commit message consistency] If commit messages refer to other commits by ID (e.g. "this reverts commit 01234567890abcdef", "In commit 0013deadbeef9a..."), those commit messages should be diff --git a/git-filter-repo b/git-filter-repo index d2b87f0..4fa9285 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -1759,6 +1759,10 @@ class FastExportFilter(object): def get_seen_refs(self): return self._seen_refs.keys() + def get_commit_renames(self): + self._flush_renames() + return self._commit_renames + def run(self, input, output, fast_import_pipes, quiet): """ This method filters fast export output. @@ -2016,6 +2020,20 @@ class FilteringOptions(object): "'***REMOVED***'. ")) parents = parser.add_argument_group(title=_("Parent rewriting")) + parents.add_argument('--replace-refs', default=None, + choices=['delete-no-add', 'delete-and-add', + 'update-no-add', 'update-or-add', + 'update-and-add'], + help=_("Replace refs (see git-replace(1)) are used to rewrite " + "parents (unless turned off by the usual git mechanism); this " + "flag specifies what do do with those refs afterward. " + "Replace refs can either be deleted or updated to point at new " + "commit hashes. Also, new replace refs can be added for each " + "commit rewrite. With 'update-or-add', new replace refs are " + "only added for commit rewrites that aren't used to update an " + "existing replace ref. default is 'update-and-add' if " + "$GIT_DIR/filter-repo/already_ran does not exist; " + "'update-or-add' otherwise.")) parents.add_argument('--empty-pruning', default='auto', choices=['always', 'auto', 'never'], help=_("Whether to prune empty commits. 'auto' (the default) means " @@ -2758,6 +2776,10 @@ class RepoFilter(object): def _run_sanity_checks(self): self._sanity_checks_handled = True if not self._managed_output: + if not self._args.replace_refs: + # If not _managed_output we don't want to make extra changes to the + # repo, so set default to no-op 'update-no-add' + self._args.replace_refs = 'update-no-add' return if self._args.debug: @@ -2768,10 +2790,17 @@ class RepoFilter(object): self._orig_refs = GitUtils.get_refs(target_working_dir) is_bare = GitUtils.is_repository_bare(target_working_dir) - # Do sanity checks from the correct directory + # Determine if this is second or later run of filter-repo tmp_dir = self.results_tmp_dir(create_if_missing=False) - if not self._args.force and \ - not os.path.isfile(os.path.join(tmp_dir, b'already_ran')): + already_ran = os.path.isfile(os.path.join(tmp_dir, b'already_ran')) + + # Default for --replace-refs + if not self._args.replace_refs: + self._args.replace_refs = ('update-or-add' if already_ran + else 'update-and-add') + + # Do sanity checks from the correct directory + if not self._args.force and not already_ran: cwd = os.getcwd() os.chdir(target_working_dir) RepoFilter.sanity_check(self._orig_refs, is_bare) @@ -3116,22 +3145,58 @@ class RepoFilter(object): print(" longer be related; consider re-pushing it elsewhere.") subprocess.call('git remote rm origin'.split(), cwd=target_working_dir) - def _ref_update(self, target_working_dir, seen_refs): + def _ref_update(self, target_working_dir, seen_refs, commit_renames): + # Start the update-ref process + p = subprocess.Popen('git update-ref --no-deref --stdin'.split(), + stdin=subprocess.PIPE, + cwd=target_working_dir) + + # Remove replace_refs from _orig_refs + replace_refs = {k:v for k, v in self._orig_refs.items() + if k.startswith(b'refs/replace/')} + reverse_replace_refs = collections.defaultdict(list) + for k,v in replace_refs.items(): + reverse_replace_refs[v].append(k) + all(map(self._orig_refs.pop, replace_refs)) + # Remove unused refs refs_to_nuke = set(self._orig_refs) - set(seen_refs) - if refs_to_nuke: - if self._args.debug: - print("[DEBUG] Deleting the following refs:\n "+ - decode(b"\n ".join(refs_to_nuke))) - p = subprocess.Popen('git update-ref --stdin'.split(), - stdin=subprocess.PIPE, - cwd=target_working_dir) - p.stdin.write(b''.join([b"option no-deref\ndelete %s\n" % x - for x in refs_to_nuke])) - p.stdin.close() - if p.wait(): - raise SystemExit(_("git update-ref failed; see above")) # pragma: no cover + if refs_to_nuke and self._args.debug: + print("[DEBUG] Deleting the following refs:\n "+ + decode(b"\n ".join(refs_to_nuke))) + p.stdin.write(b''.join([b"delete %s\n" % x + for x in refs_to_nuke])) + # Delete or update and add replace_refs; note that fast-export automatically + # handles 'update-no-add', we only need to take action for the other four + # choices for replace_refs. + actual_renames = {k:v for k,v in commit_renames.items() if k != v} + if self._args.replace_refs in ['delete-no-add', 'delete-and-add']: + # Delete old replace refs, if unwanted + replace_refs_to_nuke = set(replace_refs) + if self._args.replace_refs == 'delete-and-add': + # git-update-ref won't allow us to update a ref twice, so be careful + # to avoid deleting refs we'll later update + replace_refs_to_nuke = replace_refs_to_nuke.difference( + [b'refs/replace/'+x for x in actual_renames]) + p.stdin.write(b''.join([b"delete %s\n" % x + for x in replace_refs_to_nuke])) + if self._args.replace_refs in ['delete-and-add', 'update-or-add', + 'update-and-add']: + # Add new replace refs + update_only = (self._args.replace_refs == 'update-or-add') + p.stdin.write(b''.join([b"update refs/replace/%s %s\n" % (old, new) + for old,new in actual_renames.items() + if new and not (update_only and + old in reverse_replace_refs)])) + + # Complete the update-ref process + p.stdin.close() + if p.wait(): + raise SystemExit(_("git update-ref failed; see above")) # pragma: no cover + + # Return what we removed due to being filtered out (not being one of the + # seen refs means it was filtered out). return refs_to_nuke def finish(self): @@ -3213,7 +3278,9 @@ class RepoFilter(object): target_working_dir = self._args.target or '.' if self._input: - refs_nuked = self._ref_update(target_working_dir, fef.get_seen_refs()) + refs_nuked = self._ref_update(target_working_dir, + fef.get_seen_refs(), + fef.get_commit_renames()) # Write out data about run fef.record_metadata(self.results_tmp_dir(), diff --git a/t/t9390-filter-repo.sh b/t/t9390-filter-repo.sh index fad0933..21de381 100755 --- a/t/t9390-filter-repo.sh +++ b/t/t9390-filter-repo.sh @@ -22,7 +22,7 @@ filter_testcase() { rm .git/packed-refs && # Run the example - cat $DATA/$INPUT | git filter-repo --stdin --quiet --force "${REST[@]}" && + cat $DATA/$INPUT | git filter-repo --stdin --quiet --force --replace-refs delete-no-add "${REST[@]}" && # Compare the resulting repo to expected value git fast-export --use-done-feature --all >compare && @@ -145,7 +145,7 @@ test_expect_success '--path-rename inability to squash' ' ) ' -test_expect_success 'more setup' ' +test_expect_success 'setup metasyntactic repo' ' test_create_repo metasyntactic && ( cd metasyntactic && @@ -240,7 +240,7 @@ test_expect_success 'refs/replace/ to skip a parent' ' git tag -d v2.0 && git replace HEAD~1 HEAD~2 && - git filter-repo --path "" --force && + git filter-repo --replace-refs delete-no-add --path "" --force && test $(git rev-list --count HEAD) = 2 && git cat-file --batch-check --batch-all-objects >all-objs && test_line_count = 16 all-objs && @@ -270,7 +270,7 @@ test_expect_success 'refs/replace/ to add more initial history' ' git --no-replace-objects cat-file -p master~2 >grandparent && ! grep parent grandparent && - git filter-repo --path "" --force && + git filter-repo --replace-refs delete-no-add --path "" --force && git --no-replace-objects cat-file -p master~2 >new-grandparent && grep parent new-grandparent && @@ -287,6 +287,64 @@ test_expect_success 'refs/replace/ to add more initial history' ' ) ' +test_expect_success 'creation/deletion/updating of replace refs' ' + ( + git clone file://"$(pwd)"/metasyntactic replace_handling && + + # Same setup as "refs/replace/ to skip a parent", so we + # do not have to check that replacement refs were used + # correctly in the rewrite, just that replacement refs were + # deleted, added, or updated correctly. + cd replace_handling && + git tag -d v2.0 && + master=$(git rev-parse master) && + master_1=$(git rev-parse master~1) && + master_2=$(git rev-parse master~2) && + git replace HEAD~1 HEAD~2 && + cd .. && + + mkdir -p test_replace_refs && + cd test_replace_refs && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs delete-no-add --path-rename numbers:counting && + git show-ref >output && + ! grep refs/replace/ output && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs delete-and-add --path-rename numbers:counting && + echo "$(git rev-parse master) refs/replace/$master" >out && + echo "$(git rev-parse master~1) refs/replace/$master_1" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_2" >>out && + sort -k 2 out >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs update-no-add --path-rename numbers:counting && + echo "$(git rev-parse master~1) refs/replace/$master_1" >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs update-or-add --path-rename numbers:counting && + echo "$(git rev-parse master) refs/replace/$master" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_1" >>out && + sort -k 2 out >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect && + + rsync -a --delete ../replace_handling/ ./ && + git filter-repo --replace-refs update-and-add --path-rename numbers:counting && + echo "$(git rev-parse master) refs/replace/$master" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_1" >>out && + echo "$(git rev-parse master~1) refs/replace/$master_2" >>out && + sort -k 2 out >expect && + git show-ref | grep refs/replace/ >output && + test_cmp output expect + ) +' + test_expect_success '--debug' ' ( git clone file://"$(pwd)"/metasyntactic debug && @@ -850,6 +908,7 @@ test_expect_success 'handle funny characters' ' cd funny_chars_checks && file_sha=$(git rev-parse :0:señor) && + former_head_sha=$(git rev-parse HEAD) && git filter-repo --to-subdirectory-filter títulos && cat <<-EOF >expect && @@ -865,6 +924,7 @@ test_expect_success 'handle funny characters' ' tag_sha=$(git rev-parse סְפָרַד) && cat <<-EOF >expect && $commit_sha refs/heads/españa + $commit_sha refs/replace/$former_head_sha $tag_sha refs/tags/סְפָרַד EOF