From 9e02ac95e4573b76482377ab3c75a8eefbe92113 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 16 Oct 2018 18:07:58 -0700 Subject: [PATCH] filter-repo: record metadata for remapping for refs and commits Our filtering process will rewrite (and drop) commits, causing refs to also get updated. A useful debugging aid for users is to write metadata showing the mapping from old commit IDs to new commit IDs, and from the hash that old refs pointed and the hash that the new ones do. Signed-off-by: Elijah Newren --- git-filter-repo | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/git-filter-repo b/git-filter-repo index 6643294..86eca19 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -1101,6 +1101,39 @@ class FastExportFilter(object): # Now print the resulting reset reset.dump(self._output) + def record_metadata(self, metadata_dir, orig_refs, refs_nuked): + deleted_hash = '0'*40 + with open(os.path.join(metadata_dir, 'commit-map'), 'w') as f: + f.write("old new\n") + for (old,new) in self._commit_renames.iteritems(): + f.write('{} {}\n'.format(old, new if new != None else deleted_hash)) + + batch_check_process = None + with open(os.path.join(metadata_dir, 'ref-map'), 'w') as f: + for refname, old_hash in orig_refs.iteritems(): + if refname in refs_nuked: + new_hash = deleted_hash + elif old_hash in self._commit_renames: + new_hash = self._commit_renames[old_hash] + new_hash = new_hash if new_hash != None else deleted_hash + else: # Must be an annotated tag + if not batch_check_process: + cmd = 'git cat-file --batch-check'.split() + batch_check_process = subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + batch_check_process.stdin.write(refname+"\n") + line = batch_check_process.stdout.readline() + m = re.match('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$', line) + if not m or m.group(2) != 'tag': + raise SystemExit("Failed to find new id for {} (old id was {})" + .format(refname, old_hash)) + new_hash = m.group(1) + f.write('{} {} {}\n'.format(old_hash, new_hash, refname)) + if batch_check_process: + batch_check_process.stdin.close() + batch_check_process.wait() + def get_seen_refs(self): return self._seen_refs.keys() @@ -1503,10 +1536,9 @@ def run_fast_filter(): sanity_check(orig_refs, is_bare) # Create a temporary directory for storing some results - if args.dry_run or args.debug: - results_tmp_dir = os.path.join(git_dir, 'filter-repo') - if not os.path.isdir(results_tmp_dir): - os.mkdir(results_tmp_dir) + results_tmp_dir = os.path.join(git_dir, 'filter-repo') + if not os.path.isdir(results_tmp_dir): + os.mkdir(results_tmp_dir) # Determine where to get input (and whether to make a copy) if args.stdin: @@ -1577,6 +1609,9 @@ def run_fast_filter(): if p.wait(): raise SystemExit("git update-ref failed; see above") + # Write out data about run + filter.record_metadata(results_tmp_dir, orig_refs, refs_to_nuke) + # Nuke the reflogs and repack if not args.quiet and not args.debug: print("Repacking your repo and cleaning out old unneeded objects")