From 72b69b3dbe9aacc0138245fc61a0a5db1950ab8d Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 8 Jan 2019 11:02:53 -0800 Subject: [PATCH] filter-repo: support --source and --target options This will allow exporting from one repo into a different repo, and combined with chained RepoFilter instances from commit 81016821a1a5 (filter-repo: allow chaining of RepoFilter instances, 2019-01-07), will even allow things like splicing separate repositories together. Signed-off-by: Elijah Newren --- git-filter-repo | 72 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/git-filter-repo b/git-filter-repo index 63e17be..0b074f1 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -733,11 +733,14 @@ class FastExportFilter(object): repo). """ - def __init__(self, + def __init__(self, repo_working_dir, tag_callback = None, commit_callback = None, blob_callback = None, progress_callback = None, reset_callback = None, checkpoint_callback = None, everything_callback = None): + # Repo we are exporting + self._repo_working_dir = repo_working_dir + # Members below simply store callback functions for the various git # elements self._tag_callback = tag_callback @@ -1463,7 +1466,8 @@ class FastExportFilter(object): cmd = 'git cat-file --batch-check'.split() batch_check_process = subprocess.Popen(cmd, stdin=subprocess.PIPE, - stdout=subprocess.PIPE) + stdout=subprocess.PIPE, + cwd=self._repo_working_dir) batch_check_process.stdin.write(refname+"\n") line = batch_check_process.stdout.readline() m = re.match('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$', line) @@ -1694,19 +1698,24 @@ class GitUtils(object): return int(lines[0].split()[1]) + int(lines[2].split()[1]) @staticmethod - def is_repository_bare(): - out = subprocess.check_output('git rev-parse --is-bare-repository'.split()) + def is_repository_bare(repo_working_dir): + out = subprocess.check_output('git rev-parse --is-bare-repository'.split(), + cwd=repo_working_dir) return (out.strip() == 'true') @staticmethod - def determine_git_dir(): - out = subprocess.check_output('git rev-parse --git-dir'.split()) - return out.strip() + def determine_git_dir(repo_working_dir): + d = subprocess.check_output('git rev-parse --git-dir'.split(), + cwd=repo_working_dir).strip() + if repo_working_dir=='.' or d.startswith('/'): + return d + return os.path.join(repo_working_dir, d) @staticmethod - def get_refs(): + def get_refs(repo_working_dir): try: - output = subprocess.check_output('git show-ref'.split()) + output = subprocess.check_output('git show-ref'.split(), + cwd=repo_working_dir) except subprocess.CalledProcessError as e: # If error code is 1, there just aren't any refs; i.e. new repo. # If error code is other than 1, some other error (e.g. not a git repo) @@ -1846,6 +1855,13 @@ class FilteringOptions(object): DIRECTORY. Equivalent to using "--path-rename :DIRECTORY/"''') + location = parser.add_argument_group(title='Location to filter from/to') + location.add_argument('--source', + help='''Git repository to read from''') + location.add_argument('--target', + help='''Git repository to overwrite with filtered + history''') + misc = parser.add_argument_group(title='Miscellaneous options') misc.add_argument('--help', '-h', action='store_true', help='''Show this help message and exit.''') @@ -2353,7 +2369,7 @@ class RepoAnalyze(object): @staticmethod def run(args): - git_dir = GitUtils.determine_git_dir() + git_dir = GitUtils.determine_git_dir('.') # Create the report directory as necessary results_tmp_dir = os.path.join(git_dir, 'filter-repo') @@ -2443,12 +2459,16 @@ class RepoFilter(object): print("[DEBUG] Passed arguments:\n{}".format(self._args)) # Determine basic repository information - self._orig_refs = GitUtils.get_refs() - is_bare = GitUtils.is_repository_bare() + target_working_dir = self._args.target or '.' + self._orig_refs = GitUtils.get_refs(target_working_dir) + is_bare = GitUtils.is_repository_bare(target_working_dir) # Do sanity checks from the correct directory if not self._args.force: + cwd = os.getcwd() + os.chdir(target_working_dir) RepoFilter.sanity_check(self._orig_refs, is_bare) + os.chdir(cwd) @staticmethod def sanity_check(refs, is_bare): @@ -2474,7 +2494,7 @@ class RepoFilter(object): # Avoid letting people running with weird setups and overwriting GIT_DIR # elsewhere - git_dir = GitUtils.determine_git_dir() + git_dir = GitUtils.determine_git_dir('.') if is_bare and git_dir != '.': abort("GIT_DIR must be .") elif not is_bare and git_dir != '.git': @@ -2592,7 +2612,8 @@ class RepoFilter(object): reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname) def results_tmp_dir(self): - git_dir = GitUtils.determine_git_dir() + working_dir = self._args.target or self._args.source or '.' + git_dir = GitUtils.determine_git_dir(working_dir) d = os.path.join(git_dir, 'filter-repo') if not os.path.isdir(d): os.mkdir(d) @@ -2622,11 +2643,14 @@ class RepoFilter(object): self._input = sys.stdin self._fe_orig = None else: - skip_blobs = (self._blob_callback is None) and ( - self._everything_callback is None) + skip_blobs = (self._blob_callback is None and + self._everything_callback is None and + self._args.source is None and + self._args.target is None) extra_flags = ['--no-data'] if skip_blobs else [] done_feature = ['--use-done-feature'] if use_done_feature else [] - fep_cmd = ['git', 'fast-export', '--show-original-ids', + location = ['-C', self._args.source] if self._args.source else [] + fep_cmd = ['git'] + location + ['fast-export', '--show-original-ids', '--signed-tags=strip', '--tag-of-filtered-object=rewrite' ] + done_feature + extra_flags + self._args.refs self._fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE) @@ -2642,7 +2666,8 @@ class RepoFilter(object): def _setup_output(self): if not self._args.dry_run: - fip_cmd = 'git fast-import --force --quiet'.split() + location = ['-C', self._args.target] if self._args.target else [] + fip_cmd = ['git'] + location + 'fast-import --force --quiet'.split() self._fip = subprocess.Popen(fip_cmd, bufsize=-1, stdin=subprocess.PIPE, @@ -2679,7 +2704,8 @@ class RepoFilter(object): self._reset_callback and self._reset_callback(r) # Create and run the filter - filter = FastExportFilter(blob_callback = self._blob_callback, + filter = FastExportFilter(self._args.source or '.', + blob_callback = self._blob_callback, commit_callback = actual_commit_callback, tag_callback = actual_tag_callback, reset_callback = actual_reset_callback, @@ -2713,6 +2739,7 @@ class RepoFilter(object): print(" " + self._fe_filt) sys.exit(0) + target_working_dir = self._args.target or '.' if self._input: # Remove unused refs refs_to_nuke = set(self._orig_refs) - set(filter.get_seen_refs()) @@ -2721,7 +2748,8 @@ class RepoFilter(object): print("[DEBUG] Deleting the following refs:\n "+ "\n ".join(refs_to_nuke)) p = subprocess.Popen('git update-ref --stdin'.split(), - stdin=subprocess.PIPE) + stdin=subprocess.PIPE, + cwd=target_working_dir) p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x) for x in refs_to_nuke])) p.stdin.close() @@ -2739,12 +2767,12 @@ class RepoFilter(object): quiet_flags = '--quiet' if self._args.quiet else '' cleanup_cmds = ['git reflog expire --expire=now --all'.split(), 'git gc {} --prune=now'.format(quiet_flags).split()] - if not GitUtils.is_repository_bare(): + if not GitUtils.is_repository_bare(target_working_dir): cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split()) for cmd in cleanup_cmds: if self._args.debug: print("[DEBUG] Running: {}".format(' '.join(cmd))) - subprocess.call(cmd) + subprocess.call(cmd, cwd=target_working_dir) if __name__ == '__main__': args = FilteringOptions.parse_args(sys.argv[1:])