filter-repo: support --source and --target options

This will allow exporting from one repo into a different repo, and
combined with chained RepoFilter instances from commit 81016821a1
(filter-repo: allow chaining of RepoFilter instances, 2019-01-07), will
even allow things like splicing separate repositories together.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-01-08 11:02:53 -08:00
parent d0640bad7a
commit 72b69b3dbe

View File

@ -733,11 +733,14 @@ class FastExportFilter(object):
repo).
"""
def __init__(self,
def __init__(self, repo_working_dir,
tag_callback = None, commit_callback = None,
blob_callback = None, progress_callback = None,
reset_callback = None, checkpoint_callback = None,
everything_callback = None):
# Repo we are exporting
self._repo_working_dir = repo_working_dir
# Members below simply store callback functions for the various git
# elements
self._tag_callback = tag_callback
@ -1463,7 +1466,8 @@ class FastExportFilter(object):
cmd = 'git cat-file --batch-check'.split()
batch_check_process = subprocess.Popen(cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
stdout=subprocess.PIPE,
cwd=self._repo_working_dir)
batch_check_process.stdin.write(refname+"\n")
line = batch_check_process.stdout.readline()
m = re.match('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$', line)
@ -1694,19 +1698,24 @@ class GitUtils(object):
return int(lines[0].split()[1]) + int(lines[2].split()[1])
@staticmethod
def is_repository_bare():
out = subprocess.check_output('git rev-parse --is-bare-repository'.split())
def is_repository_bare(repo_working_dir):
out = subprocess.check_output('git rev-parse --is-bare-repository'.split(),
cwd=repo_working_dir)
return (out.strip() == 'true')
@staticmethod
def determine_git_dir():
out = subprocess.check_output('git rev-parse --git-dir'.split())
return out.strip()
def determine_git_dir(repo_working_dir):
d = subprocess.check_output('git rev-parse --git-dir'.split(),
cwd=repo_working_dir).strip()
if repo_working_dir=='.' or d.startswith('/'):
return d
return os.path.join(repo_working_dir, d)
@staticmethod
def get_refs():
def get_refs(repo_working_dir):
try:
output = subprocess.check_output('git show-ref'.split())
output = subprocess.check_output('git show-ref'.split(),
cwd=repo_working_dir)
except subprocess.CalledProcessError as e:
# If error code is 1, there just aren't any refs; i.e. new repo.
# If error code is other than 1, some other error (e.g. not a git repo)
@ -1846,6 +1855,13 @@ class FilteringOptions(object):
DIRECTORY. Equivalent to using
"--path-rename :DIRECTORY/"''')
location = parser.add_argument_group(title='Location to filter from/to')
location.add_argument('--source',
help='''Git repository to read from''')
location.add_argument('--target',
help='''Git repository to overwrite with filtered
history''')
misc = parser.add_argument_group(title='Miscellaneous options')
misc.add_argument('--help', '-h', action='store_true',
help='''Show this help message and exit.''')
@ -2353,7 +2369,7 @@ class RepoAnalyze(object):
@staticmethod
def run(args):
git_dir = GitUtils.determine_git_dir()
git_dir = GitUtils.determine_git_dir('.')
# Create the report directory as necessary
results_tmp_dir = os.path.join(git_dir, 'filter-repo')
@ -2443,12 +2459,16 @@ class RepoFilter(object):
print("[DEBUG] Passed arguments:\n{}".format(self._args))
# Determine basic repository information
self._orig_refs = GitUtils.get_refs()
is_bare = GitUtils.is_repository_bare()
target_working_dir = self._args.target or '.'
self._orig_refs = GitUtils.get_refs(target_working_dir)
is_bare = GitUtils.is_repository_bare(target_working_dir)
# Do sanity checks from the correct directory
if not self._args.force:
cwd = os.getcwd()
os.chdir(target_working_dir)
RepoFilter.sanity_check(self._orig_refs, is_bare)
os.chdir(cwd)
@staticmethod
def sanity_check(refs, is_bare):
@ -2474,7 +2494,7 @@ class RepoFilter(object):
# Avoid letting people running with weird setups and overwriting GIT_DIR
# elsewhere
git_dir = GitUtils.determine_git_dir()
git_dir = GitUtils.determine_git_dir('.')
if is_bare and git_dir != '.':
abort("GIT_DIR must be .")
elif not is_bare and git_dir != '.git':
@ -2592,7 +2612,8 @@ class RepoFilter(object):
reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname)
def results_tmp_dir(self):
git_dir = GitUtils.determine_git_dir()
working_dir = self._args.target or self._args.source or '.'
git_dir = GitUtils.determine_git_dir(working_dir)
d = os.path.join(git_dir, 'filter-repo')
if not os.path.isdir(d):
os.mkdir(d)
@ -2622,11 +2643,14 @@ class RepoFilter(object):
self._input = sys.stdin
self._fe_orig = None
else:
skip_blobs = (self._blob_callback is None) and (
self._everything_callback is None)
skip_blobs = (self._blob_callback is None and
self._everything_callback is None and
self._args.source is None and
self._args.target is None)
extra_flags = ['--no-data'] if skip_blobs else []
done_feature = ['--use-done-feature'] if use_done_feature else []
fep_cmd = ['git', 'fast-export', '--show-original-ids',
location = ['-C', self._args.source] if self._args.source else []
fep_cmd = ['git'] + location + ['fast-export', '--show-original-ids',
'--signed-tags=strip', '--tag-of-filtered-object=rewrite'
] + done_feature + extra_flags + self._args.refs
self._fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
@ -2642,7 +2666,8 @@ class RepoFilter(object):
def _setup_output(self):
if not self._args.dry_run:
fip_cmd = 'git fast-import --force --quiet'.split()
location = ['-C', self._args.target] if self._args.target else []
fip_cmd = ['git'] + location + 'fast-import --force --quiet'.split()
self._fip = subprocess.Popen(fip_cmd,
bufsize=-1,
stdin=subprocess.PIPE,
@ -2679,7 +2704,8 @@ class RepoFilter(object):
self._reset_callback and self._reset_callback(r)
# Create and run the filter
filter = FastExportFilter(blob_callback = self._blob_callback,
filter = FastExportFilter(self._args.source or '.',
blob_callback = self._blob_callback,
commit_callback = actual_commit_callback,
tag_callback = actual_tag_callback,
reset_callback = actual_reset_callback,
@ -2713,6 +2739,7 @@ class RepoFilter(object):
print(" " + self._fe_filt)
sys.exit(0)
target_working_dir = self._args.target or '.'
if self._input:
# Remove unused refs
refs_to_nuke = set(self._orig_refs) - set(filter.get_seen_refs())
@ -2721,7 +2748,8 @@ class RepoFilter(object):
print("[DEBUG] Deleting the following refs:\n "+
"\n ".join(refs_to_nuke))
p = subprocess.Popen('git update-ref --stdin'.split(),
stdin=subprocess.PIPE)
stdin=subprocess.PIPE,
cwd=target_working_dir)
p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
for x in refs_to_nuke]))
p.stdin.close()
@ -2739,12 +2767,12 @@ class RepoFilter(object):
quiet_flags = '--quiet' if self._args.quiet else ''
cleanup_cmds = ['git reflog expire --expire=now --all'.split(),
'git gc {} --prune=now'.format(quiet_flags).split()]
if not GitUtils.is_repository_bare():
if not GitUtils.is_repository_bare(target_working_dir):
cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split())
for cmd in cleanup_cmds:
if self._args.debug:
print("[DEBUG] Running: {}".format(' '.join(cmd)))
subprocess.call(cmd)
subprocess.call(cmd, cwd=target_working_dir)
if __name__ == '__main__':
args = FilteringOptions.parse_args(sys.argv[1:])