filter-repo: support --source and --target options

This will allow exporting from one repo into a different repo, and
combined with chained RepoFilter instances from commit 81016821a1
(filter-repo: allow chaining of RepoFilter instances, 2019-01-07), will
even allow things like splicing separate repositories together.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-01-08 11:02:53 -08:00
parent d0640bad7a
commit 72b69b3dbe

View File

@ -733,11 +733,14 @@ class FastExportFilter(object):
repo). repo).
""" """
def __init__(self, def __init__(self, repo_working_dir,
tag_callback = None, commit_callback = None, tag_callback = None, commit_callback = None,
blob_callback = None, progress_callback = None, blob_callback = None, progress_callback = None,
reset_callback = None, checkpoint_callback = None, reset_callback = None, checkpoint_callback = None,
everything_callback = None): everything_callback = None):
# Repo we are exporting
self._repo_working_dir = repo_working_dir
# Members below simply store callback functions for the various git # Members below simply store callback functions for the various git
# elements # elements
self._tag_callback = tag_callback self._tag_callback = tag_callback
@ -1463,7 +1466,8 @@ class FastExportFilter(object):
cmd = 'git cat-file --batch-check'.split() cmd = 'git cat-file --batch-check'.split()
batch_check_process = subprocess.Popen(cmd, batch_check_process = subprocess.Popen(cmd,
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.PIPE) stdout=subprocess.PIPE,
cwd=self._repo_working_dir)
batch_check_process.stdin.write(refname+"\n") batch_check_process.stdin.write(refname+"\n")
line = batch_check_process.stdout.readline() line = batch_check_process.stdout.readline()
m = re.match('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$', line) m = re.match('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$', line)
@ -1694,19 +1698,24 @@ class GitUtils(object):
return int(lines[0].split()[1]) + int(lines[2].split()[1]) return int(lines[0].split()[1]) + int(lines[2].split()[1])
@staticmethod @staticmethod
def is_repository_bare(): def is_repository_bare(repo_working_dir):
out = subprocess.check_output('git rev-parse --is-bare-repository'.split()) out = subprocess.check_output('git rev-parse --is-bare-repository'.split(),
cwd=repo_working_dir)
return (out.strip() == 'true') return (out.strip() == 'true')
@staticmethod @staticmethod
def determine_git_dir(): def determine_git_dir(repo_working_dir):
out = subprocess.check_output('git rev-parse --git-dir'.split()) d = subprocess.check_output('git rev-parse --git-dir'.split(),
return out.strip() cwd=repo_working_dir).strip()
if repo_working_dir=='.' or d.startswith('/'):
return d
return os.path.join(repo_working_dir, d)
@staticmethod @staticmethod
def get_refs(): def get_refs(repo_working_dir):
try: try:
output = subprocess.check_output('git show-ref'.split()) output = subprocess.check_output('git show-ref'.split(),
cwd=repo_working_dir)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
# If error code is 1, there just aren't any refs; i.e. new repo. # If error code is 1, there just aren't any refs; i.e. new repo.
# If error code is other than 1, some other error (e.g. not a git repo) # If error code is other than 1, some other error (e.g. not a git repo)
@ -1846,6 +1855,13 @@ class FilteringOptions(object):
DIRECTORY. Equivalent to using DIRECTORY. Equivalent to using
"--path-rename :DIRECTORY/"''') "--path-rename :DIRECTORY/"''')
location = parser.add_argument_group(title='Location to filter from/to')
location.add_argument('--source',
help='''Git repository to read from''')
location.add_argument('--target',
help='''Git repository to overwrite with filtered
history''')
misc = parser.add_argument_group(title='Miscellaneous options') misc = parser.add_argument_group(title='Miscellaneous options')
misc.add_argument('--help', '-h', action='store_true', misc.add_argument('--help', '-h', action='store_true',
help='''Show this help message and exit.''') help='''Show this help message and exit.''')
@ -2353,7 +2369,7 @@ class RepoAnalyze(object):
@staticmethod @staticmethod
def run(args): def run(args):
git_dir = GitUtils.determine_git_dir() git_dir = GitUtils.determine_git_dir('.')
# Create the report directory as necessary # Create the report directory as necessary
results_tmp_dir = os.path.join(git_dir, 'filter-repo') results_tmp_dir = os.path.join(git_dir, 'filter-repo')
@ -2443,12 +2459,16 @@ class RepoFilter(object):
print("[DEBUG] Passed arguments:\n{}".format(self._args)) print("[DEBUG] Passed arguments:\n{}".format(self._args))
# Determine basic repository information # Determine basic repository information
self._orig_refs = GitUtils.get_refs() target_working_dir = self._args.target or '.'
is_bare = GitUtils.is_repository_bare() self._orig_refs = GitUtils.get_refs(target_working_dir)
is_bare = GitUtils.is_repository_bare(target_working_dir)
# Do sanity checks from the correct directory # Do sanity checks from the correct directory
if not self._args.force: if not self._args.force:
cwd = os.getcwd()
os.chdir(target_working_dir)
RepoFilter.sanity_check(self._orig_refs, is_bare) RepoFilter.sanity_check(self._orig_refs, is_bare)
os.chdir(cwd)
@staticmethod @staticmethod
def sanity_check(refs, is_bare): def sanity_check(refs, is_bare):
@ -2474,7 +2494,7 @@ class RepoFilter(object):
# Avoid letting people running with weird setups and overwriting GIT_DIR # Avoid letting people running with weird setups and overwriting GIT_DIR
# elsewhere # elsewhere
git_dir = GitUtils.determine_git_dir() git_dir = GitUtils.determine_git_dir('.')
if is_bare and git_dir != '.': if is_bare and git_dir != '.':
abort("GIT_DIR must be .") abort("GIT_DIR must be .")
elif not is_bare and git_dir != '.git': elif not is_bare and git_dir != '.git':
@ -2592,7 +2612,8 @@ class RepoFilter(object):
reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname) reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname)
def results_tmp_dir(self): def results_tmp_dir(self):
git_dir = GitUtils.determine_git_dir() working_dir = self._args.target or self._args.source or '.'
git_dir = GitUtils.determine_git_dir(working_dir)
d = os.path.join(git_dir, 'filter-repo') d = os.path.join(git_dir, 'filter-repo')
if not os.path.isdir(d): if not os.path.isdir(d):
os.mkdir(d) os.mkdir(d)
@ -2622,11 +2643,14 @@ class RepoFilter(object):
self._input = sys.stdin self._input = sys.stdin
self._fe_orig = None self._fe_orig = None
else: else:
skip_blobs = (self._blob_callback is None) and ( skip_blobs = (self._blob_callback is None and
self._everything_callback is None) self._everything_callback is None and
self._args.source is None and
self._args.target is None)
extra_flags = ['--no-data'] if skip_blobs else [] extra_flags = ['--no-data'] if skip_blobs else []
done_feature = ['--use-done-feature'] if use_done_feature else [] done_feature = ['--use-done-feature'] if use_done_feature else []
fep_cmd = ['git', 'fast-export', '--show-original-ids', location = ['-C', self._args.source] if self._args.source else []
fep_cmd = ['git'] + location + ['fast-export', '--show-original-ids',
'--signed-tags=strip', '--tag-of-filtered-object=rewrite' '--signed-tags=strip', '--tag-of-filtered-object=rewrite'
] + done_feature + extra_flags + self._args.refs ] + done_feature + extra_flags + self._args.refs
self._fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE) self._fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
@ -2642,7 +2666,8 @@ class RepoFilter(object):
def _setup_output(self): def _setup_output(self):
if not self._args.dry_run: if not self._args.dry_run:
fip_cmd = 'git fast-import --force --quiet'.split() location = ['-C', self._args.target] if self._args.target else []
fip_cmd = ['git'] + location + 'fast-import --force --quiet'.split()
self._fip = subprocess.Popen(fip_cmd, self._fip = subprocess.Popen(fip_cmd,
bufsize=-1, bufsize=-1,
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
@ -2679,7 +2704,8 @@ class RepoFilter(object):
self._reset_callback and self._reset_callback(r) self._reset_callback and self._reset_callback(r)
# Create and run the filter # Create and run the filter
filter = FastExportFilter(blob_callback = self._blob_callback, filter = FastExportFilter(self._args.source or '.',
blob_callback = self._blob_callback,
commit_callback = actual_commit_callback, commit_callback = actual_commit_callback,
tag_callback = actual_tag_callback, tag_callback = actual_tag_callback,
reset_callback = actual_reset_callback, reset_callback = actual_reset_callback,
@ -2713,6 +2739,7 @@ class RepoFilter(object):
print(" " + self._fe_filt) print(" " + self._fe_filt)
sys.exit(0) sys.exit(0)
target_working_dir = self._args.target or '.'
if self._input: if self._input:
# Remove unused refs # Remove unused refs
refs_to_nuke = set(self._orig_refs) - set(filter.get_seen_refs()) refs_to_nuke = set(self._orig_refs) - set(filter.get_seen_refs())
@ -2721,7 +2748,8 @@ class RepoFilter(object):
print("[DEBUG] Deleting the following refs:\n "+ print("[DEBUG] Deleting the following refs:\n "+
"\n ".join(refs_to_nuke)) "\n ".join(refs_to_nuke))
p = subprocess.Popen('git update-ref --stdin'.split(), p = subprocess.Popen('git update-ref --stdin'.split(),
stdin=subprocess.PIPE) stdin=subprocess.PIPE,
cwd=target_working_dir)
p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x) p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
for x in refs_to_nuke])) for x in refs_to_nuke]))
p.stdin.close() p.stdin.close()
@ -2739,12 +2767,12 @@ class RepoFilter(object):
quiet_flags = '--quiet' if self._args.quiet else '' quiet_flags = '--quiet' if self._args.quiet else ''
cleanup_cmds = ['git reflog expire --expire=now --all'.split(), cleanup_cmds = ['git reflog expire --expire=now --all'.split(),
'git gc {} --prune=now'.format(quiet_flags).split()] 'git gc {} --prune=now'.format(quiet_flags).split()]
if not GitUtils.is_repository_bare(): if not GitUtils.is_repository_bare(target_working_dir):
cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split()) cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split())
for cmd in cleanup_cmds: for cmd in cleanup_cmds:
if self._args.debug: if self._args.debug:
print("[DEBUG] Running: {}".format(' '.join(cmd))) print("[DEBUG] Running: {}".format(' '.join(cmd)))
subprocess.call(cmd) subprocess.call(cmd, cwd=target_working_dir)
if __name__ == '__main__': if __name__ == '__main__':
args = FilteringOptions.parse_args(sys.argv[1:]) args = FilteringOptions.parse_args(sys.argv[1:])