filter-repo: allow chaining of RepoFilter instances

Allow each instance to be just input or just output so that we can splice
repos together or split one into multiple different repos.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-01-07 13:46:48 -08:00
parent 59f3947857
commit 81016821a1

View File

@ -2403,6 +2403,33 @@ class DualFileWriter:
self.file2.close() self.file2.close()
class RepoFilter(object): class RepoFilter(object):
def __init__(self,
args,
blob_callback = None,
commit_callback = None,
tag_callback = None,
reset_callback = None,
everything_callback = None):
# Store arguments for later use
self._args = args
self._blob_callback = blob_callback
self._commit_callback = commit_callback
self._tag_callback = tag_callback
self._reset_callback = reset_callback
self._everything_callback = everything_callback
# Defaults for input
self._input = None
self._fep = None # Fast Export Process
self._fe_orig = None # Path to where original fast-export output stored
self._fe_filt = None # Path to where filtered fast-export output stored
# Defaults for output
self._output = None
self._fip = None # Fast Import Process
self._import_pipes = None
self._managed_output = True
@staticmethod @staticmethod
def sanity_check(refs, is_bare): def sanity_check(refs, is_bare):
def abort(reason): def abort(reason):
@ -2544,132 +2571,163 @@ class RepoFilter(object):
def handle_tag(args, reset_or_tag, shortname = False): def handle_tag(args, reset_or_tag, shortname = False):
reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname) reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname)
@staticmethod def results_tmp_dir(self):
def run(args,
blob_callback = None,
commit_callback = None,
tag_callback = None,
reset_callback = None,
everything_callback = None):
if args.debug:
print("[DEBUG] Passed arguments:\n{}".format(args))
# Determine basic repository information
orig_refs = GitUtils.get_refs()
is_bare = GitUtils.is_repository_bare()
git_dir = GitUtils.determine_git_dir() git_dir = GitUtils.determine_git_dir()
d = os.path.join(git_dir, 'filter-repo')
if not os.path.isdir(d):
os.mkdir(d)
return d
# Do sanity checks def importer_only(self):
if not args.force: self._setup_output()
RepoFilter.sanity_check(orig_refs, is_bare)
# Create a temporary directory for storing some results def set_output(self, outputRepoFilter):
results_tmp_dir = os.path.join(git_dir, 'filter-repo') assert outputRepoFilter._output
if not os.path.isdir(results_tmp_dir):
os.mkdir(results_tmp_dir)
# Determine where to get input (and whether to make a copy) # set_output implies this RepoFilter is doing exporting, though may not
if args.stdin: # be the only one.
input = sys.stdin self._setup_input(use_done_feature = False)
fe_orig = None
# Set our output management up to pipe to outputRepoFilter's locations
self._managed_output = False
self._output = outputRepoFilter._output
self._import_pipes = outputRepoFilter._import_pipes
def _setup_input(self, use_done_feature):
if self._args.stdin:
self._input = sys.stdin
self._fe_orig = None
else: else:
skip_blobs = blob_callback is None and everything_callback is None skip_blobs = (self._blob_callback is None) and (
self._everything_callback is None)
extra_flags = ['--no-data'] if skip_blobs else [] extra_flags = ['--no-data'] if skip_blobs else []
fep_cmd = ['git', 'fast-export', done_feature = ['--use-done-feature'] if use_done_feature else []
'--show-original-ids', fep_cmd = ['git', 'fast-export', '--show-original-ids',
'--signed-tags=strip', '--signed-tags=strip', '--tag-of-filtered-object=rewrite'
'--tag-of-filtered-object=rewrite', ] + done_feature + extra_flags + self._args.refs
'--use-done-feature'] + extra_flags + args.refs self._fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE) self._input = self._fep.stdout
input = fep.stdout if self._args.dry_run or self._args.debug:
if args.dry_run or args.debug: self._fe_orig = os.path.join(self.results_tmp_dir(),
fe_orig = os.path.join(results_tmp_dir, 'fast-export.original') 'fast-export.original')
output = open(fe_orig, 'w') output = open(self._fe_orig, 'w')
input = InputFileBackup(input, output) self._input = InputFileBackup(self._input, output)
if args.debug: if self._args.debug:
print("[DEBUG] Running: {}".format(' '.join(fep_cmd))) print("[DEBUG] Running: {}".format(' '.join(fep_cmd)))
print(" (saving a copy of the output at {})".format(fe_orig)) print(" (saving a copy of the output at {})".format(self._fe_orig))
# Determine where to send output def _setup_output(self):
pipes = None if not self._args.dry_run:
if not args.dry_run:
fip_cmd = 'git fast-import --force --quiet'.split() fip_cmd = 'git fast-import --force --quiet'.split()
fip = subprocess.Popen(fip_cmd, self._fip = subprocess.Popen(fip_cmd,
bufsize=-1, bufsize=-1,
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.PIPE) stdout=subprocess.PIPE)
pipes = (fip.stdin, fip.stdout) self._import_pipes = (self._fip.stdin, self._fip.stdout)
if args.dry_run or args.debug: if self._args.dry_run or self._args.debug:
fe_filt = os.path.join(results_tmp_dir, 'fast-export.filtered') self._fe_filt = os.path.join(self.results_tmp_dir(),
output = open(fe_filt, 'w') 'fast-export.filtered')
self._output = open(self._fe_filt, 'w')
else: else:
output = fip.stdin self._output = self._fip.stdin
if args.debug: if self._args.debug:
output = DualFileWriter(fip.stdin, output) self._output = DualFileWriter(self._fip.stdin, self._output)
print("[DEBUG] Running: {}".format(' '.join(fip_cmd))) print("[DEBUG] Running: {}".format(' '.join(fip_cmd)))
print(" (using the following file as input: {})".format(fe_filt)) print(" (using the following file as input: {})".format(self._fe_filt))
# Set up the callbacks def run(self):
def actual_commit_callback(c): if not self._input and not self._output:
RepoFilter.tweak_commit(args, c) self._setup_input(use_done_feature = True)
commit_callback and commit_callback(c) self._setup_output()
def actual_tag_callback(t):
RepoFilter.handle_tag(args, t, shortname = True)
tag_callback and tag_callback(t)
def actual_reset_callback(r):
RepoFilter.handle_tag(args, r)
reset_callback and reset_callback(r)
# Create and run the filter if self._managed_output:
filter = FastExportFilter(blob_callback = blob_callback, if self._args.debug:
commit_callback = actual_commit_callback, print("[DEBUG] Passed arguments:\n{}".format(self._args))
tag_callback = actual_tag_callback,
reset_callback = actual_reset_callback,
everything_callback = everything_callback)
filter.run(input, output, fast_import_pipes = pipes, quiet = args.quiet)
# Close the output, ensure fast-export and fast-import have completed # Determine basic repository information
output.close() orig_refs = GitUtils.get_refs()
if not args.stdin and fep.wait(): is_bare = GitUtils.is_repository_bare()
raise SystemExit("Error: fast-export failed; see above.")
if not args.dry_run and fip.wait(): # Do sanity checks
if not self._args.force:
RepoFilter.sanity_check(orig_refs, is_bare)
if self._input:
# Set up the callbacks
def actual_commit_callback(c):
RepoFilter.tweak_commit(self._args, c)
self._commit_callback and self._commit_callback(c)
def actual_tag_callback(t):
RepoFilter.handle_tag(self._args, t, shortname = True)
self._tag_callback and self._tag_callback(t)
def actual_reset_callback(r):
RepoFilter.handle_tag(self._args, r)
self._reset_callback and self._reset_callback(r)
# Create and run the filter
filter = FastExportFilter(blob_callback = self._blob_callback,
commit_callback = actual_commit_callback,
tag_callback = actual_tag_callback,
reset_callback = actual_reset_callback,
everything_callback = self._everything_callback)
filter.run(self._input,
self._output,
fast_import_pipes = self._import_pipes,
quiet = self._args.quiet)
# Make sure fast-export completed successfully
if not self._args.stdin and self._fep.wait():
raise SystemExit("Error: fast-export failed; see above.")
# If we're not the manager of self._output, we should avoid post-run cleanup
if not self._managed_output:
return
# Close the output and ensure fast-import successfully completes
self._output.close()
if not self._args.dry_run and self._fip.wait():
raise SystemExit("Error: fast-import failed; see above.") raise SystemExit("Error: fast-import failed; see above.")
# Exit early, if requested # Exit early, if requested
if args.dry_run: if self._args.dry_run:
orig_str = "by comparing:\n "+fe_orig if fe_orig else "at:" if self._fe_orig:
orig_str = "by comparing:\n "+self._fe_orig
else:
orig_str = "at:"
print("NOTE: Not running fast-import or cleaning up; --dry-run passed.") print("NOTE: Not running fast-import or cleaning up; --dry-run passed.")
print(" Requested filtering can be seen {}".format(orig_str)) print(" Requested filtering can be seen {}".format(orig_str))
print(" " + fe_filt) print(" " + self._fe_filt)
sys.exit(0) sys.exit(0)
# Remove unused refs if self._input:
refs_to_nuke = set(orig_refs) - set(filter.get_seen_refs()) # Remove unused refs
if refs_to_nuke: refs_to_nuke = set(orig_refs) - set(filter.get_seen_refs())
if args.debug: if refs_to_nuke:
print("[DEBUG] Deleting the following refs:\n "+ if self._args.debug:
"\n ".join(refs_to_nuke)) print("[DEBUG] Deleting the following refs:\n "+
p = subprocess.Popen('git update-ref --stdin'.split(), "\n ".join(refs_to_nuke))
stdin=subprocess.PIPE) ### FIXME: Make sure to run within the target repo...
p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x) p = subprocess.Popen('git update-ref --stdin'.split(),
for x in refs_to_nuke])) stdin=subprocess.PIPE)
p.stdin.close() p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
if p.wait(): for x in refs_to_nuke]))
raise SystemExit("git update-ref failed; see above") p.stdin.close()
if p.wait():
raise SystemExit("git update-ref failed; see above")
# Write out data about run # Write out data about run
filter.record_metadata(results_tmp_dir, orig_refs, refs_to_nuke) filter.record_metadata(self.results_tmp_dir(), orig_refs, refs_to_nuke)
# Nuke the reflogs and repack # Nuke the reflogs and repack
if not args.quiet and not args.debug: if not self._args.quiet and not self._args.debug:
print("Repacking your repo and cleaning out old unneeded objects") print("Repacking your repo and cleaning out old unneeded objects")
quiet_flags = '--quiet' if args.quiet else '' quiet_flags = '--quiet' if self._args.quiet else ''
cleanup_cmds = ['git reflog expire --expire=now --all'.split(), cleanup_cmds = ['git reflog expire --expire=now --all'.split(),
'git gc {} --prune=now'.format(quiet_flags).split()] 'git gc {} --prune=now'.format(quiet_flags).split()]
if not is_bare: if not is_bare:
cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split()) cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split())
for cmd in cleanup_cmds: for cmd in cleanup_cmds:
if args.debug: if self._args.debug:
print("[DEBUG] Running: {}".format(' '.join(cmd))) print("[DEBUG] Running: {}".format(' '.join(cmd)))
subprocess.call(cmd) subprocess.call(cmd)
@ -2678,4 +2736,5 @@ if __name__ == '__main__':
if args.analyze: if args.analyze:
RepoAnalyze.run(args) RepoAnalyze.run(args)
else: else:
RepoFilter.run(args) filter = RepoFilter(args)
filter.run()