mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-06 18:32:14 +02:00
filter-repo: allow chaining of RepoFilter instances
Allow each instance to be just input or just output so that we can splice repos together or split one into multiple different repos. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
59f3947857
commit
81016821a1
217
git-filter-repo
217
git-filter-repo
@ -2403,6 +2403,33 @@ class DualFileWriter:
|
||||
self.file2.close()
|
||||
|
||||
class RepoFilter(object):
|
||||
def __init__(self,
|
||||
args,
|
||||
blob_callback = None,
|
||||
commit_callback = None,
|
||||
tag_callback = None,
|
||||
reset_callback = None,
|
||||
everything_callback = None):
|
||||
# Store arguments for later use
|
||||
self._args = args
|
||||
self._blob_callback = blob_callback
|
||||
self._commit_callback = commit_callback
|
||||
self._tag_callback = tag_callback
|
||||
self._reset_callback = reset_callback
|
||||
self._everything_callback = everything_callback
|
||||
|
||||
# Defaults for input
|
||||
self._input = None
|
||||
self._fep = None # Fast Export Process
|
||||
self._fe_orig = None # Path to where original fast-export output stored
|
||||
self._fe_filt = None # Path to where filtered fast-export output stored
|
||||
|
||||
# Defaults for output
|
||||
self._output = None
|
||||
self._fip = None # Fast Import Process
|
||||
self._import_pipes = None
|
||||
self._managed_output = True
|
||||
|
||||
@staticmethod
|
||||
def sanity_check(refs, is_bare):
|
||||
def abort(reason):
|
||||
@ -2544,111 +2571,142 @@ class RepoFilter(object):
|
||||
def handle_tag(args, reset_or_tag, shortname = False):
|
||||
reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname)
|
||||
|
||||
@staticmethod
|
||||
def run(args,
|
||||
blob_callback = None,
|
||||
commit_callback = None,
|
||||
tag_callback = None,
|
||||
reset_callback = None,
|
||||
everything_callback = None):
|
||||
if args.debug:
|
||||
print("[DEBUG] Passed arguments:\n{}".format(args))
|
||||
def results_tmp_dir(self):
|
||||
git_dir = GitUtils.determine_git_dir()
|
||||
d = os.path.join(git_dir, 'filter-repo')
|
||||
if not os.path.isdir(d):
|
||||
os.mkdir(d)
|
||||
return d
|
||||
|
||||
def importer_only(self):
|
||||
self._setup_output()
|
||||
|
||||
def set_output(self, outputRepoFilter):
|
||||
assert outputRepoFilter._output
|
||||
|
||||
# set_output implies this RepoFilter is doing exporting, though may not
|
||||
# be the only one.
|
||||
self._setup_input(use_done_feature = False)
|
||||
|
||||
# Set our output management up to pipe to outputRepoFilter's locations
|
||||
self._managed_output = False
|
||||
self._output = outputRepoFilter._output
|
||||
self._import_pipes = outputRepoFilter._import_pipes
|
||||
|
||||
def _setup_input(self, use_done_feature):
|
||||
if self._args.stdin:
|
||||
self._input = sys.stdin
|
||||
self._fe_orig = None
|
||||
else:
|
||||
skip_blobs = (self._blob_callback is None) and (
|
||||
self._everything_callback is None)
|
||||
extra_flags = ['--no-data'] if skip_blobs else []
|
||||
done_feature = ['--use-done-feature'] if use_done_feature else []
|
||||
fep_cmd = ['git', 'fast-export', '--show-original-ids',
|
||||
'--signed-tags=strip', '--tag-of-filtered-object=rewrite'
|
||||
] + done_feature + extra_flags + self._args.refs
|
||||
self._fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
||||
self._input = self._fep.stdout
|
||||
if self._args.dry_run or self._args.debug:
|
||||
self._fe_orig = os.path.join(self.results_tmp_dir(),
|
||||
'fast-export.original')
|
||||
output = open(self._fe_orig, 'w')
|
||||
self._input = InputFileBackup(self._input, output)
|
||||
if self._args.debug:
|
||||
print("[DEBUG] Running: {}".format(' '.join(fep_cmd)))
|
||||
print(" (saving a copy of the output at {})".format(self._fe_orig))
|
||||
|
||||
def _setup_output(self):
|
||||
if not self._args.dry_run:
|
||||
fip_cmd = 'git fast-import --force --quiet'.split()
|
||||
self._fip = subprocess.Popen(fip_cmd,
|
||||
bufsize=-1,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE)
|
||||
self._import_pipes = (self._fip.stdin, self._fip.stdout)
|
||||
if self._args.dry_run or self._args.debug:
|
||||
self._fe_filt = os.path.join(self.results_tmp_dir(),
|
||||
'fast-export.filtered')
|
||||
self._output = open(self._fe_filt, 'w')
|
||||
else:
|
||||
self._output = self._fip.stdin
|
||||
if self._args.debug:
|
||||
self._output = DualFileWriter(self._fip.stdin, self._output)
|
||||
print("[DEBUG] Running: {}".format(' '.join(fip_cmd)))
|
||||
print(" (using the following file as input: {})".format(self._fe_filt))
|
||||
|
||||
def run(self):
|
||||
if not self._input and not self._output:
|
||||
self._setup_input(use_done_feature = True)
|
||||
self._setup_output()
|
||||
|
||||
if self._managed_output:
|
||||
if self._args.debug:
|
||||
print("[DEBUG] Passed arguments:\n{}".format(self._args))
|
||||
|
||||
# Determine basic repository information
|
||||
orig_refs = GitUtils.get_refs()
|
||||
is_bare = GitUtils.is_repository_bare()
|
||||
git_dir = GitUtils.determine_git_dir()
|
||||
|
||||
# Do sanity checks
|
||||
if not args.force:
|
||||
if not self._args.force:
|
||||
RepoFilter.sanity_check(orig_refs, is_bare)
|
||||
|
||||
# Create a temporary directory for storing some results
|
||||
results_tmp_dir = os.path.join(git_dir, 'filter-repo')
|
||||
if not os.path.isdir(results_tmp_dir):
|
||||
os.mkdir(results_tmp_dir)
|
||||
|
||||
# Determine where to get input (and whether to make a copy)
|
||||
if args.stdin:
|
||||
input = sys.stdin
|
||||
fe_orig = None
|
||||
else:
|
||||
skip_blobs = blob_callback is None and everything_callback is None
|
||||
extra_flags = ['--no-data'] if skip_blobs else []
|
||||
fep_cmd = ['git', 'fast-export',
|
||||
'--show-original-ids',
|
||||
'--signed-tags=strip',
|
||||
'--tag-of-filtered-object=rewrite',
|
||||
'--use-done-feature'] + extra_flags + args.refs
|
||||
fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
||||
input = fep.stdout
|
||||
if args.dry_run or args.debug:
|
||||
fe_orig = os.path.join(results_tmp_dir, 'fast-export.original')
|
||||
output = open(fe_orig, 'w')
|
||||
input = InputFileBackup(input, output)
|
||||
if args.debug:
|
||||
print("[DEBUG] Running: {}".format(' '.join(fep_cmd)))
|
||||
print(" (saving a copy of the output at {})".format(fe_orig))
|
||||
|
||||
# Determine where to send output
|
||||
pipes = None
|
||||
if not args.dry_run:
|
||||
fip_cmd = 'git fast-import --force --quiet'.split()
|
||||
fip = subprocess.Popen(fip_cmd,
|
||||
bufsize=-1,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE)
|
||||
pipes = (fip.stdin, fip.stdout)
|
||||
if args.dry_run or args.debug:
|
||||
fe_filt = os.path.join(results_tmp_dir, 'fast-export.filtered')
|
||||
output = open(fe_filt, 'w')
|
||||
else:
|
||||
output = fip.stdin
|
||||
if args.debug:
|
||||
output = DualFileWriter(fip.stdin, output)
|
||||
print("[DEBUG] Running: {}".format(' '.join(fip_cmd)))
|
||||
print(" (using the following file as input: {})".format(fe_filt))
|
||||
|
||||
if self._input:
|
||||
# Set up the callbacks
|
||||
def actual_commit_callback(c):
|
||||
RepoFilter.tweak_commit(args, c)
|
||||
commit_callback and commit_callback(c)
|
||||
RepoFilter.tweak_commit(self._args, c)
|
||||
self._commit_callback and self._commit_callback(c)
|
||||
def actual_tag_callback(t):
|
||||
RepoFilter.handle_tag(args, t, shortname = True)
|
||||
tag_callback and tag_callback(t)
|
||||
RepoFilter.handle_tag(self._args, t, shortname = True)
|
||||
self._tag_callback and self._tag_callback(t)
|
||||
def actual_reset_callback(r):
|
||||
RepoFilter.handle_tag(args, r)
|
||||
reset_callback and reset_callback(r)
|
||||
RepoFilter.handle_tag(self._args, r)
|
||||
self._reset_callback and self._reset_callback(r)
|
||||
|
||||
# Create and run the filter
|
||||
filter = FastExportFilter(blob_callback = blob_callback,
|
||||
filter = FastExportFilter(blob_callback = self._blob_callback,
|
||||
commit_callback = actual_commit_callback,
|
||||
tag_callback = actual_tag_callback,
|
||||
reset_callback = actual_reset_callback,
|
||||
everything_callback = everything_callback)
|
||||
filter.run(input, output, fast_import_pipes = pipes, quiet = args.quiet)
|
||||
everything_callback = self._everything_callback)
|
||||
filter.run(self._input,
|
||||
self._output,
|
||||
fast_import_pipes = self._import_pipes,
|
||||
quiet = self._args.quiet)
|
||||
|
||||
# Close the output, ensure fast-export and fast-import have completed
|
||||
output.close()
|
||||
if not args.stdin and fep.wait():
|
||||
# Make sure fast-export completed successfully
|
||||
if not self._args.stdin and self._fep.wait():
|
||||
raise SystemExit("Error: fast-export failed; see above.")
|
||||
if not args.dry_run and fip.wait():
|
||||
|
||||
# If we're not the manager of self._output, we should avoid post-run cleanup
|
||||
if not self._managed_output:
|
||||
return
|
||||
|
||||
# Close the output and ensure fast-import successfully completes
|
||||
self._output.close()
|
||||
if not self._args.dry_run and self._fip.wait():
|
||||
raise SystemExit("Error: fast-import failed; see above.")
|
||||
|
||||
# Exit early, if requested
|
||||
if args.dry_run:
|
||||
orig_str = "by comparing:\n "+fe_orig if fe_orig else "at:"
|
||||
if self._args.dry_run:
|
||||
if self._fe_orig:
|
||||
orig_str = "by comparing:\n "+self._fe_orig
|
||||
else:
|
||||
orig_str = "at:"
|
||||
print("NOTE: Not running fast-import or cleaning up; --dry-run passed.")
|
||||
print(" Requested filtering can be seen {}".format(orig_str))
|
||||
print(" " + fe_filt)
|
||||
print(" " + self._fe_filt)
|
||||
sys.exit(0)
|
||||
|
||||
if self._input:
|
||||
# Remove unused refs
|
||||
refs_to_nuke = set(orig_refs) - set(filter.get_seen_refs())
|
||||
if refs_to_nuke:
|
||||
if args.debug:
|
||||
if self._args.debug:
|
||||
print("[DEBUG] Deleting the following refs:\n "+
|
||||
"\n ".join(refs_to_nuke))
|
||||
### FIXME: Make sure to run within the target repo...
|
||||
p = subprocess.Popen('git update-ref --stdin'.split(),
|
||||
stdin=subprocess.PIPE)
|
||||
p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
|
||||
@ -2658,18 +2716,18 @@ class RepoFilter(object):
|
||||
raise SystemExit("git update-ref failed; see above")
|
||||
|
||||
# Write out data about run
|
||||
filter.record_metadata(results_tmp_dir, orig_refs, refs_to_nuke)
|
||||
filter.record_metadata(self.results_tmp_dir(), orig_refs, refs_to_nuke)
|
||||
|
||||
# Nuke the reflogs and repack
|
||||
if not args.quiet and not args.debug:
|
||||
if not self._args.quiet and not self._args.debug:
|
||||
print("Repacking your repo and cleaning out old unneeded objects")
|
||||
quiet_flags = '--quiet' if args.quiet else ''
|
||||
quiet_flags = '--quiet' if self._args.quiet else ''
|
||||
cleanup_cmds = ['git reflog expire --expire=now --all'.split(),
|
||||
'git gc {} --prune=now'.format(quiet_flags).split()]
|
||||
if not is_bare:
|
||||
cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split())
|
||||
for cmd in cleanup_cmds:
|
||||
if args.debug:
|
||||
if self._args.debug:
|
||||
print("[DEBUG] Running: {}".format(' '.join(cmd)))
|
||||
subprocess.call(cmd)
|
||||
|
||||
@ -2678,4 +2736,5 @@ if __name__ == '__main__':
|
||||
if args.analyze:
|
||||
RepoAnalyze.run(args)
|
||||
else:
|
||||
RepoFilter.run(args)
|
||||
filter = RepoFilter(args)
|
||||
filter.run()
|
||||
|
Loading…
Reference in New Issue
Block a user