mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-06 18:32:14 +02:00
filter-repo: allow chaining of RepoFilter instances
Allow each instance to be just input or just output so that we can splice repos together or split one into multiple different repos. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
59f3947857
commit
81016821a1
253
git-filter-repo
253
git-filter-repo
@ -2403,6 +2403,33 @@ class DualFileWriter:
|
|||||||
self.file2.close()
|
self.file2.close()
|
||||||
|
|
||||||
class RepoFilter(object):
|
class RepoFilter(object):
|
||||||
|
def __init__(self,
|
||||||
|
args,
|
||||||
|
blob_callback = None,
|
||||||
|
commit_callback = None,
|
||||||
|
tag_callback = None,
|
||||||
|
reset_callback = None,
|
||||||
|
everything_callback = None):
|
||||||
|
# Store arguments for later use
|
||||||
|
self._args = args
|
||||||
|
self._blob_callback = blob_callback
|
||||||
|
self._commit_callback = commit_callback
|
||||||
|
self._tag_callback = tag_callback
|
||||||
|
self._reset_callback = reset_callback
|
||||||
|
self._everything_callback = everything_callback
|
||||||
|
|
||||||
|
# Defaults for input
|
||||||
|
self._input = None
|
||||||
|
self._fep = None # Fast Export Process
|
||||||
|
self._fe_orig = None # Path to where original fast-export output stored
|
||||||
|
self._fe_filt = None # Path to where filtered fast-export output stored
|
||||||
|
|
||||||
|
# Defaults for output
|
||||||
|
self._output = None
|
||||||
|
self._fip = None # Fast Import Process
|
||||||
|
self._import_pipes = None
|
||||||
|
self._managed_output = True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sanity_check(refs, is_bare):
|
def sanity_check(refs, is_bare):
|
||||||
def abort(reason):
|
def abort(reason):
|
||||||
@ -2544,132 +2571,163 @@ class RepoFilter(object):
|
|||||||
def handle_tag(args, reset_or_tag, shortname = False):
|
def handle_tag(args, reset_or_tag, shortname = False):
|
||||||
reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname)
|
reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname)
|
||||||
|
|
||||||
@staticmethod
|
def results_tmp_dir(self):
|
||||||
def run(args,
|
|
||||||
blob_callback = None,
|
|
||||||
commit_callback = None,
|
|
||||||
tag_callback = None,
|
|
||||||
reset_callback = None,
|
|
||||||
everything_callback = None):
|
|
||||||
if args.debug:
|
|
||||||
print("[DEBUG] Passed arguments:\n{}".format(args))
|
|
||||||
|
|
||||||
# Determine basic repository information
|
|
||||||
orig_refs = GitUtils.get_refs()
|
|
||||||
is_bare = GitUtils.is_repository_bare()
|
|
||||||
git_dir = GitUtils.determine_git_dir()
|
git_dir = GitUtils.determine_git_dir()
|
||||||
|
d = os.path.join(git_dir, 'filter-repo')
|
||||||
|
if not os.path.isdir(d):
|
||||||
|
os.mkdir(d)
|
||||||
|
return d
|
||||||
|
|
||||||
# Do sanity checks
|
def importer_only(self):
|
||||||
if not args.force:
|
self._setup_output()
|
||||||
RepoFilter.sanity_check(orig_refs, is_bare)
|
|
||||||
|
|
||||||
# Create a temporary directory for storing some results
|
def set_output(self, outputRepoFilter):
|
||||||
results_tmp_dir = os.path.join(git_dir, 'filter-repo')
|
assert outputRepoFilter._output
|
||||||
if not os.path.isdir(results_tmp_dir):
|
|
||||||
os.mkdir(results_tmp_dir)
|
|
||||||
|
|
||||||
# Determine where to get input (and whether to make a copy)
|
# set_output implies this RepoFilter is doing exporting, though may not
|
||||||
if args.stdin:
|
# be the only one.
|
||||||
input = sys.stdin
|
self._setup_input(use_done_feature = False)
|
||||||
fe_orig = None
|
|
||||||
|
# Set our output management up to pipe to outputRepoFilter's locations
|
||||||
|
self._managed_output = False
|
||||||
|
self._output = outputRepoFilter._output
|
||||||
|
self._import_pipes = outputRepoFilter._import_pipes
|
||||||
|
|
||||||
|
def _setup_input(self, use_done_feature):
|
||||||
|
if self._args.stdin:
|
||||||
|
self._input = sys.stdin
|
||||||
|
self._fe_orig = None
|
||||||
else:
|
else:
|
||||||
skip_blobs = blob_callback is None and everything_callback is None
|
skip_blobs = (self._blob_callback is None) and (
|
||||||
|
self._everything_callback is None)
|
||||||
extra_flags = ['--no-data'] if skip_blobs else []
|
extra_flags = ['--no-data'] if skip_blobs else []
|
||||||
fep_cmd = ['git', 'fast-export',
|
done_feature = ['--use-done-feature'] if use_done_feature else []
|
||||||
'--show-original-ids',
|
fep_cmd = ['git', 'fast-export', '--show-original-ids',
|
||||||
'--signed-tags=strip',
|
'--signed-tags=strip', '--tag-of-filtered-object=rewrite'
|
||||||
'--tag-of-filtered-object=rewrite',
|
] + done_feature + extra_flags + self._args.refs
|
||||||
'--use-done-feature'] + extra_flags + args.refs
|
self._fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
||||||
fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
self._input = self._fep.stdout
|
||||||
input = fep.stdout
|
if self._args.dry_run or self._args.debug:
|
||||||
if args.dry_run or args.debug:
|
self._fe_orig = os.path.join(self.results_tmp_dir(),
|
||||||
fe_orig = os.path.join(results_tmp_dir, 'fast-export.original')
|
'fast-export.original')
|
||||||
output = open(fe_orig, 'w')
|
output = open(self._fe_orig, 'w')
|
||||||
input = InputFileBackup(input, output)
|
self._input = InputFileBackup(self._input, output)
|
||||||
if args.debug:
|
if self._args.debug:
|
||||||
print("[DEBUG] Running: {}".format(' '.join(fep_cmd)))
|
print("[DEBUG] Running: {}".format(' '.join(fep_cmd)))
|
||||||
print(" (saving a copy of the output at {})".format(fe_orig))
|
print(" (saving a copy of the output at {})".format(self._fe_orig))
|
||||||
|
|
||||||
# Determine where to send output
|
def _setup_output(self):
|
||||||
pipes = None
|
if not self._args.dry_run:
|
||||||
if not args.dry_run:
|
|
||||||
fip_cmd = 'git fast-import --force --quiet'.split()
|
fip_cmd = 'git fast-import --force --quiet'.split()
|
||||||
fip = subprocess.Popen(fip_cmd,
|
self._fip = subprocess.Popen(fip_cmd,
|
||||||
bufsize=-1,
|
bufsize=-1,
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE)
|
stdout=subprocess.PIPE)
|
||||||
pipes = (fip.stdin, fip.stdout)
|
self._import_pipes = (self._fip.stdin, self._fip.stdout)
|
||||||
if args.dry_run or args.debug:
|
if self._args.dry_run or self._args.debug:
|
||||||
fe_filt = os.path.join(results_tmp_dir, 'fast-export.filtered')
|
self._fe_filt = os.path.join(self.results_tmp_dir(),
|
||||||
output = open(fe_filt, 'w')
|
'fast-export.filtered')
|
||||||
|
self._output = open(self._fe_filt, 'w')
|
||||||
else:
|
else:
|
||||||
output = fip.stdin
|
self._output = self._fip.stdin
|
||||||
if args.debug:
|
if self._args.debug:
|
||||||
output = DualFileWriter(fip.stdin, output)
|
self._output = DualFileWriter(self._fip.stdin, self._output)
|
||||||
print("[DEBUG] Running: {}".format(' '.join(fip_cmd)))
|
print("[DEBUG] Running: {}".format(' '.join(fip_cmd)))
|
||||||
print(" (using the following file as input: {})".format(fe_filt))
|
print(" (using the following file as input: {})".format(self._fe_filt))
|
||||||
|
|
||||||
# Set up the callbacks
|
def run(self):
|
||||||
def actual_commit_callback(c):
|
if not self._input and not self._output:
|
||||||
RepoFilter.tweak_commit(args, c)
|
self._setup_input(use_done_feature = True)
|
||||||
commit_callback and commit_callback(c)
|
self._setup_output()
|
||||||
def actual_tag_callback(t):
|
|
||||||
RepoFilter.handle_tag(args, t, shortname = True)
|
|
||||||
tag_callback and tag_callback(t)
|
|
||||||
def actual_reset_callback(r):
|
|
||||||
RepoFilter.handle_tag(args, r)
|
|
||||||
reset_callback and reset_callback(r)
|
|
||||||
|
|
||||||
# Create and run the filter
|
if self._managed_output:
|
||||||
filter = FastExportFilter(blob_callback = blob_callback,
|
if self._args.debug:
|
||||||
commit_callback = actual_commit_callback,
|
print("[DEBUG] Passed arguments:\n{}".format(self._args))
|
||||||
tag_callback = actual_tag_callback,
|
|
||||||
reset_callback = actual_reset_callback,
|
|
||||||
everything_callback = everything_callback)
|
|
||||||
filter.run(input, output, fast_import_pipes = pipes, quiet = args.quiet)
|
|
||||||
|
|
||||||
# Close the output, ensure fast-export and fast-import have completed
|
# Determine basic repository information
|
||||||
output.close()
|
orig_refs = GitUtils.get_refs()
|
||||||
if not args.stdin and fep.wait():
|
is_bare = GitUtils.is_repository_bare()
|
||||||
raise SystemExit("Error: fast-export failed; see above.")
|
|
||||||
if not args.dry_run and fip.wait():
|
# Do sanity checks
|
||||||
|
if not self._args.force:
|
||||||
|
RepoFilter.sanity_check(orig_refs, is_bare)
|
||||||
|
|
||||||
|
if self._input:
|
||||||
|
# Set up the callbacks
|
||||||
|
def actual_commit_callback(c):
|
||||||
|
RepoFilter.tweak_commit(self._args, c)
|
||||||
|
self._commit_callback and self._commit_callback(c)
|
||||||
|
def actual_tag_callback(t):
|
||||||
|
RepoFilter.handle_tag(self._args, t, shortname = True)
|
||||||
|
self._tag_callback and self._tag_callback(t)
|
||||||
|
def actual_reset_callback(r):
|
||||||
|
RepoFilter.handle_tag(self._args, r)
|
||||||
|
self._reset_callback and self._reset_callback(r)
|
||||||
|
|
||||||
|
# Create and run the filter
|
||||||
|
filter = FastExportFilter(blob_callback = self._blob_callback,
|
||||||
|
commit_callback = actual_commit_callback,
|
||||||
|
tag_callback = actual_tag_callback,
|
||||||
|
reset_callback = actual_reset_callback,
|
||||||
|
everything_callback = self._everything_callback)
|
||||||
|
filter.run(self._input,
|
||||||
|
self._output,
|
||||||
|
fast_import_pipes = self._import_pipes,
|
||||||
|
quiet = self._args.quiet)
|
||||||
|
|
||||||
|
# Make sure fast-export completed successfully
|
||||||
|
if not self._args.stdin and self._fep.wait():
|
||||||
|
raise SystemExit("Error: fast-export failed; see above.")
|
||||||
|
|
||||||
|
# If we're not the manager of self._output, we should avoid post-run cleanup
|
||||||
|
if not self._managed_output:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Close the output and ensure fast-import successfully completes
|
||||||
|
self._output.close()
|
||||||
|
if not self._args.dry_run and self._fip.wait():
|
||||||
raise SystemExit("Error: fast-import failed; see above.")
|
raise SystemExit("Error: fast-import failed; see above.")
|
||||||
|
|
||||||
# Exit early, if requested
|
# Exit early, if requested
|
||||||
if args.dry_run:
|
if self._args.dry_run:
|
||||||
orig_str = "by comparing:\n "+fe_orig if fe_orig else "at:"
|
if self._fe_orig:
|
||||||
|
orig_str = "by comparing:\n "+self._fe_orig
|
||||||
|
else:
|
||||||
|
orig_str = "at:"
|
||||||
print("NOTE: Not running fast-import or cleaning up; --dry-run passed.")
|
print("NOTE: Not running fast-import or cleaning up; --dry-run passed.")
|
||||||
print(" Requested filtering can be seen {}".format(orig_str))
|
print(" Requested filtering can be seen {}".format(orig_str))
|
||||||
print(" " + fe_filt)
|
print(" " + self._fe_filt)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Remove unused refs
|
if self._input:
|
||||||
refs_to_nuke = set(orig_refs) - set(filter.get_seen_refs())
|
# Remove unused refs
|
||||||
if refs_to_nuke:
|
refs_to_nuke = set(orig_refs) - set(filter.get_seen_refs())
|
||||||
if args.debug:
|
if refs_to_nuke:
|
||||||
print("[DEBUG] Deleting the following refs:\n "+
|
if self._args.debug:
|
||||||
"\n ".join(refs_to_nuke))
|
print("[DEBUG] Deleting the following refs:\n "+
|
||||||
p = subprocess.Popen('git update-ref --stdin'.split(),
|
"\n ".join(refs_to_nuke))
|
||||||
stdin=subprocess.PIPE)
|
### FIXME: Make sure to run within the target repo...
|
||||||
p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
|
p = subprocess.Popen('git update-ref --stdin'.split(),
|
||||||
for x in refs_to_nuke]))
|
stdin=subprocess.PIPE)
|
||||||
p.stdin.close()
|
p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
|
||||||
if p.wait():
|
for x in refs_to_nuke]))
|
||||||
raise SystemExit("git update-ref failed; see above")
|
p.stdin.close()
|
||||||
|
if p.wait():
|
||||||
|
raise SystemExit("git update-ref failed; see above")
|
||||||
|
|
||||||
# Write out data about run
|
# Write out data about run
|
||||||
filter.record_metadata(results_tmp_dir, orig_refs, refs_to_nuke)
|
filter.record_metadata(self.results_tmp_dir(), orig_refs, refs_to_nuke)
|
||||||
|
|
||||||
# Nuke the reflogs and repack
|
# Nuke the reflogs and repack
|
||||||
if not args.quiet and not args.debug:
|
if not self._args.quiet and not self._args.debug:
|
||||||
print("Repacking your repo and cleaning out old unneeded objects")
|
print("Repacking your repo and cleaning out old unneeded objects")
|
||||||
quiet_flags = '--quiet' if args.quiet else ''
|
quiet_flags = '--quiet' if self._args.quiet else ''
|
||||||
cleanup_cmds = ['git reflog expire --expire=now --all'.split(),
|
cleanup_cmds = ['git reflog expire --expire=now --all'.split(),
|
||||||
'git gc {} --prune=now'.format(quiet_flags).split()]
|
'git gc {} --prune=now'.format(quiet_flags).split()]
|
||||||
if not is_bare:
|
if not is_bare:
|
||||||
cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split())
|
cleanup_cmds.append('git reset {} --hard'.format(quiet_flags).split())
|
||||||
for cmd in cleanup_cmds:
|
for cmd in cleanup_cmds:
|
||||||
if args.debug:
|
if self._args.debug:
|
||||||
print("[DEBUG] Running: {}".format(' '.join(cmd)))
|
print("[DEBUG] Running: {}".format(' '.join(cmd)))
|
||||||
subprocess.call(cmd)
|
subprocess.call(cmd)
|
||||||
|
|
||||||
@ -2678,4 +2736,5 @@ if __name__ == '__main__':
|
|||||||
if args.analyze:
|
if args.analyze:
|
||||||
RepoAnalyze.run(args)
|
RepoAnalyze.run(args)
|
||||||
else:
|
else:
|
||||||
RepoFilter.run(args)
|
filter = RepoFilter(args)
|
||||||
|
filter.run()
|
||||||
|
Loading…
Reference in New Issue
Block a user