filter-repo: group high-level repo filtering functions into a class

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2018-12-24 23:02:03 -08:00
parent 4e2110136e
commit 55c2c32d7c

View File

@ -27,7 +27,7 @@ from datetime import tzinfo, timedelta, datetime
__all__ = ["Blob", "Reset", "FileChanges", "Commit", "Tag", "Progress",
"Checkpoint", "FastExportFilter", "FixedTimeZone", "ProgressWriter",
"fast_export_output", "fast_import_input", "record_id_rename",
"GitUtils", "FilteringOptions"]
"GitUtils", "FilteringOptions", "RepoFilter"]
def _timedelta_to_seconds(delta):
@ -2330,7 +2330,9 @@ class RepoAnalyze(object):
names_with_sha))
@staticmethod
def run(args, git_dir):
def run(args):
git_dir = GitUtils.determine_git_dir()
# Create the report directory as necessary
results_tmp_dir = os.path.join(git_dir, 'filter-repo')
if not os.path.isdir(results_tmp_dir):
@ -2350,6 +2352,36 @@ class RepoAnalyze(object):
RepoAnalyze.write_report(reportdir, stats)
sys.stdout.write("done.\n")
class InputFileBackup:
def __init__(self, input_file, output_file):
self.input_file = input_file
self.output_file = output_file
def read(self, size):
output = self.input_file.read(size)
self.output_file.write(output)
return output
def readline(self):
line = self.input_file.readline()
self.output_file.write(line)
return line
class DualFileWriter:
def __init__(self, file1, file2):
self.file1 = file1
self.file2 = file2
def write(self, *args):
self.file1.write(*args)
self.file2.write(*args)
def close(self):
self.file1.close()
self.file2.close()
class RepoFilter(object):
@staticmethod
def sanity_check(refs, is_bare):
def abort(reason):
raise SystemExit(
@ -2411,6 +2443,7 @@ def sanity_check(refs, is_bare):
if rev != refs[origin_ref]:
abort('{} does not match {}'.format(refname, origin_ref))
@staticmethod
def tweak_commit(args, commit):
def filename_matches(path_expression, pathname):
if path_expression == '':
@ -2425,17 +2458,17 @@ def tweak_commit(args, commit):
def newname(path_changes, pathname, filtering_is_inclusive):
wanted = False
for (mod_type, match_type, path_expression) in path_changes:
for (mod_type, match_type, path_exp) in path_changes:
if mod_type == 'filter' and not wanted:
assert match_type in ('match', 'glob', 'regex')
if match_type == 'match' and filename_matches(path_expression, pathname):
if match_type == 'match' and filename_matches(path_exp, pathname):
wanted = True
if match_type == 'glob' and fnmatch.fnmatch(pathname, path_expression):
if match_type == 'glob' and fnmatch.fnmatch(pathname, path_exp):
wanted = True
if match_type == 'regex' and re.search(path_expression, pathname):
if match_type == 'regex' and re.search(path_exp, pathname):
wanted = True
elif mod_type == 'rename':
old_exp, new_exp = path_expression.split(':')
old_exp, new_exp = path_exp.split(':')
assert match_type in ('prefix',)
if match_type == 'prefix' and pathname.startswith(old_exp):
pathname = pathname.replace(old_exp, new_exp, 1)
@ -2443,12 +2476,13 @@ def tweak_commit(args, commit):
# Sometimes the 'branch' given is a tag; if so, rename it as requested so
# we don't get any old tagnames
commit.branch = new_tagname(args, commit.branch)
commit.branch = RepoFilter.new_tagname(args, commit.branch)
# Filter the list of file changes
new_file_changes = {}
for change in commit.file_changes:
change.filename = newname(args.path_changes, change.filename, args.inclusive)
change.filename = newname(args.path_changes, change.filename,
args.inclusive)
if not change.filename:
continue # Filtering criteria excluded this file; move on to next one
if change.filename in new_file_changes:
@ -2470,6 +2504,7 @@ def tweak_commit(args, commit):
new_file_changes[change.filename] = change
commit.file_changes = new_file_changes.values()
@staticmethod
def new_tagname(args, tagname, shortname = False):
replace = args.tag_rename
if not replace:
@ -2481,55 +2516,23 @@ def new_tagname(args, tagname, shortname = False):
return tagname.replace(old, new, 1)
return tagname
@staticmethod
def handle_tag(args, reset_or_tag, shortname = False):
reset_or_tag.ref = new_tagname(args, reset_or_tag.ref, shortname)
reset_or_tag.ref = RepoFilter.new_tagname(args, reset_or_tag.ref, shortname)
class InputFileBackup:
def __init__(self, input_file, output_file):
self.input_file = input_file
self.output_file = output_file
def read(self, size):
output = self.input_file.read(size)
self.output_file.write(output)
return output
def readline(self):
line = self.input_file.readline()
self.output_file.write(line)
return line
class DualFileWriter:
def __init__(self, file1, file2):
self.file1 = file1
self.file2 = file2
def write(self, *args):
self.file1.write(*args)
self.file2.write(*args)
def close(self):
self.file1.close()
self.file2.close()
def run_fast_filter():
args = FilteringOptions.parse_args(sys.argv[1:])
@staticmethod
def run(args):
if args.debug:
print("[DEBUG] Parsed arguments:\n{}".format(args))
print("[DEBUG] Passed arguments:\n{}".format(args))
# Determine basic repository information
orig_refs = GitUtils.get_refs()
is_bare = GitUtils.is_repository_bare()
git_dir = GitUtils.determine_git_dir()
# Do analysis, if requested
if args.analyze:
RepoAnalyze.run(args, git_dir)
return
# Do sanity checks
if not args.force:
sanity_check(orig_refs, is_bare)
RepoFilter.sanity_check(orig_refs, is_bare)
# Create a temporary directory for storing some results
results_tmp_dir = os.path.join(git_dir, 'filter-repo')
@ -2578,9 +2581,9 @@ def run_fast_filter():
# Create and run the filter
filter = FastExportFilter(
commit_callback = lambda c : tweak_commit(args, c),
tag_callback = lambda t : handle_tag(args, t, shortname = True),
reset_callback = lambda r : handle_tag(args, r),
commit_callback = lambda c : RepoFilter.tweak_commit(args, c),
tag_callback = lambda t : RepoFilter.handle_tag(args, t, shortname = True),
reset_callback = lambda r : RepoFilter.handle_tag(args, r),
)
filter.run(input, output, fast_import_pipes = pipes, quiet = args.quiet)
@ -2591,7 +2594,7 @@ def run_fast_filter():
if not args.dry_run and fip.wait():
raise SystemExit("Error: fast-import failed; see above.")
# Exit early
# Exit early, if requested
if args.dry_run:
orig_str = "by comparing:\n "+fe_orig if fe_orig else "at:"
print("NOTE: Not running fast-import or cleaning up; --dry-run passed.")
@ -2630,4 +2633,8 @@ def run_fast_filter():
subprocess.call(cmd)
if __name__ == '__main__':
run_fast_filter()
args = FilteringOptions.parse_args(sys.argv[1:])
if args.analyze:
RepoAnalyze.run(args)
else:
RepoFilter.run(args)