mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-06 18:32:14 +02:00
filter-repo: group repo analysis functions into a class
Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
9887dd5cbe
commit
4e2110136e
@ -1891,10 +1891,15 @@ class FilteringOptions(object):
|
||||
FilteringOptions.sanity_check_args(args)
|
||||
return args
|
||||
|
||||
def analyze_commit(stats, graph, commit, parents, date, file_changes):
|
||||
def equiv_class(filename):
|
||||
class RepoAnalyze(object):
|
||||
|
||||
# First, several helper functions for analyze_commit()
|
||||
|
||||
@staticmethod
|
||||
def equiv_class(stats, filename):
|
||||
return stats['equivalence'].get(filename, (filename,))
|
||||
|
||||
@staticmethod
|
||||
def setup_equivalence_for_rename(stats, oldname, newname):
|
||||
# if A is renamed to B and B is renamed to C, then the user thinks of
|
||||
# A, B, and C as all being different names for the same 'file'. We record
|
||||
@ -1911,18 +1916,22 @@ def analyze_commit(stats, graph, commit, parents, date, file_changes):
|
||||
for f in new_tuple:
|
||||
stats['equivalence'][f] = new_tuple
|
||||
|
||||
@staticmethod
|
||||
def setup_or_update_rename_history(stats, commit, oldname, newname):
|
||||
rename_commits = stats['rename_history'].get(oldname, set())
|
||||
rename_commits.add(commit)
|
||||
stats['rename_history'][oldname] = rename_commits
|
||||
|
||||
@staticmethod
|
||||
def handle_renames(stats, commit, change_types, filenames):
|
||||
for index, change_type in enumerate(change_types):
|
||||
if change_type == 'R':
|
||||
oldname, newname = filenames[index], filenames[-1]
|
||||
setup_equivalence_for_rename(stats, oldname, newname)
|
||||
setup_or_update_rename_history(stats, commit, oldname, newname)
|
||||
RepoAnalyze.setup_equivalence_for_rename(stats, oldname, newname)
|
||||
RepoAnalyze.setup_or_update_rename_history(stats, commit,
|
||||
oldname, newname)
|
||||
|
||||
@staticmethod
|
||||
def handle_file(stats, graph, commit, modes, shas, filenames):
|
||||
mode, sha, filename = modes[-1], shas[-1], filenames[-1]
|
||||
|
||||
@ -1936,7 +1945,7 @@ def analyze_commit(stats, graph, commit, parents, date, file_changes):
|
||||
|
||||
# If the file (or equivalence class of files) was recorded as deleted,
|
||||
# clearly it isn't anymore
|
||||
equiv = equiv_class(filename)
|
||||
equiv = RepoAnalyze.equiv_class(stats, filename)
|
||||
for f in equiv:
|
||||
stats[delmode].pop(f, None)
|
||||
|
||||
@ -1954,6 +1963,8 @@ def analyze_commit(stats, graph, commit, parents, date, file_changes):
|
||||
if f in stats['equivalence']:
|
||||
del stats['equivalence'][f]
|
||||
|
||||
@staticmethod
|
||||
def analyze_commit(stats, graph, commit, parents, date, file_changes):
|
||||
graph.add_commit_and_parents(commit, parents)
|
||||
for change in file_changes:
|
||||
modes, shas, change_types, filenames = change
|
||||
@ -1962,30 +1973,32 @@ def analyze_commit(stats, graph, commit, parents, date, file_changes):
|
||||
if modes[-1] == '160000':
|
||||
continue
|
||||
elif modes[-1] == '000000':
|
||||
# Track when files/directories are deleted; see 'R' below about equiv_class
|
||||
for f in equiv_class(filenames[-1]):
|
||||
# Track when files/directories are deleted
|
||||
for f in RepoAnalyze.equiv_class(stats, filenames[-1]):
|
||||
if any(x == '040000' for x in modes[0:-1]):
|
||||
stats['tree_deletions'][f] = date
|
||||
else:
|
||||
stats['file_deletions'][f] = date
|
||||
elif change_types.strip('AMT') == '':
|
||||
handle_file(stats, graph, commit, modes, shas, filenames)
|
||||
RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
|
||||
elif modes[-1] == '040000' and change_types.strip('RAM') == '':
|
||||
handle_file(stats, graph, commit, modes, shas, filenames)
|
||||
RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
|
||||
elif change_types.strip('RAM') == '':
|
||||
handle_file(stats, graph, commit, modes, shas, filenames)
|
||||
handle_renames(stats, commit, change_types, filenames)
|
||||
RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
|
||||
RepoAnalyze.handle_renames(stats, commit, change_types, filenames)
|
||||
else:
|
||||
raise SystemExit("Unhandled change type(s): {} (in commit {})"
|
||||
.format(change_types, commit))
|
||||
|
||||
@staticmethod
|
||||
def gather_data(args):
|
||||
blob_size_progress = ProgressWriter()
|
||||
num_blobs = 0
|
||||
|
||||
# Get sizes of blobs by sha1
|
||||
a='--batch-check=%(objectname) %(objecttype) %(objectsize) %(objectsize:disk)'
|
||||
cf = subprocess.Popen(['git', 'cat-file', '--batch-all-objects', a],
|
||||
cmd = '--batch-check=%(objectname) %(objecttype) ' + \
|
||||
'%(objectsize) %(objectsize:disk)'
|
||||
cf = subprocess.Popen(['git', 'cat-file', '--batch-all-objects', cmd],
|
||||
bufsize = -1,
|
||||
stdout = subprocess.PIPE)
|
||||
unpacked_size = {}
|
||||
@ -2013,7 +2026,9 @@ def gather_data(args):
|
||||
# Setup the rev-list/diff-tree process
|
||||
commit_parse_progress = ProgressWriter()
|
||||
num_commits = 0
|
||||
cmd = 'git rev-list --topo-order --reverse {} | git diff-tree --stdin --always --root --format="%H%n%P%n%cd" --date=short -M -t -c --raw --combined-all-paths'.format(' '.join(args.refs))
|
||||
cmd = ('git rev-list --topo-order --reverse {}'.format(' '.join(args.refs)) +
|
||||
' | git diff-tree --stdin --always --root --format=%H%n%P%n%cd' +
|
||||
' --date=short -M -t -c --raw --combined-all-paths')
|
||||
dtp = subprocess.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE)
|
||||
f = dtp.stdout
|
||||
line = f.next()
|
||||
@ -2056,7 +2071,8 @@ def gather_data(args):
|
||||
file_changes.append([modes, shas, change_types, filenames])
|
||||
|
||||
# Analyze this commit and update progress
|
||||
analyze_commit(stats, graph, commit, parents, date, file_changes)
|
||||
RepoAnalyze.analyze_commit(stats, graph, commit, parents, date,
|
||||
file_changes)
|
||||
num_commits += 1
|
||||
commit_parse_progress.show("Processed {} commits".format(num_commits))
|
||||
|
||||
@ -2071,6 +2087,7 @@ def gather_data(args):
|
||||
|
||||
return stats
|
||||
|
||||
@staticmethod
|
||||
def write_report(reportdir, stats):
|
||||
def datestr(datetimestr):
|
||||
return datetimestr if datetimestr else '<present>'
|
||||
@ -2312,7 +2329,8 @@ def write_report(reportdir, stats):
|
||||
size,
|
||||
names_with_sha))
|
||||
|
||||
def do_analysis(args, git_dir):
|
||||
@staticmethod
|
||||
def run(args, git_dir):
|
||||
# Create the report directory as necessary
|
||||
results_tmp_dir = os.path.join(git_dir, 'filter-repo')
|
||||
if not os.path.isdir(results_tmp_dir):
|
||||
@ -2324,12 +2342,12 @@ def do_analysis(args, git_dir):
|
||||
os.mkdir(reportdir)
|
||||
|
||||
# Gather the data we need
|
||||
stats = gather_data(args)
|
||||
stats = RepoAnalyze.gather_data(args)
|
||||
|
||||
# Write the reports
|
||||
sys.stdout.write("Writing reports to {}...".format(reportdir))
|
||||
sys.stdout.flush()
|
||||
write_report(reportdir, stats)
|
||||
RepoAnalyze.write_report(reportdir, stats)
|
||||
sys.stdout.write("done.\n")
|
||||
|
||||
def sanity_check(refs, is_bare):
|
||||
@ -2506,7 +2524,7 @@ def run_fast_filter():
|
||||
|
||||
# Do analysis, if requested
|
||||
if args.analyze:
|
||||
do_analysis(args, git_dir)
|
||||
RepoAnalyze.run(args, git_dir)
|
||||
return
|
||||
|
||||
# Do sanity checks
|
||||
|
Loading…
Reference in New Issue
Block a user