mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-06 18:32:14 +02:00
filter-repo: buffer subprocess stdout to significantly improve performance
Apparently, the default for subprocess stdout is unbuffered; switching it to buffered yields a huge 40% speedup. Doing this also exposes the need to add fi_input.flush() calls, highlighting another performance issue. We may be able to have fewer such calls with some refactoring, but that is a bigger separate change. Just having them highlighted to remind about them as a performance issue is good for now. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
9ebd3117ca
commit
03507e57f5
@ -1120,6 +1120,7 @@ class FastExportFilter(object):
|
||||
# buffers filling up so I instead read from it as I go.
|
||||
for change in file_changes:
|
||||
fi_input.write("ls :{} {}\n".format(from_commit, change.filename))
|
||||
fi_input.flush()
|
||||
parent_version = fi_output.readline().split()
|
||||
if change.type == 'D':
|
||||
if parent_version == ['missing', change.filename]:
|
||||
@ -1128,6 +1129,7 @@ class FastExportFilter(object):
|
||||
blob_sha = change.blob_id
|
||||
if isinstance(change.blob_id, int):
|
||||
fi_input.write("get-mark :{}\n".format(change.blob_id))
|
||||
fi_input.flush()
|
||||
blob_sha = fi_output.readline().rstrip()
|
||||
if parent_version == [change.mode, 'blob', blob_sha, change.filename]:
|
||||
unnecessary_filechanges.add(change)
|
||||
@ -1173,6 +1175,7 @@ class FastExportFilter(object):
|
||||
if commit.original_id and fast_import_pipes:
|
||||
fi_input, fi_output = fast_import_pipes
|
||||
fi_input.write("get-mark :{}\n".format(commit.id))
|
||||
fi_input.flush()
|
||||
orig_id = commit.original_id
|
||||
new_id = fi_output.readline().rstrip()
|
||||
self._commit_renames[orig_id] = new_id
|
||||
@ -1531,6 +1534,7 @@ def get_commit_count(repo, *args):
|
||||
if len(args) == 1 and isinstance(args[0], list):
|
||||
args = args[0]
|
||||
p1 = subprocess.Popen(["git", "rev-list"] + args,
|
||||
bufsize=-1,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=repo)
|
||||
p2 = subprocess.Popen(["wc", "-l"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
@ -1919,6 +1923,7 @@ def gather_data(args):
|
||||
# Get sizes of blobs by sha1
|
||||
a='--batch-check=%(objectname) %(objecttype) %(objectsize) %(objectsize:disk)'
|
||||
cf = subprocess.Popen(['git', 'cat-file', '--batch-all-objects', a],
|
||||
bufsize = -1,
|
||||
stdout = subprocess.PIPE)
|
||||
unpacked_size = {}
|
||||
packed_size = {}
|
||||
@ -1946,7 +1951,7 @@ def gather_data(args):
|
||||
commit_parse_progress = ProgressWriter()
|
||||
num_commits = 0
|
||||
cmd = 'git rev-list --topo-order --reverse {} | git diff-tree --stdin --always --root --format="%H%n%P%n%cd" --date=short -M -t -c --raw --combined-all-paths'.format(' '.join(args.refs))
|
||||
dtp = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
||||
dtp = subprocess.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE)
|
||||
f = dtp.stdout
|
||||
line = f.next()
|
||||
cont = bool(line)
|
||||
@ -2400,7 +2405,7 @@ def run_fast_filter():
|
||||
'--tag-of-filtered-object=rewrite',
|
||||
'--no-data',
|
||||
'--use-done-feature'] + args.refs
|
||||
fep = subprocess.Popen(fep_cmd, stdout=subprocess.PIPE)
|
||||
fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
||||
input = fep.stdout
|
||||
if args.dry_run or args.debug:
|
||||
fe_orig = os.path.join(results_tmp_dir, 'fast-export.original')
|
||||
@ -2414,7 +2419,9 @@ def run_fast_filter():
|
||||
pipes = None
|
||||
if not args.dry_run:
|
||||
fip_cmd = 'git fast-import --force --quiet'.split()
|
||||
fip = subprocess.Popen(fip_cmd, stdin=subprocess.PIPE,
|
||||
fip = subprocess.Popen(fip_cmd,
|
||||
bufsize=-1,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE)
|
||||
pipes = (fip.stdin, fip.stdout)
|
||||
if args.dry_run or args.debug:
|
||||
|
Loading…
Reference in New Issue
Block a user