mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-06 18:32:14 +02:00
filter-repo: buffer subprocess stdout to significantly improve performance
Apparently, the default for subprocess stdout is unbuffered; switching it to buffered yields a huge 40% speedup. Doing this also exposes the need to add fi_input.flush() calls, highlighting another performance issue. We may be able to have fewer such calls with some refactoring, but that is a bigger separate change. Just having them highlighted to remind about them as a performance issue is good for now. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
9ebd3117ca
commit
03507e57f5
@ -1120,6 +1120,7 @@ class FastExportFilter(object):
|
|||||||
# buffers filling up so I instead read from it as I go.
|
# buffers filling up so I instead read from it as I go.
|
||||||
for change in file_changes:
|
for change in file_changes:
|
||||||
fi_input.write("ls :{} {}\n".format(from_commit, change.filename))
|
fi_input.write("ls :{} {}\n".format(from_commit, change.filename))
|
||||||
|
fi_input.flush()
|
||||||
parent_version = fi_output.readline().split()
|
parent_version = fi_output.readline().split()
|
||||||
if change.type == 'D':
|
if change.type == 'D':
|
||||||
if parent_version == ['missing', change.filename]:
|
if parent_version == ['missing', change.filename]:
|
||||||
@ -1128,6 +1129,7 @@ class FastExportFilter(object):
|
|||||||
blob_sha = change.blob_id
|
blob_sha = change.blob_id
|
||||||
if isinstance(change.blob_id, int):
|
if isinstance(change.blob_id, int):
|
||||||
fi_input.write("get-mark :{}\n".format(change.blob_id))
|
fi_input.write("get-mark :{}\n".format(change.blob_id))
|
||||||
|
fi_input.flush()
|
||||||
blob_sha = fi_output.readline().rstrip()
|
blob_sha = fi_output.readline().rstrip()
|
||||||
if parent_version == [change.mode, 'blob', blob_sha, change.filename]:
|
if parent_version == [change.mode, 'blob', blob_sha, change.filename]:
|
||||||
unnecessary_filechanges.add(change)
|
unnecessary_filechanges.add(change)
|
||||||
@ -1173,6 +1175,7 @@ class FastExportFilter(object):
|
|||||||
if commit.original_id and fast_import_pipes:
|
if commit.original_id and fast_import_pipes:
|
||||||
fi_input, fi_output = fast_import_pipes
|
fi_input, fi_output = fast_import_pipes
|
||||||
fi_input.write("get-mark :{}\n".format(commit.id))
|
fi_input.write("get-mark :{}\n".format(commit.id))
|
||||||
|
fi_input.flush()
|
||||||
orig_id = commit.original_id
|
orig_id = commit.original_id
|
||||||
new_id = fi_output.readline().rstrip()
|
new_id = fi_output.readline().rstrip()
|
||||||
self._commit_renames[orig_id] = new_id
|
self._commit_renames[orig_id] = new_id
|
||||||
@ -1531,6 +1534,7 @@ def get_commit_count(repo, *args):
|
|||||||
if len(args) == 1 and isinstance(args[0], list):
|
if len(args) == 1 and isinstance(args[0], list):
|
||||||
args = args[0]
|
args = args[0]
|
||||||
p1 = subprocess.Popen(["git", "rev-list"] + args,
|
p1 = subprocess.Popen(["git", "rev-list"] + args,
|
||||||
|
bufsize=-1,
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
cwd=repo)
|
cwd=repo)
|
||||||
p2 = subprocess.Popen(["wc", "-l"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
p2 = subprocess.Popen(["wc", "-l"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||||
@ -1919,6 +1923,7 @@ def gather_data(args):
|
|||||||
# Get sizes of blobs by sha1
|
# Get sizes of blobs by sha1
|
||||||
a='--batch-check=%(objectname) %(objecttype) %(objectsize) %(objectsize:disk)'
|
a='--batch-check=%(objectname) %(objecttype) %(objectsize) %(objectsize:disk)'
|
||||||
cf = subprocess.Popen(['git', 'cat-file', '--batch-all-objects', a],
|
cf = subprocess.Popen(['git', 'cat-file', '--batch-all-objects', a],
|
||||||
|
bufsize = -1,
|
||||||
stdout = subprocess.PIPE)
|
stdout = subprocess.PIPE)
|
||||||
unpacked_size = {}
|
unpacked_size = {}
|
||||||
packed_size = {}
|
packed_size = {}
|
||||||
@ -1946,7 +1951,7 @@ def gather_data(args):
|
|||||||
commit_parse_progress = ProgressWriter()
|
commit_parse_progress = ProgressWriter()
|
||||||
num_commits = 0
|
num_commits = 0
|
||||||
cmd = 'git rev-list --topo-order --reverse {} | git diff-tree --stdin --always --root --format="%H%n%P%n%cd" --date=short -M -t -c --raw --combined-all-paths'.format(' '.join(args.refs))
|
cmd = 'git rev-list --topo-order --reverse {} | git diff-tree --stdin --always --root --format="%H%n%P%n%cd" --date=short -M -t -c --raw --combined-all-paths'.format(' '.join(args.refs))
|
||||||
dtp = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
dtp = subprocess.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE)
|
||||||
f = dtp.stdout
|
f = dtp.stdout
|
||||||
line = f.next()
|
line = f.next()
|
||||||
cont = bool(line)
|
cont = bool(line)
|
||||||
@ -2400,7 +2405,7 @@ def run_fast_filter():
|
|||||||
'--tag-of-filtered-object=rewrite',
|
'--tag-of-filtered-object=rewrite',
|
||||||
'--no-data',
|
'--no-data',
|
||||||
'--use-done-feature'] + args.refs
|
'--use-done-feature'] + args.refs
|
||||||
fep = subprocess.Popen(fep_cmd, stdout=subprocess.PIPE)
|
fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
||||||
input = fep.stdout
|
input = fep.stdout
|
||||||
if args.dry_run or args.debug:
|
if args.dry_run or args.debug:
|
||||||
fe_orig = os.path.join(results_tmp_dir, 'fast-export.original')
|
fe_orig = os.path.join(results_tmp_dir, 'fast-export.original')
|
||||||
@ -2414,7 +2419,9 @@ def run_fast_filter():
|
|||||||
pipes = None
|
pipes = None
|
||||||
if not args.dry_run:
|
if not args.dry_run:
|
||||||
fip_cmd = 'git fast-import --force --quiet'.split()
|
fip_cmd = 'git fast-import --force --quiet'.split()
|
||||||
fip = subprocess.Popen(fip_cmd, stdin=subprocess.PIPE,
|
fip = subprocess.Popen(fip_cmd,
|
||||||
|
bufsize=-1,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE)
|
stdout=subprocess.PIPE)
|
||||||
pipes = (fip.stdin, fip.stdout)
|
pipes = (fip.stdin, fip.stdout)
|
||||||
if args.dry_run or args.debug:
|
if args.dry_run or args.debug:
|
||||||
|
Loading…
Reference in New Issue
Block a user