From 9b3134b68ce94488e8addc93a1830be48a47c82e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Sat, 27 Apr 2019 11:47:12 -0700 Subject: [PATCH] filter-repo (python3): ensure file reads and writes are done in bytes Signed-off-by: Elijah Newren --- git-filter-repo | 36 ++++++++++++++++++------------------ t/t9391/unusual.py | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/git-filter-repo b/git-filter-repo index 3645697..e006c5a 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -266,7 +266,7 @@ class MailmapInfo(object): comment_re = re.compile(r'\s*#.*') if not os.access(filename, os.R_OK): raise SystemExit(_("Cannot read %s") % filename) - with open(filename) as f: + with open(filename, 'br') as f: count = 0 for line in f: count += 1 @@ -1633,14 +1633,14 @@ class FastExportFilter(object): def record_metadata(self, metadata_dir, orig_refs, refs_nuked): deleted_hash = '0'*40 self._flush_renames() - with open(os.path.join(metadata_dir, 'commit-map'), 'w') as f: + with open(os.path.join(metadata_dir, 'commit-map'), 'bw') as f: f.write("%-40s %s\n" % (_("old"), _("new"))) for (old,new) in self._commit_renames.items(): f.write('{} {}\n'.format(old, new if new != None else deleted_hash)) batch_check_process = None batch_check_output_re = re.compile('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$') - with open(os.path.join(metadata_dir, 'ref-map'), 'w') as f: + with open(os.path.join(metadata_dir, 'ref-map'), 'bw') as f: for refname, old_hash in orig_refs.items(): if refname in refs_nuked: new_hash = deleted_hash @@ -1669,7 +1669,7 @@ class FastExportFilter(object): batch_check_process.stdin.close() batch_check_process.wait() - with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'w') as f: + with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'bw') as f: issues_found = False if self._commits_no_longer_merges: issues_found = True @@ -1698,7 +1698,7 @@ class FastExportFilter(object): if not issues_found: f.write(_("No filtering problems encountered.")) - with open(os.path.join(metadata_dir, 'already_ran'), 'w') as f: + with open(os.path.join(metadata_dir, 'already_ran'), 'bw') as f: f.write(_("This file exists to allow you to filter again without --force.")) def get_seen_refs(self): @@ -2054,7 +2054,7 @@ class FilteringOptions(object): def get_replace_text(filename): replace_literals = [] replace_regexes = [] - with open(filename) as f: + with open(filename, 'br') as f: for line in f: line = line.rstrip('\r\n') @@ -2353,7 +2353,7 @@ class RepoAnalyze(object): for name in dir_size['packed']: dir_deleted_data[name] = stats['tree_deletions'].get(name, None) - with open(os.path.join(reportdir, "README"), 'w') as f: + with open(os.path.join(reportdir, "README"), 'bw') as f: # Give a basic overview of this file f.write("== %s ==\n" % _("Overall Statistics")) f.write(" %s: %d\n" % (_("Number of commits"), @@ -2443,7 +2443,7 @@ class RepoAnalyze(object): # Equivalence classes for names, so if folks only want to keep a # certain set of paths, they know the old names they want to include # too. - with open(os.path.join(reportdir, "renames.txt"), 'w') as f: + with open(os.path.join(reportdir, "renames.txt"), 'bw') as f: seen = set() for pathname,equiv_group in sorted(stats['equivalence'].items(), key=lambda x:(x[1], x[0])): @@ -2455,7 +2455,7 @@ class RepoAnalyze(object): "\n") # List directories in reverse sorted order of unpacked size - with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'w') as f: + with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'bw') as f: f.write("=== %s ===\n" % _("Deleted directories by reverse size")) f.write(_("Format: unpacked size, packed size, date deleted, directory name\n")) for dirname, size in sorted(dir_size['packed'].items(), @@ -2467,7 +2467,7 @@ class RepoAnalyze(object): datestr(dir_deleted_data[dirname]), dirname or _(''))) - with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'w') as f: + with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'bw') as f: f.write("=== %s ===\n" % _("All directories by reverse size")) f.write(_("Format: unpacked size, packed size, date deleted, directory name\n")) for dirname, size in sorted(dir_size['packed'].items(), @@ -2479,7 +2479,7 @@ class RepoAnalyze(object): dirname or _(""))) # List extensions in reverse sorted order of unpacked size - with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'w') as f: + with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'bw') as f: f.write("=== %s ===\n" % _("Deleted extensions by reverse size")) f.write(_("Format: unpacked size, packed size, date deleted, extension name\n")) for extname, size in sorted(ext_size['packed'].items(), @@ -2491,7 +2491,7 @@ class RepoAnalyze(object): datestr(ext_deleted_data[extname]), extname or _(''))) - with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'w') as f: + with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'bw') as f: f.write("=== %s ===\n" % _("All extensions by reverse size")) f.write(_("Format: unpacked size, packed size, date deleted, extension name\n")) for extname, size in sorted(ext_size['packed'].items(), @@ -2503,7 +2503,7 @@ class RepoAnalyze(object): extname or _(''))) # List files in reverse sorted order of unpacked size - with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'w') as f: + with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'bw') as f: f.write("=== %s ===\n" % _("Deleted paths by reverse accumulated size")) f.write(_("Format: unpacked size, packed size, date deleted, path name(s)\n")) for pathname, size in sorted(path_size['packed'].items(), @@ -2516,7 +2516,7 @@ class RepoAnalyze(object): datestr(when), pathname)) - with open(os.path.join(reportdir, "path-all-sizes.txt"), 'w') as f: + with open(os.path.join(reportdir, "path-all-sizes.txt"), 'bw') as f: f.write("=== %s ===\n" % _("All paths by reverse accumulated size")) f.write(_("Format: unpacked size, packed size, date deleted, pathectory name\n")) for pathname, size in sorted(path_size['packed'].items(), @@ -2529,7 +2529,7 @@ class RepoAnalyze(object): pathname)) # List of filenames and sizes in descending order - with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'w') as f: + with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'bw') as f: f.write("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")) f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n")) for sha, size in sorted(stats['packed_size'].items(), @@ -2735,7 +2735,7 @@ class RepoFilter(object): for root, dirs, files in os.walk(reflog_dir): for filename in files: pathname = os.path.join(root, filename) - with open(pathname) as f: + with open(pathname, 'br') as f: if len(f.read().splitlines()) > 1: shortpath = pathname[len(reflog_dir)+1:] abort(_("expected at most one entry in the reflog for %s") % @@ -2970,7 +2970,7 @@ class RepoFilter(object): if self._args.dry_run or self._args.debug: self._fe_orig = os.path.join(self.results_tmp_dir(), 'fast-export.original') - output = open(self._fe_orig, 'w') + output = open(self._fe_orig, 'bw') self._input = InputFileBackup(self._input, output) if self._args.debug: print("[DEBUG] Running: {}".format(' '.join(fep_cmd))) @@ -2988,7 +2988,7 @@ class RepoFilter(object): if self._args.dry_run or self._args.debug: self._fe_filt = os.path.join(self.results_tmp_dir(), 'fast-export.filtered') - self._output = open(self._fe_filt, 'w') + self._output = open(self._fe_filt, 'bw') else: self._output = self._fip.stdin if self._args.debug: diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py index 684c105..6817c65 100755 --- a/t/t9391/unusual.py +++ b/t/t9391/unusual.py @@ -59,7 +59,7 @@ filter = fr.FastExportFilter('.', everything_callback = track_everything) filter.run(input = sys.stdin.detach(), - output = open(os.devnull, 'wb'), + output = open(os.devnull, 'bw'), fast_import_pipes = None, quiet = True) # DO NOT depend upon or use _IDS directly you external script writers. I'm