diff --git a/git-filter-repo b/git-filter-repo index 18aa07a..5579ec3 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -1824,6 +1824,16 @@ class FilteringOptions(object): action='store_const', const='.mailmap', help='''Same as: '--mailmap .mailmap' ''') + contents = parser.add_argument_group(title='Content editing filters') + contents.add_argument('--replace-text', metavar='EXPRESSIONS_FILE', + help='''A file with expressions that, if found, will + be replaced. By default, each expression is + treated as literal text, but 'regex:' and 'glob:' + prefixes are supported. You can end the line + with "==>" and some replacement text to choose + a replacement choice other than the default of + "***REMOVED***". ''') + location = parser.add_argument_group(title='Location to filter from/to') location.add_argument('--source', help='''Git repository to read from''') @@ -1881,6 +1891,38 @@ class FilteringOptions(object): if not has_filter: args.inclusive = False + @staticmethod + def get_replace_text(filename): + replace_literals = [] + replace_regexes = [] + with open(filename) as f: + for line in f: + line = line.rstrip('\r\n') + + # Determine the replacement + replacement = '***REMOVED***' + if '==>' in line: + line, replacement = line.rsplit('==>', 1) + + # See if we need to match via regex + regex = None + if line.startswith('regex:'): + regex = line[6:] + elif line.startswith('glob:'): + regex = fnmatch.translate(line[5:]) + if regex.endswith(r'\Z(?ms)'): + regex = regex[0:-7] + if regex: + replace_regexes.append((re.compile(regex), replacement)) + else: + # Otherwise, find the literal we need to replace + if line.startswith('literal:'): + line = line[8:] + if not line: + continue + replace_literals.append((line, replacement)) + return {'literals': replace_literals, 'regexes': replace_regexes} + @staticmethod def default_options(): return FilteringOptions.parse_args([], error_on_empty = False) @@ -1898,6 +1940,8 @@ class FilteringOptions(object): FilteringOptions.sanity_check_args(args) if args.mailmap: args.mailmap = MailmapInfo(args.mailmap) + if args.replace_text: + args.replace_text = FilteringOptions.get_replace_text(args.replace_text) return args class RepoAnalyze(object): @@ -2505,6 +2549,14 @@ class RepoFilter(object): if rev != refs[origin_ref]: abort('{} does not match {}'.format(refname, origin_ref)) + @staticmethod + def tweak_blob(args, blob): + if args.replace_text: + for literal, replacement in args.replace_text['literals']: + blob.data = blob.data.replace(literal, replacement) + for regex, replacement in args.replace_text['regexes']: + blob.data = regex.sub(replacement, blob.data) + @staticmethod def tweak_commit(args, commit): def filename_matches(path_expression, pathname): @@ -2630,6 +2682,7 @@ class RepoFilter(object): else: skip_blobs = (self._blob_callback is None and self._everything_callback is None and + self._args.replace_text is None and self._args.source is None and self._args.target is None) extra_flags = ['--no-data'] if skip_blobs else [] @@ -2686,6 +2739,9 @@ class RepoFilter(object): if self._input: # Set up the callbacks + def combined_blob_callback(b): + RepoFilter.tweak_blob(self._args, b) + self._blob_callback and self._blob_callback(b) def actual_commit_callback(c): RepoFilter.tweak_commit(self._args, c) self._commit_callback and self._commit_callback(c) @@ -2695,10 +2751,13 @@ class RepoFilter(object): def actual_reset_callback(r): RepoFilter.handle_reset(self._args, r) self._reset_callback and self._reset_callback(r) + actual_blob_callback = self._blob_callback + if self._args.replace_text: + actual_blob_callback = combined_blob_callback # Create and run the filter fef = FastExportFilter(self._args.source or '.', - blob_callback = self._blob_callback, + blob_callback = actual_blob_callback, commit_callback = actual_commit_callback, tag_callback = actual_tag_callback, reset_callback = actual_reset_callback, diff --git a/t/t9390-filter-repo.sh b/t/t9390-filter-repo.sh index ddc6bc5..d8ae636 100755 --- a/t/t9390-filter-repo.sh +++ b/t/t9390-filter-repo.sh @@ -34,5 +34,6 @@ filter_testcase basic basic-filename --path filename filter_testcase basic basic-twenty --path twenty filter_testcase basic basic-ten --path ten filter_testcase basic basic-mailmap --mailmap ../t9390/sample-mailmap +filter_testcase basic basic-replace --replace-text ../t9390/sample-replace test_done diff --git a/t/t9390/basic-replace b/t/t9390/basic-replace new file mode 100644 index 0000000..3383a65 --- /dev/null +++ b/t/t9390/basic-replace @@ -0,0 +1,78 @@ +feature done +blob +mark :1 +data 8 +initial + +reset refs/heads/B +commit refs/heads/B +mark :2 +author Little O. Me 1535228562 -0700 +committer Little O. Me 1535228562 -0700 +data 8 +Initial +M 100644 :1 filename +M 100644 :1 ten +M 100644 :1 twenty + +blob +mark :3 +data 28 +twenty-modified-by-gremlins + +commit refs/heads/B +mark :4 +author Little 'ol Me 1535229544 -0700 +committer Little 'ol Me 1535229544 -0700 +data 11 +add twenty +from :2 +M 100644 :3 twenty + +blob +mark :5 +data 25 +ten-modified-by-gremlins + +commit refs/heads/A +mark :6 +author Little O. Me 1535229523 -0700 +committer Little O. Me 1535229523 -0700 +data 8 +add ten +from :2 +M 100644 :5 ten + +commit refs/heads/master +mark :7 +author Lit.e Me 1535229559 -0700 +committer Lit.e Me 1535229580 -0700 +data 24 +Merge branch 'A' into B +from :4 +merge :6 +M 100644 :5 ten + +blob +mark :8 +data 6 +final + +commit refs/heads/master +mark :9 +author Little Me 1535229601 -0700 +committer Little Me 1535229601 -0700 +data 9 +whatever +from :7 +M 100644 :8 filename +M 100644 :8 ten +M 100644 :8 twenty + +tag v1.0 +from :9 +tagger Little John 1535229618 -0700 +data 5 +v1.0 + +done diff --git a/t/t9390/sample-replace b/t/t9390/sample-replace new file mode 100644 index 0000000..c77cc88 --- /dev/null +++ b/t/t9390/sample-replace @@ -0,0 +1 @@ +mod==>modified-by-gremlins