mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-05 09:52:15 +02:00
filter-repo: add text removal (or replacement) via file of expressions
Make it easy for users to search and replace text throughout the repository history. Instead of inventing some new syntax, reuse the same syntax used by BFG repo filter's --replace-text option, namely, a file with one expression per line of the form [regex:|glob:|literal:]$MATCH_EXPR[==>$REPLACEMENT_EXPR] Where "$MATCH_EXPR" is by default considered to be literal text, but could be a regex or a glob if the appropriate prefix is used. Also, $REPLACEMENT_EXPR defaults to '***REMOVED***' if not specified. If you want a literal '==>' to be part of your $MATCH_EXPR, then you must also manually specify a replacement expression instead of taking the default. Some examples: sup3rs3kr3t (replaces 'sup3rs3kr3t' with '***REMOVED***') HeWhoShallNotBeNamed==>Voldemort (replaces 'HeWhoShallNotBeNamed' with 'Voldemort') very==> (replaces 'very' with the empty string) regex:(\d{2})/(\d{2})/(\d{4})==>\2/\1/\3 (replaces '05/17/2012' with '17/05/2012', and vice-versa) The format for regex is as from re.sub(<pattern>, <repl>, <string>) from https://docs.python.org/2/library/re.html The <string> comes from file contents of the repo, and you specify the <pattern> and <repl>. glob:Copy*t==>Cartel (replaces 'Copyright' or 'Copyleft' or 'Copy my st' with 'Cartel') Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
4ee915e4dd
commit
73e91edecc
@ -1824,6 +1824,16 @@ class FilteringOptions(object):
|
||||
action='store_const', const='.mailmap',
|
||||
help='''Same as: '--mailmap .mailmap' ''')
|
||||
|
||||
contents = parser.add_argument_group(title='Content editing filters')
|
||||
contents.add_argument('--replace-text', metavar='EXPRESSIONS_FILE',
|
||||
help='''A file with expressions that, if found, will
|
||||
be replaced. By default, each expression is
|
||||
treated as literal text, but 'regex:' and 'glob:'
|
||||
prefixes are supported. You can end the line
|
||||
with "==>" and some replacement text to choose
|
||||
a replacement choice other than the default of
|
||||
"***REMOVED***". ''')
|
||||
|
||||
location = parser.add_argument_group(title='Location to filter from/to')
|
||||
location.add_argument('--source',
|
||||
help='''Git repository to read from''')
|
||||
@ -1881,6 +1891,38 @@ class FilteringOptions(object):
|
||||
if not has_filter:
|
||||
args.inclusive = False
|
||||
|
||||
@staticmethod
|
||||
def get_replace_text(filename):
|
||||
replace_literals = []
|
||||
replace_regexes = []
|
||||
with open(filename) as f:
|
||||
for line in f:
|
||||
line = line.rstrip('\r\n')
|
||||
|
||||
# Determine the replacement
|
||||
replacement = '***REMOVED***'
|
||||
if '==>' in line:
|
||||
line, replacement = line.rsplit('==>', 1)
|
||||
|
||||
# See if we need to match via regex
|
||||
regex = None
|
||||
if line.startswith('regex:'):
|
||||
regex = line[6:]
|
||||
elif line.startswith('glob:'):
|
||||
regex = fnmatch.translate(line[5:])
|
||||
if regex.endswith(r'\Z(?ms)'):
|
||||
regex = regex[0:-7]
|
||||
if regex:
|
||||
replace_regexes.append((re.compile(regex), replacement))
|
||||
else:
|
||||
# Otherwise, find the literal we need to replace
|
||||
if line.startswith('literal:'):
|
||||
line = line[8:]
|
||||
if not line:
|
||||
continue
|
||||
replace_literals.append((line, replacement))
|
||||
return {'literals': replace_literals, 'regexes': replace_regexes}
|
||||
|
||||
@staticmethod
|
||||
def default_options():
|
||||
return FilteringOptions.parse_args([], error_on_empty = False)
|
||||
@ -1898,6 +1940,8 @@ class FilteringOptions(object):
|
||||
FilteringOptions.sanity_check_args(args)
|
||||
if args.mailmap:
|
||||
args.mailmap = MailmapInfo(args.mailmap)
|
||||
if args.replace_text:
|
||||
args.replace_text = FilteringOptions.get_replace_text(args.replace_text)
|
||||
return args
|
||||
|
||||
class RepoAnalyze(object):
|
||||
@ -2505,6 +2549,14 @@ class RepoFilter(object):
|
||||
if rev != refs[origin_ref]:
|
||||
abort('{} does not match {}'.format(refname, origin_ref))
|
||||
|
||||
@staticmethod
|
||||
def tweak_blob(args, blob):
|
||||
if args.replace_text:
|
||||
for literal, replacement in args.replace_text['literals']:
|
||||
blob.data = blob.data.replace(literal, replacement)
|
||||
for regex, replacement in args.replace_text['regexes']:
|
||||
blob.data = regex.sub(replacement, blob.data)
|
||||
|
||||
@staticmethod
|
||||
def tweak_commit(args, commit):
|
||||
def filename_matches(path_expression, pathname):
|
||||
@ -2630,6 +2682,7 @@ class RepoFilter(object):
|
||||
else:
|
||||
skip_blobs = (self._blob_callback is None and
|
||||
self._everything_callback is None and
|
||||
self._args.replace_text is None and
|
||||
self._args.source is None and
|
||||
self._args.target is None)
|
||||
extra_flags = ['--no-data'] if skip_blobs else []
|
||||
@ -2686,6 +2739,9 @@ class RepoFilter(object):
|
||||
|
||||
if self._input:
|
||||
# Set up the callbacks
|
||||
def combined_blob_callback(b):
|
||||
RepoFilter.tweak_blob(self._args, b)
|
||||
self._blob_callback and self._blob_callback(b)
|
||||
def actual_commit_callback(c):
|
||||
RepoFilter.tweak_commit(self._args, c)
|
||||
self._commit_callback and self._commit_callback(c)
|
||||
@ -2695,10 +2751,13 @@ class RepoFilter(object):
|
||||
def actual_reset_callback(r):
|
||||
RepoFilter.handle_reset(self._args, r)
|
||||
self._reset_callback and self._reset_callback(r)
|
||||
actual_blob_callback = self._blob_callback
|
||||
if self._args.replace_text:
|
||||
actual_blob_callback = combined_blob_callback
|
||||
|
||||
# Create and run the filter
|
||||
fef = FastExportFilter(self._args.source or '.',
|
||||
blob_callback = self._blob_callback,
|
||||
blob_callback = actual_blob_callback,
|
||||
commit_callback = actual_commit_callback,
|
||||
tag_callback = actual_tag_callback,
|
||||
reset_callback = actual_reset_callback,
|
||||
|
@ -34,5 +34,6 @@ filter_testcase basic basic-filename --path filename
|
||||
filter_testcase basic basic-twenty --path twenty
|
||||
filter_testcase basic basic-ten --path ten
|
||||
filter_testcase basic basic-mailmap --mailmap ../t9390/sample-mailmap
|
||||
filter_testcase basic basic-replace --replace-text ../t9390/sample-replace
|
||||
|
||||
test_done
|
||||
|
78
t/t9390/basic-replace
Normal file
78
t/t9390/basic-replace
Normal file
@ -0,0 +1,78 @@
|
||||
feature done
|
||||
blob
|
||||
mark :1
|
||||
data 8
|
||||
initial
|
||||
|
||||
reset refs/heads/B
|
||||
commit refs/heads/B
|
||||
mark :2
|
||||
author Little O. Me <me@little.net> 1535228562 -0700
|
||||
committer Little O. Me <me@little.net> 1535228562 -0700
|
||||
data 8
|
||||
Initial
|
||||
M 100644 :1 filename
|
||||
M 100644 :1 ten
|
||||
M 100644 :1 twenty
|
||||
|
||||
blob
|
||||
mark :3
|
||||
data 28
|
||||
twenty-modified-by-gremlins
|
||||
|
||||
commit refs/heads/B
|
||||
mark :4
|
||||
author Little 'ol Me <me@laptop.(none)> 1535229544 -0700
|
||||
committer Little 'ol Me <me@laptop.(none)> 1535229544 -0700
|
||||
data 11
|
||||
add twenty
|
||||
from :2
|
||||
M 100644 :3 twenty
|
||||
|
||||
blob
|
||||
mark :5
|
||||
data 25
|
||||
ten-modified-by-gremlins
|
||||
|
||||
commit refs/heads/A
|
||||
mark :6
|
||||
author Little O. Me <me@machine52.little.net> 1535229523 -0700
|
||||
committer Little O. Me <me@machine52.little.net> 1535229523 -0700
|
||||
data 8
|
||||
add ten
|
||||
from :2
|
||||
M 100644 :5 ten
|
||||
|
||||
commit refs/heads/master
|
||||
mark :7
|
||||
author Lit.e Me <me@fire.com> 1535229559 -0700
|
||||
committer Lit.e Me <me@fire.com> 1535229580 -0700
|
||||
data 24
|
||||
Merge branch 'A' into B
|
||||
from :4
|
||||
merge :6
|
||||
M 100644 :5 ten
|
||||
|
||||
blob
|
||||
mark :8
|
||||
data 6
|
||||
final
|
||||
|
||||
commit refs/heads/master
|
||||
mark :9
|
||||
author Little Me <me@bigcompany.com> 1535229601 -0700
|
||||
committer Little Me <me@bigcompany.com> 1535229601 -0700
|
||||
data 9
|
||||
whatever
|
||||
from :7
|
||||
M 100644 :8 filename
|
||||
M 100644 :8 ten
|
||||
M 100644 :8 twenty
|
||||
|
||||
tag v1.0
|
||||
from :9
|
||||
tagger Little John <second@merry.men> 1535229618 -0700
|
||||
data 5
|
||||
v1.0
|
||||
|
||||
done
|
1
t/t9390/sample-replace
Normal file
1
t/t9390/sample-replace
Normal file
@ -0,0 +1 @@
|
||||
mod==>modified-by-gremlins
|
Loading…
Reference in New Issue
Block a user