mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-06 02:12:17 +02:00
filter-repo: add GitElement and Blob classes, and a FastExport Parser class
We still only parse a single blob, but this should put the infrastructure in place for parsing more output from git-fast-export. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
2b34e5c25d
commit
eb4afc4e78
153
git-filter-repo
153
git-filter-repo
@ -2,63 +2,126 @@
|
||||
|
||||
import commands
|
||||
import re
|
||||
import sha # bleh...when can I assume python >= 2.5?
|
||||
import sys
|
||||
from pyparsing import ParserElement, Literal, Optional, Combine, Word, nums
|
||||
|
||||
from pyparsing import Token, ParseResults
|
||||
class ExactData(Token):
|
||||
"""Token for matching data dumps in git-fast-import format"""
|
||||
def __init__( self ):
|
||||
super(ExactData,self).__init__()
|
||||
"""Specialized pyparsing subclass for handling data dumps in git-fast-import
|
||||
exact data format"""
|
||||
def __init__( self ):
|
||||
super(ExactData,self).__init__()
|
||||
|
||||
self.pattern = r"data (\d+)\n"
|
||||
self.re = re.compile(self.pattern)
|
||||
self.reString = self.pattern
|
||||
self.pattern = r"data (\d+)\n"
|
||||
self.re = re.compile(self.pattern)
|
||||
self.reString = self.pattern
|
||||
|
||||
self.name = "ExactData"
|
||||
self.errmsg = "Expected " + self.name
|
||||
#self.myException.msg = self.errmsg
|
||||
self.mayIndexError = False
|
||||
self.mayReturnEmpty = True
|
||||
self.name = "ExactData"
|
||||
self.errmsg = "Expected " + self.name
|
||||
self.mayIndexError = False
|
||||
self.mayReturnEmpty = True
|
||||
|
||||
def parseImpl( self, instring, loc, doActions=True ):
|
||||
result = self.re.match(instring,loc)
|
||||
if not result:
|
||||
exc = self.myException
|
||||
exc.loc = loc
|
||||
exc.pstr = instring
|
||||
raise exc
|
||||
def parseImpl( self, instring, loc, doActions=True ):
|
||||
result = self.re.match(instring,loc)
|
||||
if not result:
|
||||
exc = self.myException
|
||||
exc.loc = loc
|
||||
exc.pstr = instring
|
||||
raise exc
|
||||
|
||||
num = result.group(1)
|
||||
loc = result.end()+int(num)
|
||||
data = instring[result.end():loc]
|
||||
d = result.groupdict()
|
||||
ret = ParseResults(['data', num, data])
|
||||
return loc,ret
|
||||
num = result.group(1)
|
||||
loc = result.end()+int(num)
|
||||
data = instring[result.end():loc]
|
||||
d = result.groupdict()
|
||||
ret = ParseResults(['data', num, data])
|
||||
return loc,ret
|
||||
|
||||
def __str__( self ):
|
||||
try:
|
||||
return super(ExactMath,self).__str__()
|
||||
except:
|
||||
pass
|
||||
def __str__( self ):
|
||||
try:
|
||||
return super(ExactMath,self).__str__()
|
||||
except:
|
||||
pass
|
||||
|
||||
if self.strRepr is None:
|
||||
self.strRepr = "Data:"
|
||||
if self.strRepr is None:
|
||||
self.strRepr = "Data:"
|
||||
|
||||
return self.strRepr
|
||||
return self.strRepr
|
||||
|
||||
class GitElement(object):
|
||||
def __init__(self):
|
||||
self.type = None
|
||||
|
||||
def dump(self):
|
||||
raise SystemExit("Unimplemented function: %s.dump()", type(self))
|
||||
|
||||
class Blob(GitElement):
|
||||
def __init__(self, mark, data):
|
||||
GitElement.__init__(self)
|
||||
self.type = 'blob'
|
||||
self.mark = mark
|
||||
self.data = data
|
||||
|
||||
def dump(self):
|
||||
sys.stdout.write('blob\n')
|
||||
sys.stdout.write('mark :%d\n' % self.mark)
|
||||
sys.stdout.write('data %d\n%s' % (len(self.data), self.data))
|
||||
|
||||
class FastExportParser(object):
|
||||
def __init__(self,
|
||||
tag_callback = None, commit_callback = None,
|
||||
blob_callback = None, progress_callback = None,
|
||||
reset_callback = None, checkpoint_callback = None,
|
||||
everything_callback = None):
|
||||
self._setup_parser()
|
||||
self.tag_callback = tag_callback
|
||||
self.blob_callback = blob_callback
|
||||
self.reset_callback = reset_callback
|
||||
self.commit_callback = commit_callback
|
||||
self.progress_callback = progress_callback
|
||||
self.checkpoint_callback = checkpoint_callback
|
||||
self.everything_callback = everything_callback
|
||||
|
||||
def _make_blob(self, t):
|
||||
# Create the Blob object from the parser tokens
|
||||
mark = int(t[1][1:])
|
||||
datalen = int(t[3])
|
||||
data = t[4]
|
||||
if datalen != len(data):
|
||||
raise SystemExit('%d != len(%s)' % datalen, data)
|
||||
blob = Blob(mark, data)
|
||||
|
||||
# Call any user callback to allow them to modify the blob
|
||||
if self.blob_callback:
|
||||
self.blob_callback(blob)
|
||||
|
||||
# Now print the resulting blob to stdout
|
||||
blob.dump()
|
||||
|
||||
# Replace data with its sha1sum to cut down on memory usage
|
||||
# (python parser stores whole resulting parse tree in memory)
|
||||
sha1sum = sha.new(blob.data).hexdigest()
|
||||
return ['blob', blob.mark, len(blob.data), sha1sum]
|
||||
|
||||
def _setup_parser(self):
|
||||
ParserElement.setDefaultWhitespaceChars('')
|
||||
number = Word(nums)
|
||||
lf = Literal('\n').suppress()
|
||||
sp = Literal(' ').suppress()
|
||||
mark_name = Combine(Literal(':') + number)
|
||||
mark = Literal('mark').suppress() - sp + mark_name + lf
|
||||
#exact_data = Literal('data') + sp + number + lf
|
||||
exact_data = ExactData()
|
||||
file_content = exact_data
|
||||
self.blob = Literal('blob') + lf + mark + file_content
|
||||
self.blob.setParseAction(lambda t: self._make_blob(t))
|
||||
|
||||
def parse(self, string):
|
||||
results = self.blob.parseString(string, parseAll = False)
|
||||
return results
|
||||
|
||||
|
||||
parser = FastExportParser()
|
||||
string = commands.getoutput("GIT_DIR=foo/.git git fast-export --all")
|
||||
|
||||
ParserElement.setDefaultWhitespaceChars('')
|
||||
number = Word(nums)
|
||||
lf = Literal('\n').suppress()
|
||||
sp = Literal(' ').suppress()
|
||||
mark_name = Combine(Literal(':') + number)
|
||||
mark = Literal('mark').suppress() - sp + mark_name + lf
|
||||
#exact_data = Literal('data') + sp + number + lf
|
||||
exact_data = ExactData()
|
||||
file_content = exact_data
|
||||
blob = Literal('blob') + lf + mark + file_content
|
||||
|
||||
results = blob.parseString(string, parseAll = False)
|
||||
results = parser.parse(string)
|
||||
print results
|
||||
|
Loading…
Reference in New Issue
Block a user