filter-repo: add handling of 'original-oid' directive

This will be used later to help with commit message rewriting (so that
commits can continue to refer to other commits in their history, using
the new rewritten hashes for those commits), and perhaps also in
removing blobs by id.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2018-10-30 17:55:54 -07:00
parent 057947f6ff
commit f95308c5eb

View File

@ -272,12 +272,15 @@ class Blob(_GitElementWithId):
way of representing file contents). way of representing file contents).
""" """
def __init__(self, data): def __init__(self, data, original_id = None):
_GitElementWithId.__init__(self) _GitElementWithId.__init__(self)
# Denote that this is a blob # Denote that this is a blob
self.type = 'blob' self.type = 'blob'
# Record original id
self.original_id = original_id
# Stores the blob's data # Stores the blob's data
self.data = data self.data = data
@ -382,6 +385,7 @@ class Commit(_GitElementWithId):
file_changes, file_changes,
from_commit = None, from_commit = None,
merge_commits = [], merge_commits = [],
original_id = None,
**kwargs): **kwargs):
_GitElementWithId.__init__(self) _GitElementWithId.__init__(self)
@ -391,6 +395,9 @@ class Commit(_GitElementWithId):
# Record the affected branch # Record the affected branch
self.branch = branch self.branch = branch
# Record original id
self.original_id = original_id
# Record author's name # Record author's name
self.author_name = author_name self.author_name = author_name
@ -500,7 +507,8 @@ class Tag(_GitElement):
""" """
def __init__(self, ref, from_ref, def __init__(self, ref, from_ref,
tagger_name, tagger_email, tagger_date, tag_msg): tagger_name, tagger_email, tagger_date, tag_msg,
original_id = None):
_GitElement.__init__(self) _GitElement.__init__(self)
# Denote that this is a tag element # Denote that this is a tag element
@ -512,6 +520,9 @@ class Tag(_GitElement):
# Store the entity being tagged (this should be a commit) # Store the entity being tagged (this should be a commit)
self.from_ref = from_ref self.from_ref = from_ref
# Record original id
self.original_id = original_id
# Store the name of the tagger # Store the name of the tagger
self.tagger_name = tagger_name self.tagger_name = tagger_name
@ -740,6 +751,11 @@ class FastExportFilter(object):
self._advance_currentline() self._advance_currentline()
return filechange return filechange
def _parse_original_id(self):
original_id = self._currentline[len('original-oid '):].rstrip()
self._advance_currentline()
return original_id
def _parse_ref_line(self, refname): def _parse_ref_line(self, refname):
""" """
Parses string data (often a branch name) from current-line. The name of Parses string data (often a branch name) from current-line. The name of
@ -795,12 +811,17 @@ class FastExportFilter(object):
# Parse the Blob # Parse the Blob
self._advance_currentline() self._advance_currentline()
id_ = self._parse_optional_mark() id_ = self._parse_optional_mark()
original_id = None
if self._currentline.startswith('original-oid'):
original_id = self._parse_original_id();
data = self._parse_data() data = self._parse_data()
if self._currentline == '\n': if self._currentline == '\n':
self._advance_currentline() self._advance_currentline()
# Create the blob # Create the blob
blob = Blob(data) blob = Blob(data, original_id)
# If fast-export text had a mark for this blob, need to make sure this # If fast-export text had a mark for this blob, need to make sure this
# mark translates to the blob's true id. # mark translates to the blob's true id.
@ -860,6 +881,10 @@ class FastExportFilter(object):
self._seen_refs[branch] = None self._seen_refs[branch] = None
id_ = self._parse_optional_mark() id_ = self._parse_optional_mark()
original_id = None
if self._currentline.startswith('original-oid'):
original_id = self._parse_original_id();
author_name = None author_name = None
if self._currentline.startswith('author'): if self._currentline.startswith('author'):
(author_name, author_email, author_date) = self._parse_user('author') (author_name, author_email, author_date) = self._parse_user('author')
@ -968,6 +993,7 @@ class FastExportFilter(object):
file_changes, file_changes,
from_commit, from_commit,
merge_commits, merge_commits,
original_id,
stream_number = _CURRENT_STREAM_NUMBER) stream_number = _CURRENT_STREAM_NUMBER)
# If fast-export text had a mark for this commit, need to make sure this # If fast-export text had a mark for this commit, need to make sure this
@ -1022,13 +1048,20 @@ class FastExportFilter(object):
from_ref = self._parse_optional_parent_ref('from') from_ref = self._parse_optional_parent_ref('from')
if from_ref is None: if from_ref is None:
raise SystemExit("Expected 'from' line while parsing tag %s" % tag) raise SystemExit("Expected 'from' line while parsing tag %s" % tag)
original_id = None
if self._currentline.startswith('original-oid'):
original_id = self._parse_original_id();
(tagger_name, tagger_email, tagger_date) = self._parse_user('tagger') (tagger_name, tagger_email, tagger_date) = self._parse_user('tagger')
tag_msg = self._parse_data() tag_msg = self._parse_data()
if self._currentline == '\n': if self._currentline == '\n':
self._advance_currentline() self._advance_currentline()
# Create the tag # Create the tag
tag = Tag(tag, from_ref, tagger_name, tagger_email, tagger_date, tag_msg) tag = Tag(tag, from_ref,
tagger_name, tagger_email, tagger_date, tag_msg,
original_id)
# Call any user callback to allow them to modify the tag # Call any user callback to allow them to modify the tag
if self._tag_callback: if self._tag_callback:
@ -1583,6 +1616,7 @@ def run_fast_filter():
fe_orig = None fe_orig = None
else: else:
fep_cmd = ['git', 'fast-export', fep_cmd = ['git', 'fast-export',
'--show-original-ids',
'--no-data', '--use-done-feature'] + args.revisions '--no-data', '--use-done-feature'] + args.revisions
fep = subprocess.Popen(fep_cmd, stdout=subprocess.PIPE) fep = subprocess.Popen(fep_cmd, stdout=subprocess.PIPE)
input = fep.stdout input = fep.stdout