Allow less strict parsing of fastimport streams.
authorJelmer Vernooij <jelmer@samba.org>
Wed, 4 Apr 2012 00:44:30 +0000 (02:44 +0200)
committerJelmer Vernooij <jelmer@samba.org>
Wed, 4 Apr 2012 00:44:30 +0000 (02:44 +0200)
NEWS
fastimport/parser.py
fastimport/tests/test_parser.py

diff --git a/NEWS b/NEWS
index 969ef751ea3e343e39339e2c0e5b1fac54d8d6a4..8702378b48162d63e31363974ab128ca56333a73 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,10 @@
  * Cope with invalid timezones like +61800 a little bit better.
    (Jelmer Vernooij, #959154)
 
+ * Allow non-strict parsing of fastimport streams, when
+   a tagger is missing an email address.
+   (Jelmer Vernooij, #730607)
+
 0.9.1  2012-02-28
 
  * Update FSF address in headers. (Dan Callaghan, #868800)
index 88eecfac4abdc70d61e4bae6d1b7da001c41801a..cff8614a3d3704045c61799aff3377529e549397 100644 (file)
@@ -259,7 +259,7 @@ _WHO_RE = re.compile(r'([^<]*)<(.*)>')
 class ImportParser(LineBasedParser):
 
     def __init__(self, input, verbose=False, output=sys.stdout,
-        user_mapper=None):
+        user_mapper=None, strict=True):
         """A Parser of import commands.
 
         :param input: the file-like object to read from
@@ -267,11 +267,13 @@ class ImportParser(LineBasedParser):
         :param output: the file-like object to write messages to (YAGNI?)
         :param user_mapper: if not None, the UserMapper used to adjust
           user-ids for authors, committers and taggers.
+        :param strict: Raise errors on strictly invalid data
         """
         LineBasedParser.__init__(self, input)
         self.verbose = verbose
         self.output = output
         self.user_mapper = user_mapper
+        self.strict = strict
         # We auto-detect the date format when a date is first encountered
         self.date_parser = None
         self.features = {}
@@ -421,7 +423,8 @@ class ImportParser(LineBasedParser):
     def _parse_tag(self, name):
         """Parse a tag command."""
         from_ = self._get_from('tag')
-        tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)
+        tagger = self._get_user_info('tag', 'tagger',
+                accept_just_who=True)
         message = self._get_data('tag', 'message')
         return commands.TagCommand(name, from_, tagger, message)
 
@@ -524,19 +527,25 @@ class ImportParser(LineBasedParser):
             except ValueError:
                 print "failed to parse datestr '%s'" % (datestr,)
                 raise
+            name = match.group(1)
+            email = match.group(2)
         else:
             match = _WHO_RE.search(s)
             if accept_just_who and match:
                 # HACK around missing time
                 # TODO: output a warning here
                 when = dates.DATE_PARSERS_BY_NAME['now']('now')
-            else:
+                name = match.group(1)
+                email = match.group(2)
+            elif self.strict:
                 self.abort(errors.BadFormat, cmd, section, s)
-        name = match.group(1)
+            else:
+                name = s
+                email = None
+                when = dates.DATE_PARSERS_BY_NAME['now']('now')
         if len(name) > 0:
             if name[-1] == " ":
                 name = name[:-1]
-        email = match.group(2)
         # While it shouldn't happen, some datasets have email addresses
         # which contain unicode characters. See bug 338186. We sanitize
         # the data at this level just in case.
index 4bf11c7cd044a6ec58ba5c4a25b1fb5983f7609e..97d062b76b0cd25e214ac4501f8dffff91fcd000 100644 (file)
@@ -20,6 +20,7 @@ import StringIO
 import testtools
 
 from fastimport import (
+    commands,
     errors,
     parser,
     )
@@ -304,3 +305,40 @@ class TestPathPairParsing(testtools.TestCase):
         p = parser.ImportParser("")
         self.assertEqual(['foo bar', 'baz'],
             p._path_pair('"foo bar" baz'))
+
+
+class TestTagParsing(testtools.TestCase):
+
+    def test_tagger_with_email(self):
+        p = parser.ImportParser(StringIO.StringIO(
+            "tag refs/tags/v1.0\n"
+            "from :xxx\n"
+            "tagger Joe Wong <joe@example.com> 1234567890 -0600\n"
+            "data 11\n"
+            "create v1.0"))
+        cmds = list(p.iter_commands())
+        self.assertEquals(1, len(cmds))
+        self.assertIsInstance(cmds[0], commands.TagCommand)
+        self.assertEquals(cmds[0].tagger,
+            ('Joe Wong', 'joe@example.com', 1234567890.0, -21600))
+
+    def test_tagger_no_email_strict(self):
+        p = parser.ImportParser(StringIO.StringIO(
+            "tag refs/tags/v1.0\n"
+            "from :xxx\n"
+            "tagger Joe Wong\n"
+            "data 11\n"
+            "create v1.0"))
+        self.assertRaises(errors.BadFormat, list, p.iter_commands())
+
+    def test_tagger_no_email_not_strict(self):
+        p = parser.ImportParser(StringIO.StringIO(
+            "tag refs/tags/v1.0\n"
+            "from :xxx\n"
+            "tagger Joe Wong\n"
+            "data 11\n"
+            "create v1.0"), strict=False)
+        cmds = list(p.iter_commands())
+        self.assertEquals(1, len(cmds))
+        self.assertIsInstance(cmds[0], commands.TagCommand)
+        self.assertEquals(cmds[0].tagger[:2], ('Joe Wong', None))