* Python 3.1 and 3.2 have an inconsistent memoryview implementation which
authorRobert Collins <robertc@robertcollins.net>
Sun, 25 Aug 2013 00:40:00 +0000 (12:40 +1200)
committerRobert Collins <robertc@robertcollins.net>
Sun, 25 Aug 2013 00:40:00 +0000 (12:40 +1200)
  required a workaround for NUL byte detection. (Robert Collins, #1216246)

NEWS
python/subunit/v2.py

diff --git a/NEWS b/NEWS
index 4fd9e17035cf8b3a7e9bb7212d7b6b14a7ec98f0..98b0320901c314ddb3c2a9eafe0e03c1cb255d02 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,9 @@ NEXT (In development)
 BUG FIXES
 ~~~~~~~~~
 
+* Python 3.1 and 3.2 have an inconsistent memoryview implementation which
+  required a workaround for NUL byte detection. (Robert Collins, #1216246)
+
 * V2 parser errors now set appropriate mime types for the encapsulated packet
   data and the error message. (Robert Collins)
 
index b4e1e3839aa3d01a06877943de6b62b5393f61ae..057f65c3bdd63203ed4b0b3c51d52579a30cc116 100644 (file)
@@ -50,6 +50,24 @@ FLAG_EOF = 0x0010
 FLAG_FILE_CONTENT = 0x0040
 EPOCH = datetime.datetime.utcfromtimestamp(0).replace(tzinfo=iso8601.Utc())
 NUL_ELEMENT = b'\0'[0]
+# Contains True for types for which 'nul in thing' falsely returns false.
+_nul_test_broken = {}
+
+
+def has_nul(buffer_or_bytes):
+    """Return True if a null byte is present in buffer_or_bytes."""
+    # Simple "if NUL_ELEMENT in utf8_bytes:" fails on Python 3.1 and 3.2 with
+    # memoryviews. See https://bugs.launchpad.net/subunit/+bug/1216246
+    buffer_type = type(buffer_or_bytes)
+    broken = _nul_test_broken.get(buffer_type)
+    if broken is None:
+        reference = buffer_type(b'\0')
+        broken = not NUL_ELEMENT in reference
+        _nul_test_broken[buffer_type] = broken
+    if broken:
+        return b'\0' in buffer_or_bytes
+    else:
+        return NUL_ELEMENT in buffer_or_bytes
 
 
 class ParseError(Exception):
@@ -462,7 +480,7 @@ class ByteStreamToStreamResult(object):
                 'UTF8 string at offset %d extends past end of packet: '
                 'claimed %d bytes, %d available' % (pos - 2, length,
                 len(utf8_bytes)))
-        if NUL_ELEMENT in utf8_bytes:
+        if has_nul(utf8_bytes):
             raise ParseError('UTF8 string at offset %d contains NUL byte' % (
                 pos-2,))
         try: