Merge pull request #99 from takluyver/issue-84
authorJeff Quast <contact@jeffquast.com>
Mon, 25 Aug 2014 07:21:27 +0000 (00:21 -0700)
committerJeff Quast <contact@jeffquast.com>
Mon, 25 Aug 2014 07:21:27 +0000 (00:21 -0700)
Unicode support for screen and ANSI

1  2 
pexpect/ANSI.py
tests/test_ansi.py

diff --cc pexpect/ANSI.py
Simple merge
index 516509cc59e2bc9f8d18e586f48c54c9e6a76874,33d21bc818dba05b566205b98b81376ed89bb99c..a9d445ed7de55a5beee4821a56fcfdcf60b8bc6c
@@@ -142,26 -145,67 +145,82 @@@ class ansiTestCase (PexpectTestCase.Pex
  
      def test_number_x(self):
          """Test the FSM state used to handle more than 2 numeric parameters."""
 -        s = ANSI.ANSI(1, 20)
 +        class TestANSI(ANSI.ANSI):
 +            captured_memory = None
 +            def do_sgr(self, fsm):
 +                assert self.captured_memory is None
 +                self.captured_memory = fsm.memory
 +
 +        s = TestANSI(1, 20)
          s.write('\x1b[0;1;32;45mtest')
          assert str(s) == ('test                ')
 -        assert(s.state.memory == [s, '0', '1', '32', '45'])
 +        assert s.captured_memory is not None
 +        assert s.captured_memory == [s, '0', '1', '32', '45']
 +
 +    def test_fsm_memory(self):
 +        """Test the FSM stack/memory does not have numbers left on it
 +        after some sequences with numbers are passed in."""
 +        s = ANSI.ANSI(1, 20)
 +        s.write('\x1b[0;1;2;3m\x1b[4;5;6;7q\x1b[?8h\x1b[?9ltest')
 +        assert str(s) == ('test                ')
 +        assert s.state.memory == [s]
  
+     def test_utf8_bytes(self):
+         """Test that when bytes are passed in containing UTF-8 encoded
+         characters, where the encoding of each character consists of
+         multiple bytes, the characters are correctly decoded.
+         Incremental decoding is also tested."""
+         s = ANSI.ANSI(2, 10, encoding='utf-8')
+         # This is the UTF-8 encoding of the UCS character "HOURGLASS"
+         # followed by the UTF-8 encoding of the UCS character
+         # "KEYBOARD".  These characters can't be encoded in cp437 or
+         # latin-1.  The "KEYBOARD" character is split into two
+         # separate writes.
+         s.write(b'\xe2\x8c\x9b')
+         s.write(b'\xe2\x8c')
+         s.write(b'\xa8')
+         if PY3:
+             assert str(s) == u'\u231b\u2328        \n          '
+         else:
+             assert unicode(s) == u'\u231b\u2328        \n          '
+             assert str(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8        \n          '
+         assert s.dump() == u'\u231b\u2328                  '
+         assert s.pretty() == u'+----------+\n|\u231b\u2328        |\n|          |\n+----------+\n'
+         assert s.get_abs(1, 1) == u'\u231b'
+         assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328   ']
+     def test_unicode(self):
+         """Test passing in of a unicode string."""
+         s = ANSI.ANSI(2, 10, encoding="utf-8")
+         s.write(u'\u231b\u2328')
+         if PY3:
+             assert str(s) == u'\u231b\u2328        \n          '
+         else:
+             assert unicode(s) == u'\u231b\u2328        \n          '
+             assert str(s) == b'\xe2\x8c\x9b\xe2\x8c\xa8        \n          '
+         assert s.dump() == u'\u231b\u2328                  '
+         assert s.pretty() == u'+----------+\n|\u231b\u2328        |\n|          |\n+----------+\n'
+         assert s.get_abs(1, 1) == u'\u231b'
+         assert s.get_region(1, 1, 1, 5) == [u'\u231b\u2328   ']
+     def test_decode_error(self):
+         """Test that default handling of decode errors replaces the
+         invalid characters."""
+         s = ANSI.ANSI(2, 10, encoding="ascii")
+         s.write(b'\xff') # a non-ASCII character
+         # In unicode, the non-ASCII character is replaced with
+         # REPLACEMENT CHARACTER.
+         if PY3:
+             assert str(s) == u'\ufffd         \n          '
+         else:
+             assert unicode(s) == u'\ufffd         \n          '
+             assert str(s) == b'?         \n          '
+         assert s.dump() == u'\ufffd                   '
+         assert s.pretty() == u'+----------+\n|\ufffd         |\n|          |\n+----------+\n'
+         assert s.get_abs(1, 1) == u'\ufffd'
+         assert s.get_region(1, 1, 1, 5) == [u'\ufffd    ']
  if __name__ == '__main__':
      unittest.main()