PageRenderTime 36ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/Lib/test/test_unicode_jy.py

http://github.com/nriley/jython
Python | 182 lines | 158 code | 15 blank | 9 comment | 8 complexity | efa6f7187d77f8154e1033f4db086ae3 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. """Misc unicode tests
  3. Made for Jython.
  4. """
  5. import re
  6. import sys
  7. import unittest
  8. from StringIO import StringIO
  9. from test import test_support
  10. class UnicodeTestCase(unittest.TestCase):
  11. def test_simplejson_plane_bug(self):
  12. # a bug exposed by simplejson: unicode __add__ was always
  13. # forcing the basic plane
  14. chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE | re.MULTILINE | re.DOTALL)
  15. orig = u'z\U0001d120x'
  16. quoted1 = u'"z\U0001d120x"'
  17. quoted2 = '"' + orig + '"'
  18. # chunker re gives different results depending on the plane
  19. self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
  20. self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))
  21. def test_parse_unicode(self):
  22. foo = u'ą\n'
  23. self.assertEqual(len(foo), 2, repr(foo))
  24. self.assertEqual(repr(foo), "u'\\u0105\\n'")
  25. self.assertEqual(ord(foo[0]), 261)
  26. self.assertEqual(ord(foo[1]), 10)
  27. bar = foo.encode('utf-8')
  28. self.assertEqual(len(bar), 3)
  29. self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
  30. self.assertEqual(ord(bar[0]), 196)
  31. self.assertEqual(ord(bar[1]), 133)
  32. self.assertEqual(ord(bar[2]), 10)
  33. def test_parse_raw_unicode(self):
  34. foo = ur'ą\n'
  35. self.assertEqual(len(foo), 3, repr(foo))
  36. self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
  37. self.assertEqual(ord(foo[0]), 261)
  38. self.assertEqual(ord(foo[1]), 92)
  39. self.assertEqual(ord(foo[2]), 110)
  40. bar = foo.encode('utf-8')
  41. self.assertEqual(len(bar), 4)
  42. self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
  43. self.assertEqual(ord(bar[0]), 196)
  44. self.assertEqual(ord(bar[1]), 133)
  45. self.assertEqual(ord(bar[2]), 92)
  46. self.assertEqual(ord(bar[3]), 110)
  47. for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':
  48. self.assertEqual(len(baz), 13, repr(baz))
  49. self.assertEqual(repr(baz), "u'Hello World !'")
  50. self.assertEqual(ord(baz[5]), 32)
  51. quux = ur'\U00100000'
  52. self.assertEqual(repr(quux), "u'\\U00100000'")
  53. if sys.maxunicode == 0xffff:
  54. self.assertEqual(len(quux), 2)
  55. self.assertEqual(ord(quux[0]), 56256)
  56. self.assertEqual(ord(quux[1]), 56320)
  57. else:
  58. self.assertEqual(len(quux), 1)
  59. self.assertEqual(ord(quux), 1048576)
  60. def test_raw_unicode_escape(self):
  61. foo = u'\U00100000'
  62. self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')
  63. self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),
  64. foo)
  65. for bar in '\\u', '\\u000', '\\U00000':
  66. self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')
  67. def test_encode_decimal(self):
  68. self.assertEqual(int(u'\u0039\u0032'), 92)
  69. self.assertEqual(int(u'\u0660'), 0)
  70. self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)
  71. self.assertEqual(long(u'\u0663'), 3)
  72. self.assertEqual(float(u'\u0663.\u0661'), 3.1)
  73. self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)
  74. def test_unstateful_end_of_data(self):
  75. # http://bugs.jython.org/issue1368
  76. for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
  77. self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)
  78. def test_formatchar(self):
  79. self.assertEqual('%c' % 255, '\xff')
  80. self.assertRaises(OverflowError, '%c'.__mod__, 256)
  81. result = u'%c' % 256
  82. self.assert_(isinstance(result, unicode))
  83. self.assertEqual(result, u'\u0100')
  84. if sys.maxunicode == 0xffff:
  85. self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')
  86. else:
  87. self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')
  88. self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)
  89. def test_repr(self):
  90. self.assert_(isinstance('%r' % u'foo', str))
  91. def test_concat(self):
  92. self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')
  93. self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')
  94. def test_join(self):
  95. self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])
  96. self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])
  97. def test_file_encoding(self):
  98. '''Ensure file writing doesn't attempt to encode things by default and reading doesn't
  99. decode things by default. This was jython's behavior prior to 2.2.1'''
  100. EURO_SIGN = u"\u20ac"
  101. try:
  102. EURO_SIGN.encode()
  103. except UnicodeEncodeError:
  104. # This default encoding can't handle the encoding the Euro sign. Skip the test
  105. return
  106. f = open(test_support.TESTFN, "w")
  107. self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,
  108. "Shouldn't be able to write out a Euro sign without first encoding")
  109. f.close()
  110. f = open(test_support.TESTFN, "w")
  111. f.write(EURO_SIGN.encode('utf-8'))
  112. f.close()
  113. f = open(test_support.TESTFN, "r")
  114. encoded_euro = f.read()
  115. f.close()
  116. os.remove(test_support.TESTFN)
  117. self.assertEquals('\xe2\x82\xac', encoded_euro)
  118. self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))
  119. class UnicodeFormatTestCase(unittest.TestCase):
  120. def test_unicode_mapping(self):
  121. assertTrue = self.assertTrue
  122. class EnsureUnicode(dict):
  123. def __missing__(self, key):
  124. assertTrue(isinstance(key, unicode))
  125. return key
  126. u'%(foo)s' % EnsureUnicode()
  127. def test_non_ascii_unicode_mod_str(self):
  128. # Regression test for a problem on the formatting logic: when no unicode
  129. # args were found, Jython stored the resulting buffer on a PyString,
  130. # decoding it later to make a PyUnicode. That crashed when the left side
  131. # of % was a unicode containing non-ascii chars
  132. self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")
  133. class UnicodeStdIOTestCase(unittest.TestCase):
  134. def setUp(self):
  135. self.stdout = sys.stdout
  136. def tearDown(self):
  137. sys.stdout = self.stdout
  138. def test_intercepted_stdout(self):
  139. msg = u'Circle is 360\u00B0'
  140. sys.stdout = StringIO()
  141. print msg,
  142. self.assertEqual(sys.stdout.getvalue(), msg)
  143. def test_main():
  144. test_support.run_unittest(UnicodeTestCase,
  145. UnicodeFormatTestCase,
  146. UnicodeStdIOTestCase)
  147. if __name__ == "__main__":
  148. test_main()