/Lib/test/test_ucn.py

http://unladen-swallow.googlecode.com/ · Python · 144 lines · 112 code · 17 blank · 15 comment · 5 complexity · 3a2e4113426421bf96a7c7f0aa596067 MD5 · raw file

  1. """ Test script for the Unicode implementation.
  2. Written by Bill Tutt.
  3. Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
  4. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  5. """#"
  6. import unittest
  7. from test import test_support
  8. class UnicodeNamesTest(unittest.TestCase):
  9. def checkletter(self, name, code):
  10. # Helper that put all \N escapes inside eval'd raw strings,
  11. # to make sure this script runs even if the compiler
  12. # chokes on \N escapes
  13. res = eval(ur'u"\N{%s}"' % name)
  14. self.assertEqual(res, code)
  15. return res
  16. def test_general(self):
  17. # General and case insensitivity test:
  18. chars = [
  19. "LATIN CAPITAL LETTER T",
  20. "LATIN SMALL LETTER H",
  21. "LATIN SMALL LETTER E",
  22. "SPACE",
  23. "LATIN SMALL LETTER R",
  24. "LATIN CAPITAL LETTER E",
  25. "LATIN SMALL LETTER D",
  26. "SPACE",
  27. "LATIN SMALL LETTER f",
  28. "LATIN CAPITAL LeTtEr o",
  29. "LATIN SMaLl LETTER x",
  30. "SPACE",
  31. "LATIN SMALL LETTER A",
  32. "LATIN SMALL LETTER T",
  33. "LATIN SMALL LETTER E",
  34. "SPACE",
  35. "LATIN SMALL LETTER T",
  36. "LATIN SMALL LETTER H",
  37. "LATIN SMALL LETTER E",
  38. "SpAcE",
  39. "LATIN SMALL LETTER S",
  40. "LATIN SMALL LETTER H",
  41. "LATIN small LETTER e",
  42. "LATIN small LETTER e",
  43. "LATIN SMALL LETTER P",
  44. "FULL STOP"
  45. ]
  46. string = u"The rEd fOx ate the sheep."
  47. self.assertEqual(
  48. u"".join([self.checkletter(*args) for args in zip(chars, string)]),
  49. string
  50. )
  51. def test_ascii_letters(self):
  52. import unicodedata
  53. for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
  54. name = "LATIN SMALL LETTER %s" % char.upper()
  55. code = unicodedata.lookup(name)
  56. self.assertEqual(unicodedata.name(code), name)
  57. def test_hangul_syllables(self):
  58. self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
  59. self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
  60. self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
  61. self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
  62. self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
  63. self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
  64. self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
  65. self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
  66. self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
  67. self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
  68. self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
  69. self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
  70. self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
  71. import unicodedata
  72. self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
  73. def test_cjk_unified_ideographs(self):
  74. self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
  75. self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
  76. self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
  77. self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
  78. self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
  79. self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
  80. def test_bmp_characters(self):
  81. import unicodedata
  82. count = 0
  83. for code in xrange(0x10000):
  84. char = unichr(code)
  85. name = unicodedata.name(char, None)
  86. if name is not None:
  87. self.assertEqual(unicodedata.lookup(name), char)
  88. count += 1
  89. def test_misc_symbols(self):
  90. self.checkletter("PILCROW SIGN", u"\u00b6")
  91. self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
  92. self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
  93. self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
  94. def test_errors(self):
  95. import unicodedata
  96. self.assertRaises(TypeError, unicodedata.name)
  97. self.assertRaises(TypeError, unicodedata.name, u'xx')
  98. self.assertRaises(TypeError, unicodedata.lookup)
  99. self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
  100. def test_strict_eror_handling(self):
  101. # bogus character name
  102. self.assertRaises(
  103. UnicodeError,
  104. unicode, "\\N{blah}", 'unicode-escape', 'strict'
  105. )
  106. # long bogus character name
  107. self.assertRaises(
  108. UnicodeError,
  109. unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
  110. )
  111. # missing closing brace
  112. self.assertRaises(
  113. UnicodeError,
  114. unicode, "\\N{SPACE", 'unicode-escape', 'strict'
  115. )
  116. # missing opening brace
  117. self.assertRaises(
  118. UnicodeError,
  119. unicode, "\\NSPACE", 'unicode-escape', 'strict'
  120. )
  121. def test_main():
  122. test_support.run_unittest(UnicodeNamesTest)
  123. if __name__ == "__main__":
  124. test_main()