PageRenderTime 286ms CodeModel.GetById 141ms app.highlight 13ms RepoModel.GetById 128ms app.codeStats 0ms

/Lib/test/test_ucn.py

http://unladen-swallow.googlecode.com/
Python | 144 lines | 127 code | 6 blank | 11 comment | 0 complexity | 3a2e4113426421bf96a7c7f0aa596067 MD5 | raw file
  1""" Test script for the Unicode implementation.
  2
  3Written by Bill Tutt.
  4Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
  5
  6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  7
  8"""#"
  9
 10import unittest
 11
 12from test import test_support
 13
 14class UnicodeNamesTest(unittest.TestCase):
 15
 16    def checkletter(self, name, code):
 17        # Helper that put all \N escapes inside eval'd raw strings,
 18        # to make sure this script runs even if the compiler
 19        # chokes on \N escapes
 20        res = eval(ur'u"\N{%s}"' % name)
 21        self.assertEqual(res, code)
 22        return res
 23
 24    def test_general(self):
 25        # General and case insensitivity test:
 26        chars = [
 27            "LATIN CAPITAL LETTER T",
 28            "LATIN SMALL LETTER H",
 29            "LATIN SMALL LETTER E",
 30            "SPACE",
 31            "LATIN SMALL LETTER R",
 32            "LATIN CAPITAL LETTER E",
 33            "LATIN SMALL LETTER D",
 34            "SPACE",
 35            "LATIN SMALL LETTER f",
 36            "LATIN CAPITAL LeTtEr o",
 37            "LATIN SMaLl LETTER x",
 38            "SPACE",
 39            "LATIN SMALL LETTER A",
 40            "LATIN SMALL LETTER T",
 41            "LATIN SMALL LETTER E",
 42            "SPACE",
 43            "LATIN SMALL LETTER T",
 44            "LATIN SMALL LETTER H",
 45            "LATIN SMALL LETTER E",
 46            "SpAcE",
 47            "LATIN SMALL LETTER S",
 48            "LATIN SMALL LETTER H",
 49            "LATIN small LETTER e",
 50            "LATIN small LETTER e",
 51            "LATIN SMALL LETTER P",
 52            "FULL STOP"
 53        ]
 54        string = u"The rEd fOx ate the sheep."
 55
 56        self.assertEqual(
 57            u"".join([self.checkletter(*args) for args in zip(chars, string)]),
 58            string
 59        )
 60
 61    def test_ascii_letters(self):
 62        import unicodedata
 63
 64        for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
 65            name = "LATIN SMALL LETTER %s" % char.upper()
 66            code = unicodedata.lookup(name)
 67            self.assertEqual(unicodedata.name(code), name)
 68
 69    def test_hangul_syllables(self):
 70        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
 71        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
 72        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
 73        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
 74        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
 75        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
 76        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
 77        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
 78        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
 79        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
 80        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
 81        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
 82        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
 83
 84        import unicodedata
 85        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
 86
 87    def test_cjk_unified_ideographs(self):
 88        self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
 89        self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
 90        self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
 91        self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
 92        self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
 93        self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
 94
 95    def test_bmp_characters(self):
 96        import unicodedata
 97        count = 0
 98        for code in xrange(0x10000):
 99            char = unichr(code)
100            name = unicodedata.name(char, None)
101            if name is not None:
102                self.assertEqual(unicodedata.lookup(name), char)
103                count += 1
104
105    def test_misc_symbols(self):
106        self.checkletter("PILCROW SIGN", u"\u00b6")
107        self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
108        self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
109        self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
110
111    def test_errors(self):
112        import unicodedata
113        self.assertRaises(TypeError, unicodedata.name)
114        self.assertRaises(TypeError, unicodedata.name, u'xx')
115        self.assertRaises(TypeError, unicodedata.lookup)
116        self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
117
118    def test_strict_eror_handling(self):
119        # bogus character name
120        self.assertRaises(
121            UnicodeError,
122            unicode, "\\N{blah}", 'unicode-escape', 'strict'
123        )
124        # long bogus character name
125        self.assertRaises(
126            UnicodeError,
127            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
128        )
129        # missing closing brace
130        self.assertRaises(
131            UnicodeError,
132            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
133        )
134        # missing opening brace
135        self.assertRaises(
136            UnicodeError,
137            unicode, "\\NSPACE", 'unicode-escape', 'strict'
138        )
139
140def test_main():
141    test_support.run_unittest(UnicodeNamesTest)
142
143if __name__ == "__main__":
144    test_main()