PageRenderTime 104ms CodeModel.GetById 40ms app.highlight 29ms RepoModel.GetById 31ms app.codeStats 0ms

/Lib/test/test_unicodedata.py

http://unladen-swallow.googlecode.com/
Python | 287 lines | 260 code | 12 blank | 15 comment | 3 complexity | e0e00058a4ebef752051f5969416111e MD5 | raw file
  1""" Test script for the unicodedata module.
  2
  3    Written by Marc-Andre Lemburg (mal@lemburg.com).
  4
  5    (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  6
  7"""
  8
  9import sys
 10import unittest
 11import hashlib
 12import subprocess
 13import test.test_support
 14
 15encoding = 'utf-8'
 16
 17
 18### Run tests
 19
 20class UnicodeMethodsTest(unittest.TestCase):
 21
 22    # update this, if the database changes
 23    expectedchecksum = '6ec65b65835614ec00634c674bba0e50cd32c189'
 24
 25    def test_method_checksum(self):
 26        h = hashlib.sha1()
 27        for i in range(65536):
 28            char = unichr(i)
 29            data = [
 30                # Predicates (single char)
 31                u"01"[char.isalnum()],
 32                u"01"[char.isalpha()],
 33                u"01"[char.isdecimal()],
 34                u"01"[char.isdigit()],
 35                u"01"[char.islower()],
 36                u"01"[char.isnumeric()],
 37                u"01"[char.isspace()],
 38                u"01"[char.istitle()],
 39                u"01"[char.isupper()],
 40
 41                # Predicates (multiple chars)
 42                u"01"[(char + u'abc').isalnum()],
 43                u"01"[(char + u'abc').isalpha()],
 44                u"01"[(char + u'123').isdecimal()],
 45                u"01"[(char + u'123').isdigit()],
 46                u"01"[(char + u'abc').islower()],
 47                u"01"[(char + u'123').isnumeric()],
 48                u"01"[(char + u' \t').isspace()],
 49                u"01"[(char + u'abc').istitle()],
 50                u"01"[(char + u'ABC').isupper()],
 51
 52                # Mappings (single char)
 53                char.lower(),
 54                char.upper(),
 55                char.title(),
 56
 57                # Mappings (multiple chars)
 58                (char + u'abc').lower(),
 59                (char + u'ABC').upper(),
 60                (char + u'abc').title(),
 61                (char + u'ABC').title(),
 62
 63                ]
 64            h.update(u''.join(data).encode(encoding))
 65        result = h.hexdigest()
 66        self.assertEqual(result, self.expectedchecksum)
 67
 68class UnicodeDatabaseTest(unittest.TestCase):
 69
 70    def setUp(self):
 71        # In case unicodedata is not available, this will raise an ImportError,
 72        # but the other test cases will still be run
 73        import unicodedata
 74        self.db = unicodedata
 75
 76    def tearDown(self):
 77        del self.db
 78
 79class UnicodeFunctionsTest(UnicodeDatabaseTest):
 80
 81    # update this, if the database changes
 82    expectedchecksum = '3136d5afd787dc2bcb1bdcac95e385349fbebbca'
 83
 84    def test_function_checksum(self):
 85        data = []
 86        h = hashlib.sha1()
 87
 88        for i in range(0x10000):
 89            char = unichr(i)
 90            data = [
 91                # Properties
 92                str(self.db.digit(char, -1)),
 93                str(self.db.numeric(char, -1)),
 94                str(self.db.decimal(char, -1)),
 95                self.db.category(char),
 96                self.db.bidirectional(char),
 97                self.db.decomposition(char),
 98                str(self.db.mirrored(char)),
 99                str(self.db.combining(char)),
100            ]
101            h.update(''.join(data))
102        result = h.hexdigest()
103        self.assertEqual(result, self.expectedchecksum)
104
105    def test_digit(self):
106        self.assertEqual(self.db.digit(u'A', None), None)
107        self.assertEqual(self.db.digit(u'9'), 9)
108        self.assertEqual(self.db.digit(u'\u215b', None), None)
109        self.assertEqual(self.db.digit(u'\u2468'), 9)
110        self.assertEqual(self.db.digit(u'\U00020000', None), None)
111
112        self.assertRaises(TypeError, self.db.digit)
113        self.assertRaises(TypeError, self.db.digit, u'xx')
114        self.assertRaises(ValueError, self.db.digit, u'x')
115
116    def test_numeric(self):
117        self.assertEqual(self.db.numeric(u'A',None), None)
118        self.assertEqual(self.db.numeric(u'9'), 9)
119        self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
120        self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
121        self.assertEqual(self.db.numeric(u'\U00020000', None), None)
122
123        self.assertRaises(TypeError, self.db.numeric)
124        self.assertRaises(TypeError, self.db.numeric, u'xx')
125        self.assertRaises(ValueError, self.db.numeric, u'x')
126
127    def test_decimal(self):
128        self.assertEqual(self.db.decimal(u'A',None), None)
129        self.assertEqual(self.db.decimal(u'9'), 9)
130        self.assertEqual(self.db.decimal(u'\u215b', None), None)
131        self.assertEqual(self.db.decimal(u'\u2468', None), None)
132        self.assertEqual(self.db.decimal(u'\U00020000', None), None)
133
134        self.assertRaises(TypeError, self.db.decimal)
135        self.assertRaises(TypeError, self.db.decimal, u'xx')
136        self.assertRaises(ValueError, self.db.decimal, u'x')
137
138    def test_category(self):
139        self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
140        self.assertEqual(self.db.category(u'a'), 'Ll')
141        self.assertEqual(self.db.category(u'A'), 'Lu')
142        self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
143
144        self.assertRaises(TypeError, self.db.category)
145        self.assertRaises(TypeError, self.db.category, u'xx')
146
147    def test_bidirectional(self):
148        self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
149        self.assertEqual(self.db.bidirectional(u' '), 'WS')
150        self.assertEqual(self.db.bidirectional(u'A'), 'L')
151        self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
152
153        self.assertRaises(TypeError, self.db.bidirectional)
154        self.assertRaises(TypeError, self.db.bidirectional, u'xx')
155
156    def test_decomposition(self):
157        self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
158        self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
159
160        self.assertRaises(TypeError, self.db.decomposition)
161        self.assertRaises(TypeError, self.db.decomposition, u'xx')
162
163    def test_mirrored(self):
164        self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
165        self.assertEqual(self.db.mirrored(u'a'), 0)
166        self.assertEqual(self.db.mirrored(u'\u2201'), 1)
167        self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
168
169        self.assertRaises(TypeError, self.db.mirrored)
170        self.assertRaises(TypeError, self.db.mirrored, u'xx')
171
172    def test_combining(self):
173        self.assertEqual(self.db.combining(u'\uFFFE'), 0)
174        self.assertEqual(self.db.combining(u'a'), 0)
175        self.assertEqual(self.db.combining(u'\u20e1'), 230)
176        self.assertEqual(self.db.combining(u'\U00020000'), 0)
177
178        self.assertRaises(TypeError, self.db.combining)
179        self.assertRaises(TypeError, self.db.combining, u'xx')
180
181    def test_normalize(self):
182        self.assertRaises(TypeError, self.db.normalize)
183        self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
184        self.assertEqual(self.db.normalize('NFKC', u''), u'')
185        # The rest can be found in test_normalization.py
186        # which requires an external file.
187
188    def test_east_asian_width(self):
189        eaw = self.db.east_asian_width
190        self.assertRaises(TypeError, eaw, 'a')
191        self.assertRaises(TypeError, eaw, u'')
192        self.assertRaises(TypeError, eaw, u'ra')
193        self.assertEqual(eaw(u'\x1e'), 'N')
194        self.assertEqual(eaw(u'\x20'), 'Na')
195        self.assertEqual(eaw(u'\uC894'), 'W')
196        self.assertEqual(eaw(u'\uFF66'), 'H')
197        self.assertEqual(eaw(u'\uFF1F'), 'F')
198        self.assertEqual(eaw(u'\u2010'), 'A')
199        self.assertEqual(eaw(u'\U00020000'), 'W')
200
201class UnicodeMiscTest(UnicodeDatabaseTest):
202
203    def test_failed_import_during_compiling(self):
204        # Issue 4367
205        # Decoding \N escapes requires the unicodedata module. If it can't be
206        # imported, we shouldn't segfault.
207
208        # This program should raise a SyntaxError in the eval.
209        code = "import sys;" \
210            "sys.modules['unicodedata'] = None;" \
211            """eval("u'\N{SOFT HYPHEN}'")"""
212        args = [sys.executable, "-c", code]
213        # We use a subprocess because the unicodedata module may already have
214        # been loaded in this process.
215        popen = subprocess.Popen(args, stderr=subprocess.PIPE)
216        popen.wait()
217        self.assertEqual(popen.returncode, 1)
218        error = "SyntaxError: (unicode error) \N escapes not supported " \
219            "(can't load unicodedata module)"
220        self.assertTrue(error in popen.stderr.read())
221
222    def test_decimal_numeric_consistent(self):
223        # Test that decimal and numeric are consistent,
224        # i.e. if a character has a decimal value,
225        # its numeric value should be the same.
226        count = 0
227        for i in xrange(0x10000):
228            c = unichr(i)
229            dec = self.db.decimal(c, -1)
230            if dec != -1:
231                self.assertEqual(dec, self.db.numeric(c))
232                count += 1
233        self.assert_(count >= 10) # should have tested at least the ASCII digits
234
235    def test_digit_numeric_consistent(self):
236        # Test that digit and numeric are consistent,
237        # i.e. if a character has a digit value,
238        # its numeric value should be the same.
239        count = 0
240        for i in xrange(0x10000):
241            c = unichr(i)
242            dec = self.db.digit(c, -1)
243            if dec != -1:
244                self.assertEqual(dec, self.db.numeric(c))
245                count += 1
246        self.assert_(count >= 10) # should have tested at least the ASCII digits
247
248    def test_bug_1704793(self):
249        self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
250
251    def test_ucd_510(self):
252        import unicodedata
253        # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
254        self.assert_(unicodedata.mirrored(u"\u0f3a"))
255        self.assert_(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
256        # Also, we now have two ways of representing
257        # the upper-case mapping: as delta, or as absolute value
258        self.assert_(u"a".upper()==u'A')
259        self.assert_(u"\u1d79".upper()==u'\ua77d')
260        self.assert_(u".".upper()==u".")
261
262    def test_bug_5828(self):
263        self.assertEqual(u"\u1d79".lower(), u"\u1d79")
264        # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
265        self.assertEqual(
266            [
267                c for c in range(sys.maxunicode+1)
268                if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
269            ],
270            [0]
271        )
272
273    def test_bug_4971(self):
274        # LETTER DZ WITH CARON: DZ, Dz, dz
275        self.assertEqual(u"\u01c4".title(), u"\u01c5")
276        self.assertEqual(u"\u01c5".title(), u"\u01c5")
277        self.assertEqual(u"\u01c6".title(), u"\u01c5")
278
279def test_main():
280    test.test_support.run_unittest(
281        UnicodeMiscTest,
282        UnicodeMethodsTest,
283        UnicodeFunctionsTest
284    )
285
286if __name__ == "__main__":
287    test_main()