PageRenderTime 1706ms CodeModel.GetById 181ms app.highlight 1135ms RepoModel.GetById 260ms app.codeStats 1ms

/Lib/test/test_unicode.py

http://unladen-swallow.googlecode.com/
Python | 1136 lines | 1099 code | 17 blank | 20 comment | 7 complexity | eb73381ad196cf6f4ca903146e6c5208 MD5 | raw file
   1# -*- coding: iso-8859-1 -*-
   2""" Test script for the Unicode implementation.
   3
   4Written by Marc-Andre Lemburg (mal@lemburg.com).
   5
   6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
   7
   8"""#"
   9import sys, struct, codecs
  10from test import test_support, string_tests
  11
  12# Error handling (bad decoder return)
  13def search_function(encoding):
  14    def decode1(input, errors="strict"):
  15        return 42 # not a tuple
  16    def encode1(input, errors="strict"):
  17        return 42 # not a tuple
  18    def encode2(input, errors="strict"):
  19        return (42, 42) # no unicode
  20    def decode2(input, errors="strict"):
  21        return (42, 42) # no unicode
  22    if encoding=="test.unicode1":
  23        return (encode1, decode1, None, None)
  24    elif encoding=="test.unicode2":
  25        return (encode2, decode2, None, None)
  26    else:
  27        return None
  28codecs.register(search_function)
  29
  30class UnicodeTest(
  31    string_tests.CommonTest,
  32    string_tests.MixinStrUnicodeUserStringTest,
  33    string_tests.MixinStrUnicodeTest,
  34    ):
  35    type2test = unicode
  36
  37    def checkequalnofix(self, result, object, methodname, *args):
  38        method = getattr(object, methodname)
  39        realresult = method(*args)
  40        self.assertEqual(realresult, result)
  41        self.assert_(type(realresult) is type(result))
  42
  43        # if the original is returned make sure that
  44        # this doesn't happen with subclasses
  45        if realresult is object:
  46            class usub(unicode):
  47                def __repr__(self):
  48                    return 'usub(%r)' % unicode.__repr__(self)
  49            object = usub(object)
  50            method = getattr(object, methodname)
  51            realresult = method(*args)
  52            self.assertEqual(realresult, result)
  53            self.assert_(object is not realresult)
  54
  55    def test_literals(self):
  56        self.assertEqual(u'\xff', u'\u00ff')
  57        self.assertEqual(u'\uffff', u'\U0000ffff')
  58        self.assertRaises(SyntaxError, eval, 'u\'\\Ufffffffe\'')
  59        self.assertRaises(SyntaxError, eval, 'u\'\\Uffffffff\'')
  60        self.assertRaises(SyntaxError, eval, 'u\'\\U%08x\'' % 0x110000)
  61
  62    def test_repr(self):
  63        if not sys.platform.startswith('java'):
  64            # Test basic sanity of repr()
  65            self.assertEqual(repr(u'abc'), "u'abc'")
  66            self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
  67            self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
  68            self.assertEqual(repr(u'\\c'), "u'\\\\c'")
  69            self.assertEqual(repr(u'\\'), "u'\\\\'")
  70            self.assertEqual(repr(u'\n'), "u'\\n'")
  71            self.assertEqual(repr(u'\r'), "u'\\r'")
  72            self.assertEqual(repr(u'\t'), "u'\\t'")
  73            self.assertEqual(repr(u'\b'), "u'\\x08'")
  74            self.assertEqual(repr(u"'\""), """u'\\'"'""")
  75            self.assertEqual(repr(u"'\""), """u'\\'"'""")
  76            self.assertEqual(repr(u"'"), '''u"'"''')
  77            self.assertEqual(repr(u'"'), """u'"'""")
  78            latin1repr = (
  79                "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
  80                "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
  81                "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
  82                "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
  83                "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
  84                "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
  85                "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
  86                "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
  87                "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
  88                "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
  89                "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
  90                "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
  91                "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
  92                "\\xfe\\xff'")
  93            testrepr = repr(u''.join(map(unichr, xrange(256))))
  94            self.assertEqual(testrepr, latin1repr)
  95            # Test repr works on wide unicode escapes without overflow.
  96            self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
  97                             repr(u"\U00010000" * 39 + u"\uffff" * 4096))
  98
  99
 100    def test_count(self):
 101        string_tests.CommonTest.test_count(self)
 102        # check mixed argument types
 103        self.checkequalnofix(3,  'aaa', 'count', u'a')
 104        self.checkequalnofix(0,  'aaa', 'count', u'b')
 105        self.checkequalnofix(3, u'aaa', 'count',  'a')
 106        self.checkequalnofix(0, u'aaa', 'count',  'b')
 107        self.checkequalnofix(0, u'aaa', 'count',  'b')
 108        self.checkequalnofix(1, u'aaa', 'count',  'a', -1)
 109        self.checkequalnofix(3, u'aaa', 'count',  'a', -10)
 110        self.checkequalnofix(2, u'aaa', 'count',  'a', 0, -1)
 111        self.checkequalnofix(0, u'aaa', 'count',  'a', 0, -10)
 112
 113    def test_find(self):
 114        self.checkequalnofix(0,  u'abcdefghiabc', 'find', u'abc')
 115        self.checkequalnofix(9,  u'abcdefghiabc', 'find', u'abc', 1)
 116        self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
 117
 118        self.assertRaises(TypeError, u'hello'.find)
 119        self.assertRaises(TypeError, u'hello'.find, 42)
 120
 121    def test_rfind(self):
 122        string_tests.CommonTest.test_rfind(self)
 123        # check mixed argument types
 124        self.checkequalnofix(9,   'abcdefghiabc', 'rfind', u'abc')
 125        self.checkequalnofix(12,  'abcdefghiabc', 'rfind', u'')
 126        self.checkequalnofix(12, u'abcdefghiabc', 'rfind',  '')
 127
 128    def test_index(self):
 129        string_tests.CommonTest.test_index(self)
 130        # check mixed argument types
 131        for (t1, t2) in ((str, unicode), (unicode, str)):
 132            self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2(''))
 133            self.checkequalnofix(3, t1('abcdefghiabc'), 'index',  t2('def'))
 134            self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2('abc'))
 135            self.checkequalnofix(9, t1('abcdefghiabc'), 'index',  t2('abc'), 1)
 136            self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
 137            self.assertRaises(ValueError, t1('abcdefghiab').index,  t2('abc'), 1)
 138            self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), 8)
 139            self.assertRaises(ValueError, t1('abcdefghi').index,  t2('ghi'), -1)
 140
 141    def test_rindex(self):
 142        string_tests.CommonTest.test_rindex(self)
 143        # check mixed argument types
 144        for (t1, t2) in ((str, unicode), (unicode, str)):
 145            self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex',  t2(''))
 146            self.checkequalnofix(3,  t1('abcdefghiabc'), 'rindex',  t2('def'))
 147            self.checkequalnofix(9,  t1('abcdefghiabc'), 'rindex',  t2('abc'))
 148            self.checkequalnofix(0,  t1('abcdefghiabc'), 'rindex',  t2('abc'), 0, -1)
 149
 150            self.assertRaises(ValueError, t1('abcdefghiabc').rindex,  t2('hib'))
 151            self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('def'), 1)
 152            self.assertRaises(ValueError, t1('defghiabc').rindex,  t2('abc'), 0, -1)
 153            self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, 8)
 154            self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, -1)
 155
 156    def test_translate(self):
 157        self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
 158        self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
 159        self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
 160        self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
 161        self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
 162        self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
 163
 164        self.assertRaises(TypeError, u'hello'.translate)
 165        self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
 166
 167    def test_split(self):
 168        string_tests.CommonTest.test_split(self)
 169
 170        # Mixed arguments
 171        self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
 172        self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
 173        self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
 174
 175    def test_join(self):
 176        string_tests.MixinStrUnicodeUserStringTest.test_join(self)
 177
 178        # mixed arguments
 179        self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
 180        self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
 181        self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
 182        self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
 183        self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
 184        self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
 185        self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
 186
 187    def test_strip(self):
 188        string_tests.CommonTest.test_strip(self)
 189        self.assertRaises(UnicodeError, u"hello".strip, "\xff")
 190
 191    def test_replace(self):
 192        string_tests.CommonTest.test_replace(self)
 193
 194        # method call forwarded from str implementation because of unicode argument
 195        self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
 196        self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
 197
 198    def test_comparison(self):
 199        # Comparisons:
 200        self.assertEqual(u'abc', 'abc')
 201        self.assertEqual('abc', u'abc')
 202        self.assertEqual(u'abc', u'abc')
 203        self.assert_(u'abcd' > 'abc')
 204        self.assert_('abcd' > u'abc')
 205        self.assert_(u'abcd' > u'abc')
 206        self.assert_(u'abc' < 'abcd')
 207        self.assert_('abc' < u'abcd')
 208        self.assert_(u'abc' < u'abcd')
 209
 210        if 0:
 211            # Move these tests to a Unicode collation module test...
 212            # Testing UTF-16 code point order comparisons...
 213
 214            # No surrogates, no fixup required.
 215            self.assert_(u'\u0061' < u'\u20ac')
 216            # Non surrogate below surrogate value, no fixup required
 217            self.assert_(u'\u0061' < u'\ud800\udc02')
 218
 219            # Non surrogate above surrogate value, fixup required
 220            def test_lecmp(s, s2):
 221                self.assert_(s < s2)
 222
 223            def test_fixup(s):
 224                s2 = u'\ud800\udc01'
 225                test_lecmp(s, s2)
 226                s2 = u'\ud900\udc01'
 227                test_lecmp(s, s2)
 228                s2 = u'\uda00\udc01'
 229                test_lecmp(s, s2)
 230                s2 = u'\udb00\udc01'
 231                test_lecmp(s, s2)
 232                s2 = u'\ud800\udd01'
 233                test_lecmp(s, s2)
 234                s2 = u'\ud900\udd01'
 235                test_lecmp(s, s2)
 236                s2 = u'\uda00\udd01'
 237                test_lecmp(s, s2)
 238                s2 = u'\udb00\udd01'
 239                test_lecmp(s, s2)
 240                s2 = u'\ud800\ude01'
 241                test_lecmp(s, s2)
 242                s2 = u'\ud900\ude01'
 243                test_lecmp(s, s2)
 244                s2 = u'\uda00\ude01'
 245                test_lecmp(s, s2)
 246                s2 = u'\udb00\ude01'
 247                test_lecmp(s, s2)
 248                s2 = u'\ud800\udfff'
 249                test_lecmp(s, s2)
 250                s2 = u'\ud900\udfff'
 251                test_lecmp(s, s2)
 252                s2 = u'\uda00\udfff'
 253                test_lecmp(s, s2)
 254                s2 = u'\udb00\udfff'
 255                test_lecmp(s, s2)
 256
 257                test_fixup(u'\ue000')
 258                test_fixup(u'\uff61')
 259
 260        # Surrogates on both sides, no fixup required
 261        self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
 262
 263    def test_islower(self):
 264        string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
 265        self.checkequalnofix(False, u'\u1FFc', 'islower')
 266
 267    def test_isupper(self):
 268        string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
 269        if not sys.platform.startswith('java'):
 270            self.checkequalnofix(False, u'\u1FFc', 'isupper')
 271
 272    def test_istitle(self):
 273        string_tests.MixinStrUnicodeUserStringTest.test_title(self)
 274        self.checkequalnofix(True, u'\u1FFc', 'istitle')
 275        self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
 276
 277    def test_isspace(self):
 278        string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
 279        self.checkequalnofix(True, u'\u2000', 'isspace')
 280        self.checkequalnofix(True, u'\u200a', 'isspace')
 281        self.checkequalnofix(False, u'\u2014', 'isspace')
 282
 283    def test_isalpha(self):
 284        string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
 285        self.checkequalnofix(True, u'\u1FFc', 'isalpha')
 286
 287    def test_isdecimal(self):
 288        self.checkequalnofix(False, u'', 'isdecimal')
 289        self.checkequalnofix(False, u'a', 'isdecimal')
 290        self.checkequalnofix(True, u'0', 'isdecimal')
 291        self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
 292        self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
 293        self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
 294        self.checkequalnofix(True, u'0123456789', 'isdecimal')
 295        self.checkequalnofix(False, u'0123456789a', 'isdecimal')
 296
 297        self.checkraises(TypeError, 'abc', 'isdecimal', 42)
 298
 299    def test_isdigit(self):
 300        string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
 301        self.checkequalnofix(True, u'\u2460', 'isdigit')
 302        self.checkequalnofix(False, u'\xbc', 'isdigit')
 303        self.checkequalnofix(True, u'\u0660', 'isdigit')
 304
 305    def test_isnumeric(self):
 306        self.checkequalnofix(False, u'', 'isnumeric')
 307        self.checkequalnofix(False, u'a', 'isnumeric')
 308        self.checkequalnofix(True, u'0', 'isnumeric')
 309        self.checkequalnofix(True, u'\u2460', 'isnumeric')
 310        self.checkequalnofix(True, u'\xbc', 'isnumeric')
 311        self.checkequalnofix(True, u'\u0660', 'isnumeric')
 312        self.checkequalnofix(True, u'0123456789', 'isnumeric')
 313        self.checkequalnofix(False, u'0123456789a', 'isnumeric')
 314
 315        self.assertRaises(TypeError, u"abc".isnumeric, 42)
 316
 317    def test_contains(self):
 318        # Testing Unicode contains method
 319        self.assert_('a' in u'abdb')
 320        self.assert_('a' in u'bdab')
 321        self.assert_('a' in u'bdaba')
 322        self.assert_('a' in u'bdba')
 323        self.assert_('a' in u'bdba')
 324        self.assert_(u'a' in u'bdba')
 325        self.assert_(u'a' not in u'bdb')
 326        self.assert_(u'a' not in 'bdb')
 327        self.assert_(u'a' in 'bdba')
 328        self.assert_(u'a' in ('a',1,None))
 329        self.assert_(u'a' in (1,None,'a'))
 330        self.assert_(u'a' in (1,None,u'a'))
 331        self.assert_('a' in ('a',1,None))
 332        self.assert_('a' in (1,None,'a'))
 333        self.assert_('a' in (1,None,u'a'))
 334        self.assert_('a' not in ('x',1,u'y'))
 335        self.assert_('a' not in ('x',1,None))
 336        self.assert_(u'abcd' not in u'abcxxxx')
 337        self.assert_(u'ab' in u'abcd')
 338        self.assert_('ab' in u'abc')
 339        self.assert_(u'ab' in 'abc')
 340        self.assert_(u'ab' in (1,None,u'ab'))
 341        self.assert_(u'' in u'abc')
 342        self.assert_('' in u'abc')
 343
 344        # If the following fails either
 345        # the contains operator does not propagate UnicodeErrors or
 346        # someone has changed the default encoding
 347        self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
 348
 349        self.assert_(u'' in '')
 350        self.assert_('' in u'')
 351        self.assert_(u'' in u'')
 352        self.assert_(u'' in 'abc')
 353        self.assert_('' in u'abc')
 354        self.assert_(u'' in u'abc')
 355        self.assert_(u'\0' not in 'abc')
 356        self.assert_('\0' not in u'abc')
 357        self.assert_(u'\0' not in u'abc')
 358        self.assert_(u'\0' in '\0abc')
 359        self.assert_('\0' in u'\0abc')
 360        self.assert_(u'\0' in u'\0abc')
 361        self.assert_(u'\0' in 'abc\0')
 362        self.assert_('\0' in u'abc\0')
 363        self.assert_(u'\0' in u'abc\0')
 364        self.assert_(u'a' in '\0abc')
 365        self.assert_('a' in u'\0abc')
 366        self.assert_(u'a' in u'\0abc')
 367        self.assert_(u'asdf' in 'asdf')
 368        self.assert_('asdf' in u'asdf')
 369        self.assert_(u'asdf' in u'asdf')
 370        self.assert_(u'asdf' not in 'asd')
 371        self.assert_('asdf' not in u'asd')
 372        self.assert_(u'asdf' not in u'asd')
 373        self.assert_(u'asdf' not in '')
 374        self.assert_('asdf' not in u'')
 375        self.assert_(u'asdf' not in u'')
 376
 377        self.assertRaises(TypeError, u"abc".__contains__)
 378
 379    def test_formatting(self):
 380        string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
 381        # Testing Unicode formatting strings...
 382        self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
 383        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000,  3.00')
 384        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000,  3.00')
 385        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000,  3.50')
 386        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000,  3.57')
 387        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
 388        if not sys.platform.startswith('java'):
 389            self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
 390        self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
 391        self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
 392
 393        self.assertEqual(u'%c' % 0x1234, u'\u1234')
 394        self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
 395
 396        # formatting jobs delegated from the string implementation:
 397        self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
 398        self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
 399        self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
 400        self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
 401        self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123},  u'...abc...')
 402        self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
 403        self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
 404        self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
 405        self.assertEqual('...%s...' % u"abc", u'...abc...')
 406        self.assertEqual('%*s' % (5,u'abc',), u'  abc')
 407        self.assertEqual('%*s' % (-5,u'abc',), u'abc  ')
 408        self.assertEqual('%*.*s' % (5,2,u'abc',), u'   ab')
 409        self.assertEqual('%*.*s' % (5,3,u'abc',), u'  abc')
 410        self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10   abc')
 411        self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103   abc')
 412        self.assertEqual('%c' % u'a', u'a')
 413        class Wrapper:
 414            def __str__(self):
 415                return u'\u1234'
 416        self.assertEqual('%s' % Wrapper(), u'\u1234')
 417
 418    @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
 419    def test_format_float(self):
 420        # should not format with a comma, but always with C locale
 421        self.assertEqual(u'1.0', u'%.1f' % 1.0)
 422
 423    def test_constructor(self):
 424        # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
 425
 426        self.assertEqual(
 427            unicode(u'unicode remains unicode'),
 428            u'unicode remains unicode'
 429        )
 430
 431        class UnicodeSubclass(unicode):
 432            pass
 433
 434        self.assertEqual(
 435            unicode(UnicodeSubclass('unicode subclass becomes unicode')),
 436            u'unicode subclass becomes unicode'
 437        )
 438
 439        self.assertEqual(
 440            unicode('strings are converted to unicode'),
 441            u'strings are converted to unicode'
 442        )
 443
 444        class UnicodeCompat:
 445            def __init__(self, x):
 446                self.x = x
 447            def __unicode__(self):
 448                return self.x
 449
 450        self.assertEqual(
 451            unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
 452            u'__unicode__ compatible objects are recognized')
 453
 454        class StringCompat:
 455            def __init__(self, x):
 456                self.x = x
 457            def __str__(self):
 458                return self.x
 459
 460        self.assertEqual(
 461            unicode(StringCompat('__str__ compatible objects are recognized')),
 462            u'__str__ compatible objects are recognized'
 463        )
 464
 465        # unicode(obj) is compatible to str():
 466
 467        o = StringCompat('unicode(obj) is compatible to str()')
 468        self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
 469        self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
 470
 471        # %-formatting and .__unicode__()
 472        self.assertEqual(u'%s' %
 473                         UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
 474                         u"u'%s' % obj uses obj.__unicode__()")
 475        self.assertEqual(u'%s' %
 476                         UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
 477                         u"u'%s' % obj falls back to obj.__str__()")
 478
 479        for obj in (123, 123.45, 123L):
 480            self.assertEqual(unicode(obj), unicode(str(obj)))
 481
 482        # unicode(obj, encoding, error) tests (this maps to
 483        # PyUnicode_FromEncodedObject() at C level)
 484
 485        if not sys.platform.startswith('java'):
 486            self.assertRaises(
 487                TypeError,
 488                unicode,
 489                u'decoding unicode is not supported',
 490                'utf-8',
 491                'strict'
 492            )
 493
 494        self.assertEqual(
 495            unicode('strings are decoded to unicode', 'utf-8', 'strict'),
 496            u'strings are decoded to unicode'
 497        )
 498
 499        if not sys.platform.startswith('java'):
 500            self.assertEqual(
 501                unicode(
 502                    buffer('character buffers are decoded to unicode'),
 503                    'utf-8',
 504                    'strict'
 505                ),
 506                u'character buffers are decoded to unicode'
 507            )
 508
 509        self.assertRaises(TypeError, unicode, 42, 42, 42)
 510
 511    def test_codecs_utf7(self):
 512        utfTests = [
 513            (u'A\u2262\u0391.', 'A+ImIDkQ.'),             # RFC2152 example
 514            (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
 515            (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
 516            (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
 517            (u'+', '+-'),
 518            (u'+-', '+--'),
 519            (u'+?', '+-?'),
 520            (u'\?', '+AFw?'),
 521            (u'+?', '+-?'),
 522            (ur'\\?', '+AFwAXA?'),
 523            (ur'\\\?', '+AFwAXABc?'),
 524            (ur'++--', '+-+---')
 525        ]
 526
 527        for (x, y) in utfTests:
 528            self.assertEqual(x.encode('utf-7'), y)
 529
 530        # surrogates not supported
 531        self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
 532
 533        self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
 534
 535        # Issue #2242: crash on some Windows/MSVC versions
 536        self.assertRaises(UnicodeDecodeError, '+\xc1'.decode, 'utf-7')
 537
 538    def test_codecs_utf8(self):
 539        self.assertEqual(u''.encode('utf-8'), '')
 540        self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
 541        self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
 542        self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
 543        self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
 544        self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
 545        self.assertEqual(
 546            (u'\ud800\udc02'*1000).encode('utf-8'),
 547            '\xf0\x90\x80\x82'*1000
 548        )
 549        self.assertEqual(
 550            u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
 551            u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
 552            u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
 553            u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
 554            u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
 555            u' Nunstuck git und'.encode('utf-8'),
 556            '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
 557            '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
 558            '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
 559            '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
 560            '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
 561            '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
 562            '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
 563            '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
 564            '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
 565            '\xe3\x80\x8cWenn ist das Nunstuck git und'
 566        )
 567
 568        # UTF-8 specific decoding tests
 569        self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
 570        self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
 571        self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
 572
 573        # Other possible utf-8 test cases:
 574        # * strict decoding testing for all of the
 575        #   UTF8_ERROR cases in PyUnicode_DecodeUTF8
 576
 577    def test_codecs_idna(self):
 578        # Test whether trailing dot is preserved
 579        self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
 580
 581    def test_codecs_errors(self):
 582        # Error handling (encoding)
 583        self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
 584        self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
 585        self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
 586        self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
 587
 588        # Error handling (decoding)
 589        self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
 590        self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
 591        self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
 592        self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
 593
 594        # Error handling (unknown character names)
 595        self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
 596
 597        # Error handling (truncated escape sequence)
 598        self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
 599
 600        self.assertRaises(TypeError, "hello".decode, "test.unicode1")
 601        self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
 602        self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
 603        self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
 604        # executes PyUnicode_Encode()
 605        import imp
 606        self.assertRaises(
 607            ImportError,
 608            imp.find_module,
 609            "non-existing module",
 610            [u"non-existing dir"]
 611        )
 612
 613        # Error handling (wrong arguments)
 614        self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
 615
 616        # Error handling (PyUnicode_EncodeDecimal())
 617        self.assertRaises(UnicodeError, int, u"\u0200")
 618
 619    def test_codecs(self):
 620        # Encoding
 621        self.assertEqual(u'hello'.encode('ascii'), 'hello')
 622        self.assertEqual(u'hello'.encode('utf-7'), 'hello')
 623        self.assertEqual(u'hello'.encode('utf-8'), 'hello')
 624        self.assertEqual(u'hello'.encode('utf8'), 'hello')
 625        self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
 626        self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
 627        self.assertEqual(u'hello'.encode('latin-1'), 'hello')
 628
 629        # Roundtrip safety for BMP (just the first 1024 chars)
 630        for c in xrange(1024):
 631            u = unichr(c)
 632            for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
 633                             'utf-16-be', 'raw_unicode_escape',
 634                             'unicode_escape', 'unicode_internal'):
 635                self.assertEqual(unicode(u.encode(encoding),encoding), u)
 636
 637        # Roundtrip safety for BMP (just the first 256 chars)
 638        for c in xrange(256):
 639            u = unichr(c)
 640            for encoding in ('latin-1',):
 641                self.assertEqual(unicode(u.encode(encoding),encoding), u)
 642
 643        # Roundtrip safety for BMP (just the first 128 chars)
 644        for c in xrange(128):
 645            u = unichr(c)
 646            for encoding in ('ascii',):
 647                self.assertEqual(unicode(u.encode(encoding),encoding), u)
 648
 649        # Roundtrip safety for non-BMP (just a few chars)
 650        u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
 651        for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
 652                         #'raw_unicode_escape',
 653                         'unicode_escape', 'unicode_internal'):
 654            self.assertEqual(unicode(u.encode(encoding),encoding), u)
 655
 656        # UTF-8 must be roundtrip safe for all UCS-2 code points
 657        # This excludes surrogates: in the full range, there would be
 658        # a surrogate pair (\udbff\udc00), which gets converted back
 659        # to a non-BMP character (\U0010fc00)
 660        u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
 661        for encoding in ('utf-8',):
 662            self.assertEqual(unicode(u.encode(encoding),encoding), u)
 663
 664    def test_codecs_charmap(self):
 665        # 0-127
 666        s = ''.join(map(chr, xrange(128)))
 667        for encoding in (
 668            'cp037', 'cp1026',
 669            'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
 670            'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
 671            'cp863', 'cp865', 'cp866',
 672            'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
 673            'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
 674            'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
 675            'mac_cyrillic', 'mac_latin2',
 676
 677            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
 678            'cp1256', 'cp1257', 'cp1258',
 679            'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
 680
 681            'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
 682            'cp1006', 'iso8859_8',
 683
 684            ### These have undefined mappings:
 685            #'cp424',
 686
 687            ### These fail the round-trip:
 688            #'cp875'
 689
 690            ):
 691            self.assertEqual(unicode(s, encoding).encode(encoding), s)
 692
 693        # 128-255
 694        s = ''.join(map(chr, xrange(128, 256)))
 695        for encoding in (
 696            'cp037', 'cp1026',
 697            'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
 698            'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
 699            'cp863', 'cp865', 'cp866',
 700            'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
 701            'iso8859_2', 'iso8859_4', 'iso8859_5',
 702            'iso8859_9', 'koi8_r', 'latin_1',
 703            'mac_cyrillic', 'mac_latin2',
 704
 705            ### These have undefined mappings:
 706            #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
 707            #'cp1256', 'cp1257', 'cp1258',
 708            #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
 709            #'iso8859_3', 'iso8859_6', 'iso8859_7',
 710            #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
 711
 712            ### These fail the round-trip:
 713            #'cp1006', 'cp875', 'iso8859_8',
 714
 715            ):
 716            self.assertEqual(unicode(s, encoding).encode(encoding), s)
 717
 718    def test_concatenation(self):
 719        self.assertEqual((u"abc" u"def"), u"abcdef")
 720        self.assertEqual(("abc" u"def"), u"abcdef")
 721        self.assertEqual((u"abc" "def"), u"abcdef")
 722        self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
 723        self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
 724
 725    def test_printing(self):
 726        class BitBucket:
 727            def write(self, text):
 728                pass
 729
 730        out = BitBucket()
 731        print >>out, u'abc'
 732        print >>out, u'abc', u'def'
 733        print >>out, u'abc', 'def'
 734        print >>out, 'abc', u'def'
 735        print >>out, u'abc\n'
 736        print >>out, u'abc\n',
 737        print >>out, u'abc\n',
 738        print >>out, u'def\n'
 739        print >>out, u'def\n'
 740
 741    def test_ucs4(self):
 742        x = u'\U00100000'
 743        y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
 744        self.assertEqual(x, y)
 745
 746        y = r'\U00100000'
 747        x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
 748        self.assertEqual(x, y)
 749        y = r'\U00010000'
 750        x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
 751        self.assertEqual(x, y)
 752
 753        try:
 754            '\U11111111'.decode("raw-unicode-escape")
 755        except UnicodeDecodeError as e:
 756            self.assertEqual(e.start, 0)
 757            self.assertEqual(e.end, 10)
 758        else:
 759            self.fail("Should have raised UnicodeDecodeError")
 760
 761    def test_conversion(self):
 762        # Make sure __unicode__() works properly
 763        class Foo0:
 764            def __str__(self):
 765                return "foo"
 766
 767        class Foo1:
 768            def __unicode__(self):
 769                return u"foo"
 770
 771        class Foo2(object):
 772            def __unicode__(self):
 773                return u"foo"
 774
 775        class Foo3(object):
 776            def __unicode__(self):
 777                return "foo"
 778
 779        class Foo4(str):
 780            def __unicode__(self):
 781                return "foo"
 782
 783        class Foo5(unicode):
 784            def __unicode__(self):
 785                return "foo"
 786
 787        class Foo6(str):
 788            def __str__(self):
 789                return "foos"
 790
 791            def __unicode__(self):
 792                return u"foou"
 793
 794        class Foo7(unicode):
 795            def __str__(self):
 796                return "foos"
 797            def __unicode__(self):
 798                return u"foou"
 799
 800        class Foo8(unicode):
 801            def __new__(cls, content=""):
 802                return unicode.__new__(cls, 2*content)
 803            def __unicode__(self):
 804                return self
 805
 806        class Foo9(unicode):
 807            def __str__(self):
 808                return "string"
 809            def __unicode__(self):
 810                return "not unicode"
 811
 812        self.assertEqual(unicode(Foo0()), u"foo")
 813        self.assertEqual(unicode(Foo1()), u"foo")
 814        self.assertEqual(unicode(Foo2()), u"foo")
 815        self.assertEqual(unicode(Foo3()), u"foo")
 816        self.assertEqual(unicode(Foo4("bar")), u"foo")
 817        self.assertEqual(unicode(Foo5("bar")), u"foo")
 818        self.assertEqual(unicode(Foo6("bar")), u"foou")
 819        self.assertEqual(unicode(Foo7("bar")), u"foou")
 820        self.assertEqual(unicode(Foo8("foo")), u"foofoo")
 821        self.assertEqual(str(Foo9("foo")), "string")
 822        self.assertEqual(unicode(Foo9("foo")), u"not unicode")
 823
 824    def test_unicode_repr(self):
 825        class s1:
 826            def __repr__(self):
 827                return '\\n'
 828
 829        class s2:
 830            def __repr__(self):
 831                return u'\\n'
 832
 833        self.assertEqual(repr(s1()), '\\n')
 834        self.assertEqual(repr(s2()), '\\n')
 835
 836    def test_expandtabs_overflows_gracefully(self):
 837        # This test only affects 32-bit platforms because expandtabs can only take
 838        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
 839        # to take a 64-bit long, this test should apply to all platforms.
 840        if sys.maxint > (1 << 32) or struct.calcsize('P') != 4:
 841            return
 842        self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
 843
 844    def test__format__(self):
 845        def test(value, format, expected):
 846            # test both with and without the trailing 's'
 847            self.assertEqual(value.__format__(format), expected)
 848            self.assertEqual(value.__format__(format + u's'), expected)
 849
 850        test(u'', u'', u'')
 851        test(u'abc', u'', u'abc')
 852        test(u'abc', u'.3', u'abc')
 853        test(u'ab', u'.3', u'ab')
 854        test(u'abcdef', u'.3', u'abc')
 855        test(u'abcdef', u'.0', u'')
 856        test(u'abc', u'3.3', u'abc')
 857        test(u'abc', u'2.3', u'abc')
 858        test(u'abc', u'2.2', u'ab')
 859        test(u'abc', u'3.2', u'ab ')
 860        test(u'result', u'x<0', u'result')
 861        test(u'result', u'x<5', u'result')
 862        test(u'result', u'x<6', u'result')
 863        test(u'result', u'x<7', u'resultx')
 864        test(u'result', u'x<8', u'resultxx')
 865        test(u'result', u' <7', u'result ')
 866        test(u'result', u'<7', u'result ')
 867        test(u'result', u'>7', u' result')
 868        test(u'result', u'>8', u'  result')
 869        test(u'result', u'^8', u' result ')
 870        test(u'result', u'^9', u' result  ')
 871        test(u'result', u'^10', u'  result  ')
 872        test(u'a', u'10000', u'a' + u' ' * 9999)
 873        test(u'', u'10000', u' ' * 10000)
 874        test(u'', u'10000000', u' ' * 10000000)
 875
 876        # test mixing unicode and str
 877        self.assertEqual(u'abc'.__format__('s'), u'abc')
 878        self.assertEqual(u'abc'.__format__('->10s'), u'-------abc')
 879
 880    def test_format(self):
 881        self.assertEqual(u''.format(), u'')
 882        self.assertEqual(u'a'.format(), u'a')
 883        self.assertEqual(u'ab'.format(), u'ab')
 884        self.assertEqual(u'a{{'.format(), u'a{')
 885        self.assertEqual(u'a}}'.format(), u'a}')
 886        self.assertEqual(u'{{b'.format(), u'{b')
 887        self.assertEqual(u'}}b'.format(), u'}b')
 888        self.assertEqual(u'a{{b'.format(), u'a{b')
 889
 890        # examples from the PEP:
 891        import datetime
 892        self.assertEqual(u"My name is {0}".format(u'Fred'), u"My name is Fred")
 893        self.assertEqual(u"My name is {0[name]}".format(dict(name=u'Fred')),
 894                         u"My name is Fred")
 895        self.assertEqual(u"My name is {0} :-{{}}".format(u'Fred'),
 896                         u"My name is Fred :-{}")
 897
 898        # datetime.__format__ doesn't work with unicode
 899        #d = datetime.date(2007, 8, 18)
 900        #self.assertEqual("The year is {0.year}".format(d),
 901        #                 "The year is 2007")
 902
 903        # classes we'll use for testing
 904        class C:
 905            def __init__(self, x=100):
 906                self._x = x
 907            def __format__(self, spec):
 908                return spec
 909
 910        class D:
 911            def __init__(self, x):
 912                self.x = x
 913            def __format__(self, spec):
 914                return str(self.x)
 915
 916        # class with __str__, but no __format__
 917        class E:
 918            def __init__(self, x):
 919                self.x = x
 920            def __str__(self):
 921                return u'E(' + self.x + u')'
 922
 923        # class with __repr__, but no __format__ or __str__
 924        class F:
 925            def __init__(self, x):
 926                self.x = x
 927            def __repr__(self):
 928                return u'F(' + self.x + u')'
 929
 930        # class with __format__ that forwards to string, for some format_spec's
 931        class G:
 932            def __init__(self, x):
 933                self.x = x
 934            def __str__(self):
 935                return u"string is " + self.x
 936            def __format__(self, format_spec):
 937                if format_spec == 'd':
 938                    return u'G(' + self.x + u')'
 939                return object.__format__(self, format_spec)
 940
 941        # class that returns a bad type from __format__
 942        class H:
 943            def __format__(self, format_spec):
 944                return 1.0
 945
 946        class I(datetime.date):
 947            def __format__(self, format_spec):
 948                return self.strftime(format_spec)
 949
 950        class J(int):
 951            def __format__(self, format_spec):
 952                return int.__format__(self * 2, format_spec)
 953
 954
 955        self.assertEqual(u''.format(), u'')
 956        self.assertEqual(u'abc'.format(), u'abc')
 957        self.assertEqual(u'{0}'.format(u'abc'), u'abc')
 958        self.assertEqual(u'{0:}'.format(u'abc'), u'abc')
 959        self.assertEqual(u'X{0}'.format(u'abc'), u'Xabc')
 960        self.assertEqual(u'{0}X'.format(u'abc'), u'abcX')
 961        self.assertEqual(u'X{0}Y'.format(u'abc'), u'XabcY')
 962        self.assertEqual(u'{1}'.format(1, u'abc'), u'abc')
 963        self.assertEqual(u'X{1}'.format(1, u'abc'), u'Xabc')
 964        self.assertEqual(u'{1}X'.format(1, u'abc'), u'abcX')
 965        self.assertEqual(u'X{1}Y'.format(1, u'abc'), u'XabcY')
 966        self.assertEqual(u'{0}'.format(-15), u'-15')
 967        self.assertEqual(u'{0}{1}'.format(-15, u'abc'), u'-15abc')
 968        self.assertEqual(u'{0}X{1}'.format(-15, u'abc'), u'-15Xabc')
 969        self.assertEqual(u'{{'.format(), u'{')
 970        self.assertEqual(u'}}'.format(), u'}')
 971        self.assertEqual(u'{{}}'.format(), u'{}')
 972        self.assertEqual(u'{{x}}'.format(), u'{x}')
 973        self.assertEqual(u'{{{0}}}'.format(123), u'{123}')
 974        self.assertEqual(u'{{{{0}}}}'.format(), u'{{0}}')
 975        self.assertEqual(u'}}{{'.format(), u'}{')
 976        self.assertEqual(u'}}x{{'.format(), u'}x{')
 977
 978        # weird field names
 979        self.assertEqual(u"{0[foo-bar]}".format({u'foo-bar':u'baz'}), u'baz')
 980        self.assertEqual(u"{0[foo bar]}".format({u'foo bar':u'baz'}), u'baz')
 981        self.assertEqual(u"{0[ ]}".format({u' ':3}), u'3')
 982
 983        self.assertEqual(u'{foo._x}'.format(foo=C(20)), u'20')
 984        self.assertEqual(u'{1}{0}'.format(D(10), D(20)), u'2010')
 985        self.assertEqual(u'{0._x.x}'.format(C(D(u'abc'))), u'abc')
 986        self.assertEqual(u'{0[0]}'.format([u'abc', u'def']), u'abc')
 987        self.assertEqual(u'{0[1]}'.format([u'abc', u'def']), u'def')
 988        self.assertEqual(u'{0[1][0]}'.format([u'abc', [u'def']]), u'def')
 989        self.assertEqual(u'{0[1][0].x}'.format(['abc', [D(u'def')]]), u'def')
 990
 991        # strings
 992        self.assertEqual(u'{0:.3s}'.format(u'abc'), u'abc')
 993        self.assertEqual(u'{0:.3s}'.format(u'ab'), u'ab')
 994        self.assertEqual(u'{0:.3s}'.format(u'abcdef'), u'abc')
 995        self.assertEqual(u'{0:.0s}'.format(u'abcdef'), u'')
 996        self.assertEqual(u'{0:3.3s}'.format(u'abc'), u'abc')
 997        self.assertEqual(u'{0:2.3s}'.format(u'abc'), u'abc')
 998        self.assertEqual(u'{0:2.2s}'.format(u'abc'), u'ab')
 999        self.assertEqual(u'{0:3.2s}'.format(u'abc'), u'ab ')
1000        self.assertEqual(u'{0:x<0s}'.format(u'result'), u'result')
1001        self.assertEqual(u'{0:x<5s}'.format(u'result'), u'result')
1002        self.assertEqual(u'{0:x<6s}'.format(u'result'), u'result')
1003        self.assertEqual(u'{0:x<7s}'.format(u'result'), u'resultx')
1004        self.assertEqual(u'{0:x<8s}'.format(u'result'), u'resultxx')
1005        self.assertEqual(u'{0: <7s}'.format(u'result'), u'result ')
1006        self.assertEqual(u'{0:<7s}'.format(u'result'), u'result ')
1007        self.assertEqual(u'{0:>7s}'.format(u'result'), u' result')
1008        self.assertEqual(u'{0:>8s}'.format(u'result'), u'  result')
1009        self.assertEqual(u'{0:^8s}'.format(u'result'), u' result ')
1010        self.assertEqual(u'{0:^9s}'.format(u'result'), u' result  ')
1011        self.assertEqual(u'{0:^10s}'.format(u'result'), u'  result  ')
1012        self.assertEqual(u'{0:10000}'.format(u'a'), u'a' + u' ' * 9999)
1013        self.assertEqual(u'{0:10000}'.format(u''), u' ' * 10000)
1014        self.assertEqual(u'{0:10000000}'.format(u''), u' ' * 10000000)
1015
1016        # format specifiers for user defined type
1017        self.assertEqual(u'{0:abc}'.format(C()), u'abc')
1018
1019        # !r and !s coersions
1020        self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello')
1021        self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello')
1022        self.assertEqual(u'{0!s:15}'.format(u'Hello'), u'Hello          ')
1023        self.assertEqual(u'{0!s:15s}'.format(u'Hello'), u'Hello          ')
1024        self.assertEqual(u'{0!r}'.format(u'Hello'), u"u'Hello'")
1025        self.assertEqual(u'{0!r:}'.format(u'Hello'), u"u'Hello'")
1026        self.assertEqual(u'{0!r}'.format(F(u'Hello')), u'F(Hello)')
1027
1028        # test fallback to object.__format__
1029        self.assertEqual(u'{0}'.format({}), u'{}')
1030        self.assertEqual(u'{0}'.format([]), u'[]')
1031        self.assertEqual(u'{0}'.format([1]), u'[1]')
1032        self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)')
1033        self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data)  ')
1034        self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data)  ')
1035        self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)')
1036        self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data')
1037        self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data')
1038
1039        self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007,
1040                                                        month=8,
1041                                                        day=27)),
1042                         u"date: 2007-08-27")
1043
1044        # test deriving from a builtin type and overriding __format__
1045        self.assertEqual(u"{0}".format(J(10)), u"20")
1046
1047
1048        # string format specifiers
1049        self.assertEqual(u'{0:}'.format('a'), u'a')
1050
1051        # computed format specifiers
1052        self.assertEqual(u"{0:.{1}}".format(u'hello world', 5), u'hello')
1053        self.assertEqual(u"{0:.{1}s}".format(u'hello world', 5), u'hello')
1054        self.assertEqual(u"{0:.{precision}s}".format('hello world', precision=5), u'hello')
1055        self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width=10, precision=5), u'hello     ')
1056        self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), u'hello     ')
1057
1058        # test various errors
1059        self.assertRaises(ValueError, u'{'.format)
1060        self.assertRaises(ValueError, u'}'.format)
1061        self.assertRaises(ValueError, u'a{'.format)
1062        self.assertRaises(ValueError, u'a}'.format)
1063        self.assertRaises(ValueError, u'{a'.format)
1064        self.assertRaises(ValueError, u'}a'.format)
1065        self.assertRaises(IndexError, u'{0}'.format)
1066        self.assertRaises(IndexError, u'{1}'.format, u'abc')
1067        self.assertRaises(KeyError,   u'{x}'.format)
1068        self.assertRaises(ValueError, u"}{".format)
1069        self.assertRaises(ValueError, u"{".format)
1070        self.assertRaises(ValueError, u"}".format)
1071        self.assertRaises(ValueError, u"abc{0:{}".format)
1072        self.assertRaises(ValueError, u"{0".format)
1073        self.assertRaises(IndexError, u"{0.}".format)
1074        self.assertRaises(ValueError, u"{0.}".format, 0)
1075        self.assertRaises(IndexError, u"{0[}".format)
1076        self.assertRaises(ValueError, u"{0[}".format, [])
1077        self.assertRaises(KeyError,   u"{0]}".format)
1078        self.assertRaises(ValueError, u"{0.[]}".format, 0)
1079        self.assertRaises(ValueError, u"{0..foo}".format, 0)
1080        self.assertRaises(ValueError, u"{0[0}".format, 0)
1081        self.assertRaises(ValueError, u"{0[0:foo}".format, 0)
1082        self.assertRaises(KeyError,   u"{c]}".format)
1083        self.assertRaises(ValueError, u"{{ {{{0}}".format, 0)
1084        self.assertRaises(ValueError, u"{0}}".format, 0)
1085        self.assertRaises(KeyError,   u"{foo}".format, bar=3)
1086        self.assertRaises(ValueError, u"{0!x}".format, 3)
1087        self.assertRaises(ValueError, u"{0!}".format, 0)
1088        self.assertRaises(ValueError, u"{0!rs}".format, 0)
1089        self.assertRaises(ValueError, u"{!}".format)
1090        self.assertRaises(ValueError, u"{:}".format)
1091        self.assertRaises(ValueError, u"{:s}".format)
1092        self.assertRaises(ValueError, u"{}".format)
1093
1094        # issue 6089
1095        self.assertRaises(ValueError, u"{0[0]x}".format, [None])
1096        self.assertRaises(ValueError, u"{0[0](10)}".format, [None])
1097
1098        # can't have a replacement on the field name portion
1099        self.assertRaises(TypeError, u'{0[{1}]}'.format, u'abcdefg', 4)
1100
1101        # exceed maximum recursion depth
1102        self.assertRaises(ValueError, u"{0:{1:{2}}}".format, u'abc', u's', u'')
1103        self.assertRaises(ValueError, u"{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
1104                          0, 1, 2, 3, 4, 5, 6, 7)
1105
1106        # string format spec errors
1107        self.assertRaises(ValueError, u"{0:-s}".format, u'')
1108        self.assertRaises(ValueError, format, u"", u"-")
1109        self.assertRaises(ValueError, u"{0:=s}".format, u'')
1110
1111        # test combining string and unicode
1112        self.assertEqual(u"foo{0}".format('bar'), u'foobar')
1113        # This will try to convert the argument from unicode to str, which
1114        #  will succeed
1115        self.assertEqual("foo{0}".format(u'bar'), 'foobar')
1116        # This will try to convert the argument from unicode to str, which
1117        #  will fail
1118        self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
1119
1120    def test_raiseMemError(self):
1121        # Ensure that the freelist contains a consistent object, even
1122        # when a string allocation fails with a MemoryError.
1123        # This used to crash the interpreter,
1124        # or leak references when the number was smaller.
1125        charwidth = 4 if sys.maxunicode >= 0x10000 else 2
1126        # Note: sys.maxsize is half of the actual max allocation because of
1127        # the signedness of Py_ssize_t.
1128        alloc = lambda: u"a" * (sys.maxsize // charwidth * 2)
1129        self.assertRaises(MemoryError, alloc)
1130        self.assertRaises(MemoryError, alloc)
1131
1132def test_main():
1133    test_support.run_unittest(__name__)
1134
1135if __name__ == "__main__":
1136    test_main()