PageRenderTime 99ms CodeModel.GetById 15ms app.highlight 77ms RepoModel.GetById 1ms app.codeStats 0ms

/Lib/test/test_codeccallbacks.py

http://unladen-swallow.googlecode.com/
Python | 802 lines | 678 code | 65 blank | 59 comment | 51 complexity | 4196be5443c7f5d40e54265cef13ead1 MD5 | raw file
  1import test.test_support, unittest
  2import sys, codecs, htmlentitydefs, unicodedata
  3
  4class PosReturn:
  5    # this can be used for configurable callbacks
  6
  7    def __init__(self):
  8        self.pos = 0
  9
 10    def handle(self, exc):
 11        oldpos = self.pos
 12        realpos = oldpos
 13        if realpos<0:
 14            realpos = len(exc.object) + realpos
 15        # if we don't advance this time, terminate on the next call
 16        # otherwise we'd get an endless loop
 17        if realpos <= exc.start:
 18            self.pos = len(exc.object)
 19        return (u"<?>", oldpos)
 20
 21# A UnicodeEncodeError object with a bad start attribute
 22class BadStartUnicodeEncodeError(UnicodeEncodeError):
 23    def __init__(self):
 24        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
 25        self.start = []
 26
 27# A UnicodeEncodeError object with a bad object attribute
 28class BadObjectUnicodeEncodeError(UnicodeEncodeError):
 29    def __init__(self):
 30        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
 31        self.object = []
 32
 33# A UnicodeDecodeError object without an end attribute
 34class NoEndUnicodeDecodeError(UnicodeDecodeError):
 35    def __init__(self):
 36        UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
 37        del self.end
 38
 39# A UnicodeDecodeError object with a bad object attribute
 40class BadObjectUnicodeDecodeError(UnicodeDecodeError):
 41    def __init__(self):
 42        UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
 43        self.object = []
 44
 45# A UnicodeTranslateError object without a start attribute
 46class NoStartUnicodeTranslateError(UnicodeTranslateError):
 47    def __init__(self):
 48        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
 49        del self.start
 50
 51# A UnicodeTranslateError object without an end attribute
 52class NoEndUnicodeTranslateError(UnicodeTranslateError):
 53    def __init__(self):
 54        UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
 55        del self.end
 56
 57# A UnicodeTranslateError object without an object attribute
 58class NoObjectUnicodeTranslateError(UnicodeTranslateError):
 59    def __init__(self):
 60        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
 61        del self.object
 62
 63class CodecCallbackTest(unittest.TestCase):
 64
 65    def test_xmlcharrefreplace(self):
 66        # replace unencodable characters which numeric character entities.
 67        # For ascii, latin-1 and charmaps this is completely implemented
 68        # in C and should be reasonably fast.
 69        s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
 70        self.assertEqual(
 71            s.encode("ascii", "xmlcharrefreplace"),
 72            "&#12473;&#12497;&#12514; &#228;nd eggs"
 73        )
 74        self.assertEqual(
 75            s.encode("latin-1", "xmlcharrefreplace"),
 76            "&#12473;&#12497;&#12514; \xe4nd eggs"
 77        )
 78
 79    def test_xmlcharnamereplace(self):
 80        # This time use a named character entity for unencodable
 81        # characters, if one is available.
 82
 83        def xmlcharnamereplace(exc):
 84            if not isinstance(exc, UnicodeEncodeError):
 85                raise TypeError("don't know how to handle %r" % exc)
 86            l = []
 87            for c in exc.object[exc.start:exc.end]:
 88                try:
 89                    l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
 90                except KeyError:
 91                    l.append(u"&#%d;" % ord(c))
 92            return (u"".join(l), exc.end)
 93
 94        codecs.register_error(
 95            "test.xmlcharnamereplace", xmlcharnamereplace)
 96
 97        sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
 98        sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
 99        self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
100        sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
101        self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
102        sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
103        self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
104
105    def test_uninamereplace(self):
106        # We're using the names from the unicode database this time,
107        # and we're doing "syntax highlighting" here, i.e. we include
108        # the replaced text in ANSI escape sequences. For this it is
109        # useful that the error handler is not called for every single
110        # unencodable character, but for a complete sequence of
111        # unencodable characters, otherwise we would output many
112        # unnecessary escape sequences.
113
114        def uninamereplace(exc):
115            if not isinstance(exc, UnicodeEncodeError):
116                raise TypeError("don't know how to handle %r" % exc)
117            l = []
118            for c in exc.object[exc.start:exc.end]:
119                l.append(unicodedata.name(c, u"0x%x" % ord(c)))
120            return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
121
122        codecs.register_error(
123            "test.uninamereplace", uninamereplace)
124
125        sin = u"\xac\u1234\u20ac\u8000"
126        sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
127        self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
128
129        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
130        self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
131
132        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
133        self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
134
135    def test_backslashescape(self):
136        # Does the same as the "unicode-escape" encoding, but with different
137        # base encodings.
138        sin = u"a\xac\u1234\u20ac\u8000"
139        if sys.maxunicode > 0xffff:
140            sin += unichr(sys.maxunicode)
141        sout = "a\\xac\\u1234\\u20ac\\u8000"
142        if sys.maxunicode > 0xffff:
143            sout += "\\U%08x" % sys.maxunicode
144        self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
145
146        sout = "a\xac\\u1234\\u20ac\\u8000"
147        if sys.maxunicode > 0xffff:
148            sout += "\\U%08x" % sys.maxunicode
149        self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
150
151        sout = "a\xac\\u1234\xa4\\u8000"
152        if sys.maxunicode > 0xffff:
153            sout += "\\U%08x" % sys.maxunicode
154        self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
155
156    def test_decoderelaxedutf8(self):
157        # This is the test for a decoding callback handler,
158        # that relaxes the UTF-8 minimal encoding restriction.
159        # A null byte that is encoded as "\xc0\x80" will be
160        # decoded as a null byte. All other illegal sequences
161        # will be handled strictly.
162        def relaxedutf8(exc):
163            if not isinstance(exc, UnicodeDecodeError):
164                raise TypeError("don't know how to handle %r" % exc)
165            if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
166                return (u"\x00", exc.start+2) # retry after two bytes
167            else:
168                raise exc
169
170        codecs.register_error(
171            "test.relaxedutf8", relaxedutf8)
172
173        sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
174        sout = u"a\x00b\x00c\xfc\x00\x00"
175        self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
176        sin = "\xc0\x80\xc0\x81"
177        self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
178
179    def test_charmapencode(self):
180        # For charmap encodings the replacement string will be
181        # mapped through the encoding again. This means, that
182        # to be able to use e.g. the "replace" handler, the
183        # charmap has to have a mapping for "?".
184        charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
185        sin = u"abc"
186        sout = "AABBCC"
187        self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
188
189        sin = u"abcA"
190        self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
191
192        charmap[ord("?")] = "XYZ"
193        sin = u"abcDEF"
194        sout = "AABBCCXYZXYZXYZ"
195        self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
196
197        charmap[ord("?")] = u"XYZ"
198        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
199
200        charmap[ord("?")] = u"XYZ"
201        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
202
203    def test_decodeunicodeinternal(self):
204        self.assertRaises(
205            UnicodeDecodeError,
206            "\x00\x00\x00\x00\x00".decode,
207            "unicode-internal",
208        )
209        if sys.maxunicode > 0xffff:
210            def handler_unicodeinternal(exc):
211                if not isinstance(exc, UnicodeDecodeError):
212                    raise TypeError("don't know how to handle %r" % exc)
213                return (u"\x01", 1)
214
215            self.assertEqual(
216                "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
217                u"\u0000"
218            )
219
220            self.assertEqual(
221                "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
222                u"\u0000\ufffd"
223            )
224
225            codecs.register_error("test.hui", handler_unicodeinternal)
226
227            self.assertEqual(
228                "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
229                u"\u0000\u0001\u0000"
230            )
231
232    def test_callbacks(self):
233        def handler1(exc):
234            if not isinstance(exc, UnicodeEncodeError) \
235               and not isinstance(exc, UnicodeDecodeError):
236                raise TypeError("don't know how to handle %r" % exc)
237            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
238            return (u"[%s]" % u"".join(l), exc.end)
239
240        codecs.register_error("test.handler1", handler1)
241
242        def handler2(exc):
243            if not isinstance(exc, UnicodeDecodeError):
244                raise TypeError("don't know how to handle %r" % exc)
245            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
246            return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
247
248        codecs.register_error("test.handler2", handler2)
249
250        s = "\x00\x81\x7f\x80\xff"
251
252        self.assertEqual(
253            s.decode("ascii", "test.handler1"),
254            u"\x00[<129>]\x7f[<128>][<255>]"
255        )
256        self.assertEqual(
257            s.decode("ascii", "test.handler2"),
258            u"\x00[<129>][<128>]"
259        )
260
261        self.assertEqual(
262            "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
263            u"\u3042[<92><117><51><120>]xx"
264        )
265
266        self.assertEqual(
267            "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
268            u"\u3042[<92><117><51><120><120>]"
269        )
270
271        self.assertEqual(
272            codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
273            u"z[<98>][<99>]"
274        )
275
276        self.assertEqual(
277            u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
278            u"g[<252><223>]rk"
279        )
280
281        self.assertEqual(
282            u"g\xfc\xdf".encode("ascii", "test.handler1"),
283            u"g[<252><223>]"
284        )
285
286    def test_longstrings(self):
287        # test long strings to check for memory overflow problems
288        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
289                   "backslashreplace"]
290        # register the handlers under different names,
291        # to prevent the codec from recognizing the name
292        for err in errors:
293            codecs.register_error("test." + err, codecs.lookup_error(err))
294        l = 1000
295        errors += [ "test." + err for err in errors ]
296        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
297            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
298                        "utf-8", "utf-7", "utf-16", "utf-32"):
299                for err in errors:
300                    try:
301                        uni.encode(enc, err)
302                    except UnicodeError:
303                        pass
304
305    def check_exceptionobjectargs(self, exctype, args, msg):
306        # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
307        # check with one missing argument
308        self.assertRaises(TypeError, exctype, *args[:-1])
309        # check with one argument too much
310        self.assertRaises(TypeError, exctype, *(args + ["too much"]))
311        # check with one argument of the wrong type
312        wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
313        for i in xrange(len(args)):
314            for wrongarg in wrongargs:
315                if type(wrongarg) is type(args[i]):
316                    continue
317                # build argument array
318                callargs = []
319                for j in xrange(len(args)):
320                    if i==j:
321                        callargs.append(wrongarg)
322                    else:
323                        callargs.append(args[i])
324                self.assertRaises(TypeError, exctype, *callargs)
325
326        # check with the correct number and type of arguments
327        exc = exctype(*args)
328        self.assertEquals(str(exc), msg)
329
330    def test_unicodeencodeerror(self):
331        self.check_exceptionobjectargs(
332            UnicodeEncodeError,
333            ["ascii", u"g\xfcrk", 1, 2, "ouch"],
334            "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
335        )
336        self.check_exceptionobjectargs(
337            UnicodeEncodeError,
338            ["ascii", u"g\xfcrk", 1, 4, "ouch"],
339            "'ascii' codec can't encode characters in position 1-3: ouch"
340        )
341        self.check_exceptionobjectargs(
342            UnicodeEncodeError,
343            ["ascii", u"\xfcx", 0, 1, "ouch"],
344            "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
345        )
346        self.check_exceptionobjectargs(
347            UnicodeEncodeError,
348            ["ascii", u"\u0100x", 0, 1, "ouch"],
349            "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
350        )
351        self.check_exceptionobjectargs(
352            UnicodeEncodeError,
353            ["ascii", u"\uffffx", 0, 1, "ouch"],
354            "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
355        )
356        if sys.maxunicode > 0xffff:
357            self.check_exceptionobjectargs(
358                UnicodeEncodeError,
359                ["ascii", u"\U00010000x", 0, 1, "ouch"],
360                "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
361            )
362
363    def test_unicodedecodeerror(self):
364        self.check_exceptionobjectargs(
365            UnicodeDecodeError,
366            ["ascii", "g\xfcrk", 1, 2, "ouch"],
367            "'ascii' codec can't decode byte 0xfc in position 1: ouch"
368        )
369        self.check_exceptionobjectargs(
370            UnicodeDecodeError,
371            ["ascii", "g\xfcrk", 1, 3, "ouch"],
372            "'ascii' codec can't decode bytes in position 1-2: ouch"
373        )
374
375    def test_unicodetranslateerror(self):
376        self.check_exceptionobjectargs(
377            UnicodeTranslateError,
378            [u"g\xfcrk", 1, 2, "ouch"],
379            "can't translate character u'\\xfc' in position 1: ouch"
380        )
381        self.check_exceptionobjectargs(
382            UnicodeTranslateError,
383            [u"g\u0100rk", 1, 2, "ouch"],
384            "can't translate character u'\\u0100' in position 1: ouch"
385        )
386        self.check_exceptionobjectargs(
387            UnicodeTranslateError,
388            [u"g\uffffrk", 1, 2, "ouch"],
389            "can't translate character u'\\uffff' in position 1: ouch"
390        )
391        if sys.maxunicode > 0xffff:
392            self.check_exceptionobjectargs(
393                UnicodeTranslateError,
394                [u"g\U00010000rk", 1, 2, "ouch"],
395                "can't translate character u'\\U00010000' in position 1: ouch"
396            )
397        self.check_exceptionobjectargs(
398            UnicodeTranslateError,
399            [u"g\xfcrk", 1, 3, "ouch"],
400            "can't translate characters in position 1-2: ouch"
401        )
402
403    def test_badandgoodstrictexceptions(self):
404        # "strict" complains about a non-exception passed in
405        self.assertRaises(
406            TypeError,
407            codecs.strict_errors,
408            42
409        )
410        # "strict" complains about the wrong exception type
411        self.assertRaises(
412            Exception,
413            codecs.strict_errors,
414            Exception("ouch")
415        )
416
417        # If the correct exception is passed in, "strict" raises it
418        self.assertRaises(
419            UnicodeEncodeError,
420            codecs.strict_errors,
421            UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
422        )
423
424    def test_badandgoodignoreexceptions(self):
425        # "ignore" complains about a non-exception passed in
426        self.assertRaises(
427           TypeError,
428           codecs.ignore_errors,
429           42
430        )
431        # "ignore" complains about the wrong exception type
432        self.assertRaises(
433           TypeError,
434           codecs.ignore_errors,
435           UnicodeError("ouch")
436        )
437        # If the correct exception is passed in, "ignore" returns an empty replacement
438        self.assertEquals(
439            codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
440            (u"", 1)
441        )
442        self.assertEquals(
443            codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
444            (u"", 1)
445        )
446        self.assertEquals(
447            codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
448            (u"", 1)
449        )
450
451    def test_badandgoodreplaceexceptions(self):
452        # "replace" complains about a non-exception passed in
453        self.assertRaises(
454           TypeError,
455           codecs.replace_errors,
456           42
457        )
458        # "replace" complains about the wrong exception type
459        self.assertRaises(
460           TypeError,
461           codecs.replace_errors,
462           UnicodeError("ouch")
463        )
464        self.assertRaises(
465            TypeError,
466            codecs.replace_errors,
467            BadObjectUnicodeEncodeError()
468        )
469        self.assertRaises(
470            TypeError,
471            codecs.replace_errors,
472            BadObjectUnicodeDecodeError()
473        )
474        # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
475        self.assertEquals(
476            codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
477            (u"?", 1)
478        )
479        self.assertEquals(
480            codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
481            (u"\ufffd", 1)
482        )
483        self.assertEquals(
484            codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
485            (u"\ufffd", 1)
486        )
487
488    def test_badandgoodxmlcharrefreplaceexceptions(self):
489        # "xmlcharrefreplace" complains about a non-exception passed in
490        self.assertRaises(
491           TypeError,
492           codecs.xmlcharrefreplace_errors,
493           42
494        )
495        # "xmlcharrefreplace" complains about the wrong exception types
496        self.assertRaises(
497           TypeError,
498           codecs.xmlcharrefreplace_errors,
499           UnicodeError("ouch")
500        )
501        # "xmlcharrefreplace" can only be used for encoding
502        self.assertRaises(
503            TypeError,
504            codecs.xmlcharrefreplace_errors,
505            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
506        )
507        self.assertRaises(
508            TypeError,
509            codecs.xmlcharrefreplace_errors,
510            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
511        )
512        # Use the correct exception
513        cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
514        s = "".join(unichr(c) for c in cs)
515        self.assertEquals(
516            codecs.xmlcharrefreplace_errors(
517                UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
518            ),
519            (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
520        )
521
522    def test_badandgoodbackslashreplaceexceptions(self):
523        # "backslashreplace" complains about a non-exception passed in
524        self.assertRaises(
525           TypeError,
526           codecs.backslashreplace_errors,
527           42
528        )
529        # "backslashreplace" complains about the wrong exception types
530        self.assertRaises(
531           TypeError,
532           codecs.backslashreplace_errors,
533           UnicodeError("ouch")
534        )
535        # "backslashreplace" can only be used for encoding
536        self.assertRaises(
537            TypeError,
538            codecs.backslashreplace_errors,
539            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
540        )
541        self.assertRaises(
542            TypeError,
543            codecs.backslashreplace_errors,
544            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
545        )
546        # Use the correct exception
547        self.assertEquals(
548            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
549            (u"\\u3042", 1)
550        )
551        self.assertEquals(
552            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
553            (u"\\x00", 1)
554        )
555        self.assertEquals(
556            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
557            (u"\\xff", 1)
558        )
559        self.assertEquals(
560            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
561            (u"\\u0100", 1)
562        )
563        self.assertEquals(
564            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
565            (u"\\uffff", 1)
566        )
567        if sys.maxunicode>0xffff:
568            self.assertEquals(
569                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
570                (u"\\U00010000", 1)
571            )
572            self.assertEquals(
573                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
574                (u"\\U0010ffff", 1)
575            )
576
577    def test_badhandlerresults(self):
578        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
579        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
580
581        for res in results:
582            codecs.register_error("test.badhandler", lambda: res)
583            for enc in encs:
584                self.assertRaises(
585                    TypeError,
586                    u"\u3042".encode,
587                    enc,
588                    "test.badhandler"
589                )
590            for (enc, bytes) in (
591                ("ascii", "\xff"),
592                ("utf-8", "\xff"),
593                ("utf-7", "+x-"),
594                ("unicode-internal", "\x00"),
595            ):
596                self.assertRaises(
597                    TypeError,
598                    bytes.decode,
599                    enc,
600                    "test.badhandler"
601                )
602
603    def test_lookup(self):
604        self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
605        self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
606        self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
607        self.assertEquals(
608            codecs.xmlcharrefreplace_errors,
609            codecs.lookup_error("xmlcharrefreplace")
610        )
611        self.assertEquals(
612            codecs.backslashreplace_errors,
613            codecs.lookup_error("backslashreplace")
614        )
615
616    def test_unencodablereplacement(self):
617        def unencrepl(exc):
618            if isinstance(exc, UnicodeEncodeError):
619                return (u"\u4242", exc.end)
620            else:
621                raise TypeError("don't know how to handle %r" % exc)
622        codecs.register_error("test.unencreplhandler", unencrepl)
623        for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
624            self.assertRaises(
625                UnicodeEncodeError,
626                u"\u4242".encode,
627                enc,
628                "test.unencreplhandler"
629            )
630
631    def test_badregistercall(self):
632        # enhance coverage of:
633        # Modules/_codecsmodule.c::register_error()
634        # Python/codecs.c::PyCodec_RegisterError()
635        self.assertRaises(TypeError, codecs.register_error, 42)
636        self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
637
638    def test_badlookupcall(self):
639        # enhance coverage of:
640        # Modules/_codecsmodule.c::lookup_error()
641        self.assertRaises(TypeError, codecs.lookup_error)
642
643    def test_unknownhandler(self):
644        # enhance coverage of:
645        # Modules/_codecsmodule.c::lookup_error()
646        self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
647
648    def test_xmlcharrefvalues(self):
649        # enhance coverage of:
650        # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
651        # and inline implementations
652        v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
653        if sys.maxunicode>=100000:
654            v += (100000, 500000, 1000000)
655        s = u"".join([unichr(x) for x in v])
656        codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
657        for enc in ("ascii", "iso-8859-15"):
658            for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
659                s.encode(enc, err)
660
661    def test_decodehelper(self):
662        # enhance coverage of:
663        # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
664        # and callers
665        self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
666
667        def baddecodereturn1(exc):
668            return 42
669        codecs.register_error("test.baddecodereturn1", baddecodereturn1)
670        self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
671        self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
672        self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
673        self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
674        self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
675        self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
676
677        def baddecodereturn2(exc):
678            return (u"?", None)
679        codecs.register_error("test.baddecodereturn2", baddecodereturn2)
680        self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
681
682        handler = PosReturn()
683        codecs.register_error("test.posreturn", handler.handle)
684
685        # Valid negative position
686        handler.pos = -1
687        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
688
689        # Valid negative position
690        handler.pos = -2
691        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
692
693        # Negative position out of bounds
694        handler.pos = -3
695        self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
696
697        # Valid positive position
698        handler.pos = 1
699        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
700
701        # Largest valid positive position (one beyond end of input)
702        handler.pos = 2
703        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
704
705        # Invalid positive position
706        handler.pos = 3
707        self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
708
709        # Restart at the "0"
710        handler.pos = 6
711        self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
712
713        class D(dict):
714            def __getitem__(self, key):
715                raise ValueError
716        self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
717        self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
718        self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
719
720    def test_encodehelper(self):
721        # enhance coverage of:
722        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
723        # and callers
724        self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
725
726        def badencodereturn1(exc):
727            return 42
728        codecs.register_error("test.badencodereturn1", badencodereturn1)
729        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
730
731        def badencodereturn2(exc):
732            return (u"?", None)
733        codecs.register_error("test.badencodereturn2", badencodereturn2)
734        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
735
736        handler = PosReturn()
737        codecs.register_error("test.posreturn", handler.handle)
738
739        # Valid negative position
740        handler.pos = -1
741        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
742
743        # Valid negative position
744        handler.pos = -2
745        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
746
747        # Negative position out of bounds
748        handler.pos = -3
749        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
750
751        # Valid positive position
752        handler.pos = 1
753        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
754
755        # Largest valid positive position (one beyond end of input
756        handler.pos = 2
757        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
758
759        # Invalid positive position
760        handler.pos = 3
761        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
762
763        handler.pos = 0
764
765        class D(dict):
766            def __getitem__(self, key):
767                raise ValueError
768        for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
769            self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
770            self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
771            self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
772
773    def test_translatehelper(self):
774        # enhance coverage of:
775        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
776        # and callers
777        # (Unfortunately the errors argument is not directly accessible
778        # from Python, so we can't test that much)
779        class D(dict):
780            def __getitem__(self, key):
781                raise ValueError
782        self.assertRaises(ValueError, u"\xff".translate, D())
783        self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
784        self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
785
786    def test_bug828737(self):
787        charmap = {
788            ord("&"): u"&amp;",
789            ord("<"): u"&lt;",
790            ord(">"): u"&gt;",
791            ord('"'): u"&quot;",
792        }
793
794        for n in (1, 10, 100, 1000):
795            text = u'abc<def>ghi'*n
796            text.translate(charmap)
797
798def test_main():
799    test.test_support.run_unittest(CodecCallbackTest)
800
801if __name__ == "__main__":
802    test_main()