PageRenderTime 38ms CodeModel.GetById 27ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/pypy/interpreter/unicodehelper.py

https://bitbucket.org/pypy/pypy/
Python | 67 lines | 52 code | 9 blank | 6 comment | 0 complexity | 9be24b18c5c70bb4875571f3e18e765f MD5 | raw file
 1from pypy.interpreter.error import OperationError
 2from rpython.rlib.objectmodel import specialize
 3from rpython.rlib import runicode
 4from pypy.module._codecs import interp_codecs
 5
 6@specialize.memo()
 7def decode_error_handler(space):
 8    # Fast version of the "strict" errors handler.
 9    def raise_unicode_exception_decode(errors, encoding, msg, s,
10                                       startingpos, endingpos):
11        raise OperationError(space.w_UnicodeDecodeError,
12                             space.newtuple([space.wrap(encoding),
13                                             space.wrap(s),
14                                             space.wrap(startingpos),
15                                             space.wrap(endingpos),
16                                             space.wrap(msg)]))
17    return raise_unicode_exception_decode
18
19class RUnicodeEncodeError(Exception):
20    def __init__(self, encoding, object, start, end, reason):
21        self.encoding = encoding
22        self.object = object
23        self.start = start
24        self.end = end
25        self.reason = reason
26
27def raise_unicode_exception_encode(errors, encoding, msg, u,
28                                   startingpos, endingpos):
29    raise RUnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
30
31# ____________________________________________________________
32
33def encode(space, w_data, encoding=None, errors='strict'):
34    from pypy.objspace.std.unicodeobject import encode_object
35    return encode_object(space, w_data, encoding, errors)
36
37# These functions take and return unwrapped rpython strings and unicodes
38def decode_unicode_escape(space, string):
39    state = space.fromcache(interp_codecs.CodecState)
40    unicodedata_handler = state.get_unicodedata_handler(space)
41    result, consumed = runicode.str_decode_unicode_escape(
42        string, len(string), "strict",
43        final=True, errorhandler=decode_error_handler(space),
44        unicodedata_handler=unicodedata_handler)
45    return result
46
47def decode_raw_unicode_escape(space, string):
48    result, consumed = runicode.str_decode_raw_unicode_escape(
49        string, len(string), "strict",
50        final=True, errorhandler=decode_error_handler(space))
51    return result
52
53def decode_utf8(space, string):
54    result, consumed = runicode.str_decode_utf_8(
55        string, len(string), "strict",
56        final=True, errorhandler=decode_error_handler(space),
57        allow_surrogates=True)
58    return result
59
60def encode_utf8(space, uni):
61    # Note that this function never raises UnicodeEncodeError,
62    # since surrogate pairs are allowed.
63    # This is not the case with Python3.
64    return runicode.unicode_encode_utf_8(
65        uni, len(uni), "strict",
66        errorhandler=raise_unicode_exception_encode,
67        allow_surrogates=True)