/pypy/interpreter/unicodehelper.py
Python | 67 lines | 52 code | 9 blank | 6 comment | 0 complexity | 9be24b18c5c70bb4875571f3e18e765f MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
- from pypy.interpreter.error import OperationError
- from rpython.rlib.objectmodel import specialize
- from rpython.rlib import runicode
- from pypy.module._codecs import interp_codecs
- @specialize.memo()
- def decode_error_handler(space):
- # Fast version of the "strict" errors handler.
- def raise_unicode_exception_decode(errors, encoding, msg, s,
- startingpos, endingpos):
- raise OperationError(space.w_UnicodeDecodeError,
- space.newtuple([space.wrap(encoding),
- space.wrap(s),
- space.wrap(startingpos),
- space.wrap(endingpos),
- space.wrap(msg)]))
- return raise_unicode_exception_decode
- class RUnicodeEncodeError(Exception):
- def __init__(self, encoding, object, start, end, reason):
- self.encoding = encoding
- self.object = object
- self.start = start
- self.end = end
- self.reason = reason
- def raise_unicode_exception_encode(errors, encoding, msg, u,
- startingpos, endingpos):
- raise RUnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
- # ____________________________________________________________
- def encode(space, w_data, encoding=None, errors='strict'):
- from pypy.objspace.std.unicodeobject import encode_object
- return encode_object(space, w_data, encoding, errors)
- # These functions take and return unwrapped rpython strings and unicodes
- def decode_unicode_escape(space, string):
- state = space.fromcache(interp_codecs.CodecState)
- unicodedata_handler = state.get_unicodedata_handler(space)
- result, consumed = runicode.str_decode_unicode_escape(
- string, len(string), "strict",
- final=True, errorhandler=decode_error_handler(space),
- unicodedata_handler=unicodedata_handler)
- return result
- def decode_raw_unicode_escape(space, string):
- result, consumed = runicode.str_decode_raw_unicode_escape(
- string, len(string), "strict",
- final=True, errorhandler=decode_error_handler(space))
- return result
- def decode_utf8(space, string):
- result, consumed = runicode.str_decode_utf_8(
- string, len(string), "strict",
- final=True, errorhandler=decode_error_handler(space),
- allow_surrogates=True)
- return result
- def encode_utf8(space, uni):
- # Note that this function never raises UnicodeEncodeError,
- # since surrogate pairs are allowed.
- # This is not the case with Python3.
- return runicode.unicode_encode_utf_8(
- uni, len(uni), "strict",
- errorhandler=raise_unicode_exception_encode,
- allow_surrogates=True)