unicodehelper.py - This code provides a set of functions fo…

/pypy/interpreter/unicodehelper.py

https://bitbucket.org/pypy/pypy/ · Python · 67 lines · 52 code · 9 blank · 6 comment · 0 complexity · 9be24b18c5c70bb4875571f3e18e765f MD5 · raw file


from pypy.interpreter.error import OperationError
from rpython.rlib.objectmodel import specialize
from rpython.rlib import runicode
from pypy.module._codecs import interp_codecs

@specialize.memo()
def decode_error_handler(space):
    # Fast version of the "strict" errors handler.
    def raise_unicode_exception_decode(errors, encoding, msg, s,
                                       startingpos, endingpos):
        raise OperationError(space.w_UnicodeDecodeError,
                             space.newtuple([space.wrap(encoding),
                                             space.wrap(s),
                                             space.wrap(startingpos),
                                             space.wrap(endingpos),
                                             space.wrap(msg)]))
    return raise_unicode_exception_decode

class RUnicodeEncodeError(Exception):
    def __init__(self, encoding, object, start, end, reason):
        self.encoding = encoding
        self.object = object
        self.start = start
        self.end = end
        self.reason = reason

def raise_unicode_exception_encode(errors, encoding, msg, u,
                                   startingpos, endingpos):
    raise RUnicodeEncodeError(encoding, u, startingpos, endingpos, msg)

# ____________________________________________________________

def encode(space, w_data, encoding=None, errors='strict'):
    from pypy.objspace.std.unicodeobject import encode_object
    return encode_object(space, w_data, encoding, errors)

# These functions take and return unwrapped rpython strings and unicodes
def decode_unicode_escape(space, string):
    state = space.fromcache(interp_codecs.CodecState)
    unicodedata_handler = state.get_unicodedata_handler(space)
    result, consumed = runicode.str_decode_unicode_escape(
        string, len(string), "strict",
        final=True, errorhandler=decode_error_handler(space),
        unicodedata_handler=unicodedata_handler)
    return result

def decode_raw_unicode_escape(space, string):
    result, consumed = runicode.str_decode_raw_unicode_escape(
        string, len(string), "strict",
        final=True, errorhandler=decode_error_handler(space))
    return result

def decode_utf8(space, string):
    result, consumed = runicode.str_decode_utf_8(
        string, len(string), "strict",
        final=True, errorhandler=decode_error_handler(space),
        allow_surrogates=True)
    return result

def encode_utf8(space, uni):
    # Note that this function never raises UnicodeEncodeError,
    # since surrogate pairs are allowed.
    # This is not the case with Python3.
    return runicode.unicode_encode_utf_8(
        uni, len(uni), "strict",
        errorhandler=raise_unicode_exception_encode,
        allow_surrogates=True)

Summary ✨

This code provides a set of functions for handling Unicode encoding and decoding operations in a PyPy interpreter. It includes error handlers, encoders, and decoders for various Unicode schemes, including UTF-8, Unicode escape sequences, and raw Unicode escape sequences. The functions are designed to work with the PyPy interpreter’s object model and provide a way to handle Unicode-related errors and exceptions.

Tech Fingerprint

Standard Library: IO & Files

Alerts (8)

'def' Ensure functions have docstrings for documentation
7 9 27 33 38 47 53 60