PageRenderTime 36ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/interpreter/unicodehelper.py

https://bitbucket.org/pypy/pypy/
Python | 67 lines | 52 code | 9 blank | 6 comment | 0 complexity | 9be24b18c5c70bb4875571f3e18e765f MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from pypy.interpreter.error import OperationError
  2. from rpython.rlib.objectmodel import specialize
  3. from rpython.rlib import runicode
  4. from pypy.module._codecs import interp_codecs
  5. @specialize.memo()
  6. def decode_error_handler(space):
  7. # Fast version of the "strict" errors handler.
  8. def raise_unicode_exception_decode(errors, encoding, msg, s,
  9. startingpos, endingpos):
  10. raise OperationError(space.w_UnicodeDecodeError,
  11. space.newtuple([space.wrap(encoding),
  12. space.wrap(s),
  13. space.wrap(startingpos),
  14. space.wrap(endingpos),
  15. space.wrap(msg)]))
  16. return raise_unicode_exception_decode
  17. class RUnicodeEncodeError(Exception):
  18. def __init__(self, encoding, object, start, end, reason):
  19. self.encoding = encoding
  20. self.object = object
  21. self.start = start
  22. self.end = end
  23. self.reason = reason
  24. def raise_unicode_exception_encode(errors, encoding, msg, u,
  25. startingpos, endingpos):
  26. raise RUnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
  27. # ____________________________________________________________
  28. def encode(space, w_data, encoding=None, errors='strict'):
  29. from pypy.objspace.std.unicodeobject import encode_object
  30. return encode_object(space, w_data, encoding, errors)
  31. # These functions take and return unwrapped rpython strings and unicodes
  32. def decode_unicode_escape(space, string):
  33. state = space.fromcache(interp_codecs.CodecState)
  34. unicodedata_handler = state.get_unicodedata_handler(space)
  35. result, consumed = runicode.str_decode_unicode_escape(
  36. string, len(string), "strict",
  37. final=True, errorhandler=decode_error_handler(space),
  38. unicodedata_handler=unicodedata_handler)
  39. return result
  40. def decode_raw_unicode_escape(space, string):
  41. result, consumed = runicode.str_decode_raw_unicode_escape(
  42. string, len(string), "strict",
  43. final=True, errorhandler=decode_error_handler(space))
  44. return result
  45. def decode_utf8(space, string):
  46. result, consumed = runicode.str_decode_utf_8(
  47. string, len(string), "strict",
  48. final=True, errorhandler=decode_error_handler(space),
  49. allow_surrogates=True)
  50. return result
  51. def encode_utf8(space, uni):
  52. # Note that this function never raises UnicodeEncodeError,
  53. # since surrogate pairs are allowed.
  54. # This is not the case with Python3.
  55. return runicode.unicode_encode_utf_8(
  56. uni, len(uni), "strict",
  57. errorhandler=raise_unicode_exception_encode,
  58. allow_surrogates=True)