PageRenderTime 45ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/module/_codecs/interp_codecs.py

https://bitbucket.org/dac_io/pypy
Python | 705 lines | 661 code | 22 blank | 22 comment | 44 complexity | 40af8f586185960e87b71d5b6821c0da MD5 | raw file
  1. from pypy.interpreter.error import OperationError, operationerrfmt
  2. from pypy.interpreter.gateway import NoneNotWrapped, interp2app, unwrap_spec
  3. from pypy.rlib.rstring import UnicodeBuilder
  4. from pypy.rlib.objectmodel import we_are_translated
  5. class CodecState(object):
  6. def __init__(self, space):
  7. self.codec_search_path = []
  8. self.codec_search_cache = {}
  9. self.codec_error_registry = {}
  10. self.codec_need_encodings = True
  11. self.decode_error_handler = self.make_errorhandler(space, True)
  12. self.encode_error_handler = self.make_errorhandler(space, False)
  13. self.unicodedata_handler = None
  14. def make_errorhandler(self, space, decode):
  15. def unicode_call_errorhandler(errors, encoding, reason, input,
  16. startpos, endpos):
  17. w_errorhandler = lookup_error(space, errors)
  18. if decode:
  19. w_cls = space.w_UnicodeDecodeError
  20. else:
  21. w_cls = space.w_UnicodeEncodeError
  22. w_exc = space.call_function(
  23. w_cls,
  24. space.wrap(encoding),
  25. space.wrap(input),
  26. space.wrap(startpos),
  27. space.wrap(endpos),
  28. space.wrap(reason))
  29. w_res = space.call_function(w_errorhandler, w_exc)
  30. if (not space.is_true(space.isinstance(w_res, space.w_tuple))
  31. or space.len_w(w_res) != 2
  32. or not space.is_true(space.isinstance(
  33. space.getitem(w_res, space.wrap(0)),
  34. space.w_unicode))):
  35. if decode:
  36. msg = ("decoding error handler must return "
  37. "(unicode, int) tuple, not %s")
  38. else:
  39. msg = ("encoding error handler must return "
  40. "(unicode, int) tuple, not %s")
  41. raise operationerrfmt(
  42. space.w_TypeError, msg,
  43. space.str_w(space.repr(w_res)))
  44. w_replace, w_newpos = space.fixedview(w_res, 2)
  45. newpos = space.int_w(w_newpos)
  46. if newpos < 0:
  47. newpos = len(input) + newpos
  48. if newpos < 0 or newpos > len(input):
  49. raise operationerrfmt(
  50. space.w_IndexError,
  51. "position %d from error handler out of bounds", newpos)
  52. if decode:
  53. replace = space.unicode_w(w_replace)
  54. return replace, newpos
  55. else:
  56. from pypy.objspace.std.unicodetype import encode_object
  57. w_str = encode_object(space, w_replace, encoding, None)
  58. replace = space.str_w(w_str)
  59. return replace, newpos
  60. return unicode_call_errorhandler
  61. def get_unicodedata_handler(self, space):
  62. if self.unicodedata_handler:
  63. return self.unicodedata_handler
  64. try:
  65. w_unicodedata = space.getbuiltinmodule("unicodedata")
  66. w_getcode = space.getattr(w_unicodedata, space.wrap("_get_code"))
  67. except OperationError:
  68. return None
  69. else:
  70. self.unicodedata_handler = UnicodeData_Handler(space, w_getcode)
  71. return self.unicodedata_handler
  72. def _freeze_(self):
  73. assert not self.codec_search_path
  74. return False
  75. def register_codec(space, w_search_function):
  76. """register(search_function)
  77. Register a codec search function. Search functions are expected to take
  78. one argument, the encoding name in all lower case letters, and return
  79. a tuple of functions (encoder, decoder, stream_reader, stream_writer).
  80. """
  81. state = space.fromcache(CodecState)
  82. if space.is_true(space.callable(w_search_function)):
  83. state.codec_search_path.append(w_search_function)
  84. else:
  85. raise OperationError(
  86. space.w_TypeError,
  87. space.wrap("argument must be callable"))
  88. @unwrap_spec(encoding=str)
  89. def lookup_codec(space, encoding):
  90. """lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
  91. Looks up a codec tuple in the Python codec registry and returns
  92. a tuple of functions.
  93. """
  94. assert not (space.config.translating and not we_are_translated()), \
  95. "lookup_codec() should not be called during translation"
  96. state = space.fromcache(CodecState)
  97. normalized_encoding = encoding.replace(" ", "-").lower()
  98. w_result = state.codec_search_cache.get(normalized_encoding, None)
  99. if w_result is not None:
  100. return w_result
  101. return _lookup_codec_loop(space, encoding, normalized_encoding)
  102. def _lookup_codec_loop(space, encoding, normalized_encoding):
  103. state = space.fromcache(CodecState)
  104. if state.codec_need_encodings:
  105. w_import = space.getattr(space.builtin, space.wrap("__import__"))
  106. # registers new codecs
  107. space.call_function(w_import, space.wrap("encodings"))
  108. state.codec_need_encodings = False
  109. if len(state.codec_search_path) == 0:
  110. raise OperationError(
  111. space.w_LookupError,
  112. space.wrap("no codec search functions registered: "
  113. "can't find encoding"))
  114. for w_search in state.codec_search_path:
  115. w_result = space.call_function(w_search,
  116. space.wrap(normalized_encoding))
  117. if not space.is_w(w_result, space.w_None):
  118. if not (space.is_true(space.isinstance(w_result,
  119. space.w_tuple)) and
  120. space.len_w(w_result) == 4):
  121. raise OperationError(
  122. space.w_TypeError,
  123. space.wrap("codec search functions must return 4-tuples"))
  124. else:
  125. state.codec_search_cache[normalized_encoding] = w_result
  126. return w_result
  127. raise operationerrfmt(
  128. space.w_LookupError,
  129. "unknown encoding: %s", encoding)
  130. # ____________________________________________________________
  131. # Register standard error handlers
  132. def check_exception(space, w_exc):
  133. try:
  134. w_start = space.getattr(w_exc, space.wrap('start'))
  135. w_end = space.getattr(w_exc, space.wrap('end'))
  136. w_obj = space.getattr(w_exc, space.wrap('object'))
  137. except OperationError, e:
  138. if not e.match(space, space.w_AttributeError):
  139. raise
  140. raise OperationError(space.w_TypeError, space.wrap(
  141. "wrong exception"))
  142. delta = space.int_w(w_end) - space.int_w(w_start)
  143. if delta < 0 or not (space.isinstance_w(w_obj, space.w_str) or
  144. space.isinstance_w(w_obj, space.w_unicode)):
  145. raise OperationError(space.w_TypeError, space.wrap(
  146. "wrong exception"))
  147. def strict_errors(space, w_exc):
  148. check_exception(space, w_exc)
  149. if space.isinstance_w(w_exc, space.w_BaseException):
  150. raise OperationError(space.type(w_exc), w_exc)
  151. else:
  152. raise OperationError(space.w_TypeError, space.wrap(
  153. "codec must pass exception instance"))
  154. def ignore_errors(space, w_exc):
  155. check_exception(space, w_exc)
  156. w_end = space.getattr(w_exc, space.wrap('end'))
  157. return space.newtuple([space.wrap(u''), w_end])
  158. def replace_errors(space, w_exc):
  159. check_exception(space, w_exc)
  160. w_start = space.getattr(w_exc, space.wrap('start'))
  161. w_end = space.getattr(w_exc, space.wrap('end'))
  162. size = space.int_w(w_end) - space.int_w(w_start)
  163. if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
  164. text = u'?' * size
  165. return space.newtuple([space.wrap(text), w_end])
  166. elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
  167. text = u'\ufffd'
  168. return space.newtuple([space.wrap(text), w_end])
  169. elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError):
  170. text = u'\ufffd' * size
  171. return space.newtuple([space.wrap(text), w_end])
  172. else:
  173. typename = space.type(w_exc).getname(space)
  174. raise operationerrfmt(space.w_TypeError,
  175. "don't know how to handle %s in error callback", typename)
  176. def xmlcharrefreplace_errors(space, w_exc):
  177. check_exception(space, w_exc)
  178. if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
  179. obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
  180. start = space.int_w(space.getattr(w_exc, space.wrap('start')))
  181. w_end = space.getattr(w_exc, space.wrap('end'))
  182. end = space.int_w(w_end)
  183. builder = UnicodeBuilder()
  184. pos = start
  185. while pos < end:
  186. ch = obj[pos]
  187. builder.append(u"&#")
  188. builder.append(unicode(str(ord(ch))))
  189. builder.append(u";")
  190. pos += 1
  191. return space.newtuple([space.wrap(builder.build()), w_end])
  192. else:
  193. typename = space.type(w_exc).getname(space)
  194. raise operationerrfmt(space.w_TypeError,
  195. "don't know how to handle %s in error callback", typename)
  196. def backslashreplace_errors(space, w_exc):
  197. check_exception(space, w_exc)
  198. if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
  199. obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
  200. start = space.int_w(space.getattr(w_exc, space.wrap('start')))
  201. w_end = space.getattr(w_exc, space.wrap('end'))
  202. end = space.int_w(w_end)
  203. builder = UnicodeBuilder()
  204. pos = start
  205. while pos < end:
  206. oc = ord(obj[pos])
  207. num = hex(oc)
  208. if (oc >= 0x10000):
  209. builder.append(u"\\U")
  210. zeros = 8
  211. elif (oc >= 0x100):
  212. builder.append(u"\\u")
  213. zeros = 4
  214. else:
  215. builder.append(u"\\x")
  216. zeros = 2
  217. lnum = len(num)
  218. nb = zeros + 2 - lnum # num starts with '0x'
  219. if nb > 0:
  220. builder.append_multiple_char(u'0', nb)
  221. builder.append_slice(unicode(num), 2, lnum)
  222. pos += 1
  223. return space.newtuple([space.wrap(builder.build()), w_end])
  224. else:
  225. typename = space.type(w_exc).getname(space)
  226. raise operationerrfmt(space.w_TypeError,
  227. "don't know how to handle %s in error callback", typename)
  228. def register_builtin_error_handlers(space):
  229. "NOT_RPYTHON"
  230. state = space.fromcache(CodecState)
  231. for error in ("strict", "ignore", "replace", "xmlcharrefreplace",
  232. "backslashreplace"):
  233. name = error + "_errors"
  234. state.codec_error_registry[error] = space.wrap(interp2app(globals()[name]))
  235. @unwrap_spec(errors=str)
  236. def lookup_error(space, errors):
  237. """lookup_error(errors) -> handler
  238. Return the error handler for the specified error handling name
  239. or raise a LookupError, if no handler exists under this name.
  240. """
  241. state = space.fromcache(CodecState)
  242. try:
  243. w_err_handler = state.codec_error_registry[errors]
  244. except KeyError:
  245. raise operationerrfmt(
  246. space.w_LookupError,
  247. "unknown error handler name %s", errors)
  248. return w_err_handler
  249. @unwrap_spec(errors=str)
  250. def encode(space, w_obj, w_encoding=NoneNotWrapped, errors='strict'):
  251. """encode(obj, [encoding[,errors]]) -> object
  252. Encodes obj using the codec registered for encoding. encoding defaults
  253. to the default encoding. errors may be given to set a different error
  254. handling scheme. Default is 'strict' meaning that encoding errors raise
  255. a ValueError. Other possible values are 'ignore', 'replace' and
  256. 'xmlcharrefreplace' as well as any other name registered with
  257. codecs.register_error that can handle ValueErrors.
  258. """
  259. if w_encoding is None:
  260. encoding = space.sys.defaultencoding
  261. else:
  262. encoding = space.str_w(w_encoding)
  263. w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
  264. w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
  265. return space.getitem(w_res, space.wrap(0))
  266. @unwrap_spec(s='bufferstr', errors='str_or_None')
  267. def buffer_encode(space, s, errors='strict'):
  268. return space.newtuple([space.wrap(s), space.wrap(len(s))])
  269. @unwrap_spec(errors=str)
  270. def decode(space, w_obj, w_encoding=NoneNotWrapped, errors='strict'):
  271. """decode(obj, [encoding[,errors]]) -> object
  272. Decodes obj using the codec registered for encoding. encoding defaults
  273. to the default encoding. errors may be given to set a different error
  274. handling scheme. Default is 'strict' meaning that encoding errors raise
  275. a ValueError. Other possible values are 'ignore' and 'replace'
  276. as well as any other name registerd with codecs.register_error that is
  277. able to handle ValueErrors.
  278. """
  279. if w_encoding is None:
  280. encoding = space.sys.defaultencoding
  281. else:
  282. encoding = space.str_w(w_encoding)
  283. w_decoder = space.getitem(lookup_codec(space, encoding), space.wrap(1))
  284. if space.is_true(w_decoder):
  285. w_res = space.call_function(w_decoder, w_obj, space.wrap(errors))
  286. if (not space.is_true(space.isinstance(w_res, space.w_tuple))
  287. or space.len_w(w_res) != 2):
  288. raise OperationError(
  289. space.w_TypeError,
  290. space.wrap("encoder must return a tuple (object, integer)"))
  291. return space.getitem(w_res, space.wrap(0))
  292. else:
  293. assert 0, "XXX, what to do here?"
  294. @unwrap_spec(errors=str)
  295. def register_error(space, errors, w_handler):
  296. """register_error(errors, handler)
  297. Register the specified error handler under the name
  298. errors. handler must be a callable object, that
  299. will be called with an exception instance containing
  300. information about the location of the encoding/decoding
  301. error and must return a (replacement, new position) tuple.
  302. """
  303. state = space.fromcache(CodecState)
  304. if space.is_true(space.callable(w_handler)):
  305. state.codec_error_registry[errors] = w_handler
  306. else:
  307. raise OperationError(
  308. space.w_TypeError,
  309. space.wrap("handler must be callable"))
  310. # ____________________________________________________________
  311. # delegation to runicode
  312. from pypy.rlib import runicode
  313. def make_raw_encoder(name):
  314. rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
  315. assert hasattr(runicode, rname)
  316. def raw_encoder(space, uni):
  317. state = space.fromcache(CodecState)
  318. func = getattr(runicode, rname)
  319. errors = "strict"
  320. return func(uni, len(uni), errors, state.encode_error_handler)
  321. raw_encoder.func_name = rname
  322. return raw_encoder
  323. def make_raw_decoder(name):
  324. rname = "str_decode_%s" % (name.replace("_decode", ""), )
  325. assert hasattr(runicode, rname)
  326. def raw_decoder(space, string):
  327. final = True
  328. errors = "strict"
  329. state = space.fromcache(CodecState)
  330. func = getattr(runicode, rname)
  331. kwargs = {}
  332. if name == 'unicode_escape':
  333. unicodedata_handler = state.get_unicodedata_handler(space)
  334. result, consumed = func(string, len(string), errors,
  335. final, state.decode_error_handler,
  336. unicodedata_handler=unicodedata_handler)
  337. else:
  338. result, consumed = func(string, len(string), errors,
  339. final, state.decode_error_handler)
  340. return result
  341. raw_decoder.func_name = rname
  342. return raw_decoder
  343. def make_encoder_wrapper(name):
  344. rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
  345. assert hasattr(runicode, rname)
  346. @unwrap_spec(uni=unicode, errors='str_or_None')
  347. def wrap_encoder(space, uni, errors="strict"):
  348. if errors is None:
  349. errors = 'strict'
  350. state = space.fromcache(CodecState)
  351. func = getattr(runicode, rname)
  352. result = func(uni, len(uni), errors, state.encode_error_handler)
  353. return space.newtuple([space.wrap(result), space.wrap(len(uni))])
  354. wrap_encoder.func_name = rname
  355. globals()[name] = wrap_encoder
  356. def make_decoder_wrapper(name):
  357. rname = "str_decode_%s" % (name.replace("_decode", ""), )
  358. assert hasattr(runicode, rname)
  359. @unwrap_spec(string='bufferstr', errors='str_or_None')
  360. def wrap_decoder(space, string, errors="strict", w_final=False):
  361. if errors is None:
  362. errors = 'strict'
  363. final = space.is_true(w_final)
  364. state = space.fromcache(CodecState)
  365. func = getattr(runicode, rname)
  366. result, consumed = func(string, len(string), errors,
  367. final, state.decode_error_handler)
  368. return space.newtuple([space.wrap(result), space.wrap(consumed)])
  369. wrap_decoder.func_name = rname
  370. globals()[name] = wrap_decoder
  371. for encoders in [
  372. "ascii_encode",
  373. "latin_1_encode",
  374. "utf_7_encode",
  375. "utf_8_encode",
  376. "utf_16_encode",
  377. "utf_16_be_encode",
  378. "utf_16_le_encode",
  379. "utf_32_encode",
  380. "utf_32_be_encode",
  381. "utf_32_le_encode",
  382. "unicode_escape_encode",
  383. "raw_unicode_escape_encode",
  384. "unicode_internal_encode",
  385. ]:
  386. make_encoder_wrapper(encoders)
  387. for decoders in [
  388. "ascii_decode",
  389. "latin_1_decode",
  390. "utf_7_decode",
  391. "utf_8_decode",
  392. "utf_16_decode",
  393. "utf_16_be_decode",
  394. "utf_16_le_decode",
  395. "utf_32_decode",
  396. "utf_32_be_decode",
  397. "utf_32_le_decode",
  398. "raw_unicode_escape_decode",
  399. ]:
  400. make_decoder_wrapper(decoders)
  401. if hasattr(runicode, 'str_decode_mbcs'):
  402. make_encoder_wrapper('mbcs_encode')
  403. make_decoder_wrapper('mbcs_decode')
  404. @unwrap_spec(data=str, errors='str_or_None', byteorder=int)
  405. def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=False):
  406. if errors is None:
  407. errors = 'strict'
  408. final = space.is_true(w_final)
  409. state = space.fromcache(CodecState)
  410. if byteorder == 0:
  411. byteorder = 'native'
  412. elif byteorder == -1:
  413. byteorder = 'little'
  414. else:
  415. byteorder = 'big'
  416. consumed = len(data)
  417. if final:
  418. consumed = 0
  419. res, consumed, byteorder = runicode.str_decode_utf_16_helper(
  420. data, len(data), errors, final, state.decode_error_handler, byteorder)
  421. return space.newtuple([space.wrap(res), space.wrap(consumed),
  422. space.wrap(byteorder)])
  423. @unwrap_spec(data=str, errors='str_or_None', byteorder=int)
  424. def utf_32_ex_decode(space, data, errors='strict', byteorder=0, w_final=False):
  425. final = space.is_true(w_final)
  426. state = space.fromcache(CodecState)
  427. if byteorder == 0:
  428. byteorder = 'native'
  429. elif byteorder == -1:
  430. byteorder = 'little'
  431. else:
  432. byteorder = 'big'
  433. consumed = len(data)
  434. if final:
  435. consumed = 0
  436. res, consumed, byteorder = runicode.str_decode_utf_32_helper(
  437. data, len(data), errors, final, state.decode_error_handler, byteorder)
  438. return space.newtuple([space.wrap(res), space.wrap(consumed),
  439. space.wrap(byteorder)])
  440. # ____________________________________________________________
  441. # Charmap
  442. class Charmap_Decode:
  443. def __init__(self, space, w_mapping):
  444. self.space = space
  445. self.w_mapping = w_mapping
  446. # fast path for all the stuff in the encodings module
  447. if space.is_true(space.isinstance(w_mapping, space.w_tuple)):
  448. self.mapping_w = space.fixedview(w_mapping)
  449. else:
  450. self.mapping_w = None
  451. def get(self, ch, errorchar):
  452. space = self.space
  453. # get the character from the mapping
  454. if self.mapping_w is not None:
  455. w_ch = self.mapping_w[ord(ch)]
  456. else:
  457. try:
  458. w_ch = space.getitem(self.w_mapping, space.newint(ord(ch)))
  459. except OperationError, e:
  460. if not e.match(space, space.w_LookupError):
  461. raise
  462. return errorchar
  463. # Charmap may return a unicode string
  464. try:
  465. x = space.unicode_w(w_ch)
  466. except OperationError, e:
  467. if not e.match(space, space.w_TypeError):
  468. raise
  469. else:
  470. return x
  471. # Charmap may return a number
  472. try:
  473. x = space.int_w(w_ch)
  474. except OperationError:
  475. if not e.match(space, space.w_TypeError):
  476. raise
  477. else:
  478. if 0 <= x < 65536: # Even on wide unicode builds...
  479. return unichr(x)
  480. else:
  481. raise OperationError(space.w_TypeError, space.wrap(
  482. "character mapping must be in range(65536)"))
  483. # Charmap may return None
  484. if space.is_w(w_ch, space.w_None):
  485. return errorchar
  486. raise OperationError(space.w_TypeError, space.wrap("invalid mapping"))
  487. class Charmap_Encode:
  488. def __init__(self, space, w_mapping):
  489. self.space = space
  490. self.w_mapping = w_mapping
  491. def get(self, ch, errorchar):
  492. space = self.space
  493. # get the character from the mapping
  494. try:
  495. w_ch = space.getitem(self.w_mapping, space.newint(ord(ch)))
  496. except OperationError, e:
  497. if not e.match(space, space.w_LookupError):
  498. raise
  499. return errorchar
  500. # Charmap may return a string
  501. try:
  502. x = space.realstr_w(w_ch)
  503. except OperationError, e:
  504. if not e.match(space, space.w_TypeError):
  505. raise
  506. else:
  507. return x
  508. # Charmap may return a number
  509. try:
  510. x = space.int_w(w_ch)
  511. except OperationError:
  512. if not e.match(space, space.w_TypeError):
  513. raise
  514. else:
  515. if 0 <= x < 256:
  516. return chr(x)
  517. else:
  518. raise OperationError(space.w_TypeError, space.wrap(
  519. "character mapping must be in range(256)"))
  520. # Charmap may return None
  521. if space.is_w(w_ch, space.w_None):
  522. return errorchar
  523. raise OperationError(space.w_TypeError, space.wrap("invalid mapping"))
  524. @unwrap_spec(string=str, errors='str_or_None')
  525. def charmap_decode(space, string, errors="strict", w_mapping=None):
  526. if errors is None:
  527. errors = 'strict'
  528. if len(string) == 0:
  529. return space.newtuple([space.wrap(u''), space.wrap(0)])
  530. if space.is_w(w_mapping, space.w_None):
  531. mapping = None
  532. else:
  533. mapping = Charmap_Decode(space, w_mapping)
  534. final = True
  535. state = space.fromcache(CodecState)
  536. result, consumed = runicode.str_decode_charmap(
  537. string, len(string), errors,
  538. final, state.decode_error_handler, mapping)
  539. return space.newtuple([space.wrap(result), space.wrap(consumed)])
  540. @unwrap_spec(uni=unicode, errors='str_or_None')
  541. def charmap_encode(space, uni, errors="strict", w_mapping=None):
  542. if errors is None:
  543. errors = 'strict'
  544. if space.is_w(w_mapping, space.w_None):
  545. mapping = None
  546. else:
  547. mapping = Charmap_Encode(space, w_mapping)
  548. state = space.fromcache(CodecState)
  549. result = runicode.unicode_encode_charmap(
  550. uni, len(uni), errors,
  551. state.encode_error_handler, mapping)
  552. return space.newtuple([space.wrap(result), space.wrap(len(uni))])
  553. @unwrap_spec(chars=unicode)
  554. def charmap_build(space, chars):
  555. # XXX CPython sometimes uses a three-level trie
  556. w_charmap = space.newdict()
  557. for num in range(len(chars)):
  558. elem = chars[num]
  559. space.setitem(w_charmap, space.newint(ord(elem)), space.newint(num))
  560. return w_charmap
  561. # ____________________________________________________________
  562. # Unicode escape
  563. class UnicodeData_Handler:
  564. def __init__(self, space, w_getcode):
  565. self.space = space
  566. self.w_getcode = w_getcode
  567. def call(self, name):
  568. space = self.space
  569. try:
  570. w_code = space.call_function(self.w_getcode, space.wrap(name))
  571. except OperationError, e:
  572. if not e.match(space, space.w_KeyError):
  573. raise
  574. return -1
  575. return space.int_w(w_code)
  576. @unwrap_spec(string='bufferstr', errors='str_or_None')
  577. def unicode_escape_decode(space, string, errors="strict", w_final=False):
  578. if errors is None:
  579. errors = 'strict'
  580. final = space.is_true(w_final)
  581. state = space.fromcache(CodecState)
  582. errorhandler=state.decode_error_handler
  583. unicode_name_handler = state.get_unicodedata_handler(space)
  584. result, consumed = runicode.str_decode_unicode_escape(
  585. string, len(string), errors,
  586. final, state.decode_error_handler,
  587. unicode_name_handler)
  588. return space.newtuple([space.wrap(result), space.wrap(consumed)])
  589. # ____________________________________________________________
  590. # Unicode-internal
  591. @unwrap_spec(errors='str_or_None')
  592. def unicode_internal_decode(space, w_string, errors="strict"):
  593. if errors is None:
  594. errors = 'strict'
  595. # special case for this codec: unicodes are returned as is
  596. if space.isinstance_w(w_string, space.w_unicode):
  597. return space.newtuple([w_string, space.len(w_string)])
  598. string = space.str_w(w_string)
  599. if len(string) == 0:
  600. return space.newtuple([space.wrap(u''), space.wrap(0)])
  601. final = True
  602. state = space.fromcache(CodecState)
  603. result, consumed = runicode.str_decode_unicode_internal(
  604. string, len(string), errors,
  605. final, state.decode_error_handler)
  606. return space.newtuple([space.wrap(result), space.wrap(consumed)])
  607. # ____________________________________________________________
  608. # support for the "string escape" codec
  609. # This is a bytes-to bytes transformation
  610. @unwrap_spec(data=str, errors='str_or_None')
  611. def escape_encode(space, data, errors='strict'):
  612. from pypy.objspace.std.stringobject import string_escape_encode
  613. result = string_escape_encode(data, quote="'")
  614. start = 1
  615. end = len(result) - 1
  616. assert end >= 0
  617. w_result = space.wrap(result[start:end])
  618. return space.newtuple([w_result, space.wrap(len(data))])
  619. @unwrap_spec(data=str, errors='str_or_None')
  620. def escape_decode(space, data, errors='strict'):
  621. from pypy.interpreter.pyparser.parsestring import PyString_DecodeEscape
  622. result = PyString_DecodeEscape(space, data, None)
  623. return space.newtuple([space.wrap(result), space.wrap(len(data))])