/Modules/_codecsmodule.c

http://unladen-swallow.googlecode.com/ · C · 1115 lines · 897 code · 146 blank · 72 comment · 119 complexity · 6977f73a355fb562f231bf895ab9c98c MD5 · raw file

  1. /* ------------------------------------------------------------------------
  2. _codecs -- Provides access to the codec registry and the builtin
  3. codecs.
  4. This module should never be imported directly. The standard library
  5. module "codecs" wraps this builtin module for use within Python.
  6. The codec registry is accessible via:
  7. register(search_function) -> None
  8. lookup(encoding) -> CodecInfo object
  9. The builtin Unicode codecs use the following interface:
  10. <encoding>_encode(Unicode_object[,errors='strict']) ->
  11. (string object, bytes consumed)
  12. <encoding>_decode(char_buffer_obj[,errors='strict']) ->
  13. (Unicode object, bytes consumed)
  14. <encoding>_encode() interfaces also accept non-Unicode object as
  15. input. The objects are then converted to Unicode using
  16. PyUnicode_FromObject() prior to applying the conversion.
  17. These <encoding>s are available: utf_8, unicode_escape,
  18. raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
  19. mbcs (on win32).
  20. Written by Marc-Andre Lemburg (mal@lemburg.com).
  21. Copyright (c) Corporation for National Research Initiatives.
  22. ------------------------------------------------------------------------ */
  23. #define PY_SSIZE_T_CLEAN
  24. #include "Python.h"
  25. /* --- Registry ----------------------------------------------------------- */
  26. PyDoc_STRVAR(register__doc__,
  27. "register(search_function)\n\
  28. \n\
  29. Register a codec search function. Search functions are expected to take\n\
  30. one argument, the encoding name in all lower case letters, and return\n\
  31. a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
  32. (or a CodecInfo object).");
  33. static
  34. PyObject *codec_register(PyObject *self, PyObject *search_function)
  35. {
  36. if (PyCodec_Register(search_function))
  37. return NULL;
  38. Py_RETURN_NONE;
  39. }
  40. PyDoc_STRVAR(lookup__doc__,
  41. "lookup(encoding) -> CodecInfo\n\
  42. \n\
  43. Looks up a codec tuple in the Python codec registry and returns\n\
  44. a CodecInfo object.");
  45. static
  46. PyObject *codec_lookup(PyObject *self, PyObject *args)
  47. {
  48. char *encoding;
  49. if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
  50. return NULL;
  51. return _PyCodec_Lookup(encoding);
  52. }
  53. PyDoc_STRVAR(encode__doc__,
  54. "encode(obj, [encoding[,errors]]) -> object\n\
  55. \n\
  56. Encodes obj using the codec registered for encoding. encoding defaults\n\
  57. to the default encoding. errors may be given to set a different error\n\
  58. handling scheme. Default is 'strict' meaning that encoding errors raise\n\
  59. a ValueError. Other possible values are 'ignore', 'replace' and\n\
  60. 'xmlcharrefreplace' as well as any other name registered with\n\
  61. codecs.register_error that can handle ValueErrors.");
  62. static PyObject *
  63. codec_encode(PyObject *self, PyObject *args)
  64. {
  65. const char *encoding = NULL;
  66. const char *errors = NULL;
  67. PyObject *v;
  68. if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
  69. return NULL;
  70. #ifdef Py_USING_UNICODE
  71. if (encoding == NULL)
  72. encoding = PyUnicode_GetDefaultEncoding();
  73. #else
  74. if (encoding == NULL) {
  75. PyErr_SetString(PyExc_ValueError, "no encoding specified");
  76. return NULL;
  77. }
  78. #endif
  79. /* Encode via the codec registry */
  80. return PyCodec_Encode(v, encoding, errors);
  81. }
  82. PyDoc_STRVAR(decode__doc__,
  83. "decode(obj, [encoding[,errors]]) -> object\n\
  84. \n\
  85. Decodes obj using the codec registered for encoding. encoding defaults\n\
  86. to the default encoding. errors may be given to set a different error\n\
  87. handling scheme. Default is 'strict' meaning that encoding errors raise\n\
  88. a ValueError. Other possible values are 'ignore' and 'replace'\n\
  89. as well as any other name registered with codecs.register_error that is\n\
  90. able to handle ValueErrors.");
  91. static PyObject *
  92. codec_decode(PyObject *self, PyObject *args)
  93. {
  94. const char *encoding = NULL;
  95. const char *errors = NULL;
  96. PyObject *v;
  97. if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
  98. return NULL;
  99. #ifdef Py_USING_UNICODE
  100. if (encoding == NULL)
  101. encoding = PyUnicode_GetDefaultEncoding();
  102. #else
  103. if (encoding == NULL) {
  104. PyErr_SetString(PyExc_ValueError, "no encoding specified");
  105. return NULL;
  106. }
  107. #endif
  108. /* Decode via the codec registry */
  109. return PyCodec_Decode(v, encoding, errors);
  110. }
  111. /* --- Helpers ------------------------------------------------------------ */
  112. static
  113. PyObject *codec_tuple(PyObject *unicode,
  114. Py_ssize_t len)
  115. {
  116. PyObject *v;
  117. if (unicode == NULL)
  118. return NULL;
  119. v = Py_BuildValue("On", unicode, len);
  120. Py_DECREF(unicode);
  121. return v;
  122. }
  123. /* --- String codecs ------------------------------------------------------ */
  124. static PyObject *
  125. escape_decode(PyObject *self,
  126. PyObject *args)
  127. {
  128. const char *errors = NULL;
  129. const char *data;
  130. Py_ssize_t size;
  131. if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
  132. &data, &size, &errors))
  133. return NULL;
  134. return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
  135. size);
  136. }
  137. static PyObject *
  138. escape_encode(PyObject *self,
  139. PyObject *args)
  140. {
  141. PyObject *str;
  142. const char *errors = NULL;
  143. char *buf;
  144. Py_ssize_t len;
  145. if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
  146. &PyString_Type, &str, &errors))
  147. return NULL;
  148. str = PyString_Repr(str, 0);
  149. if (!str)
  150. return NULL;
  151. /* The string will be quoted. Unquote, similar to unicode-escape. */
  152. buf = PyString_AS_STRING (str);
  153. len = PyString_GET_SIZE (str);
  154. memmove(buf, buf+1, len-2);
  155. if (_PyString_Resize(&str, len-2) < 0)
  156. return NULL;
  157. return codec_tuple(str, PyString_Size(str));
  158. }
  159. #ifdef Py_USING_UNICODE
  160. /* --- Decoder ------------------------------------------------------------ */
  161. static PyObject *
  162. unicode_internal_decode(PyObject *self,
  163. PyObject *args)
  164. {
  165. PyObject *obj;
  166. const char *errors = NULL;
  167. const char *data;
  168. Py_ssize_t size;
  169. if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
  170. &obj, &errors))
  171. return NULL;
  172. if (PyUnicode_Check(obj)) {
  173. Py_INCREF(obj);
  174. return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
  175. }
  176. else {
  177. if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
  178. return NULL;
  179. return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
  180. size);
  181. }
  182. }
  183. static PyObject *
  184. utf_7_decode(PyObject *self,
  185. PyObject *args)
  186. {
  187. Py_buffer pbuf;
  188. const char *errors = NULL;
  189. int final = 0;
  190. Py_ssize_t consumed;
  191. PyObject *decoded = NULL;
  192. if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
  193. &pbuf, &errors, &final))
  194. return NULL;
  195. consumed = pbuf.len;
  196. decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
  197. final ? NULL : &consumed);
  198. PyBuffer_Release(&pbuf);
  199. if (decoded == NULL)
  200. return NULL;
  201. return codec_tuple(decoded, consumed);
  202. }
  203. static PyObject *
  204. utf_8_decode(PyObject *self,
  205. PyObject *args)
  206. {
  207. Py_buffer pbuf;
  208. const char *errors = NULL;
  209. int final = 0;
  210. Py_ssize_t consumed;
  211. PyObject *decoded = NULL;
  212. if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
  213. &pbuf, &errors, &final))
  214. return NULL;
  215. consumed = pbuf.len;
  216. decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
  217. final ? NULL : &consumed);
  218. PyBuffer_Release(&pbuf);
  219. if (decoded == NULL)
  220. return NULL;
  221. return codec_tuple(decoded, consumed);
  222. }
  223. static PyObject *
  224. utf_16_decode(PyObject *self,
  225. PyObject *args)
  226. {
  227. Py_buffer pbuf;
  228. const char *errors = NULL;
  229. int byteorder = 0;
  230. int final = 0;
  231. Py_ssize_t consumed;
  232. PyObject *decoded;
  233. if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
  234. &pbuf, &errors, &final))
  235. return NULL;
  236. consumed = pbuf.len; /* This is overwritten unless final is true. */
  237. decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
  238. &byteorder, final ? NULL : &consumed);
  239. PyBuffer_Release(&pbuf);
  240. if (decoded == NULL)
  241. return NULL;
  242. return codec_tuple(decoded, consumed);
  243. }
  244. static PyObject *
  245. utf_16_le_decode(PyObject *self,
  246. PyObject *args)
  247. {
  248. Py_buffer pbuf;
  249. const char *errors = NULL;
  250. int byteorder = -1;
  251. int final = 0;
  252. Py_ssize_t consumed;
  253. PyObject *decoded = NULL;
  254. if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
  255. &pbuf, &errors, &final))
  256. return NULL;
  257. consumed = pbuf.len; /* This is overwritten unless final is true. */
  258. decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
  259. &byteorder, final ? NULL : &consumed);
  260. PyBuffer_Release(&pbuf);
  261. if (decoded == NULL)
  262. return NULL;
  263. return codec_tuple(decoded, consumed);
  264. }
  265. static PyObject *
  266. utf_16_be_decode(PyObject *self,
  267. PyObject *args)
  268. {
  269. Py_buffer pbuf;
  270. const char *errors = NULL;
  271. int byteorder = 1;
  272. int final = 0;
  273. Py_ssize_t consumed;
  274. PyObject *decoded = NULL;
  275. if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
  276. &pbuf, &errors, &final))
  277. return NULL;
  278. consumed = pbuf.len; /* This is overwritten unless final is true. */
  279. decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
  280. &byteorder, final ? NULL : &consumed);
  281. PyBuffer_Release(&pbuf);
  282. if (decoded == NULL)
  283. return NULL;
  284. return codec_tuple(decoded, consumed);
  285. }
  286. /* This non-standard version also provides access to the byteorder
  287. parameter of the builtin UTF-16 codec.
  288. It returns a tuple (unicode, bytesread, byteorder) with byteorder
  289. being the value in effect at the end of data.
  290. */
  291. static PyObject *
  292. utf_16_ex_decode(PyObject *self,
  293. PyObject *args)
  294. {
  295. Py_buffer pbuf;
  296. const char *errors = NULL;
  297. int byteorder = 0;
  298. PyObject *unicode, *tuple;
  299. int final = 0;
  300. Py_ssize_t consumed;
  301. if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
  302. &pbuf, &errors, &byteorder, &final))
  303. return NULL;
  304. consumed = pbuf.len; /* This is overwritten unless final is true. */
  305. unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
  306. &byteorder, final ? NULL : &consumed);
  307. PyBuffer_Release(&pbuf);
  308. if (unicode == NULL)
  309. return NULL;
  310. tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
  311. Py_DECREF(unicode);
  312. return tuple;
  313. }
  314. static PyObject *
  315. utf_32_decode(PyObject *self,
  316. PyObject *args)
  317. {
  318. Py_buffer pbuf;
  319. const char *errors = NULL;
  320. int byteorder = 0;
  321. int final = 0;
  322. Py_ssize_t consumed;
  323. PyObject *decoded;
  324. if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
  325. &pbuf, &errors, &final))
  326. return NULL;
  327. consumed = pbuf.len; /* This is overwritten unless final is true. */
  328. decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
  329. &byteorder, final ? NULL : &consumed);
  330. PyBuffer_Release(&pbuf);
  331. if (decoded == NULL)
  332. return NULL;
  333. return codec_tuple(decoded, consumed);
  334. }
  335. static PyObject *
  336. utf_32_le_decode(PyObject *self,
  337. PyObject *args)
  338. {
  339. Py_buffer pbuf;
  340. const char *errors = NULL;
  341. int byteorder = -1;
  342. int final = 0;
  343. Py_ssize_t consumed;
  344. PyObject *decoded;
  345. if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
  346. &pbuf, &errors, &final))
  347. return NULL;
  348. consumed = pbuf.len; /* This is overwritten unless final is true. */
  349. decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
  350. &byteorder, final ? NULL : &consumed);
  351. PyBuffer_Release(&pbuf);
  352. if (decoded == NULL)
  353. return NULL;
  354. return codec_tuple(decoded, consumed);
  355. }
  356. static PyObject *
  357. utf_32_be_decode(PyObject *self,
  358. PyObject *args)
  359. {
  360. Py_buffer pbuf;
  361. const char *errors = NULL;
  362. int byteorder = 1;
  363. int final = 0;
  364. Py_ssize_t consumed;
  365. PyObject *decoded;
  366. if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
  367. &pbuf, &errors, &final))
  368. return NULL;
  369. consumed = pbuf.len; /* This is overwritten unless final is true. */
  370. decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
  371. &byteorder, final ? NULL : &consumed);
  372. PyBuffer_Release(&pbuf);
  373. if (decoded == NULL)
  374. return NULL;
  375. return codec_tuple(decoded, consumed);
  376. }
  377. /* This non-standard version also provides access to the byteorder
  378. parameter of the builtin UTF-32 codec.
  379. It returns a tuple (unicode, bytesread, byteorder) with byteorder
  380. being the value in effect at the end of data.
  381. */
  382. static PyObject *
  383. utf_32_ex_decode(PyObject *self,
  384. PyObject *args)
  385. {
  386. Py_buffer pbuf;
  387. const char *errors = NULL;
  388. int byteorder = 0;
  389. PyObject *unicode, *tuple;
  390. int final = 0;
  391. Py_ssize_t consumed;
  392. if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
  393. &pbuf, &errors, &byteorder, &final))
  394. return NULL;
  395. consumed = pbuf.len; /* This is overwritten unless final is true. */
  396. unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
  397. &byteorder, final ? NULL : &consumed);
  398. PyBuffer_Release(&pbuf);
  399. if (unicode == NULL)
  400. return NULL;
  401. tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
  402. Py_DECREF(unicode);
  403. return tuple;
  404. }
  405. static PyObject *
  406. unicode_escape_decode(PyObject *self,
  407. PyObject *args)
  408. {
  409. Py_buffer pbuf;
  410. const char *errors = NULL;
  411. PyObject *unicode;
  412. if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
  413. &pbuf, &errors))
  414. return NULL;
  415. unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
  416. PyBuffer_Release(&pbuf);
  417. return codec_tuple(unicode, pbuf.len);
  418. }
  419. static PyObject *
  420. raw_unicode_escape_decode(PyObject *self,
  421. PyObject *args)
  422. {
  423. Py_buffer pbuf;
  424. const char *errors = NULL;
  425. PyObject *unicode;
  426. if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
  427. &pbuf, &errors))
  428. return NULL;
  429. unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
  430. PyBuffer_Release(&pbuf);
  431. return codec_tuple(unicode, pbuf.len);
  432. }
  433. static PyObject *
  434. latin_1_decode(PyObject *self,
  435. PyObject *args)
  436. {
  437. Py_buffer pbuf;
  438. PyObject *unicode;
  439. const char *errors = NULL;
  440. if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
  441. &pbuf, &errors))
  442. return NULL;
  443. unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
  444. PyBuffer_Release(&pbuf);
  445. return codec_tuple(unicode, pbuf.len);
  446. }
  447. static PyObject *
  448. ascii_decode(PyObject *self,
  449. PyObject *args)
  450. {
  451. Py_buffer pbuf;
  452. PyObject *unicode;
  453. const char *errors = NULL;
  454. if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
  455. &pbuf, &errors))
  456. return NULL;
  457. unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
  458. PyBuffer_Release(&pbuf);
  459. return codec_tuple(unicode, pbuf.len);
  460. }
  461. static PyObject *
  462. charmap_decode(PyObject *self,
  463. PyObject *args)
  464. {
  465. Py_buffer pbuf;
  466. PyObject *unicode;
  467. const char *errors = NULL;
  468. PyObject *mapping = NULL;
  469. if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
  470. &pbuf, &errors, &mapping))
  471. return NULL;
  472. if (mapping == Py_None)
  473. mapping = NULL;
  474. unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
  475. PyBuffer_Release(&pbuf);
  476. return codec_tuple(unicode, pbuf.len);
  477. }
  478. #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
  479. static PyObject *
  480. mbcs_decode(PyObject *self,
  481. PyObject *args)
  482. {
  483. Py_buffer pbuf;
  484. const char *errors = NULL;
  485. int final = 0;
  486. Py_ssize_t consumed;
  487. PyObject *decoded = NULL;
  488. if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
  489. &pbuf, &errors, &final))
  490. return NULL;
  491. consumed = pbuf.len;
  492. decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
  493. final ? NULL : &consumed);
  494. PyBuffer_Release(&pbuf);
  495. if (decoded == NULL)
  496. return NULL;
  497. return codec_tuple(decoded, consumed);
  498. }
  499. #endif /* MS_WINDOWS */
  500. /* --- Encoder ------------------------------------------------------------ */
  501. static PyObject *
  502. readbuffer_encode(PyObject *self,
  503. PyObject *args)
  504. {
  505. const char *data;
  506. Py_ssize_t size;
  507. const char *errors = NULL;
  508. if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
  509. &data, &size, &errors))
  510. return NULL;
  511. return codec_tuple(PyString_FromStringAndSize(data, size),
  512. size);
  513. }
  514. static PyObject *
  515. charbuffer_encode(PyObject *self,
  516. PyObject *args)
  517. {
  518. const char *data;
  519. Py_ssize_t size;
  520. const char *errors = NULL;
  521. if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
  522. &data, &size, &errors))
  523. return NULL;
  524. return codec_tuple(PyString_FromStringAndSize(data, size),
  525. size);
  526. }
  527. static PyObject *
  528. unicode_internal_encode(PyObject *self,
  529. PyObject *args)
  530. {
  531. PyObject *obj;
  532. const char *errors = NULL;
  533. const char *data;
  534. Py_ssize_t size;
  535. if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
  536. &obj, &errors))
  537. return NULL;
  538. if (PyUnicode_Check(obj)) {
  539. data = PyUnicode_AS_DATA(obj);
  540. size = PyUnicode_GET_DATA_SIZE(obj);
  541. return codec_tuple(PyString_FromStringAndSize(data, size),
  542. size);
  543. }
  544. else {
  545. if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
  546. return NULL;
  547. return codec_tuple(PyString_FromStringAndSize(data, size),
  548. size);
  549. }
  550. }
  551. static PyObject *
  552. utf_7_encode(PyObject *self,
  553. PyObject *args)
  554. {
  555. PyObject *str, *v;
  556. const char *errors = NULL;
  557. if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
  558. &str, &errors))
  559. return NULL;
  560. str = PyUnicode_FromObject(str);
  561. if (str == NULL)
  562. return NULL;
  563. v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
  564. PyUnicode_GET_SIZE(str),
  565. 0,
  566. 0,
  567. errors),
  568. PyUnicode_GET_SIZE(str));
  569. Py_DECREF(str);
  570. return v;
  571. }
  572. static PyObject *
  573. utf_8_encode(PyObject *self,
  574. PyObject *args)
  575. {
  576. PyObject *str, *v;
  577. const char *errors = NULL;
  578. if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
  579. &str, &errors))
  580. return NULL;
  581. str = PyUnicode_FromObject(str);
  582. if (str == NULL)
  583. return NULL;
  584. v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
  585. PyUnicode_GET_SIZE(str),
  586. errors),
  587. PyUnicode_GET_SIZE(str));
  588. Py_DECREF(str);
  589. return v;
  590. }
  591. /* This version provides access to the byteorder parameter of the
  592. builtin UTF-16 codecs as optional third argument. It defaults to 0
  593. which means: use the native byte order and prepend the data with a
  594. BOM mark.
  595. */
  596. static PyObject *
  597. utf_16_encode(PyObject *self,
  598. PyObject *args)
  599. {
  600. PyObject *str, *v;
  601. const char *errors = NULL;
  602. int byteorder = 0;
  603. if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
  604. &str, &errors, &byteorder))
  605. return NULL;
  606. str = PyUnicode_FromObject(str);
  607. if (str == NULL)
  608. return NULL;
  609. v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
  610. PyUnicode_GET_SIZE(str),
  611. errors,
  612. byteorder),
  613. PyUnicode_GET_SIZE(str));
  614. Py_DECREF(str);
  615. return v;
  616. }
  617. static PyObject *
  618. utf_16_le_encode(PyObject *self,
  619. PyObject *args)
  620. {
  621. PyObject *str, *v;
  622. const char *errors = NULL;
  623. if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
  624. &str, &errors))
  625. return NULL;
  626. str = PyUnicode_FromObject(str);
  627. if (str == NULL)
  628. return NULL;
  629. v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
  630. PyUnicode_GET_SIZE(str),
  631. errors,
  632. -1),
  633. PyUnicode_GET_SIZE(str));
  634. Py_DECREF(str);
  635. return v;
  636. }
  637. static PyObject *
  638. utf_16_be_encode(PyObject *self,
  639. PyObject *args)
  640. {
  641. PyObject *str, *v;
  642. const char *errors = NULL;
  643. if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
  644. &str, &errors))
  645. return NULL;
  646. str = PyUnicode_FromObject(str);
  647. if (str == NULL)
  648. return NULL;
  649. v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
  650. PyUnicode_GET_SIZE(str),
  651. errors,
  652. +1),
  653. PyUnicode_GET_SIZE(str));
  654. Py_DECREF(str);
  655. return v;
  656. }
  657. /* This version provides access to the byteorder parameter of the
  658. builtin UTF-32 codecs as optional third argument. It defaults to 0
  659. which means: use the native byte order and prepend the data with a
  660. BOM mark.
  661. */
  662. static PyObject *
  663. utf_32_encode(PyObject *self,
  664. PyObject *args)
  665. {
  666. PyObject *str, *v;
  667. const char *errors = NULL;
  668. int byteorder = 0;
  669. if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
  670. &str, &errors, &byteorder))
  671. return NULL;
  672. str = PyUnicode_FromObject(str);
  673. if (str == NULL)
  674. return NULL;
  675. v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
  676. PyUnicode_GET_SIZE(str),
  677. errors,
  678. byteorder),
  679. PyUnicode_GET_SIZE(str));
  680. Py_DECREF(str);
  681. return v;
  682. }
  683. static PyObject *
  684. utf_32_le_encode(PyObject *self,
  685. PyObject *args)
  686. {
  687. PyObject *str, *v;
  688. const char *errors = NULL;
  689. if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
  690. &str, &errors))
  691. return NULL;
  692. str = PyUnicode_FromObject(str);
  693. if (str == NULL)
  694. return NULL;
  695. v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
  696. PyUnicode_GET_SIZE(str),
  697. errors,
  698. -1),
  699. PyUnicode_GET_SIZE(str));
  700. Py_DECREF(str);
  701. return v;
  702. }
  703. static PyObject *
  704. utf_32_be_encode(PyObject *self,
  705. PyObject *args)
  706. {
  707. PyObject *str, *v;
  708. const char *errors = NULL;
  709. if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
  710. &str, &errors))
  711. return NULL;
  712. str = PyUnicode_FromObject(str);
  713. if (str == NULL)
  714. return NULL;
  715. v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
  716. PyUnicode_GET_SIZE(str),
  717. errors,
  718. +1),
  719. PyUnicode_GET_SIZE(str));
  720. Py_DECREF(str);
  721. return v;
  722. }
  723. static PyObject *
  724. unicode_escape_encode(PyObject *self,
  725. PyObject *args)
  726. {
  727. PyObject *str, *v;
  728. const char *errors = NULL;
  729. if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
  730. &str, &errors))
  731. return NULL;
  732. str = PyUnicode_FromObject(str);
  733. if (str == NULL)
  734. return NULL;
  735. v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
  736. PyUnicode_GET_SIZE(str)),
  737. PyUnicode_GET_SIZE(str));
  738. Py_DECREF(str);
  739. return v;
  740. }
  741. static PyObject *
  742. raw_unicode_escape_encode(PyObject *self,
  743. PyObject *args)
  744. {
  745. PyObject *str, *v;
  746. const char *errors = NULL;
  747. if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
  748. &str, &errors))
  749. return NULL;
  750. str = PyUnicode_FromObject(str);
  751. if (str == NULL)
  752. return NULL;
  753. v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
  754. PyUnicode_AS_UNICODE(str),
  755. PyUnicode_GET_SIZE(str)),
  756. PyUnicode_GET_SIZE(str));
  757. Py_DECREF(str);
  758. return v;
  759. }
  760. static PyObject *
  761. latin_1_encode(PyObject *self,
  762. PyObject *args)
  763. {
  764. PyObject *str, *v;
  765. const char *errors = NULL;
  766. if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
  767. &str, &errors))
  768. return NULL;
  769. str = PyUnicode_FromObject(str);
  770. if (str == NULL)
  771. return NULL;
  772. v = codec_tuple(PyUnicode_EncodeLatin1(
  773. PyUnicode_AS_UNICODE(str),
  774. PyUnicode_GET_SIZE(str),
  775. errors),
  776. PyUnicode_GET_SIZE(str));
  777. Py_DECREF(str);
  778. return v;
  779. }
  780. static PyObject *
  781. ascii_encode(PyObject *self,
  782. PyObject *args)
  783. {
  784. PyObject *str, *v;
  785. const char *errors = NULL;
  786. if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
  787. &str, &errors))
  788. return NULL;
  789. str = PyUnicode_FromObject(str);
  790. if (str == NULL)
  791. return NULL;
  792. v = codec_tuple(PyUnicode_EncodeASCII(
  793. PyUnicode_AS_UNICODE(str),
  794. PyUnicode_GET_SIZE(str),
  795. errors),
  796. PyUnicode_GET_SIZE(str));
  797. Py_DECREF(str);
  798. return v;
  799. }
  800. static PyObject *
  801. charmap_encode(PyObject *self,
  802. PyObject *args)
  803. {
  804. PyObject *str, *v;
  805. const char *errors = NULL;
  806. PyObject *mapping = NULL;
  807. if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
  808. &str, &errors, &mapping))
  809. return NULL;
  810. if (mapping == Py_None)
  811. mapping = NULL;
  812. str = PyUnicode_FromObject(str);
  813. if (str == NULL)
  814. return NULL;
  815. v = codec_tuple(PyUnicode_EncodeCharmap(
  816. PyUnicode_AS_UNICODE(str),
  817. PyUnicode_GET_SIZE(str),
  818. mapping,
  819. errors),
  820. PyUnicode_GET_SIZE(str));
  821. Py_DECREF(str);
  822. return v;
  823. }
  824. static PyObject*
  825. charmap_build(PyObject *self, PyObject *args)
  826. {
  827. PyObject *map;
  828. if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
  829. return NULL;
  830. return PyUnicode_BuildEncodingMap(map);
  831. }
  832. #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
  833. static PyObject *
  834. mbcs_encode(PyObject *self,
  835. PyObject *args)
  836. {
  837. PyObject *str, *v;
  838. const char *errors = NULL;
  839. if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
  840. &str, &errors))
  841. return NULL;
  842. str = PyUnicode_FromObject(str);
  843. if (str == NULL)
  844. return NULL;
  845. v = codec_tuple(PyUnicode_EncodeMBCS(
  846. PyUnicode_AS_UNICODE(str),
  847. PyUnicode_GET_SIZE(str),
  848. errors),
  849. PyUnicode_GET_SIZE(str));
  850. Py_DECREF(str);
  851. return v;
  852. }
  853. #endif /* MS_WINDOWS */
  854. #endif /* Py_USING_UNICODE */
  855. /* --- Error handler registry --------------------------------------------- */
  856. PyDoc_STRVAR(register_error__doc__,
  857. "register_error(errors, handler)\n\
  858. \n\
  859. Register the specified error handler under the name\n\
  860. errors. handler must be a callable object, that\n\
  861. will be called with an exception instance containing\n\
  862. information about the location of the encoding/decoding\n\
  863. error and must return a (replacement, new position) tuple.");
  864. static PyObject *register_error(PyObject *self, PyObject *args)
  865. {
  866. const char *name;
  867. PyObject *handler;
  868. if (!PyArg_ParseTuple(args, "sO:register_error",
  869. &name, &handler))
  870. return NULL;
  871. if (PyCodec_RegisterError(name, handler))
  872. return NULL;
  873. Py_RETURN_NONE;
  874. }
  875. PyDoc_STRVAR(lookup_error__doc__,
  876. "lookup_error(errors) -> handler\n\
  877. \n\
  878. Return the error handler for the specified error handling name\n\
  879. or raise a LookupError, if no handler exists under this name.");
  880. static PyObject *lookup_error(PyObject *self, PyObject *args)
  881. {
  882. const char *name;
  883. if (!PyArg_ParseTuple(args, "s:lookup_error",
  884. &name))
  885. return NULL;
  886. return PyCodec_LookupError(name);
  887. }
  888. /* --- Module API --------------------------------------------------------- */
  889. static PyMethodDef _codecs_functions[] = {
  890. {"register", codec_register, METH_O,
  891. register__doc__},
  892. {"lookup", codec_lookup, METH_VARARGS,
  893. lookup__doc__},
  894. {"encode", codec_encode, METH_VARARGS,
  895. encode__doc__},
  896. {"decode", codec_decode, METH_VARARGS,
  897. decode__doc__},
  898. {"escape_encode", escape_encode, METH_VARARGS},
  899. {"escape_decode", escape_decode, METH_VARARGS},
  900. #ifdef Py_USING_UNICODE
  901. {"utf_8_encode", utf_8_encode, METH_VARARGS},
  902. {"utf_8_decode", utf_8_decode, METH_VARARGS},
  903. {"utf_7_encode", utf_7_encode, METH_VARARGS},
  904. {"utf_7_decode", utf_7_decode, METH_VARARGS},
  905. {"utf_16_encode", utf_16_encode, METH_VARARGS},
  906. {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
  907. {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
  908. {"utf_16_decode", utf_16_decode, METH_VARARGS},
  909. {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
  910. {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
  911. {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
  912. {"utf_32_encode", utf_32_encode, METH_VARARGS},
  913. {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
  914. {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
  915. {"utf_32_decode", utf_32_decode, METH_VARARGS},
  916. {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
  917. {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
  918. {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
  919. {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
  920. {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
  921. {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
  922. {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
  923. {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
  924. {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
  925. {"latin_1_encode", latin_1_encode, METH_VARARGS},
  926. {"latin_1_decode", latin_1_decode, METH_VARARGS},
  927. {"ascii_encode", ascii_encode, METH_VARARGS},
  928. {"ascii_decode", ascii_decode, METH_VARARGS},
  929. {"charmap_encode", charmap_encode, METH_VARARGS},
  930. {"charmap_decode", charmap_decode, METH_VARARGS},
  931. {"charmap_build", charmap_build, METH_VARARGS},
  932. {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
  933. {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
  934. #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
  935. {"mbcs_encode", mbcs_encode, METH_VARARGS},
  936. {"mbcs_decode", mbcs_decode, METH_VARARGS},
  937. #endif
  938. #endif /* Py_USING_UNICODE */
  939. {"register_error", register_error, METH_VARARGS,
  940. register_error__doc__},
  941. {"lookup_error", lookup_error, METH_VARARGS,
  942. lookup_error__doc__},
  943. {NULL, NULL} /* sentinel */
  944. };
  945. PyMODINIT_FUNC
  946. init_codecs(void)
  947. {
  948. Py_InitModule("_codecs", _codecs_functions);
  949. }