PageRenderTime 712ms CodeModel.GetById 161ms app.highlight 374ms RepoModel.GetById 169ms app.codeStats 0ms

/Modules/_codecsmodule.c

http://unladen-swallow.googlecode.com/
C | 1115 lines | 897 code | 146 blank | 72 comment | 119 complexity | 6977f73a355fb562f231bf895ab9c98c MD5 | raw file
   1/* ------------------------------------------------------------------------
   2
   3   _codecs -- Provides access to the codec registry and the builtin
   4              codecs.
   5
   6   This module should never be imported directly. The standard library
   7   module "codecs" wraps this builtin module for use within Python.
   8
   9   The codec registry is accessible via:
  10
  11     register(search_function) -> None
  12
  13     lookup(encoding) -> CodecInfo object
  14
  15   The builtin Unicode codecs use the following interface:
  16
  17     <encoding>_encode(Unicode_object[,errors='strict']) ->
  18     	(string object, bytes consumed)
  19
  20     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
  21        (Unicode object, bytes consumed)
  22
  23   <encoding>_encode() interfaces also accept non-Unicode object as
  24   input. The objects are then converted to Unicode using
  25   PyUnicode_FromObject() prior to applying the conversion.
  26
  27   These <encoding>s are available: utf_8, unicode_escape,
  28   raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
  29   mbcs (on win32).
  30
  31
  32Written by Marc-Andre Lemburg (mal@lemburg.com).
  33
  34Copyright (c) Corporation for National Research Initiatives.
  35
  36   ------------------------------------------------------------------------ */
  37
  38#define PY_SSIZE_T_CLEAN
  39#include "Python.h"
  40
  41/* --- Registry ----------------------------------------------------------- */
  42
  43PyDoc_STRVAR(register__doc__,
  44"register(search_function)\n\
  45\n\
  46Register a codec search function. Search functions are expected to take\n\
  47one argument, the encoding name in all lower case letters, and return\n\
  48a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
  49(or a CodecInfo object).");
  50
  51static
  52PyObject *codec_register(PyObject *self, PyObject *search_function)
  53{
  54    if (PyCodec_Register(search_function))
  55        return NULL;
  56
  57    Py_RETURN_NONE;
  58}
  59
  60PyDoc_STRVAR(lookup__doc__,
  61"lookup(encoding) -> CodecInfo\n\
  62\n\
  63Looks up a codec tuple in the Python codec registry and returns\n\
  64a CodecInfo object.");
  65
  66static
  67PyObject *codec_lookup(PyObject *self, PyObject *args)
  68{
  69    char *encoding;
  70
  71    if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
  72        return NULL;
  73
  74    return _PyCodec_Lookup(encoding);
  75}
  76
  77PyDoc_STRVAR(encode__doc__,
  78"encode(obj, [encoding[,errors]]) -> object\n\
  79\n\
  80Encodes obj using the codec registered for encoding. encoding defaults\n\
  81to the default encoding. errors may be given to set a different error\n\
  82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
  83a ValueError. Other possible values are 'ignore', 'replace' and\n\
  84'xmlcharrefreplace' as well as any other name registered with\n\
  85codecs.register_error that can handle ValueErrors.");
  86
  87static PyObject *
  88codec_encode(PyObject *self, PyObject *args)
  89{
  90    const char *encoding = NULL;
  91    const char *errors = NULL;
  92    PyObject *v;
  93
  94    if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
  95        return NULL;
  96
  97#ifdef Py_USING_UNICODE
  98    if (encoding == NULL)
  99	encoding = PyUnicode_GetDefaultEncoding();
 100#else
 101    if (encoding == NULL) {
 102	PyErr_SetString(PyExc_ValueError, "no encoding specified");
 103	return NULL;
 104    }
 105#endif
 106
 107    /* Encode via the codec registry */
 108    return PyCodec_Encode(v, encoding, errors);
 109}
 110
 111PyDoc_STRVAR(decode__doc__,
 112"decode(obj, [encoding[,errors]]) -> object\n\
 113\n\
 114Decodes obj using the codec registered for encoding. encoding defaults\n\
 115to the default encoding. errors may be given to set a different error\n\
 116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
 117a ValueError. Other possible values are 'ignore' and 'replace'\n\
 118as well as any other name registered with codecs.register_error that is\n\
 119able to handle ValueErrors.");
 120
 121static PyObject *
 122codec_decode(PyObject *self, PyObject *args)
 123{
 124    const char *encoding = NULL;
 125    const char *errors = NULL;
 126    PyObject *v;
 127
 128    if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
 129        return NULL;
 130
 131#ifdef Py_USING_UNICODE
 132    if (encoding == NULL)
 133	encoding = PyUnicode_GetDefaultEncoding();
 134#else
 135    if (encoding == NULL) {
 136	PyErr_SetString(PyExc_ValueError, "no encoding specified");
 137	return NULL;
 138    }
 139#endif
 140
 141    /* Decode via the codec registry */
 142    return PyCodec_Decode(v, encoding, errors);
 143}
 144
 145/* --- Helpers ------------------------------------------------------------ */
 146
 147static
 148PyObject *codec_tuple(PyObject *unicode,
 149		      Py_ssize_t len)
 150{
 151    PyObject *v;
 152    if (unicode == NULL)
 153        return NULL;
 154    v = Py_BuildValue("On", unicode, len);
 155    Py_DECREF(unicode);
 156    return v;
 157}
 158
 159/* --- String codecs ------------------------------------------------------ */
 160static PyObject *
 161escape_decode(PyObject *self,
 162	      PyObject *args)
 163{
 164    const char *errors = NULL;
 165    const char *data;
 166    Py_ssize_t size;
 167
 168    if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
 169			  &data, &size, &errors))
 170	return NULL;
 171    return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
 172		       size);
 173}
 174
 175static PyObject *
 176escape_encode(PyObject *self,
 177	      PyObject *args)
 178{
 179	PyObject *str;
 180	const char *errors = NULL;
 181	char *buf;
 182	Py_ssize_t len;
 183
 184	if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
 185			      &PyString_Type, &str, &errors))
 186		return NULL;
 187
 188	str = PyString_Repr(str, 0);
 189	if (!str)
 190		return NULL;
 191
 192	/* The string will be quoted. Unquote, similar to unicode-escape. */
 193	buf = PyString_AS_STRING (str);
 194	len = PyString_GET_SIZE (str);
 195	memmove(buf, buf+1, len-2);
 196	if (_PyString_Resize(&str, len-2) < 0)
 197		return NULL;
 198	
 199	return codec_tuple(str, PyString_Size(str));
 200}
 201
 202#ifdef Py_USING_UNICODE
 203/* --- Decoder ------------------------------------------------------------ */
 204
 205static PyObject *
 206unicode_internal_decode(PyObject *self,
 207			PyObject *args)
 208{
 209    PyObject *obj;
 210    const char *errors = NULL;
 211    const char *data;
 212    Py_ssize_t size;
 213
 214    if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
 215			  &obj, &errors))
 216	return NULL;
 217
 218    if (PyUnicode_Check(obj)) {
 219	Py_INCREF(obj);
 220	return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
 221    }
 222    else {
 223	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 224	    return NULL;
 225
 226	return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
 227			   size);
 228    }
 229}
 230
 231static PyObject *
 232utf_7_decode(PyObject *self,
 233             PyObject *args)
 234{
 235	Py_buffer pbuf;
 236    const char *errors = NULL;
 237    int final = 0;
 238    Py_ssize_t consumed;
 239    PyObject *decoded = NULL;
 240
 241    if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
 242			  &pbuf, &errors, &final))
 243	return NULL;
 244    consumed = pbuf.len;
 245
 246    decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
 247					   final ? NULL : &consumed);
 248	PyBuffer_Release(&pbuf);
 249    if (decoded == NULL)
 250        return NULL;
 251    return codec_tuple(decoded, consumed);
 252}
 253
 254static PyObject *
 255utf_8_decode(PyObject *self,
 256	    PyObject *args)
 257{
 258	Py_buffer pbuf;
 259    const char *errors = NULL;
 260    int final = 0;
 261    Py_ssize_t consumed;
 262    PyObject *decoded = NULL;
 263
 264    if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
 265			  &pbuf, &errors, &final))
 266	return NULL;
 267    consumed = pbuf.len;
 268
 269    decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
 270					   final ? NULL : &consumed);
 271	PyBuffer_Release(&pbuf);
 272    if (decoded == NULL)
 273	return NULL;
 274    return codec_tuple(decoded, consumed);
 275}
 276
 277static PyObject *
 278utf_16_decode(PyObject *self,
 279	    PyObject *args)
 280{
 281	Py_buffer pbuf;
 282    const char *errors = NULL;
 283    int byteorder = 0;
 284    int final = 0;
 285    Py_ssize_t consumed;
 286    PyObject *decoded;
 287
 288    if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
 289			  &pbuf, &errors, &final))
 290	return NULL;
 291    consumed = pbuf.len; /* This is overwritten unless final is true. */
 292    decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 293					&byteorder, final ? NULL : &consumed);
 294	PyBuffer_Release(&pbuf);
 295    if (decoded == NULL)
 296	return NULL;
 297    return codec_tuple(decoded, consumed);
 298}
 299
 300static PyObject *
 301utf_16_le_decode(PyObject *self,
 302		 PyObject *args)
 303{
 304	Py_buffer pbuf;
 305    const char *errors = NULL;
 306    int byteorder = -1;
 307    int final = 0;
 308    Py_ssize_t consumed;
 309    PyObject *decoded = NULL;
 310
 311    if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
 312			  &pbuf, &errors, &final))
 313	return NULL;
 314
 315    consumed = pbuf.len; /* This is overwritten unless final is true. */
 316    decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 317	&byteorder, final ? NULL : &consumed);
 318	PyBuffer_Release(&pbuf);
 319    if (decoded == NULL)
 320	return NULL;
 321    return codec_tuple(decoded, consumed);
 322}
 323
 324static PyObject *
 325utf_16_be_decode(PyObject *self,
 326		 PyObject *args)
 327{
 328	Py_buffer pbuf;
 329    const char *errors = NULL;
 330    int byteorder = 1;
 331    int final = 0;
 332    Py_ssize_t consumed;
 333    PyObject *decoded = NULL;
 334
 335    if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
 336			  &pbuf, &errors, &final))
 337	return NULL;
 338
 339    consumed = pbuf.len; /* This is overwritten unless final is true. */
 340    decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 341	&byteorder, final ? NULL : &consumed);
 342	PyBuffer_Release(&pbuf);
 343    if (decoded == NULL)
 344	return NULL;
 345    return codec_tuple(decoded, consumed);
 346}
 347
 348/* This non-standard version also provides access to the byteorder
 349   parameter of the builtin UTF-16 codec.
 350
 351   It returns a tuple (unicode, bytesread, byteorder) with byteorder
 352   being the value in effect at the end of data.
 353
 354*/
 355
 356static PyObject *
 357utf_16_ex_decode(PyObject *self,
 358		 PyObject *args)
 359{
 360	Py_buffer pbuf;
 361    const char *errors = NULL;
 362    int byteorder = 0;
 363    PyObject *unicode, *tuple;
 364    int final = 0;
 365    Py_ssize_t consumed;
 366
 367    if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
 368			  &pbuf, &errors, &byteorder, &final))
 369	return NULL;
 370    consumed = pbuf.len; /* This is overwritten unless final is true. */
 371    unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
 372					&byteorder, final ? NULL : &consumed);
 373	PyBuffer_Release(&pbuf);
 374    if (unicode == NULL)
 375	return NULL;
 376    tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
 377    Py_DECREF(unicode);
 378    return tuple;
 379}
 380
 381static PyObject *
 382utf_32_decode(PyObject *self,
 383	    PyObject *args)
 384{
 385	Py_buffer pbuf;
 386    const char *errors = NULL;
 387    int byteorder = 0;
 388    int final = 0;
 389    Py_ssize_t consumed;
 390    PyObject *decoded;
 391
 392    if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
 393			  &pbuf, &errors, &final))
 394	return NULL;
 395    consumed = pbuf.len; /* This is overwritten unless final is true. */
 396    decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 397					&byteorder, final ? NULL : &consumed);
 398	PyBuffer_Release(&pbuf);
 399    if (decoded == NULL)
 400	return NULL;
 401    return codec_tuple(decoded, consumed);
 402}
 403
 404static PyObject *
 405utf_32_le_decode(PyObject *self,
 406		 PyObject *args)
 407{
 408	Py_buffer pbuf;
 409    const char *errors = NULL;
 410    int byteorder = -1;
 411    int final = 0;
 412    Py_ssize_t consumed;
 413    PyObject *decoded;
 414
 415    if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
 416			  &pbuf, &errors, &final))
 417	return NULL;
 418    consumed = pbuf.len; /* This is overwritten unless final is true. */
 419    decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 420					&byteorder, final ? NULL : &consumed);
 421	PyBuffer_Release(&pbuf);
 422    if (decoded == NULL)
 423	return NULL;
 424    return codec_tuple(decoded, consumed);
 425}
 426
 427static PyObject *
 428utf_32_be_decode(PyObject *self,
 429		 PyObject *args)
 430{
 431	Py_buffer pbuf;
 432    const char *errors = NULL;
 433    int byteorder = 1;
 434    int final = 0;
 435    Py_ssize_t consumed;
 436    PyObject *decoded;
 437
 438    if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
 439			  &pbuf, &errors, &final))
 440	return NULL;
 441    consumed = pbuf.len; /* This is overwritten unless final is true. */
 442    decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 443					&byteorder, final ? NULL : &consumed);
 444	PyBuffer_Release(&pbuf);
 445    if (decoded == NULL)
 446	return NULL;
 447    return codec_tuple(decoded, consumed);
 448}
 449
 450/* This non-standard version also provides access to the byteorder
 451   parameter of the builtin UTF-32 codec.
 452
 453   It returns a tuple (unicode, bytesread, byteorder) with byteorder
 454   being the value in effect at the end of data.
 455
 456*/
 457
 458static PyObject *
 459utf_32_ex_decode(PyObject *self,
 460		 PyObject *args)
 461{
 462	Py_buffer pbuf;
 463    const char *errors = NULL;
 464    int byteorder = 0;
 465    PyObject *unicode, *tuple;
 466    int final = 0;
 467    Py_ssize_t consumed;
 468
 469    if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
 470			  &pbuf, &errors, &byteorder, &final))
 471	return NULL;
 472    consumed = pbuf.len; /* This is overwritten unless final is true. */
 473    unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
 474					&byteorder, final ? NULL : &consumed);
 475	PyBuffer_Release(&pbuf);
 476    if (unicode == NULL)
 477	return NULL;
 478    tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
 479    Py_DECREF(unicode);
 480    return tuple;
 481}
 482
 483static PyObject *
 484unicode_escape_decode(PyObject *self,
 485		     PyObject *args)
 486{
 487	Py_buffer pbuf;
 488    const char *errors = NULL;
 489	PyObject *unicode;
 490
 491    if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
 492			  &pbuf, &errors))
 493	return NULL;
 494
 495	unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
 496	PyBuffer_Release(&pbuf);
 497	return codec_tuple(unicode, pbuf.len);
 498}
 499
 500static PyObject *
 501raw_unicode_escape_decode(PyObject *self,
 502			PyObject *args)
 503{
 504	Py_buffer pbuf;
 505    const char *errors = NULL;
 506	PyObject *unicode;
 507
 508    if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
 509			  &pbuf, &errors))
 510	return NULL;
 511
 512	unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
 513	PyBuffer_Release(&pbuf);
 514	return codec_tuple(unicode, pbuf.len);
 515}
 516
 517static PyObject *
 518latin_1_decode(PyObject *self,
 519	       PyObject *args)
 520{
 521	Py_buffer pbuf;
 522	PyObject *unicode;
 523    const char *errors = NULL;
 524
 525    if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
 526			  &pbuf, &errors))
 527	return NULL;
 528
 529	unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
 530	PyBuffer_Release(&pbuf);
 531	return codec_tuple(unicode, pbuf.len);
 532}
 533
 534static PyObject *
 535ascii_decode(PyObject *self,
 536	     PyObject *args)
 537{
 538	Py_buffer pbuf;
 539	PyObject *unicode;
 540    const char *errors = NULL;
 541
 542    if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
 543			  &pbuf, &errors))
 544	return NULL;
 545
 546	unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
 547	PyBuffer_Release(&pbuf);
 548	return codec_tuple(unicode, pbuf.len);
 549}
 550
 551static PyObject *
 552charmap_decode(PyObject *self,
 553	       PyObject *args)
 554{
 555	Py_buffer pbuf;
 556	PyObject *unicode;
 557    const char *errors = NULL;
 558    PyObject *mapping = NULL;
 559
 560    if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
 561			  &pbuf, &errors, &mapping))
 562	return NULL;
 563    if (mapping == Py_None)
 564	mapping = NULL;
 565
 566	unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
 567	PyBuffer_Release(&pbuf);
 568	return codec_tuple(unicode, pbuf.len);
 569}
 570
 571#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 572
 573static PyObject *
 574mbcs_decode(PyObject *self,
 575	    PyObject *args)
 576{
 577	Py_buffer pbuf;
 578    const char *errors = NULL;
 579    int final = 0;
 580    Py_ssize_t consumed;
 581    PyObject *decoded = NULL;
 582
 583    if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
 584			  &pbuf, &errors, &final))
 585	return NULL;
 586    consumed = pbuf.len;
 587
 588    decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
 589					   final ? NULL : &consumed);
 590	PyBuffer_Release(&pbuf);
 591    if (decoded == NULL)
 592	return NULL;
 593    return codec_tuple(decoded, consumed);
 594}
 595
 596#endif /* MS_WINDOWS */
 597
 598/* --- Encoder ------------------------------------------------------------ */
 599
 600static PyObject *
 601readbuffer_encode(PyObject *self,
 602		  PyObject *args)
 603{
 604    const char *data;
 605    Py_ssize_t size;
 606    const char *errors = NULL;
 607
 608    if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
 609			  &data, &size, &errors))
 610	return NULL;
 611
 612    return codec_tuple(PyString_FromStringAndSize(data, size),
 613		       size);
 614}
 615
 616static PyObject *
 617charbuffer_encode(PyObject *self,
 618		  PyObject *args)
 619{
 620    const char *data;
 621    Py_ssize_t size;
 622    const char *errors = NULL;
 623
 624    if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
 625			  &data, &size, &errors))
 626	return NULL;
 627
 628    return codec_tuple(PyString_FromStringAndSize(data, size),
 629		       size);
 630}
 631
 632static PyObject *
 633unicode_internal_encode(PyObject *self,
 634			PyObject *args)
 635{
 636    PyObject *obj;
 637    const char *errors = NULL;
 638    const char *data;
 639    Py_ssize_t size;
 640
 641    if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
 642			  &obj, &errors))
 643	return NULL;
 644
 645    if (PyUnicode_Check(obj)) {
 646	data = PyUnicode_AS_DATA(obj);
 647	size = PyUnicode_GET_DATA_SIZE(obj);
 648	return codec_tuple(PyString_FromStringAndSize(data, size),
 649			   size);
 650    }
 651    else {
 652	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 653	    return NULL;
 654	return codec_tuple(PyString_FromStringAndSize(data, size),
 655			   size);
 656    }
 657}
 658
 659static PyObject *
 660utf_7_encode(PyObject *self,
 661	    PyObject *args)
 662{
 663    PyObject *str, *v;
 664    const char *errors = NULL;
 665
 666    if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
 667			  &str, &errors))
 668	return NULL;
 669
 670    str = PyUnicode_FromObject(str);
 671    if (str == NULL)
 672	return NULL;
 673    v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
 674					 PyUnicode_GET_SIZE(str),
 675					 0,
 676					 0,
 677					 errors),
 678		    PyUnicode_GET_SIZE(str));
 679    Py_DECREF(str);
 680    return v;
 681}
 682
 683static PyObject *
 684utf_8_encode(PyObject *self,
 685	    PyObject *args)
 686{
 687    PyObject *str, *v;
 688    const char *errors = NULL;
 689
 690    if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
 691			  &str, &errors))
 692	return NULL;
 693
 694    str = PyUnicode_FromObject(str);
 695    if (str == NULL)
 696	return NULL;
 697    v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
 698					 PyUnicode_GET_SIZE(str),
 699					 errors),
 700		    PyUnicode_GET_SIZE(str));
 701    Py_DECREF(str);
 702    return v;
 703}
 704
 705/* This version provides access to the byteorder parameter of the
 706   builtin UTF-16 codecs as optional third argument. It defaults to 0
 707   which means: use the native byte order and prepend the data with a
 708   BOM mark.
 709
 710*/
 711
 712static PyObject *
 713utf_16_encode(PyObject *self,
 714	    PyObject *args)
 715{
 716    PyObject *str, *v;
 717    const char *errors = NULL;
 718    int byteorder = 0;
 719
 720    if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
 721			  &str, &errors, &byteorder))
 722	return NULL;
 723
 724    str = PyUnicode_FromObject(str);
 725    if (str == NULL)
 726	return NULL;
 727    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 728					  PyUnicode_GET_SIZE(str),
 729					  errors,
 730					  byteorder),
 731		    PyUnicode_GET_SIZE(str));
 732    Py_DECREF(str);
 733    return v;
 734}
 735
 736static PyObject *
 737utf_16_le_encode(PyObject *self,
 738		 PyObject *args)
 739{
 740    PyObject *str, *v;
 741    const char *errors = NULL;
 742
 743    if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
 744			  &str, &errors))
 745	return NULL;
 746
 747    str = PyUnicode_FromObject(str);
 748    if (str == NULL)
 749	return NULL;
 750    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 751					     PyUnicode_GET_SIZE(str),
 752					     errors,
 753					     -1),
 754		       PyUnicode_GET_SIZE(str));
 755    Py_DECREF(str);
 756    return v;
 757}
 758
 759static PyObject *
 760utf_16_be_encode(PyObject *self,
 761		 PyObject *args)
 762{
 763    PyObject *str, *v;
 764    const char *errors = NULL;
 765
 766    if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
 767			  &str, &errors))
 768	return NULL;
 769
 770    str = PyUnicode_FromObject(str);
 771    if (str == NULL)
 772	return NULL;
 773    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 774					  PyUnicode_GET_SIZE(str),
 775					  errors,
 776					  +1),
 777		    PyUnicode_GET_SIZE(str));
 778    Py_DECREF(str);
 779    return v;
 780}
 781
 782/* This version provides access to the byteorder parameter of the
 783   builtin UTF-32 codecs as optional third argument. It defaults to 0
 784   which means: use the native byte order and prepend the data with a
 785   BOM mark.
 786
 787*/
 788
 789static PyObject *
 790utf_32_encode(PyObject *self,
 791	    PyObject *args)
 792{
 793    PyObject *str, *v;
 794    const char *errors = NULL;
 795    int byteorder = 0;
 796
 797    if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
 798			  &str, &errors, &byteorder))
 799	return NULL;
 800
 801    str = PyUnicode_FromObject(str);
 802    if (str == NULL)
 803	return NULL;
 804    v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
 805					  PyUnicode_GET_SIZE(str),
 806					  errors,
 807					  byteorder),
 808		    PyUnicode_GET_SIZE(str));
 809    Py_DECREF(str);
 810    return v;
 811}
 812
 813static PyObject *
 814utf_32_le_encode(PyObject *self,
 815		 PyObject *args)
 816{
 817    PyObject *str, *v;
 818    const char *errors = NULL;
 819
 820    if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
 821			  &str, &errors))
 822	return NULL;
 823
 824    str = PyUnicode_FromObject(str);
 825    if (str == NULL)
 826	return NULL;
 827    v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
 828					     PyUnicode_GET_SIZE(str),
 829					     errors,
 830					     -1),
 831		       PyUnicode_GET_SIZE(str));
 832    Py_DECREF(str);
 833    return v;
 834}
 835
 836static PyObject *
 837utf_32_be_encode(PyObject *self,
 838		 PyObject *args)
 839{
 840    PyObject *str, *v;
 841    const char *errors = NULL;
 842
 843    if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
 844			  &str, &errors))
 845	return NULL;
 846
 847    str = PyUnicode_FromObject(str);
 848    if (str == NULL)
 849	return NULL;
 850    v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
 851					  PyUnicode_GET_SIZE(str),
 852					  errors,
 853					  +1),
 854		    PyUnicode_GET_SIZE(str));
 855    Py_DECREF(str);
 856    return v;
 857}
 858
 859static PyObject *
 860unicode_escape_encode(PyObject *self,
 861		     PyObject *args)
 862{
 863    PyObject *str, *v;
 864    const char *errors = NULL;
 865
 866    if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
 867			  &str, &errors))
 868	return NULL;
 869
 870    str = PyUnicode_FromObject(str);
 871    if (str == NULL)
 872	return NULL;
 873    v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
 874						  PyUnicode_GET_SIZE(str)),
 875		    PyUnicode_GET_SIZE(str));
 876    Py_DECREF(str);
 877    return v;
 878}
 879
 880static PyObject *
 881raw_unicode_escape_encode(PyObject *self,
 882			PyObject *args)
 883{
 884    PyObject *str, *v;
 885    const char *errors = NULL;
 886
 887    if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
 888			  &str, &errors))
 889	return NULL;
 890
 891    str = PyUnicode_FromObject(str);
 892    if (str == NULL)
 893	return NULL;
 894    v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
 895			       PyUnicode_AS_UNICODE(str),
 896			       PyUnicode_GET_SIZE(str)),
 897		    PyUnicode_GET_SIZE(str));
 898    Py_DECREF(str);
 899    return v;
 900}
 901
 902static PyObject *
 903latin_1_encode(PyObject *self,
 904	       PyObject *args)
 905{
 906    PyObject *str, *v;
 907    const char *errors = NULL;
 908
 909    if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
 910			  &str, &errors))
 911	return NULL;
 912
 913    str = PyUnicode_FromObject(str);
 914    if (str == NULL)
 915	return NULL;
 916    v = codec_tuple(PyUnicode_EncodeLatin1(
 917			       PyUnicode_AS_UNICODE(str),
 918			       PyUnicode_GET_SIZE(str),
 919			       errors),
 920		    PyUnicode_GET_SIZE(str));
 921    Py_DECREF(str);
 922    return v;
 923}
 924
 925static PyObject *
 926ascii_encode(PyObject *self,
 927	     PyObject *args)
 928{
 929    PyObject *str, *v;
 930    const char *errors = NULL;
 931
 932    if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
 933			  &str, &errors))
 934	return NULL;
 935
 936    str = PyUnicode_FromObject(str);
 937    if (str == NULL)
 938	return NULL;
 939    v = codec_tuple(PyUnicode_EncodeASCII(
 940			       PyUnicode_AS_UNICODE(str),
 941			       PyUnicode_GET_SIZE(str),
 942			       errors),
 943		    PyUnicode_GET_SIZE(str));
 944    Py_DECREF(str);
 945    return v;
 946}
 947
 948static PyObject *
 949charmap_encode(PyObject *self,
 950	     PyObject *args)
 951{
 952    PyObject *str, *v;
 953    const char *errors = NULL;
 954    PyObject *mapping = NULL;
 955
 956    if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
 957			  &str, &errors, &mapping))
 958	return NULL;
 959    if (mapping == Py_None)
 960	mapping = NULL;
 961
 962    str = PyUnicode_FromObject(str);
 963    if (str == NULL)
 964	return NULL;
 965    v = codec_tuple(PyUnicode_EncodeCharmap(
 966			       PyUnicode_AS_UNICODE(str),
 967			       PyUnicode_GET_SIZE(str),
 968			       mapping,
 969			       errors),
 970		    PyUnicode_GET_SIZE(str));
 971    Py_DECREF(str);
 972    return v;
 973}
 974
 975static PyObject*
 976charmap_build(PyObject *self, PyObject *args)
 977{
 978    PyObject *map;
 979    if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
 980        return NULL;
 981    return PyUnicode_BuildEncodingMap(map);
 982}
 983
 984#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 985
 986static PyObject *
 987mbcs_encode(PyObject *self,
 988	    PyObject *args)
 989{
 990    PyObject *str, *v;
 991    const char *errors = NULL;
 992
 993    if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
 994			  &str, &errors))
 995	return NULL;
 996
 997    str = PyUnicode_FromObject(str);
 998    if (str == NULL)
 999	return NULL;
1000    v = codec_tuple(PyUnicode_EncodeMBCS(
1001			       PyUnicode_AS_UNICODE(str),
1002			       PyUnicode_GET_SIZE(str),
1003			       errors),
1004		    PyUnicode_GET_SIZE(str));
1005    Py_DECREF(str);
1006    return v;
1007}
1008
1009#endif /* MS_WINDOWS */
1010#endif /* Py_USING_UNICODE */
1011
1012/* --- Error handler registry --------------------------------------------- */
1013
1014PyDoc_STRVAR(register_error__doc__,
1015"register_error(errors, handler)\n\
1016\n\
1017Register the specified error handler under the name\n\
1018errors. handler must be a callable object, that\n\
1019will be called with an exception instance containing\n\
1020information about the location of the encoding/decoding\n\
1021error and must return a (replacement, new position) tuple.");
1022
1023static PyObject *register_error(PyObject *self, PyObject *args)
1024{
1025    const char *name;
1026    PyObject *handler;
1027
1028    if (!PyArg_ParseTuple(args, "sO:register_error",
1029			  &name, &handler))
1030	return NULL;
1031    if (PyCodec_RegisterError(name, handler))
1032        return NULL;
1033    Py_RETURN_NONE;
1034}
1035
1036PyDoc_STRVAR(lookup_error__doc__,
1037"lookup_error(errors) -> handler\n\
1038\n\
1039Return the error handler for the specified error handling name\n\
1040or raise a LookupError, if no handler exists under this name.");
1041
1042static PyObject *lookup_error(PyObject *self, PyObject *args)
1043{
1044    const char *name;
1045
1046    if (!PyArg_ParseTuple(args, "s:lookup_error",
1047			  &name))
1048	return NULL;
1049    return PyCodec_LookupError(name);
1050}
1051
1052/* --- Module API --------------------------------------------------------- */
1053
1054static PyMethodDef _codecs_functions[] = {
1055    {"register",		codec_register,			METH_O,
1056        register__doc__},
1057    {"lookup",			codec_lookup, 			METH_VARARGS,
1058        lookup__doc__},
1059    {"encode",			codec_encode,			METH_VARARGS,
1060	encode__doc__},
1061    {"decode",			codec_decode,			METH_VARARGS,
1062	decode__doc__},
1063    {"escape_encode",		escape_encode,			METH_VARARGS},
1064    {"escape_decode",		escape_decode,			METH_VARARGS},
1065#ifdef Py_USING_UNICODE
1066    {"utf_8_encode",		utf_8_encode,			METH_VARARGS},
1067    {"utf_8_decode",		utf_8_decode,			METH_VARARGS},
1068    {"utf_7_encode",		utf_7_encode,			METH_VARARGS},
1069    {"utf_7_decode",		utf_7_decode,			METH_VARARGS},
1070    {"utf_16_encode",		utf_16_encode,			METH_VARARGS},
1071    {"utf_16_le_encode",	utf_16_le_encode,		METH_VARARGS},
1072    {"utf_16_be_encode",	utf_16_be_encode,		METH_VARARGS},
1073    {"utf_16_decode",		utf_16_decode,			METH_VARARGS},
1074    {"utf_16_le_decode",	utf_16_le_decode,		METH_VARARGS},
1075    {"utf_16_be_decode",	utf_16_be_decode,		METH_VARARGS},
1076    {"utf_16_ex_decode",	utf_16_ex_decode,		METH_VARARGS},
1077    {"utf_32_encode",		utf_32_encode,			METH_VARARGS},
1078    {"utf_32_le_encode",	utf_32_le_encode,		METH_VARARGS},
1079    {"utf_32_be_encode",	utf_32_be_encode,		METH_VARARGS},
1080    {"utf_32_decode",		utf_32_decode,			METH_VARARGS},
1081    {"utf_32_le_decode",	utf_32_le_decode,		METH_VARARGS},
1082    {"utf_32_be_decode",	utf_32_be_decode,		METH_VARARGS},
1083    {"utf_32_ex_decode",	utf_32_ex_decode,		METH_VARARGS},
1084    {"unicode_escape_encode",	unicode_escape_encode,		METH_VARARGS},
1085    {"unicode_escape_decode",	unicode_escape_decode,		METH_VARARGS},
1086    {"unicode_internal_encode",	unicode_internal_encode,	METH_VARARGS},
1087    {"unicode_internal_decode",	unicode_internal_decode,	METH_VARARGS},
1088    {"raw_unicode_escape_encode", raw_unicode_escape_encode,	METH_VARARGS},
1089    {"raw_unicode_escape_decode", raw_unicode_escape_decode,	METH_VARARGS},
1090    {"latin_1_encode", 		latin_1_encode,			METH_VARARGS},
1091    {"latin_1_decode", 		latin_1_decode,			METH_VARARGS},
1092    {"ascii_encode", 		ascii_encode,			METH_VARARGS},
1093    {"ascii_decode", 		ascii_decode,			METH_VARARGS},
1094    {"charmap_encode", 		charmap_encode,			METH_VARARGS},
1095    {"charmap_decode", 		charmap_decode,			METH_VARARGS},
1096    {"charmap_build", 		charmap_build,			METH_VARARGS},
1097    {"readbuffer_encode",	readbuffer_encode,		METH_VARARGS},
1098    {"charbuffer_encode",	charbuffer_encode,		METH_VARARGS},
1099#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1100    {"mbcs_encode", 		mbcs_encode,			METH_VARARGS},
1101    {"mbcs_decode", 		mbcs_decode,			METH_VARARGS},
1102#endif
1103#endif /* Py_USING_UNICODE */
1104    {"register_error", 		register_error,			METH_VARARGS,
1105        register_error__doc__},
1106    {"lookup_error", 		lookup_error,			METH_VARARGS,
1107        lookup_error__doc__},
1108    {NULL, NULL}		/* sentinel */
1109};
1110
1111PyMODINIT_FUNC
1112init_codecs(void)
1113{
1114    Py_InitModule("_codecs", _codecs_functions);
1115}