PageRenderTime 904ms CodeModel.GetById 192ms app.highlight 424ms RepoModel.GetById 200ms app.codeStats 0ms

/Python/codecs.c

http://unladen-swallow.googlecode.com/
C | 860 lines | 694 code | 88 blank | 78 comment | 174 complexity | 0ce8a8d3b9b691dcc4659bcb93fd24be MD5 | raw file
  1/* ------------------------------------------------------------------------
  2
  3   Python Codec Registry and support functions
  4
  5Written by Marc-Andre Lemburg (mal@lemburg.com).
  6
  7Copyright (c) Corporation for National Research Initiatives.
  8
  9   ------------------------------------------------------------------------ */
 10
 11#include "Python.h"
 12#include <ctype.h>
 13
 14/* --- Codec Registry ----------------------------------------------------- */
 15
 16/* Import the standard encodings package which will register the first
 17   codec search function. 
 18
 19   This is done in a lazy way so that the Unicode implementation does
 20   not downgrade startup time of scripts not needing it.
 21
 22   ImportErrors are silently ignored by this function. Only one try is
 23   made.
 24
 25*/
 26
 27static int _PyCodecRegistry_Init(void); /* Forward */
 28
 29int PyCodec_Register(PyObject *search_function)
 30{
 31    PyInterpreterState *interp = PyThreadState_GET()->interp;
 32    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 33	goto onError;
 34    if (search_function == NULL) {
 35	PyErr_BadArgument();
 36	goto onError;
 37    }
 38    if (!PyCallable_Check(search_function)) {
 39	PyErr_SetString(PyExc_TypeError, "argument must be callable");
 40	goto onError;
 41    }
 42    return PyList_Append(interp->codec_search_path, search_function);
 43
 44 onError:
 45    return -1;
 46}
 47
 48/* Convert a string to a normalized Python string: all characters are
 49   converted to lower case, spaces are replaced with underscores. */
 50
 51static
 52PyObject *normalizestring(const char *string)
 53{
 54    register size_t i;
 55    size_t len = strlen(string);
 56    char *p;
 57    PyObject *v;
 58    
 59    if (len > PY_SSIZE_T_MAX) {
 60	PyErr_SetString(PyExc_OverflowError, "string is too large");
 61	return NULL;
 62    }
 63	
 64    v = PyString_FromStringAndSize(NULL, len);
 65    if (v == NULL)
 66	return NULL;
 67    p = PyString_AS_STRING(v);
 68    for (i = 0; i < len; i++) {
 69        register char ch = string[i];
 70        if (ch == ' ')
 71            ch = '-';
 72        else
 73            ch = tolower(Py_CHARMASK(ch));
 74	p[i] = ch;
 75    }
 76    return v;
 77}
 78
 79/* Lookup the given encoding and return a tuple providing the codec
 80   facilities.
 81
 82   The encoding string is looked up converted to all lower-case
 83   characters. This makes encodings looked up through this mechanism
 84   effectively case-insensitive.
 85
 86   If no codec is found, a LookupError is set and NULL returned. 
 87
 88   As side effect, this tries to load the encodings package, if not
 89   yet done. This is part of the lazy load strategy for the encodings
 90   package.
 91
 92*/
 93
 94PyObject *_PyCodec_Lookup(const char *encoding)
 95{
 96    PyInterpreterState *interp;
 97    PyObject *result, *args = NULL, *v;
 98    Py_ssize_t i, len;
 99
100    if (encoding == NULL) {
101	PyErr_BadArgument();
102	goto onError;
103    }
104
105    interp = PyThreadState_GET()->interp;
106    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
107	goto onError;
108
109    /* Convert the encoding to a normalized Python string: all
110       characters are converted to lower case, spaces and hyphens are
111       replaced with underscores. */
112    v = normalizestring(encoding);
113    if (v == NULL)
114	goto onError;
115    PyString_InternInPlace(&v);
116
117    /* First, try to lookup the name in the registry dictionary */
118    result = PyDict_GetItem(interp->codec_search_cache, v);
119    if (result != NULL) {
120	Py_INCREF(result);
121	Py_DECREF(v);
122	return result;
123    }
124    
125    /* Next, scan the search functions in order of registration */
126    args = PyTuple_New(1);
127    if (args == NULL)
128	goto onError;
129    PyTuple_SET_ITEM(args,0,v);
130
131    len = PyList_Size(interp->codec_search_path);
132    if (len < 0)
133	goto onError;
134    if (len == 0) {
135	PyErr_SetString(PyExc_LookupError,
136			"no codec search functions registered: "
137			"can't find encoding");
138	goto onError;
139    }
140
141    for (i = 0; i < len; i++) {
142	PyObject *func;
143	
144	func = PyList_GetItem(interp->codec_search_path, i);
145	if (func == NULL)
146	    goto onError;
147	result = PyEval_CallObject(func, args);
148	if (result == NULL)
149	    goto onError;
150	if (result == Py_None) {
151	    Py_DECREF(result);
152	    continue;
153	}
154	if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
155	    PyErr_SetString(PyExc_TypeError,
156			    "codec search functions must return 4-tuples");
157	    Py_DECREF(result);
158	    goto onError;
159	}
160	break;
161    }
162    if (i == len) {
163	/* XXX Perhaps we should cache misses too ? */
164	PyErr_Format(PyExc_LookupError,
165                     "unknown encoding: %s", encoding);
166	goto onError;
167    }
168
169    /* Cache and return the result */
170    PyDict_SetItem(interp->codec_search_cache, v, result);
171    Py_DECREF(args);
172    return result;
173
174 onError:
175    Py_XDECREF(args);
176    return NULL;
177}
178
179static
180PyObject *args_tuple(PyObject *object,
181		     const char *errors)
182{
183    PyObject *args;
184    
185    args = PyTuple_New(1 + (errors != NULL));
186    if (args == NULL)
187	return NULL;
188    Py_INCREF(object);
189    PyTuple_SET_ITEM(args,0,object);
190    if (errors) {
191	PyObject *v;
192	
193	v = PyString_FromString(errors);
194	if (v == NULL) {
195	    Py_DECREF(args);
196	    return NULL;
197	}
198	PyTuple_SET_ITEM(args, 1, v);
199    }
200    return args;
201}
202
203/* Helper function to get a codec item */
204
205static
206PyObject *codec_getitem(const char *encoding, int index)
207{
208    PyObject *codecs;
209    PyObject *v;
210
211    codecs = _PyCodec_Lookup(encoding);
212    if (codecs == NULL)
213	return NULL;
214    v = PyTuple_GET_ITEM(codecs, index);
215    Py_DECREF(codecs);
216    Py_INCREF(v);
217    return v;
218}
219
220/* Helper function to create an incremental codec. */
221
222static
223PyObject *codec_getincrementalcodec(const char *encoding,
224				    const char *errors,
225				    const char *attrname)
226{
227    PyObject *codecs, *ret, *inccodec;
228
229    codecs = _PyCodec_Lookup(encoding);
230    if (codecs == NULL)
231	return NULL;
232    inccodec = PyObject_GetAttrString(codecs, attrname);
233    Py_DECREF(codecs);
234    if (inccodec == NULL)
235	return NULL;
236    if (errors)
237	ret = PyObject_CallFunction(inccodec, "s", errors);
238    else
239	ret = PyObject_CallFunction(inccodec, NULL);
240    Py_DECREF(inccodec);
241    return ret;
242}
243
244/* Helper function to create a stream codec. */
245
246static
247PyObject *codec_getstreamcodec(const char *encoding,
248			       PyObject *stream,
249			       const char *errors,
250			       const int index)
251{
252    PyObject *codecs, *streamcodec, *codeccls;
253
254    codecs = _PyCodec_Lookup(encoding);
255    if (codecs == NULL)
256	return NULL;
257
258    codeccls = PyTuple_GET_ITEM(codecs, index);
259    if (errors != NULL)
260	streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
261    else
262	streamcodec = PyObject_CallFunction(codeccls, "O", stream);
263    Py_DECREF(codecs);
264    return streamcodec;
265}
266
267/* Convenience APIs to query the Codec registry. 
268   
269   All APIs return a codec object with incremented refcount.
270   
271 */
272
273PyObject *PyCodec_Encoder(const char *encoding)
274{
275    return codec_getitem(encoding, 0);
276}
277
278PyObject *PyCodec_Decoder(const char *encoding)
279{
280    return codec_getitem(encoding, 1);
281}
282
283PyObject *PyCodec_IncrementalEncoder(const char *encoding,
284				     const char *errors)
285{
286    return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
287}
288
289PyObject *PyCodec_IncrementalDecoder(const char *encoding,
290				     const char *errors)
291{
292    return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
293}
294
295PyObject *PyCodec_StreamReader(const char *encoding,
296			       PyObject *stream,
297			       const char *errors)
298{
299    return codec_getstreamcodec(encoding, stream, errors, 2);
300}
301
302PyObject *PyCodec_StreamWriter(const char *encoding,
303			       PyObject *stream,
304			       const char *errors)
305{
306    return codec_getstreamcodec(encoding, stream, errors, 3);
307}
308
309/* Encode an object (e.g. an Unicode object) using the given encoding
310   and return the resulting encoded object (usually a Python string).
311
312   errors is passed to the encoder factory as argument if non-NULL. */
313
314PyObject *PyCodec_Encode(PyObject *object,
315			 const char *encoding,
316			 const char *errors)
317{
318    PyObject *encoder = NULL;
319    PyObject *args = NULL, *result = NULL;
320    PyObject *v;
321
322    encoder = PyCodec_Encoder(encoding);
323    if (encoder == NULL)
324	goto onError;
325
326    args = args_tuple(object, errors);
327    if (args == NULL)
328	goto onError;
329    
330    result = PyEval_CallObject(encoder,args);
331    if (result == NULL)
332	goto onError;
333
334    if (!PyTuple_Check(result) || 
335	PyTuple_GET_SIZE(result) != 2) {
336	PyErr_SetString(PyExc_TypeError,
337			"encoder must return a tuple (object,integer)");
338	goto onError;
339    }
340    v = PyTuple_GET_ITEM(result,0);
341    Py_INCREF(v);
342    /* We don't check or use the second (integer) entry. */
343
344    Py_DECREF(args);
345    Py_DECREF(encoder);
346    Py_DECREF(result);
347    return v;
348	
349 onError:
350    Py_XDECREF(result);
351    Py_XDECREF(args);
352    Py_XDECREF(encoder);
353    return NULL;
354}
355
356/* Decode an object (usually a Python string) using the given encoding
357   and return an equivalent object (e.g. an Unicode object).
358
359   errors is passed to the decoder factory as argument if non-NULL. */
360
361PyObject *PyCodec_Decode(PyObject *object,
362			 const char *encoding,
363			 const char *errors)
364{
365    PyObject *decoder = NULL;
366    PyObject *args = NULL, *result = NULL;
367    PyObject *v;
368
369    decoder = PyCodec_Decoder(encoding);
370    if (decoder == NULL)
371	goto onError;
372
373    args = args_tuple(object, errors);
374    if (args == NULL)
375	goto onError;
376    
377    result = PyEval_CallObject(decoder,args);
378    if (result == NULL)
379	goto onError;
380    if (!PyTuple_Check(result) || 
381	PyTuple_GET_SIZE(result) != 2) {
382	PyErr_SetString(PyExc_TypeError,
383			"decoder must return a tuple (object,integer)");
384	goto onError;
385    }
386    v = PyTuple_GET_ITEM(result,0);
387    Py_INCREF(v);
388    /* We don't check or use the second (integer) entry. */
389
390    Py_DECREF(args);
391    Py_DECREF(decoder);
392    Py_DECREF(result);
393    return v;
394	
395 onError:
396    Py_XDECREF(args);
397    Py_XDECREF(decoder);
398    Py_XDECREF(result);
399    return NULL;
400}
401
402/* Register the error handling callback function error under the name
403   name. This function will be called by the codec when it encounters
404   an unencodable characters/undecodable bytes and doesn't know the
405   callback name, when name is specified as the error parameter
406   in the call to the encode/decode function.
407   Return 0 on success, -1 on error */
408int PyCodec_RegisterError(const char *name, PyObject *error)
409{
410    PyInterpreterState *interp = PyThreadState_GET()->interp;
411    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
412	return -1;
413    if (!PyCallable_Check(error)) {
414	PyErr_SetString(PyExc_TypeError, "handler must be callable");
415	return -1;
416    }
417    return PyDict_SetItemString(interp->codec_error_registry,
418	    			(char *)name, error);
419}
420
421/* Lookup the error handling callback function registered under the
422   name error. As a special case NULL can be passed, in which case
423   the error handling callback for strict encoding will be returned. */
424PyObject *PyCodec_LookupError(const char *name)
425{
426    PyObject *handler = NULL;
427
428    PyInterpreterState *interp = PyThreadState_GET()->interp;
429    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
430	return NULL;
431
432    if (name==NULL)
433	name = "strict";
434    handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
435    if (!handler)
436	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
437    else
438	Py_INCREF(handler);
439    return handler;
440}
441
442static void wrong_exception_type(PyObject *exc)
443{
444    PyObject *type = PyObject_GetAttrString(exc, "__class__");
445    if (type != NULL) {
446	PyObject *name = PyObject_GetAttrString(type, "__name__");
447	Py_DECREF(type);
448	if (name != NULL) {
449	    PyObject *string = PyObject_Str(name);
450	    Py_DECREF(name);
451	    if (string != NULL) {
452	        PyErr_Format(PyExc_TypeError,
453		    "don't know how to handle %.400s in error callback",
454		    PyString_AS_STRING(string));
455	        Py_DECREF(string);
456	    }
457	}
458    }
459}
460
461PyObject *PyCodec_StrictErrors(PyObject *exc)
462{
463    if (PyExceptionInstance_Check(exc))
464        PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
465    else
466	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
467    return NULL;
468}
469
470
471#ifdef Py_USING_UNICODE
472PyObject *PyCodec_IgnoreErrors(PyObject *exc)
473{
474    Py_ssize_t end;
475    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
476	if (PyUnicodeEncodeError_GetEnd(exc, &end))
477	    return NULL;
478    }
479    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
480	if (PyUnicodeDecodeError_GetEnd(exc, &end))
481	    return NULL;
482    }
483    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
484	if (PyUnicodeTranslateError_GetEnd(exc, &end))
485	    return NULL;
486    }
487    else {
488	wrong_exception_type(exc);
489	return NULL;
490    }
491    /* ouch: passing NULL, 0, pos gives None instead of u'' */
492    return Py_BuildValue("(u#n)", &end, 0, end);
493}
494
495
496PyObject *PyCodec_ReplaceErrors(PyObject *exc)
497{
498    PyObject *restuple;
499    Py_ssize_t start;
500    Py_ssize_t end;
501    Py_ssize_t i;
502
503    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
504	PyObject *res;
505	Py_UNICODE *p;
506	if (PyUnicodeEncodeError_GetStart(exc, &start))
507	    return NULL;
508	if (PyUnicodeEncodeError_GetEnd(exc, &end))
509	    return NULL;
510	res = PyUnicode_FromUnicode(NULL, end-start);
511	if (res == NULL)
512	    return NULL;
513	for (p = PyUnicode_AS_UNICODE(res), i = start;
514	    i<end; ++p, ++i)
515	    *p = '?';
516	restuple = Py_BuildValue("(On)", res, end);
517	Py_DECREF(res);
518	return restuple;
519    }
520    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
521	Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
522	if (PyUnicodeDecodeError_GetEnd(exc, &end))
523	    return NULL;
524	return Py_BuildValue("(u#n)", &res, 1, end);
525    }
526    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
527	PyObject *res;
528	Py_UNICODE *p;
529	if (PyUnicodeTranslateError_GetStart(exc, &start))
530	    return NULL;
531	if (PyUnicodeTranslateError_GetEnd(exc, &end))
532	    return NULL;
533	res = PyUnicode_FromUnicode(NULL, end-start);
534	if (res == NULL)
535	    return NULL;
536	for (p = PyUnicode_AS_UNICODE(res), i = start;
537	    i<end; ++p, ++i)
538	    *p = Py_UNICODE_REPLACEMENT_CHARACTER;
539	restuple = Py_BuildValue("(On)", res, end);
540	Py_DECREF(res);
541	return restuple;
542    }
543    else {
544	wrong_exception_type(exc);
545	return NULL;
546    }
547}
548
549PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
550{
551    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
552	PyObject *restuple;
553	PyObject *object;
554	Py_ssize_t start;
555	Py_ssize_t end;
556	PyObject *res;
557	Py_UNICODE *p;
558	Py_UNICODE *startp;
559	Py_UNICODE *outp;
560	int ressize;
561	if (PyUnicodeEncodeError_GetStart(exc, &start))
562	    return NULL;
563	if (PyUnicodeEncodeError_GetEnd(exc, &end))
564	    return NULL;
565	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
566	    return NULL;
567	startp = PyUnicode_AS_UNICODE(object);
568	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
569	    if (*p<10)
570		ressize += 2+1+1;
571	    else if (*p<100)
572		ressize += 2+2+1;
573	    else if (*p<1000)
574		ressize += 2+3+1;
575	    else if (*p<10000)
576		ressize += 2+4+1;
577#ifndef Py_UNICODE_WIDE
578	    else
579		ressize += 2+5+1;
580#else
581	    else if (*p<100000)
582		ressize += 2+5+1;
583	    else if (*p<1000000)
584		ressize += 2+6+1;
585	    else
586		ressize += 2+7+1;
587#endif
588	}
589	/* allocate replacement */
590	res = PyUnicode_FromUnicode(NULL, ressize);
591	if (res == NULL) {
592	    Py_DECREF(object);
593	    return NULL;
594	}
595	/* generate replacement */
596	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
597	    p < startp+end; ++p) {
598	    Py_UNICODE c = *p;
599	    int digits;
600	    int base;
601	    *outp++ = '&';
602	    *outp++ = '#';
603	    if (*p<10) {
604		digits = 1;
605		base = 1;
606	    }
607	    else if (*p<100) {
608		digits = 2;
609		base = 10;
610	    }
611	    else if (*p<1000) {
612		digits = 3;
613		base = 100;
614	    }
615	    else if (*p<10000) {
616		digits = 4;
617		base = 1000;
618	    }
619#ifndef Py_UNICODE_WIDE
620	    else {
621		digits = 5;
622		base = 10000;
623	    }
624#else
625	    else if (*p<100000) {
626		digits = 5;
627		base = 10000;
628	    }
629	    else if (*p<1000000) {
630		digits = 6;
631		base = 100000;
632	    }
633	    else {
634		digits = 7;
635		base = 1000000;
636	    }
637#endif
638	    while (digits-->0) {
639		*outp++ = '0' + c/base;
640		c %= base;
641		base /= 10;
642	    }
643	    *outp++ = ';';
644	}
645	restuple = Py_BuildValue("(On)", res, end);
646	Py_DECREF(res);
647	Py_DECREF(object);
648	return restuple;
649    }
650    else {
651	wrong_exception_type(exc);
652	return NULL;
653    }
654}
655
656static Py_UNICODE hexdigits[] = {
657    '0', '1', '2', '3', '4', '5', '6', '7',
658    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
659};
660
661PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
662{
663    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
664	PyObject *restuple;
665	PyObject *object;
666	Py_ssize_t start;
667	Py_ssize_t end;
668	PyObject *res;
669	Py_UNICODE *p;
670	Py_UNICODE *startp;
671	Py_UNICODE *outp;
672	int ressize;
673	if (PyUnicodeEncodeError_GetStart(exc, &start))
674	    return NULL;
675	if (PyUnicodeEncodeError_GetEnd(exc, &end))
676	    return NULL;
677	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
678	    return NULL;
679	startp = PyUnicode_AS_UNICODE(object);
680	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
681#ifdef Py_UNICODE_WIDE
682	    if (*p >= 0x00010000)
683		ressize += 1+1+8;
684	    else
685#endif
686	    if (*p >= 0x100) {
687		ressize += 1+1+4;
688	    }
689	    else
690		ressize += 1+1+2;
691	}
692	res = PyUnicode_FromUnicode(NULL, ressize);
693	if (res==NULL)
694	    return NULL;
695	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
696	    p < startp+end; ++p) {
697	    Py_UNICODE c = *p;
698	    *outp++ = '\\';
699#ifdef Py_UNICODE_WIDE
700	    if (c >= 0x00010000) {
701		*outp++ = 'U';
702		*outp++ = hexdigits[(c>>28)&0xf];
703		*outp++ = hexdigits[(c>>24)&0xf];
704		*outp++ = hexdigits[(c>>20)&0xf];
705		*outp++ = hexdigits[(c>>16)&0xf];
706		*outp++ = hexdigits[(c>>12)&0xf];
707		*outp++ = hexdigits[(c>>8)&0xf];
708	    }
709	    else
710#endif
711	    if (c >= 0x100) {
712		*outp++ = 'u';
713		*outp++ = hexdigits[(c>>12)&0xf];
714		*outp++ = hexdigits[(c>>8)&0xf];
715	    }
716	    else
717		*outp++ = 'x';
718	    *outp++ = hexdigits[(c>>4)&0xf];
719	    *outp++ = hexdigits[c&0xf];
720	}
721
722	restuple = Py_BuildValue("(On)", res, end);
723	Py_DECREF(res);
724	Py_DECREF(object);
725	return restuple;
726    }
727    else {
728	wrong_exception_type(exc);
729	return NULL;
730    }
731}
732#endif
733
734static PyObject *strict_errors(PyObject *self, PyObject *exc)
735{
736    return PyCodec_StrictErrors(exc);
737}
738
739
740#ifdef Py_USING_UNICODE
741static PyObject *ignore_errors(PyObject *self, PyObject *exc)
742{
743    return PyCodec_IgnoreErrors(exc);
744}
745
746
747static PyObject *replace_errors(PyObject *self, PyObject *exc)
748{
749    return PyCodec_ReplaceErrors(exc);
750}
751
752
753static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
754{
755    return PyCodec_XMLCharRefReplaceErrors(exc);
756}
757
758
759static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
760{
761    return PyCodec_BackslashReplaceErrors(exc);
762}
763#endif
764
765static int _PyCodecRegistry_Init(void)
766{
767    static struct {
768	char *name;
769	PyMethodDef def;
770    } methods[] =
771    {
772	{
773	    "strict",
774	    {
775		"strict_errors",
776		strict_errors,
777		METH_O
778	    }
779	},
780#ifdef Py_USING_UNICODE
781	{
782	    "ignore",
783	    {
784		"ignore_errors",
785		ignore_errors,
786		METH_O
787	    }
788	},
789	{
790	    "replace",
791	    {
792		"replace_errors",
793		replace_errors,
794		METH_O
795	    }
796	},
797	{
798	    "xmlcharrefreplace",
799	    {
800		"xmlcharrefreplace_errors",
801		xmlcharrefreplace_errors,
802		METH_O
803	    }
804	},
805	{
806	    "backslashreplace",
807	    {
808		"backslashreplace_errors",
809		backslashreplace_errors,
810		METH_O
811	    }
812	}
813#endif
814    };
815
816    PyInterpreterState *interp = PyThreadState_GET()->interp;
817    PyObject *mod;
818    unsigned i;
819
820    if (interp->codec_search_path != NULL)
821	return 0;
822
823    interp->codec_search_path = PyList_New(0);
824    interp->codec_search_cache = PyDict_New();
825    interp->codec_error_registry = PyDict_New();
826
827    if (interp->codec_error_registry) {
828	for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
829	    PyObject *func = PyCFunction_New(&methods[i].def, NULL);
830	    int res;
831	    if (!func)
832		Py_FatalError("can't initialize codec error registry");
833	    res = PyCodec_RegisterError(methods[i].name, func);
834	    Py_DECREF(func);
835	    if (res)
836		Py_FatalError("can't initialize codec error registry");
837	}
838    }
839
840    if (interp->codec_search_path == NULL ||
841	interp->codec_search_cache == NULL ||
842	interp->codec_error_registry == NULL)
843	Py_FatalError("can't initialize codec registry");
844
845    mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
846    if (mod == NULL) {
847	if (PyErr_ExceptionMatches(PyExc_ImportError)) {
848	    /* Ignore ImportErrors... this is done so that
849	       distributions can disable the encodings package. Note
850	       that other errors are not masked, e.g. SystemErrors
851	       raised to inform the user of an error in the Python
852	       configuration are still reported back to the user. */
853	    PyErr_Clear();
854	    return 0;
855	}
856	return -1;
857    }
858    Py_DECREF(mod);
859    return 0;
860}