blob: 184d1471036aa26203e0347c5fbdd5e875c1e1d3 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
Guido van Rossum16b1ad92000-08-03 16:24:25 +00007Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumfeee4b92000-03-10 22:57:27 +00008
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
Guido van Rossumfeee4b92000-03-10 22:57:27 +000014/* --- Codec Registry ----------------------------------------------------- */
15
16/* Import the standard encodings package which will register the first
Antoine Pitrouc83ea132010-05-09 14:46:46 +000017 codec search function.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000018
19 This is done in a lazy way so that the Unicode implementation does
20 not downgrade startup time of scripts not needing it.
21
Guido van Rossumb95de4f2000-03-31 17:25:23 +000022 ImportErrors are silently ignored by this function. Only one try is
23 made.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000024
25*/
26
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000027static int _PyCodecRegistry_Init(void); /* Forward */
Guido van Rossumfeee4b92000-03-10 22:57:27 +000028
Guido van Rossumfeee4b92000-03-10 22:57:27 +000029int PyCodec_Register(PyObject *search_function)
30{
Nicholas Bastine5662ae2004-03-24 22:22:12 +000031 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000032 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +000033 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000034 if (search_function == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000035 PyErr_BadArgument();
36 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000037 }
38 if (!PyCallable_Check(search_function)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000039 PyErr_SetString(PyExc_TypeError, "argument must be callable");
40 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000041 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000042 return PyList_Append(interp->codec_search_path, search_function);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000043
44 onError:
45 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000046}
47
Guido van Rossum9e896b32000-04-05 20:11:21 +000048/* Convert a string to a normalized Python string: all characters are
49 converted to lower case, spaces are replaced with underscores. */
50
Guido van Rossumfeee4b92000-03-10 22:57:27 +000051static
Guido van Rossum9e896b32000-04-05 20:11:21 +000052PyObject *normalizestring(const char *string)
Guido van Rossumfeee4b92000-03-10 22:57:27 +000053{
Guido van Rossum33831132000-06-29 14:50:15 +000054 register size_t i;
Guido van Rossum582acec2000-06-28 22:07:35 +000055 size_t len = strlen(string);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000056 char *p;
57 PyObject *v;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000058
Martin v. Löwisb1ed7fa2006-04-13 07:52:27 +000059 if (len > PY_SSIZE_T_MAX) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000060 PyErr_SetString(PyExc_OverflowError, "string is too large");
61 return NULL;
Martin v. Löwisb1ed7fa2006-04-13 07:52:27 +000062 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000063
Gregory P. Smithdd96db62008-06-09 04:58:54 +000064 v = PyString_FromStringAndSize(NULL, len);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000065 if (v == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +000066 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +000067 p = PyString_AS_STRING(v);
Guido van Rossum9e896b32000-04-05 20:11:21 +000068 for (i = 0; i < len; i++) {
69 register char ch = string[i];
70 if (ch == ' ')
71 ch = '-';
72 else
Antoine Pitrou4cfae022011-07-24 02:51:01 +020073 ch = Py_TOLOWER(Py_CHARMASK(ch));
Antoine Pitrouc83ea132010-05-09 14:46:46 +000074 p[i] = ch;
Guido van Rossum9e896b32000-04-05 20:11:21 +000075 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +000076 return v;
77}
78
79/* Lookup the given encoding and return a tuple providing the codec
80 facilities.
81
82 The encoding string is looked up converted to all lower-case
83 characters. This makes encodings looked up through this mechanism
84 effectively case-insensitive.
85
Antoine Pitrouc83ea132010-05-09 14:46:46 +000086 If no codec is found, a LookupError is set and NULL returned.
Guido van Rossumb95de4f2000-03-31 17:25:23 +000087
88 As side effect, this tries to load the encodings package, if not
89 yet done. This is part of the lazy load strategy for the encodings
90 package.
91
92*/
Guido van Rossumfeee4b92000-03-10 22:57:27 +000093
94PyObject *_PyCodec_Lookup(const char *encoding)
95{
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000096 PyInterpreterState *interp;
Guido van Rossum5ba3c842000-03-24 20:52:23 +000097 PyObject *result, *args = NULL, *v;
Martin v. Löwis66851282006-04-22 11:40:03 +000098 Py_ssize_t i, len;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000099
Fred Drake766de832000-05-09 19:55:59 +0000100 if (encoding == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000101 PyErr_BadArgument();
102 goto onError;
Fred Drake766de832000-05-09 19:55:59 +0000103 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000104
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000105 interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000106 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000107 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000108
Guido van Rossum9e896b32000-04-05 20:11:21 +0000109 /* Convert the encoding to a normalized Python string: all
Thomas Wouters7e474022000-07-16 12:04:32 +0000110 characters are converted to lower case, spaces and hyphens are
Guido van Rossum9e896b32000-04-05 20:11:21 +0000111 replaced with underscores. */
112 v = normalizestring(encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000113 if (v == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000114 goto onError;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000115 PyString_InternInPlace(&v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000116
117 /* First, try to lookup the name in the registry dictionary */
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000118 result = PyDict_GetItem(interp->codec_search_cache, v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000119 if (result != NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 Py_INCREF(result);
121 Py_DECREF(v);
122 return result;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000123 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000124
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000125 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000126 args = PyTuple_New(1);
127 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000128 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000129 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000130
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000131 len = PyList_Size(interp->codec_search_path);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000132 if (len < 0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000134 if (len == 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 PyErr_SetString(PyExc_LookupError,
136 "no codec search functions registered: "
137 "can't find encoding");
138 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000139 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000140
141 for (i = 0; i < len; i++) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 PyObject *func;
143
144 func = PyList_GetItem(interp->codec_search_path, i);
145 if (func == NULL)
146 goto onError;
147 result = PyEval_CallObject(func, args);
148 if (result == NULL)
149 goto onError;
150 if (result == Py_None) {
151 Py_DECREF(result);
152 continue;
153 }
154 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
155 PyErr_SetString(PyExc_TypeError,
156 "codec search functions must return 4-tuples");
157 Py_DECREF(result);
158 goto onError;
159 }
160 break;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000161 }
162 if (i == len) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000163 /* XXX Perhaps we should cache misses too ? */
164 PyErr_Format(PyExc_LookupError,
Martin v. Löwiseb42b022002-09-26 16:01:24 +0000165 "unknown encoding: %s", encoding);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000166 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000167 }
168
169 /* Cache and return the result */
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000170 PyDict_SetItem(interp->codec_search_cache, v, result);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000171 Py_DECREF(args);
172 return result;
173
174 onError:
175 Py_XDECREF(args);
176 return NULL;
177}
178
179static
180PyObject *args_tuple(PyObject *object,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000181 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000182{
183 PyObject *args;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000184
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000185 args = PyTuple_New(1 + (errors != NULL));
186 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000187 return NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000188 Py_INCREF(object);
189 PyTuple_SET_ITEM(args,0,object);
190 if (errors) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 PyObject *v;
192
193 v = PyString_FromString(errors);
194 if (v == NULL) {
195 Py_DECREF(args);
196 return NULL;
197 }
198 PyTuple_SET_ITEM(args, 1, v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000199 }
200 return args;
201}
202
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000203/* Helper function to get a codec item */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000204
205static
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000206PyObject *codec_getitem(const char *encoding, int index)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000207{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000208 PyObject *codecs;
209 PyObject *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000210
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000211 codecs = _PyCodec_Lookup(encoding);
212 if (codecs == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000213 return NULL;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000214 v = PyTuple_GET_ITEM(codecs, index);
215 Py_DECREF(codecs);
216 Py_INCREF(v);
217 return v;
218}
219
220/* Helper function to create an incremental codec. */
221
222static
223PyObject *codec_getincrementalcodec(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 const char *errors,
225 const char *attrname)
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000226{
227 PyObject *codecs, *ret, *inccodec;
228
229 codecs = _PyCodec_Lookup(encoding);
230 if (codecs == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000231 return NULL;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000232 inccodec = PyObject_GetAttrString(codecs, attrname);
Walter Dörwaldba8e1802006-03-18 14:05:43 +0000233 Py_DECREF(codecs);
234 if (inccodec == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000235 return NULL;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000236 if (errors)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000237 ret = PyObject_CallFunction(inccodec, "s", errors);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000238 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000239 ret = PyObject_CallFunction(inccodec, NULL);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000240 Py_DECREF(inccodec);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000241 return ret;
242}
243
244/* Helper function to create a stream codec. */
245
246static
247PyObject *codec_getstreamcodec(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000248 PyObject *stream,
249 const char *errors,
250 const int index)
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000251{
Hye-Shik Change6a1cb92006-06-23 21:16:18 +0000252 PyObject *codecs, *streamcodec, *codeccls;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000253
254 codecs = _PyCodec_Lookup(encoding);
255 if (codecs == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000256 return NULL;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000257
Hye-Shik Change6a1cb92006-06-23 21:16:18 +0000258 codeccls = PyTuple_GET_ITEM(codecs, index);
259 if (errors != NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000260 streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
Hye-Shik Change6a1cb92006-06-23 21:16:18 +0000261 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000262 streamcodec = PyObject_CallFunction(codeccls, "O", stream);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000263 Py_DECREF(codecs);
264 return streamcodec;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000265}
266
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000267/* Convenience APIs to query the Codec registry.
268
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000269 All APIs return a codec object with incremented refcount.
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000270
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000271 */
272
273PyObject *PyCodec_Encoder(const char *encoding)
274{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000275 return codec_getitem(encoding, 0);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000276}
277
278PyObject *PyCodec_Decoder(const char *encoding)
279{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000280 return codec_getitem(encoding, 1);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000281}
282
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000283PyObject *PyCodec_IncrementalEncoder(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000284 const char *errors)
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000285{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000286 return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000287}
288
289PyObject *PyCodec_IncrementalDecoder(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000290 const char *errors)
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000291{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000292 return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000293}
294
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000295PyObject *PyCodec_StreamReader(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000296 PyObject *stream,
297 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000298{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000299 return codec_getstreamcodec(encoding, stream, errors, 2);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000300}
301
302PyObject *PyCodec_StreamWriter(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000303 PyObject *stream,
304 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000305{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000306 return codec_getstreamcodec(encoding, stream, errors, 3);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000307}
308
309/* Encode an object (e.g. an Unicode object) using the given encoding
310 and return the resulting encoded object (usually a Python string).
311
312 errors is passed to the encoder factory as argument if non-NULL. */
313
314PyObject *PyCodec_Encode(PyObject *object,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000315 const char *encoding,
316 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000317{
318 PyObject *encoder = NULL;
Neal Norwitz3715c3e2005-11-24 22:09:18 +0000319 PyObject *args = NULL, *result = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000320 PyObject *v;
321
322 encoder = PyCodec_Encoder(encoding);
323 if (encoder == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000324 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000325
326 args = args_tuple(object, errors);
327 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000328 goto onError;
329
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000330 result = PyEval_CallObject(encoder,args);
331 if (result == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000332 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000333
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000334 if (!PyTuple_Check(result) ||
335 PyTuple_GET_SIZE(result) != 2) {
336 PyErr_SetString(PyExc_TypeError,
337 "encoder must return a tuple (object,integer)");
338 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000339 }
340 v = PyTuple_GET_ITEM(result,0);
341 Py_INCREF(v);
342 /* We don't check or use the second (integer) entry. */
343
344 Py_DECREF(args);
345 Py_DECREF(encoder);
346 Py_DECREF(result);
347 return v;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000348
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000349 onError:
Neal Norwitz3715c3e2005-11-24 22:09:18 +0000350 Py_XDECREF(result);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000351 Py_XDECREF(args);
352 Py_XDECREF(encoder);
353 return NULL;
354}
355
356/* Decode an object (usually a Python string) using the given encoding
357 and return an equivalent object (e.g. an Unicode object).
358
359 errors is passed to the decoder factory as argument if non-NULL. */
360
361PyObject *PyCodec_Decode(PyObject *object,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000362 const char *encoding,
363 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000364{
365 PyObject *decoder = NULL;
366 PyObject *args = NULL, *result = NULL;
367 PyObject *v;
368
369 decoder = PyCodec_Decoder(encoding);
370 if (decoder == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000371 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000372
373 args = args_tuple(object, errors);
374 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000375 goto onError;
376
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000377 result = PyEval_CallObject(decoder,args);
378 if (result == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000379 goto onError;
380 if (!PyTuple_Check(result) ||
381 PyTuple_GET_SIZE(result) != 2) {
382 PyErr_SetString(PyExc_TypeError,
383 "decoder must return a tuple (object,integer)");
384 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000385 }
386 v = PyTuple_GET_ITEM(result,0);
387 Py_INCREF(v);
388 /* We don't check or use the second (integer) entry. */
389
390 Py_DECREF(args);
391 Py_DECREF(decoder);
392 Py_DECREF(result);
393 return v;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000395 onError:
396 Py_XDECREF(args);
397 Py_XDECREF(decoder);
398 Py_XDECREF(result);
399 return NULL;
400}
401
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000402/* Register the error handling callback function error under the name
403 name. This function will be called by the codec when it encounters
404 an unencodable characters/undecodable bytes and doesn't know the
405 callback name, when name is specified as the error parameter
406 in the call to the encode/decode function.
407 Return 0 on success, -1 on error */
408int PyCodec_RegisterError(const char *name, PyObject *error)
409{
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000410 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000411 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000412 return -1;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000413 if (!PyCallable_Check(error)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000414 PyErr_SetString(PyExc_TypeError, "handler must be callable");
415 return -1;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000416 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000417 return PyDict_SetItemString(interp->codec_error_registry,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000418 (char *)name, error);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000419}
420
421/* Lookup the error handling callback function registered under the
422 name error. As a special case NULL can be passed, in which case
423 the error handling callback for strict encoding will be returned. */
424PyObject *PyCodec_LookupError(const char *name)
425{
426 PyObject *handler = NULL;
427
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000428 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000429 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000430 return NULL;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000431
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000432 if (name==NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000433 name = "strict";
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000434 handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000435 if (!handler)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000436 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000437 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000438 Py_INCREF(handler);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000439 return handler;
440}
441
442static void wrong_exception_type(PyObject *exc)
443{
444 PyObject *type = PyObject_GetAttrString(exc, "__class__");
445 if (type != NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000446 PyObject *name = PyObject_GetAttrString(type, "__name__");
447 Py_DECREF(type);
448 if (name != NULL) {
449 PyObject *string = PyObject_Str(name);
450 Py_DECREF(name);
451 if (string != NULL) {
452 PyErr_Format(PyExc_TypeError,
453 "don't know how to handle %.400s in error callback",
454 PyString_AS_STRING(string));
455 Py_DECREF(string);
456 }
457 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000458 }
459}
460
461PyObject *PyCodec_StrictErrors(PyObject *exc)
462{
Brett Cannonbf364092006-03-01 04:25:17 +0000463 if (PyExceptionInstance_Check(exc))
464 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000465 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000466 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000467 return NULL;
468}
469
470
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000471#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000472PyObject *PyCodec_IgnoreErrors(PyObject *exc)
473{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000474 Py_ssize_t end;
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300475
476 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000477 if (PyUnicodeEncodeError_GetEnd(exc, &end))
478 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000479 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300480 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000481 if (PyUnicodeDecodeError_GetEnd(exc, &end))
482 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000483 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300484 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000485 if (PyUnicodeTranslateError_GetEnd(exc, &end))
486 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000487 }
488 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000489 wrong_exception_type(exc);
490 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000491 }
492 /* ouch: passing NULL, 0, pos gives None instead of u'' */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000493 return Py_BuildValue("(u#n)", &end, 0, end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000494}
495
496
497PyObject *PyCodec_ReplaceErrors(PyObject *exc)
498{
499 PyObject *restuple;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000500 Py_ssize_t start;
501 Py_ssize_t end;
502 Py_ssize_t i;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000503
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300504 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000505 PyObject *res;
506 Py_UNICODE *p;
507 if (PyUnicodeEncodeError_GetStart(exc, &start))
508 return NULL;
509 if (PyUnicodeEncodeError_GetEnd(exc, &end))
510 return NULL;
511 res = PyUnicode_FromUnicode(NULL, end-start);
512 if (res == NULL)
513 return NULL;
514 for (p = PyUnicode_AS_UNICODE(res), i = start;
515 i<end; ++p, ++i)
516 *p = '?';
517 restuple = Py_BuildValue("(On)", res, end);
518 Py_DECREF(res);
519 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000520 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300521 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000522 Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
523 if (PyUnicodeDecodeError_GetEnd(exc, &end))
524 return NULL;
Serhiy Storchakaa9885e92013-08-20 20:08:53 +0300525 return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000526 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300527 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000528 PyObject *res;
529 Py_UNICODE *p;
530 if (PyUnicodeTranslateError_GetStart(exc, &start))
531 return NULL;
532 if (PyUnicodeTranslateError_GetEnd(exc, &end))
533 return NULL;
534 res = PyUnicode_FromUnicode(NULL, end-start);
535 if (res == NULL)
536 return NULL;
537 for (p = PyUnicode_AS_UNICODE(res), i = start;
538 i<end; ++p, ++i)
539 *p = Py_UNICODE_REPLACEMENT_CHARACTER;
540 restuple = Py_BuildValue("(On)", res, end);
541 Py_DECREF(res);
542 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000543 }
544 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000545 wrong_exception_type(exc);
546 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000547 }
548}
549
550PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
551{
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300552 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000553 PyObject *restuple;
554 PyObject *object;
555 Py_ssize_t start;
556 Py_ssize_t end;
557 PyObject *res;
558 Py_UNICODE *p;
559 Py_UNICODE *startp;
Serhiy Storchakae822b032013-08-06 16:56:26 +0300560 Py_UNICODE *e;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000561 Py_UNICODE *outp;
Serhiy Storchakad5249222014-10-04 14:14:41 +0300562 Py_ssize_t ressize;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000563 if (PyUnicodeEncodeError_GetStart(exc, &start))
564 return NULL;
565 if (PyUnicodeEncodeError_GetEnd(exc, &end))
566 return NULL;
567 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
568 return NULL;
569 startp = PyUnicode_AS_UNICODE(object);
Serhiy Storchakad5249222014-10-04 14:14:41 +0300570 if (end - start > PY_SSIZE_T_MAX / (2+7+1)) {
571 end = start + PY_SSIZE_T_MAX / (2+7+1);
572#ifndef Py_UNICODE_WIDE
Serhiy Storchakafb7c3802014-10-04 14:51:44 +0300573 if (0xD800 <= startp[end - 1] && startp[end - 1] <= 0xDBFF)
Serhiy Storchakad5249222014-10-04 14:14:41 +0300574 end--;
575#endif
576 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300577 e = startp + end;
578 for (p = startp+start, ressize = 0; p < e;) {
579 Py_UCS4 ch = *p++;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000580#ifndef Py_UNICODE_WIDE
Serhiy Storchakae822b032013-08-06 16:56:26 +0300581 if ((0xD800 <= ch && ch <= 0xDBFF) &&
582 (p < e) &&
583 (0xDC00 <= *p && *p <= 0xDFFF)) {
584 ch = ((((ch & 0x03FF) << 10) |
585 ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
586 }
587#endif
588 if (ch < 10)
589 ressize += 2+1+1;
590 else if (ch < 100)
591 ressize += 2+2+1;
592 else if (ch < 1000)
593 ressize += 2+3+1;
594 else if (ch < 10000)
595 ressize += 2+4+1;
596 else if (ch < 100000)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000597 ressize += 2+5+1;
Serhiy Storchakae822b032013-08-06 16:56:26 +0300598 else if (ch < 1000000)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000599 ressize += 2+6+1;
600 else
601 ressize += 2+7+1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000602 }
603 /* allocate replacement */
604 res = PyUnicode_FromUnicode(NULL, ressize);
605 if (res == NULL) {
606 Py_DECREF(object);
607 return NULL;
608 }
609 /* generate replacement */
Serhiy Storchakae822b032013-08-06 16:56:26 +0300610 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int digits;
612 int base;
Serhiy Storchakae822b032013-08-06 16:56:26 +0300613 Py_UCS4 ch = *p++;
614#ifndef Py_UNICODE_WIDE
615 if ((0xD800 <= ch && ch <= 0xDBFF) &&
616 (p < startp+end) &&
617 (0xDC00 <= *p && *p <= 0xDFFF)) {
618 ch = ((((ch & 0x03FF) << 10) |
619 ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
620 }
621#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000622 *outp++ = '&';
623 *outp++ = '#';
Serhiy Storchakae822b032013-08-06 16:56:26 +0300624 if (ch < 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 digits = 1;
626 base = 1;
627 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300628 else if (ch < 100) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000629 digits = 2;
630 base = 10;
631 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300632 else if (ch < 1000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000633 digits = 3;
634 base = 100;
635 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300636 else if (ch < 10000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000637 digits = 4;
638 base = 1000;
639 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300640 else if (ch < 100000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000641 digits = 5;
642 base = 10000;
643 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300644 else if (ch < 1000000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000645 digits = 6;
646 base = 100000;
647 }
648 else {
649 digits = 7;
650 base = 1000000;
651 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000652 while (digits-->0) {
Serhiy Storchakae822b032013-08-06 16:56:26 +0300653 *outp++ = '0' + ch/base;
654 ch %= base;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000655 base /= 10;
656 }
657 *outp++ = ';';
658 }
659 restuple = Py_BuildValue("(On)", res, end);
660 Py_DECREF(res);
661 Py_DECREF(object);
662 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000663 }
664 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000665 wrong_exception_type(exc);
666 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000667 }
668}
669
670static Py_UNICODE hexdigits[] = {
671 '0', '1', '2', '3', '4', '5', '6', '7',
672 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
673};
674
675PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
676{
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300677 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000678 PyObject *restuple;
679 PyObject *object;
680 Py_ssize_t start;
681 Py_ssize_t end;
682 PyObject *res;
683 Py_UNICODE *p;
684 Py_UNICODE *startp;
685 Py_UNICODE *outp;
Serhiy Storchakad5249222014-10-04 14:14:41 +0300686 Py_ssize_t ressize;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000687 if (PyUnicodeEncodeError_GetStart(exc, &start))
688 return NULL;
689 if (PyUnicodeEncodeError_GetEnd(exc, &end))
690 return NULL;
691 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
692 return NULL;
Serhiy Storchakad5249222014-10-04 14:14:41 +0300693 if (end - start > PY_SSIZE_T_MAX / (1+1+8))
694 end = start + PY_SSIZE_T_MAX / (1+1+8);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000695 startp = PyUnicode_AS_UNICODE(object);
696 for (p = startp+start, ressize = 0; p < startp+end; ++p) {
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000697#ifdef Py_UNICODE_WIDE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000698 if (*p >= 0x00010000)
699 ressize += 1+1+8;
700 else
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000701#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000702 if (*p >= 0x100) {
703 ressize += 1+1+4;
704 }
705 else
706 ressize += 1+1+2;
707 }
708 res = PyUnicode_FromUnicode(NULL, ressize);
Serhiy Storchaka7d96a092014-09-23 19:58:57 +0300709 if (res == NULL) {
710 Py_DECREF(object);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000711 return NULL;
Serhiy Storchaka7d96a092014-09-23 19:58:57 +0300712 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000713 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
714 p < startp+end; ++p) {
715 Py_UNICODE c = *p;
716 *outp++ = '\\';
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000717#ifdef Py_UNICODE_WIDE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000718 if (c >= 0x00010000) {
719 *outp++ = 'U';
720 *outp++ = hexdigits[(c>>28)&0xf];
721 *outp++ = hexdigits[(c>>24)&0xf];
722 *outp++ = hexdigits[(c>>20)&0xf];
723 *outp++ = hexdigits[(c>>16)&0xf];
724 *outp++ = hexdigits[(c>>12)&0xf];
725 *outp++ = hexdigits[(c>>8)&0xf];
726 }
727 else
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000728#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000729 if (c >= 0x100) {
730 *outp++ = 'u';
731 *outp++ = hexdigits[(c>>12)&0xf];
732 *outp++ = hexdigits[(c>>8)&0xf];
733 }
734 else
735 *outp++ = 'x';
736 *outp++ = hexdigits[(c>>4)&0xf];
737 *outp++ = hexdigits[c&0xf];
738 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000739
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 restuple = Py_BuildValue("(On)", res, end);
741 Py_DECREF(res);
742 Py_DECREF(object);
743 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000744 }
745 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000746 wrong_exception_type(exc);
747 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000748 }
749}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000750#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000751
752static PyObject *strict_errors(PyObject *self, PyObject *exc)
753{
754 return PyCodec_StrictErrors(exc);
755}
756
757
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000758#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000759static PyObject *ignore_errors(PyObject *self, PyObject *exc)
760{
761 return PyCodec_IgnoreErrors(exc);
762}
763
764
765static PyObject *replace_errors(PyObject *self, PyObject *exc)
766{
767 return PyCodec_ReplaceErrors(exc);
768}
769
770
771static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
772{
773 return PyCodec_XMLCharRefReplaceErrors(exc);
774}
775
776
777static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
778{
779 return PyCodec_BackslashReplaceErrors(exc);
780}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000781#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000782
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000783static int _PyCodecRegistry_Init(void)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000784{
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000785 static struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000786 char *name;
787 PyMethodDef def;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000788 } methods[] =
789 {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000790 {
791 "strict",
792 {
793 "strict_errors",
794 strict_errors,
795 METH_O,
796 PyDoc_STR("Implements the 'strict' error handling, which "
797 "raises a UnicodeError on coding errors.")
798 }
799 },
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000800#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000801 {
802 "ignore",
803 {
804 "ignore_errors",
805 ignore_errors,
806 METH_O,
807 PyDoc_STR("Implements the 'ignore' error handling, which "
808 "ignores malformed data and continues.")
809 }
810 },
811 {
812 "replace",
813 {
814 "replace_errors",
815 replace_errors,
816 METH_O,
817 PyDoc_STR("Implements the 'replace' error handling, which "
818 "replaces malformed data with a replacement marker.")
819 }
820 },
821 {
822 "xmlcharrefreplace",
823 {
824 "xmlcharrefreplace_errors",
825 xmlcharrefreplace_errors,
826 METH_O,
827 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
828 "which replaces an unencodable character with the "
829 "appropriate XML character reference.")
830 }
831 },
832 {
833 "backslashreplace",
834 {
835 "backslashreplace_errors",
836 backslashreplace_errors,
837 METH_O,
838 PyDoc_STR("Implements the 'backslashreplace' error handling, "
839 "which replaces an unencodable character with a "
840 "backslashed escape sequence.")
841 }
842 }
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000843#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000844 };
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000845
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000846 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000847 PyObject *mod;
Neal Norwitz739a8f82004-07-08 01:55:58 +0000848 unsigned i;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000849
850 if (interp->codec_search_path != NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000851 return 0;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000852
853 interp->codec_search_path = PyList_New(0);
854 interp->codec_search_cache = PyDict_New();
855 interp->codec_error_registry = PyDict_New();
856
857 if (interp->codec_error_registry) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000858 for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
859 PyObject *func = PyCFunction_New(&methods[i].def, NULL);
860 int res;
861 if (!func)
862 Py_FatalError("can't initialize codec error registry");
863 res = PyCodec_RegisterError(methods[i].name, func);
864 Py_DECREF(func);
865 if (res)
866 Py_FatalError("can't initialize codec error registry");
867 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000868 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000869
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000870 if (interp->codec_search_path == NULL ||
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000871 interp->codec_search_cache == NULL ||
872 interp->codec_error_registry == NULL)
873 Py_FatalError("can't initialize codec registry");
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000874
Thomas Woutersf7f438b2006-02-28 16:09:29 +0000875 mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000876 if (mod == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000877 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
878 /* Ignore ImportErrors... this is done so that
879 distributions can disable the encodings package. Note
880 that other errors are not masked, e.g. SystemErrors
881 raised to inform the user of an error in the Python
882 configuration are still reported back to the user. */
883 PyErr_Clear();
884 return 0;
885 }
886 return -1;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000887 }
888 Py_DECREF(mod);
889 return 0;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000890}