blob: e06d6e0922a5c5574502e29b709e3463502f8636 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
Guido van Rossum16b1ad92000-08-03 16:24:25 +00007Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumfeee4b92000-03-10 22:57:27 +00008
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
Victor Stinnerf5cff562011-10-14 02:13:11 +020014const char *Py_hexdigits = "0123456789abcdef";
15
Guido van Rossumfeee4b92000-03-10 22:57:27 +000016/* --- Codec Registry ----------------------------------------------------- */
17
18/* Import the standard encodings package which will register the first
Guido van Rossum98297ee2007-11-06 21:34:58 +000019 codec search function.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000020
21 This is done in a lazy way so that the Unicode implementation does
22 not downgrade startup time of scripts not needing it.
23
Guido van Rossumb95de4f2000-03-31 17:25:23 +000024 ImportErrors are silently ignored by this function. Only one try is
25 made.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000026
27*/
28
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000029static int _PyCodecRegistry_Init(void); /* Forward */
Guido van Rossumfeee4b92000-03-10 22:57:27 +000030
Guido van Rossumfeee4b92000-03-10 22:57:27 +000031int PyCodec_Register(PyObject *search_function)
32{
Nicholas Bastine5662ae2004-03-24 22:22:12 +000033 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000034 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000035 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000036 if (search_function == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyErr_BadArgument();
38 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000039 }
40 if (!PyCallable_Check(search_function)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000041 PyErr_SetString(PyExc_TypeError, "argument must be callable");
42 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000043 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000044 return PyList_Append(interp->codec_search_path, search_function);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000045
46 onError:
47 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000048}
49
Guido van Rossum9e896b32000-04-05 20:11:21 +000050/* Convert a string to a normalized Python string: all characters are
51 converted to lower case, spaces are replaced with underscores. */
52
Guido van Rossumfeee4b92000-03-10 22:57:27 +000053static
Guido van Rossum9e896b32000-04-05 20:11:21 +000054PyObject *normalizestring(const char *string)
Guido van Rossumfeee4b92000-03-10 22:57:27 +000055{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020056 size_t i;
Guido van Rossum582acec2000-06-28 22:07:35 +000057 size_t len = strlen(string);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000058 char *p;
59 PyObject *v;
Guido van Rossum21431e82007-10-19 21:48:41 +000060
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000061 if (len > PY_SSIZE_T_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 PyErr_SetString(PyExc_OverflowError, "string is too large");
63 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000064 }
Guido van Rossum21431e82007-10-19 21:48:41 +000065
66 p = PyMem_Malloc(len + 1);
67 if (p == NULL)
Victor Stinnercc351592013-07-12 00:02:55 +020068 return PyErr_NoMemory();
Guido van Rossum9e896b32000-04-05 20:11:21 +000069 for (i = 0; i < len; i++) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020070 char ch = string[i];
Guido van Rossum9e896b32000-04-05 20:11:21 +000071 if (ch == ' ')
72 ch = '-';
73 else
Antoine Pitroucf9d3c02011-07-24 02:27:04 +020074 ch = Py_TOLOWER(Py_CHARMASK(ch));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 p[i] = ch;
Guido van Rossum9e896b32000-04-05 20:11:21 +000076 }
Guido van Rossum21431e82007-10-19 21:48:41 +000077 p[i] = '\0';
78 v = PyUnicode_FromString(p);
79 if (v == NULL)
80 return NULL;
81 PyMem_Free(p);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000082 return v;
83}
84
85/* Lookup the given encoding and return a tuple providing the codec
86 facilities.
87
88 The encoding string is looked up converted to all lower-case
89 characters. This makes encodings looked up through this mechanism
90 effectively case-insensitive.
91
Guido van Rossum98297ee2007-11-06 21:34:58 +000092 If no codec is found, a LookupError is set and NULL returned.
Guido van Rossumb95de4f2000-03-31 17:25:23 +000093
94 As side effect, this tries to load the encodings package, if not
95 yet done. This is part of the lazy load strategy for the encodings
96 package.
97
98*/
Guido van Rossumfeee4b92000-03-10 22:57:27 +000099
100PyObject *_PyCodec_Lookup(const char *encoding)
101{
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000102 PyInterpreterState *interp;
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000103 PyObject *result, *args = NULL, *v;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000104 Py_ssize_t i, len;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000105
Fred Drake766de832000-05-09 19:55:59 +0000106 if (encoding == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 PyErr_BadArgument();
108 goto onError;
Fred Drake766de832000-05-09 19:55:59 +0000109 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000110
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000111 interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000112 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000114
Guido van Rossum9e896b32000-04-05 20:11:21 +0000115 /* Convert the encoding to a normalized Python string: all
Thomas Wouters7e474022000-07-16 12:04:32 +0000116 characters are converted to lower case, spaces and hyphens are
Guido van Rossum9e896b32000-04-05 20:11:21 +0000117 replaced with underscores. */
118 v = normalizestring(encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000119 if (v == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 goto onError;
Guido van Rossum21431e82007-10-19 21:48:41 +0000121 PyUnicode_InternInPlace(&v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000122
123 /* First, try to lookup the name in the registry dictionary */
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000124 result = PyDict_GetItem(interp->codec_search_cache, v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000125 if (result != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 Py_INCREF(result);
127 Py_DECREF(v);
128 return result;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000129 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000130
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000131 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000132 args = PyTuple_New(1);
133 if (args == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000135 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000136
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000137 len = PyList_Size(interp->codec_search_path);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000138 if (len < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000140 if (len == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 PyErr_SetString(PyExc_LookupError,
142 "no codec search functions registered: "
143 "can't find encoding");
144 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000145 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000146
147 for (i = 0; i < len; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 PyObject *func;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 func = PyList_GetItem(interp->codec_search_path, i);
151 if (func == NULL)
152 goto onError;
153 result = PyEval_CallObject(func, args);
154 if (result == NULL)
155 goto onError;
156 if (result == Py_None) {
157 Py_DECREF(result);
158 continue;
159 }
160 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
161 PyErr_SetString(PyExc_TypeError,
162 "codec search functions must return 4-tuples");
163 Py_DECREF(result);
164 goto onError;
165 }
166 break;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000167 }
168 if (i == len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 /* XXX Perhaps we should cache misses too ? */
170 PyErr_Format(PyExc_LookupError,
Martin v. Löwiseb42b022002-09-26 16:01:24 +0000171 "unknown encoding: %s", encoding);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000173 }
174
175 /* Cache and return the result */
Neal Norwitz9edcc2e2007-08-11 04:58:26 +0000176 if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 Py_DECREF(result);
178 goto onError;
Neal Norwitz9edcc2e2007-08-11 04:58:26 +0000179 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000180 Py_DECREF(args);
181 return result;
182
183 onError:
184 Py_XDECREF(args);
185 return NULL;
186}
187
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000188/* Codec registry encoding check API. */
189
190int PyCodec_KnownEncoding(const char *encoding)
191{
192 PyObject *codecs;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000194 codecs = _PyCodec_Lookup(encoding);
195 if (!codecs) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 PyErr_Clear();
197 return 0;
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000198 }
199 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 Py_DECREF(codecs);
201 return 1;
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000202 }
203}
204
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000205static
206PyObject *args_tuple(PyObject *object,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000208{
209 PyObject *args;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000210
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000211 args = PyTuple_New(1 + (errors != NULL));
212 if (args == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 return NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000214 Py_INCREF(object);
215 PyTuple_SET_ITEM(args,0,object);
216 if (errors) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 v = PyUnicode_FromString(errors);
220 if (v == NULL) {
221 Py_DECREF(args);
222 return NULL;
223 }
224 PyTuple_SET_ITEM(args, 1, v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000225 }
226 return args;
227}
228
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000229/* Helper function to get a codec item */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000230
231static
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000232PyObject *codec_getitem(const char *encoding, int index)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000233{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000234 PyObject *codecs;
235 PyObject *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000236
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000237 codecs = _PyCodec_Lookup(encoding);
238 if (codecs == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000240 v = PyTuple_GET_ITEM(codecs, index);
241 Py_DECREF(codecs);
242 Py_INCREF(v);
243 return v;
244}
245
Nick Coghlana9b15242014-02-04 22:11:18 +1000246/* Helper functions to create an incremental codec. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000247static
Nick Coghlana9b15242014-02-04 22:11:18 +1000248PyObject *codec_makeincrementalcodec(PyObject *codec_info,
249 const char *errors,
250 const char *attrname)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000251{
Nick Coghlana9b15242014-02-04 22:11:18 +1000252 PyObject *ret, *inccodec;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000253
Nick Coghlana9b15242014-02-04 22:11:18 +1000254 inccodec = PyObject_GetAttrString(codec_info, attrname);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000255 if (inccodec == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000257 if (errors)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 ret = PyObject_CallFunction(inccodec, "s", errors);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000259 else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 ret = PyObject_CallFunction(inccodec, NULL);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000261 Py_DECREF(inccodec);
262 return ret;
263}
264
Nick Coghlana9b15242014-02-04 22:11:18 +1000265static
266PyObject *codec_getincrementalcodec(const char *encoding,
267 const char *errors,
268 const char *attrname)
269{
270 PyObject *codec_info, *ret;
271
272 codec_info = _PyCodec_Lookup(encoding);
273 if (codec_info == NULL)
274 return NULL;
275 ret = codec_makeincrementalcodec(codec_info, errors, attrname);
276 Py_DECREF(codec_info);
277 return ret;
278}
279
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000280/* Helper function to create a stream codec. */
281
282static
283PyObject *codec_getstreamcodec(const char *encoding,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 PyObject *stream,
285 const char *errors,
286 const int index)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000287{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288 PyObject *codecs, *streamcodec, *codeccls;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000289
290 codecs = _PyCodec_Lookup(encoding);
291 if (codecs == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 codeccls = PyTuple_GET_ITEM(codecs, index);
295 if (errors != NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 streamcodec = PyObject_CallFunction(codeccls, "O", stream);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000299 Py_DECREF(codecs);
300 return streamcodec;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000301}
302
Nick Coghlana9b15242014-02-04 22:11:18 +1000303/* Helpers to work with the result of _PyCodec_Lookup
304
305 */
306PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
307 const char *errors)
308{
309 return codec_makeincrementalcodec(codec_info, errors,
310 "incrementaldecoder");
311}
312
313PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
314 const char *errors)
315{
316 return codec_makeincrementalcodec(codec_info, errors,
317 "incrementalencoder");
318}
319
320
Guido van Rossum98297ee2007-11-06 21:34:58 +0000321/* Convenience APIs to query the Codec registry.
322
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000323 All APIs return a codec object with incremented refcount.
Guido van Rossum98297ee2007-11-06 21:34:58 +0000324
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000325 */
326
327PyObject *PyCodec_Encoder(const char *encoding)
328{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000329 return codec_getitem(encoding, 0);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000330}
331
332PyObject *PyCodec_Decoder(const char *encoding)
333{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000334 return codec_getitem(encoding, 1);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000335}
336
Thomas Woutersa9773292006-04-21 09:43:23 +0000337PyObject *PyCodec_IncrementalEncoder(const char *encoding,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 const char *errors)
Thomas Woutersa9773292006-04-21 09:43:23 +0000339{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000340 return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
Thomas Woutersa9773292006-04-21 09:43:23 +0000341}
342
343PyObject *PyCodec_IncrementalDecoder(const char *encoding,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 const char *errors)
Thomas Woutersa9773292006-04-21 09:43:23 +0000345{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000346 return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
Thomas Woutersa9773292006-04-21 09:43:23 +0000347}
348
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000349PyObject *PyCodec_StreamReader(const char *encoding,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 PyObject *stream,
351 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000352{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000353 return codec_getstreamcodec(encoding, stream, errors, 2);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000354}
355
356PyObject *PyCodec_StreamWriter(const char *encoding,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 PyObject *stream,
358 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000359{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000360 return codec_getstreamcodec(encoding, stream, errors, 3);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000361}
362
Nick Coghlan8b097b42013-11-13 23:49:21 +1000363/* Helper that tries to ensure the reported exception chain indicates the
364 * codec that was invoked to trigger the failure without changing the type
365 * of the exception raised.
366 */
367static void
368wrap_codec_error(const char *operation,
369 const char *encoding)
370{
371 /* TrySetFromCause will replace the active exception with a suitably
372 * updated clone if it can, otherwise it will leave the original
373 * exception alone.
374 */
375 _PyErr_TrySetFromCause("%s with '%s' codec failed",
376 operation, encoding);
377}
378
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000379/* Encode an object (e.g. an Unicode object) using the given encoding
380 and return the resulting encoded object (usually a Python string).
381
382 errors is passed to the encoder factory as argument if non-NULL. */
383
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000384static PyObject *
385_PyCodec_EncodeInternal(PyObject *object,
386 PyObject *encoder,
387 const char *encoding,
388 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000389{
Neal Norwitz3715c3e2005-11-24 22:09:18 +0000390 PyObject *args = NULL, *result = NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000391 PyObject *v = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000392
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000393 args = args_tuple(object, errors);
394 if (args == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000396
397 result = PyEval_CallObject(encoder, args);
Nick Coghlanc4c25802013-11-15 21:47:37 +1000398 if (result == NULL) {
399 wrap_codec_error("encoding", encoding);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 goto onError;
Nick Coghlanc4c25802013-11-15 21:47:37 +1000401 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000402
Guido van Rossum98297ee2007-11-06 21:34:58 +0000403 if (!PyTuple_Check(result) ||
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 PyTuple_GET_SIZE(result) != 2) {
405 PyErr_SetString(PyExc_TypeError,
406 "encoder must return a tuple (object, integer)");
407 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000408 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000409 v = PyTuple_GET_ITEM(result,0);
410 Py_INCREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000411 /* We don't check or use the second (integer) entry. */
412
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000413 Py_DECREF(args);
414 Py_DECREF(encoder);
415 Py_DECREF(result);
416 return v;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000418 onError:
Neal Norwitz3715c3e2005-11-24 22:09:18 +0000419 Py_XDECREF(result);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000420 Py_XDECREF(args);
421 Py_XDECREF(encoder);
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000422 return NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000423}
424
425/* Decode an object (usually a Python string) using the given encoding
426 and return an equivalent object (e.g. an Unicode object).
427
428 errors is passed to the decoder factory as argument if non-NULL. */
429
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000430static PyObject *
431_PyCodec_DecodeInternal(PyObject *object,
432 PyObject *decoder,
433 const char *encoding,
434 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000435{
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000436 PyObject *args = NULL, *result = NULL;
437 PyObject *v;
438
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000439 args = args_tuple(object, errors);
440 if (args == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000442
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000443 result = PyEval_CallObject(decoder,args);
Nick Coghlanc4c25802013-11-15 21:47:37 +1000444 if (result == NULL) {
445 wrap_codec_error("decoding", encoding);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 goto onError;
Nick Coghlanc4c25802013-11-15 21:47:37 +1000447 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000448 if (!PyTuple_Check(result) ||
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 PyTuple_GET_SIZE(result) != 2) {
450 PyErr_SetString(PyExc_TypeError,
451 "decoder must return a tuple (object,integer)");
452 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000453 }
454 v = PyTuple_GET_ITEM(result,0);
455 Py_INCREF(v);
456 /* We don't check or use the second (integer) entry. */
457
458 Py_DECREF(args);
459 Py_DECREF(decoder);
460 Py_DECREF(result);
461 return v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000462
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000463 onError:
464 Py_XDECREF(args);
465 Py_XDECREF(decoder);
466 Py_XDECREF(result);
467 return NULL;
468}
469
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000470/* Generic encoding/decoding API */
471PyObject *PyCodec_Encode(PyObject *object,
472 const char *encoding,
473 const char *errors)
474{
475 PyObject *encoder;
476
477 encoder = PyCodec_Encoder(encoding);
478 if (encoder == NULL)
479 return NULL;
480
481 return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
482}
483
484PyObject *PyCodec_Decode(PyObject *object,
485 const char *encoding,
486 const char *errors)
487{
488 PyObject *decoder;
489
490 decoder = PyCodec_Decoder(encoding);
491 if (decoder == NULL)
492 return NULL;
493
494 return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
495}
496
497/* Text encoding/decoding API */
Nick Coghlana9b15242014-02-04 22:11:18 +1000498PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
499 const char *alternate_command)
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000500{
501 _Py_IDENTIFIER(_is_text_encoding);
502 PyObject *codec;
503 PyObject *attr;
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000504 int is_text_codec;
505
506 codec = _PyCodec_Lookup(encoding);
507 if (codec == NULL)
508 return NULL;
509
510 /* Backwards compatibility: assume any raw tuple describes a text
511 * encoding, and the same for anything lacking the private
512 * attribute.
513 */
514 if (!PyTuple_CheckExact(codec)) {
515 attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding);
516 if (attr == NULL) {
517 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
518 PyErr_Clear();
519 } else {
520 Py_DECREF(codec);
521 return NULL;
522 }
523 } else {
524 is_text_codec = PyObject_IsTrue(attr);
525 Py_DECREF(attr);
526 if (!is_text_codec) {
527 Py_DECREF(codec);
528 PyErr_Format(PyExc_LookupError,
529 "'%.400s' is not a text encoding; "
Nick Coghlana9b15242014-02-04 22:11:18 +1000530 "use %s to handle arbitrary codecs",
531 encoding, alternate_command);
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000532 return NULL;
533 }
534 }
535 }
536
Nick Coghlana9b15242014-02-04 22:11:18 +1000537 /* This appears to be a valid text encoding */
538 return codec;
539}
540
541
542static
543PyObject *codec_getitem_checked(const char *encoding,
544 const char *alternate_command,
545 int index)
546{
547 PyObject *codec;
548 PyObject *v;
549
550 codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
551 if (codec == NULL)
552 return NULL;
553
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000554 v = PyTuple_GET_ITEM(codec, index);
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000555 Py_INCREF(v);
Nick Coghlana9b15242014-02-04 22:11:18 +1000556 Py_DECREF(codec);
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000557 return v;
558}
559
560static PyObject * _PyCodec_TextEncoder(const char *encoding)
561{
Nick Coghlana9b15242014-02-04 22:11:18 +1000562 return codec_getitem_checked(encoding, "codecs.encode()", 0);
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000563}
564
565static PyObject * _PyCodec_TextDecoder(const char *encoding)
566{
Nick Coghlana9b15242014-02-04 22:11:18 +1000567 return codec_getitem_checked(encoding, "codecs.decode()", 1);
Nick Coghlanc72e4e62013-11-22 22:39:36 +1000568}
569
570PyObject *_PyCodec_EncodeText(PyObject *object,
571 const char *encoding,
572 const char *errors)
573{
574 PyObject *encoder;
575
576 encoder = _PyCodec_TextEncoder(encoding);
577 if (encoder == NULL)
578 return NULL;
579
580 return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
581}
582
583PyObject *_PyCodec_DecodeText(PyObject *object,
584 const char *encoding,
585 const char *errors)
586{
587 PyObject *decoder;
588
589 decoder = _PyCodec_TextDecoder(encoding);
590 if (decoder == NULL)
591 return NULL;
592
593 return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
594}
595
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000596/* Register the error handling callback function error under the name
597 name. This function will be called by the codec when it encounters
598 an unencodable characters/undecodable bytes and doesn't know the
599 callback name, when name is specified as the error parameter
600 in the call to the encode/decode function.
601 Return 0 on success, -1 on error */
602int PyCodec_RegisterError(const char *name, PyObject *error)
603{
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000604 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000605 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 return -1;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000607 if (!PyCallable_Check(error)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 PyErr_SetString(PyExc_TypeError, "handler must be callable");
609 return -1;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000610 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000611 return PyDict_SetItemString(interp->codec_error_registry,
Serhiy Storchakac6792272013-10-19 21:03:34 +0300612 name, error);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000613}
614
615/* Lookup the error handling callback function registered under the
616 name error. As a special case NULL can be passed, in which case
617 the error handling callback for strict encoding will be returned. */
618PyObject *PyCodec_LookupError(const char *name)
619{
620 PyObject *handler = NULL;
621
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000622 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000623 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 return NULL;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000625
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000626 if (name==NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 name = "strict";
Serhiy Storchakac6792272013-10-19 21:03:34 +0300628 handler = PyDict_GetItemString(interp->codec_error_registry, name);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000629 if (!handler)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000631 else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 Py_INCREF(handler);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000633 return handler;
634}
635
636static void wrong_exception_type(PyObject *exc)
637{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200638 _Py_IDENTIFIER(__class__);
639 _Py_IDENTIFIER(__name__);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200640 PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000641 if (type != NULL) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200642 PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
Walter Dörwald573c08c2007-05-25 15:46:59 +0000643 Py_DECREF(type);
644 if (name != NULL) {
645 PyErr_Format(PyExc_TypeError,
646 "don't know how to handle %S in error callback", name);
647 Py_DECREF(name);
648 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000649 }
650}
651
652PyObject *PyCodec_StrictErrors(PyObject *exc)
653{
Brett Cannonbf364092006-03-01 04:25:17 +0000654 if (PyExceptionInstance_Check(exc))
655 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000656 else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000658 return NULL;
659}
660
661
662PyObject *PyCodec_IgnoreErrors(PyObject *exc)
663{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000664 Py_ssize_t end;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000665 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 if (PyUnicodeEncodeError_GetEnd(exc, &end))
667 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000668 }
669 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 if (PyUnicodeDecodeError_GetEnd(exc, &end))
671 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000672 }
673 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 if (PyUnicodeTranslateError_GetEnd(exc, &end))
675 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000676 }
677 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 wrong_exception_type(exc);
679 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000680 }
Victor Stinneree450092011-12-01 02:52:11 +0100681 return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000682}
683
684
685PyObject *PyCodec_ReplaceErrors(PyObject *exc)
686{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 Py_ssize_t start, end, i, len;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000688
689 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200691 int kind;
692 void *data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 if (PyUnicodeEncodeError_GetStart(exc, &start))
694 return NULL;
695 if (PyUnicodeEncodeError_GetEnd(exc, &end))
696 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200697 len = end - start;
698 res = PyUnicode_New(len, '?');
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 if (res == NULL)
700 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200701 kind = PyUnicode_KIND(res);
702 data = PyUnicode_DATA(res);
703 for (i = 0; i < len; ++i)
704 PyUnicode_WRITE(kind, data, i, '?');
Victor Stinner8f825062012-04-27 13:55:39 +0200705 assert(_PyUnicode_CheckConsistency(res, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200706 return Py_BuildValue("(Nn)", res, end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000707 }
708 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 if (PyUnicodeDecodeError_GetEnd(exc, &end))
710 return NULL;
Victor Stinner1a15aba2011-10-02 19:00:15 +0200711 return Py_BuildValue("(Cn)",
712 (int)Py_UNICODE_REPLACEMENT_CHARACTER,
713 end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000714 }
715 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200717 int kind;
718 void *data;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 if (PyUnicodeTranslateError_GetStart(exc, &start))
720 return NULL;
721 if (PyUnicodeTranslateError_GetEnd(exc, &end))
722 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200723 len = end - start;
724 res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 if (res == NULL)
726 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200727 kind = PyUnicode_KIND(res);
728 data = PyUnicode_DATA(res);
729 for (i=0; i < len; i++)
730 PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
Victor Stinner8f825062012-04-27 13:55:39 +0200731 assert(_PyUnicode_CheckConsistency(res, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200732 return Py_BuildValue("(Nn)", res, end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000733 }
734 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 wrong_exception_type(exc);
736 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000737 }
738}
739
740PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
741{
742 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 PyObject *restuple;
744 PyObject *object;
Victor Stinnerb31f1bc2011-11-04 21:29:10 +0100745 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 Py_ssize_t start;
747 Py_ssize_t end;
748 PyObject *res;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100749 unsigned char *outp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 int ressize;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100751 Py_UCS4 ch;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 if (PyUnicodeEncodeError_GetStart(exc, &start))
753 return NULL;
754 if (PyUnicodeEncodeError_GetEnd(exc, &end))
755 return NULL;
756 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
757 return NULL;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100758 for (i = start, ressize = 0; i < end; ++i) {
759 /* object is guaranteed to be "ready" */
760 ch = PyUnicode_READ_CHAR(object, i);
761 if (ch<10)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 ressize += 2+1+1;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100763 else if (ch<100)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000764 ressize += 2+2+1;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100765 else if (ch<1000)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 ressize += 2+3+1;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100767 else if (ch<10000)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 ressize += 2+4+1;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100769 else if (ch<100000)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 ressize += 2+5+1;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100771 else if (ch<1000000)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 ressize += 2+6+1;
773 else
774 ressize += 2+7+1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 }
776 /* allocate replacement */
Martin v. Löwisb09af032011-11-04 11:16:41 +0100777 res = PyUnicode_New(ressize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 if (res == NULL) {
779 Py_DECREF(object);
780 return NULL;
781 }
Martin v. Löwisb09af032011-11-04 11:16:41 +0100782 outp = PyUnicode_1BYTE_DATA(res);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 /* generate replacement */
Victor Stinnerb31f1bc2011-11-04 21:29:10 +0100784 for (i = start; i < end; ++i) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 int digits;
786 int base;
Martin v. Löwis8ba79302011-11-04 12:26:49 +0100787 ch = PyUnicode_READ_CHAR(object, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 *outp++ = '&';
789 *outp++ = '#';
Martin v. Löwisb09af032011-11-04 11:16:41 +0100790 if (ch<10) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 digits = 1;
792 base = 1;
793 }
Martin v. Löwisb09af032011-11-04 11:16:41 +0100794 else if (ch<100) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 digits = 2;
796 base = 10;
797 }
Martin v. Löwisb09af032011-11-04 11:16:41 +0100798 else if (ch<1000) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 digits = 3;
800 base = 100;
801 }
Martin v. Löwisb09af032011-11-04 11:16:41 +0100802 else if (ch<10000) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 digits = 4;
804 base = 1000;
805 }
Martin v. Löwisb09af032011-11-04 11:16:41 +0100806 else if (ch<100000) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 digits = 5;
808 base = 10000;
809 }
Martin v. Löwisb09af032011-11-04 11:16:41 +0100810 else if (ch<1000000) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 digits = 6;
812 base = 100000;
813 }
814 else {
815 digits = 7;
816 base = 1000000;
817 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 while (digits-->0) {
Martin v. Löwisb09af032011-11-04 11:16:41 +0100819 *outp++ = '0' + ch/base;
820 ch %= base;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 base /= 10;
822 }
823 *outp++ = ';';
824 }
Victor Stinner8f825062012-04-27 13:55:39 +0200825 assert(_PyUnicode_CheckConsistency(res, 1));
826 restuple = Py_BuildValue("(Nn)", res, end);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 Py_DECREF(object);
828 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000829 }
830 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 wrong_exception_type(exc);
832 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000833 }
834}
835
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000836PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
837{
838 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000839 PyObject *restuple;
840 PyObject *object;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100841 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 Py_ssize_t start;
843 Py_ssize_t end;
844 PyObject *res;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100845 unsigned char *outp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 int ressize;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100847 Py_UCS4 c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000848 if (PyUnicodeEncodeError_GetStart(exc, &start))
849 return NULL;
850 if (PyUnicodeEncodeError_GetEnd(exc, &end))
851 return NULL;
852 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
853 return NULL;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100854 for (i = start, ressize = 0; i < end; ++i) {
855 /* object is guaranteed to be "ready" */
856 c = PyUnicode_READ_CHAR(object, i);
857 if (c >= 0x10000) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 ressize += 1+1+8;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100859 }
860 else if (c >= 0x100) {
861 ressize += 1+1+4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 }
863 else
864 ressize += 1+1+2;
865 }
Martin v. Löwisb09af032011-11-04 11:16:41 +0100866 res = PyUnicode_New(ressize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 if (res==NULL)
868 return NULL;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100869 for (i = start, outp = PyUnicode_1BYTE_DATA(res);
870 i < end; ++i) {
871 c = PyUnicode_READ_CHAR(object, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 *outp++ = '\\';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 if (c >= 0x00010000) {
874 *outp++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200875 *outp++ = Py_hexdigits[(c>>28)&0xf];
876 *outp++ = Py_hexdigits[(c>>24)&0xf];
877 *outp++ = Py_hexdigits[(c>>20)&0xf];
878 *outp++ = Py_hexdigits[(c>>16)&0xf];
879 *outp++ = Py_hexdigits[(c>>12)&0xf];
880 *outp++ = Py_hexdigits[(c>>8)&0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 }
Antoine Pitroue4a18922010-09-09 20:30:23 +0000882 else if (c >= 0x100) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 *outp++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200884 *outp++ = Py_hexdigits[(c>>12)&0xf];
885 *outp++ = Py_hexdigits[(c>>8)&0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 }
887 else
888 *outp++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200889 *outp++ = Py_hexdigits[(c>>4)&0xf];
890 *outp++ = Py_hexdigits[c&0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000892
Victor Stinner8f825062012-04-27 13:55:39 +0200893 assert(_PyUnicode_CheckConsistency(res, 1));
894 restuple = Py_BuildValue("(Nn)", res, end);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 Py_DECREF(object);
896 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000897 }
898 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 wrong_exception_type(exc);
900 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000901 }
902}
903
Serhiy Storchaka58cf6072013-11-19 11:32:41 +0200904#define ENC_UTF8 0
905#define ENC_UTF16BE 1
906#define ENC_UTF16LE 2
907#define ENC_UTF32BE 3
908#define ENC_UTF32LE 4
909
910static int
911get_standard_encoding(const char *encoding, int *bytelength)
912{
913 if (Py_TOLOWER(encoding[0]) == 'u' &&
914 Py_TOLOWER(encoding[1]) == 't' &&
915 Py_TOLOWER(encoding[2]) == 'f') {
916 encoding += 3;
917 if (*encoding == '-' || *encoding == '_' )
918 encoding++;
919 if (encoding[0] == '1' && encoding[1] == '6') {
920 encoding += 2;
921 *bytelength = 2;
922 if (*encoding == '\0') {
923#ifdef WORDS_BIGENDIAN
924 return ENC_UTF16BE;
925#else
926 return ENC_UTF16LE;
927#endif
928 }
929 if (*encoding == '-' || *encoding == '_' )
930 encoding++;
931 if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
932 if (Py_TOLOWER(encoding[0]) == 'b')
933 return ENC_UTF16BE;
934 if (Py_TOLOWER(encoding[0]) == 'l')
935 return ENC_UTF16LE;
936 }
937 }
938 else if (encoding[0] == '3' && encoding[1] == '2') {
939 encoding += 2;
940 *bytelength = 4;
941 if (*encoding == '\0') {
942#ifdef WORDS_BIGENDIAN
943 return ENC_UTF32BE;
944#else
945 return ENC_UTF32LE;
946#endif
947 }
948 if (*encoding == '-' || *encoding == '_' )
949 encoding++;
950 if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
951 if (Py_TOLOWER(encoding[0]) == 'b')
952 return ENC_UTF32BE;
953 if (Py_TOLOWER(encoding[0]) == 'l')
954 return ENC_UTF32LE;
955 }
956 }
957 }
958 /* utf-8 */
959 *bytelength = 3;
960 return ENC_UTF8;
961}
962
Martin v. Löwisaef3fb02009-05-02 19:27:30 +0000963/* This handler is declared static until someone demonstrates
964 a need to call it directly. */
965static PyObject *
Martin v. Löwise0a2b722009-05-10 08:08:56 +0000966PyCodec_SurrogatePassErrors(PyObject *exc)
Martin v. Löwisdb12d452009-05-02 18:52:14 +0000967{
968 PyObject *restuple;
969 PyObject *object;
Serhiy Storchaka58cf6072013-11-19 11:32:41 +0200970 PyObject *encode;
971 char *encoding;
972 int code;
973 int bytelength;
Martin v. Löwisb09af032011-11-04 11:16:41 +0100974 Py_ssize_t i;
Martin v. Löwisdb12d452009-05-02 18:52:14 +0000975 Py_ssize_t start;
976 Py_ssize_t end;
977 PyObject *res;
978 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
Serhiy Storchaka58cf6072013-11-19 11:32:41 +0200979 unsigned char *outp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 if (PyUnicodeEncodeError_GetStart(exc, &start))
981 return NULL;
982 if (PyUnicodeEncodeError_GetEnd(exc, &end))
983 return NULL;
984 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
985 return NULL;
Serhiy Storchaka58cf6072013-11-19 11:32:41 +0200986 if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) {
987 Py_DECREF(object);
988 return NULL;
989 }
990 if (!(encoding = PyUnicode_AsUTF8(encode))) {
991 Py_DECREF(object);
992 Py_DECREF(encode);
993 return NULL;
994 }
995 code = get_standard_encoding(encoding, &bytelength);
996 Py_DECREF(encode);
997
998 res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 if (!res) {
1000 Py_DECREF(object);
1001 return NULL;
1002 }
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001003 outp = (unsigned char*)PyBytes_AsString(res);
Martin v. Löwisb09af032011-11-04 11:16:41 +01001004 for (i = start; i < end; i++) {
1005 /* object is guaranteed to be "ready" */
1006 Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
Victor Stinner76df43d2012-10-30 01:42:39 +01001007 if (!Py_UNICODE_IS_SURROGATE(ch)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 /* Not a surrogate, fail with original exception */
1009 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1010 Py_DECREF(res);
1011 Py_DECREF(object);
1012 return NULL;
1013 }
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001014 switch (code) {
1015 case ENC_UTF8:
1016 *outp++ = (unsigned char)(0xe0 | (ch >> 12));
1017 *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f));
1018 *outp++ = (unsigned char)(0x80 | (ch & 0x3f));
1019 break;
1020 case ENC_UTF16LE:
1021 *outp++ = (unsigned char) ch;
1022 *outp++ = (unsigned char)(ch >> 8);
1023 break;
1024 case ENC_UTF16BE:
1025 *outp++ = (unsigned char)(ch >> 8);
1026 *outp++ = (unsigned char) ch;
1027 break;
1028 case ENC_UTF32LE:
1029 *outp++ = (unsigned char) ch;
1030 *outp++ = (unsigned char)(ch >> 8);
1031 *outp++ = (unsigned char)(ch >> 16);
1032 *outp++ = (unsigned char)(ch >> 24);
1033 break;
1034 case ENC_UTF32BE:
1035 *outp++ = (unsigned char)(ch >> 24);
1036 *outp++ = (unsigned char)(ch >> 16);
1037 *outp++ = (unsigned char)(ch >> 8);
1038 *outp++ = (unsigned char) ch;
1039 break;
1040 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 }
1042 restuple = Py_BuildValue("(On)", res, end);
1043 Py_DECREF(res);
1044 Py_DECREF(object);
1045 return restuple;
Martin v. Löwisdb12d452009-05-02 18:52:14 +00001046 }
1047 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 unsigned char *p;
Victor Stinnerc06bb7a2011-11-04 21:36:35 +01001049 Py_UCS4 ch = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 if (PyUnicodeDecodeError_GetStart(exc, &start))
1051 return NULL;
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001052 if (PyUnicodeDecodeError_GetEnd(exc, &end))
1053 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 if (!(object = PyUnicodeDecodeError_GetObject(exc)))
1055 return NULL;
1056 if (!(p = (unsigned char*)PyBytes_AsString(object))) {
1057 Py_DECREF(object);
1058 return NULL;
1059 }
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001060 if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) {
1061 Py_DECREF(object);
1062 return NULL;
1063 }
1064 if (!(encoding = PyUnicode_AsUTF8(encode))) {
1065 Py_DECREF(object);
1066 Py_DECREF(encode);
1067 return NULL;
1068 }
1069 code = get_standard_encoding(encoding, &bytelength);
1070 Py_DECREF(encode);
1071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 /* Try decoding a single surrogate character. If
1073 there are more, let the codec call us again. */
1074 p += start;
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001075 if (PyBytes_GET_SIZE(object) - start >= bytelength) {
1076 switch (code) {
1077 case ENC_UTF8:
1078 if ((p[0] & 0xf0) == 0xe0 &&
1079 (p[1] & 0xc0) == 0x80 &&
1080 (p[2] & 0xc0) == 0x80) {
1081 /* it's a three-byte code */
1082 ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
1083 }
1084 break;
1085 case ENC_UTF16LE:
1086 ch = p[1] << 8 | p[0];
1087 break;
1088 case ENC_UTF16BE:
1089 ch = p[0] << 8 | p[1];
1090 break;
1091 case ENC_UTF32LE:
1092 ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
1093 break;
1094 case ENC_UTF32BE:
1095 ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
1096 break;
1097 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 }
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 Py_DECREF(object);
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001101 if (!Py_UNICODE_IS_SURROGATE(ch)) {
1102 /* it's not a surrogate - fail */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1104 return NULL;
1105 }
Victor Stinnerc06bb7a2011-11-04 21:36:35 +01001106 res = PyUnicode_FromOrdinal(ch);
1107 if (res == NULL)
1108 return NULL;
Serhiy Storchaka58cf6072013-11-19 11:32:41 +02001109 return Py_BuildValue("(Nn)", res, start + bytelength);
Martin v. Löwisdb12d452009-05-02 18:52:14 +00001110 }
1111 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 wrong_exception_type(exc);
1113 return NULL;
Martin v. Löwisdb12d452009-05-02 18:52:14 +00001114 }
1115}
1116
Martin v. Löwis011e8422009-05-05 04:43:17 +00001117static PyObject *
Martin v. Löwis43c57782009-05-10 08:15:24 +00001118PyCodec_SurrogateEscapeErrors(PyObject *exc)
Martin v. Löwis011e8422009-05-05 04:43:17 +00001119{
1120 PyObject *restuple;
1121 PyObject *object;
Martin v. Löwisb09af032011-11-04 11:16:41 +01001122 Py_ssize_t i;
Martin v. Löwis011e8422009-05-05 04:43:17 +00001123 Py_ssize_t start;
1124 Py_ssize_t end;
1125 PyObject *res;
1126 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 char *outp;
1128 if (PyUnicodeEncodeError_GetStart(exc, &start))
1129 return NULL;
1130 if (PyUnicodeEncodeError_GetEnd(exc, &end))
1131 return NULL;
1132 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
1133 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 res = PyBytes_FromStringAndSize(NULL, end-start);
1135 if (!res) {
1136 Py_DECREF(object);
1137 return NULL;
1138 }
1139 outp = PyBytes_AsString(res);
Martin v. Löwisb09af032011-11-04 11:16:41 +01001140 for (i = start; i < end; i++) {
1141 /* object is guaranteed to be "ready" */
1142 Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 if (ch < 0xdc80 || ch > 0xdcff) {
1144 /* Not a UTF-8b surrogate, fail with original exception */
1145 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1146 Py_DECREF(res);
1147 Py_DECREF(object);
1148 return NULL;
1149 }
1150 *outp++ = ch - 0xdc00;
1151 }
1152 restuple = Py_BuildValue("(On)", res, end);
1153 Py_DECREF(res);
1154 Py_DECREF(object);
1155 return restuple;
Martin v. Löwis011e8422009-05-05 04:43:17 +00001156 }
1157 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
Victor Stinnerc06bb7a2011-11-04 21:36:35 +01001158 PyObject *str;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 unsigned char *p;
Victor Stinnerc06bb7a2011-11-04 21:36:35 +01001160 Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 int consumed = 0;
1162 if (PyUnicodeDecodeError_GetStart(exc, &start))
1163 return NULL;
1164 if (PyUnicodeDecodeError_GetEnd(exc, &end))
1165 return NULL;
1166 if (!(object = PyUnicodeDecodeError_GetObject(exc)))
1167 return NULL;
1168 if (!(p = (unsigned char*)PyBytes_AsString(object))) {
1169 Py_DECREF(object);
1170 return NULL;
1171 }
1172 while (consumed < 4 && consumed < end-start) {
1173 /* Refuse to escape ASCII bytes. */
1174 if (p[start+consumed] < 128)
1175 break;
1176 ch[consumed] = 0xdc00 + p[start+consumed];
1177 consumed++;
1178 }
1179 Py_DECREF(object);
1180 if (!consumed) {
1181 /* codec complained about ASCII byte. */
1182 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1183 return NULL;
1184 }
Victor Stinnerc06bb7a2011-11-04 21:36:35 +01001185 str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
1186 if (str == NULL)
1187 return NULL;
1188 return Py_BuildValue("(Nn)", str, start+consumed);
Martin v. Löwis011e8422009-05-05 04:43:17 +00001189 }
1190 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 wrong_exception_type(exc);
1192 return NULL;
Martin v. Löwis011e8422009-05-05 04:43:17 +00001193 }
1194}
1195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001197static PyObject *strict_errors(PyObject *self, PyObject *exc)
1198{
1199 return PyCodec_StrictErrors(exc);
1200}
1201
1202
1203static PyObject *ignore_errors(PyObject *self, PyObject *exc)
1204{
1205 return PyCodec_IgnoreErrors(exc);
1206}
1207
1208
1209static PyObject *replace_errors(PyObject *self, PyObject *exc)
1210{
1211 return PyCodec_ReplaceErrors(exc);
1212}
1213
1214
1215static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
1216{
1217 return PyCodec_XMLCharRefReplaceErrors(exc);
1218}
1219
1220
1221static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
1222{
1223 return PyCodec_BackslashReplaceErrors(exc);
1224}
1225
Martin v. Löwise0a2b722009-05-10 08:08:56 +00001226static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
Martin v. Löwisdb12d452009-05-02 18:52:14 +00001227{
Martin v. Löwise0a2b722009-05-10 08:08:56 +00001228 return PyCodec_SurrogatePassErrors(exc);
Martin v. Löwisdb12d452009-05-02 18:52:14 +00001229}
1230
Martin v. Löwis43c57782009-05-10 08:15:24 +00001231static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
Martin v. Löwis011e8422009-05-05 04:43:17 +00001232{
Martin v. Löwis43c57782009-05-10 08:15:24 +00001233 return PyCodec_SurrogateEscapeErrors(exc);
Martin v. Löwis011e8422009-05-05 04:43:17 +00001234}
1235
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001236static int _PyCodecRegistry_Init(void)
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001237{
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001238 static struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 char *name;
1240 PyMethodDef def;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001241 } methods[] =
1242 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 {
1244 "strict",
1245 {
1246 "strict_errors",
1247 strict_errors,
1248 METH_O,
1249 PyDoc_STR("Implements the 'strict' error handling, which "
1250 "raises a UnicodeError on coding errors.")
1251 }
1252 },
1253 {
1254 "ignore",
1255 {
1256 "ignore_errors",
1257 ignore_errors,
1258 METH_O,
1259 PyDoc_STR("Implements the 'ignore' error handling, which "
1260 "ignores malformed data and continues.")
1261 }
1262 },
1263 {
1264 "replace",
1265 {
1266 "replace_errors",
1267 replace_errors,
1268 METH_O,
1269 PyDoc_STR("Implements the 'replace' error handling, which "
1270 "replaces malformed data with a replacement marker.")
1271 }
1272 },
1273 {
1274 "xmlcharrefreplace",
1275 {
1276 "xmlcharrefreplace_errors",
1277 xmlcharrefreplace_errors,
1278 METH_O,
1279 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
1280 "which replaces an unencodable character with the "
1281 "appropriate XML character reference.")
1282 }
1283 },
1284 {
1285 "backslashreplace",
1286 {
1287 "backslashreplace_errors",
1288 backslashreplace_errors,
1289 METH_O,
1290 PyDoc_STR("Implements the 'backslashreplace' error handling, "
1291 "which replaces an unencodable character with a "
1292 "backslashed escape sequence.")
1293 }
1294 },
1295 {
1296 "surrogatepass",
1297 {
1298 "surrogatepass",
1299 surrogatepass_errors,
1300 METH_O
1301 }
1302 },
1303 {
1304 "surrogateescape",
1305 {
1306 "surrogateescape",
1307 surrogateescape_errors,
1308 METH_O
1309 }
1310 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001311 };
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001312
Nicholas Bastine5662ae2004-03-24 22:22:12 +00001313 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001314 PyObject *mod;
Neal Norwitz739a8f82004-07-08 01:55:58 +00001315 unsigned i;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001316
1317 if (interp->codec_search_path != NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 return 0;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001319
1320 interp->codec_search_path = PyList_New(0);
1321 interp->codec_search_cache = PyDict_New();
1322 interp->codec_error_registry = PyDict_New();
1323
1324 if (interp->codec_error_registry) {
Victor Stinner63941882011-09-29 00:42:28 +02001325 for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
Andrew Svetlov3ba3a3e2012-12-25 13:32:35 +02001326 PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 int res;
1328 if (!func)
1329 Py_FatalError("can't initialize codec error registry");
1330 res = PyCodec_RegisterError(methods[i].name, func);
1331 Py_DECREF(func);
1332 if (res)
1333 Py_FatalError("can't initialize codec error registry");
1334 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001335 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001336
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001337 if (interp->codec_search_path == NULL ||
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 interp->codec_search_cache == NULL ||
1339 interp->codec_error_registry == NULL)
1340 Py_FatalError("can't initialize codec registry");
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001341
Christian Heimes819b8bf2008-01-03 23:05:47 +00001342 mod = PyImport_ImportModuleNoBlock("encodings");
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001343 if (mod == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 return -1;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001345 }
1346 Py_DECREF(mod);
Christian Heimes6a27efa2008-10-30 21:48:26 +00001347 interp->codecs_initialized = 1;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001348 return 0;
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001349}