blob: dbecd1dcccfca5a2d4b901249a9e99a513648b03 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
Guido van Rossum16b1ad92000-08-03 16:24:25 +00007Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumfeee4b92000-03-10 22:57:27 +00008
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
Guido van Rossumfeee4b92000-03-10 22:57:27 +000014/* --- Codec Registry ----------------------------------------------------- */
15
16/* Import the standard encodings package which will register the first
Antoine Pitrouc83ea132010-05-09 14:46:46 +000017 codec search function.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000018
19 This is done in a lazy way so that the Unicode implementation does
20 not downgrade startup time of scripts not needing it.
21
Guido van Rossumb95de4f2000-03-31 17:25:23 +000022 ImportErrors are silently ignored by this function. Only one try is
23 made.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000024
25*/
26
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000027static int _PyCodecRegistry_Init(void); /* Forward */
Guido van Rossumfeee4b92000-03-10 22:57:27 +000028
Guido van Rossumfeee4b92000-03-10 22:57:27 +000029int PyCodec_Register(PyObject *search_function)
30{
Nicholas Bastine5662ae2004-03-24 22:22:12 +000031 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000032 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +000033 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000034 if (search_function == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000035 PyErr_BadArgument();
36 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000037 }
38 if (!PyCallable_Check(search_function)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000039 PyErr_SetString(PyExc_TypeError, "argument must be callable");
40 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000041 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000042 return PyList_Append(interp->codec_search_path, search_function);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000043
44 onError:
45 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000046}
47
Guido van Rossum9e896b32000-04-05 20:11:21 +000048/* Convert a string to a normalized Python string: all characters are
49 converted to lower case, spaces are replaced with underscores. */
50
Guido van Rossumfeee4b92000-03-10 22:57:27 +000051static
Guido van Rossum9e896b32000-04-05 20:11:21 +000052PyObject *normalizestring(const char *string)
Guido van Rossumfeee4b92000-03-10 22:57:27 +000053{
Guido van Rossum33831132000-06-29 14:50:15 +000054 register size_t i;
Guido van Rossum582acec2000-06-28 22:07:35 +000055 size_t len = strlen(string);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000056 char *p;
57 PyObject *v;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000058
Martin v. Löwisb1ed7fa2006-04-13 07:52:27 +000059 if (len > PY_SSIZE_T_MAX) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000060 PyErr_SetString(PyExc_OverflowError, "string is too large");
61 return NULL;
Martin v. Löwisb1ed7fa2006-04-13 07:52:27 +000062 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000063
Gregory P. Smithdd96db62008-06-09 04:58:54 +000064 v = PyString_FromStringAndSize(NULL, len);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000065 if (v == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +000066 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +000067 p = PyString_AS_STRING(v);
Guido van Rossum9e896b32000-04-05 20:11:21 +000068 for (i = 0; i < len; i++) {
69 register char ch = string[i];
70 if (ch == ' ')
71 ch = '-';
72 else
Antoine Pitrou4cfae022011-07-24 02:51:01 +020073 ch = Py_TOLOWER(Py_CHARMASK(ch));
Antoine Pitrouc83ea132010-05-09 14:46:46 +000074 p[i] = ch;
Guido van Rossum9e896b32000-04-05 20:11:21 +000075 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +000076 return v;
77}
78
79/* Lookup the given encoding and return a tuple providing the codec
80 facilities.
81
82 The encoding string is looked up converted to all lower-case
83 characters. This makes encodings looked up through this mechanism
84 effectively case-insensitive.
85
Antoine Pitrouc83ea132010-05-09 14:46:46 +000086 If no codec is found, a LookupError is set and NULL returned.
Guido van Rossumb95de4f2000-03-31 17:25:23 +000087
88 As side effect, this tries to load the encodings package, if not
89 yet done. This is part of the lazy load strategy for the encodings
90 package.
91
92*/
Guido van Rossumfeee4b92000-03-10 22:57:27 +000093
94PyObject *_PyCodec_Lookup(const char *encoding)
95{
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000096 PyInterpreterState *interp;
Guido van Rossum5ba3c842000-03-24 20:52:23 +000097 PyObject *result, *args = NULL, *v;
Martin v. Löwis66851282006-04-22 11:40:03 +000098 Py_ssize_t i, len;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000099
Fred Drake766de832000-05-09 19:55:59 +0000100 if (encoding == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000101 PyErr_BadArgument();
102 goto onError;
Fred Drake766de832000-05-09 19:55:59 +0000103 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000104
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000105 interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000106 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000107 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000108
Guido van Rossum9e896b32000-04-05 20:11:21 +0000109 /* Convert the encoding to a normalized Python string: all
Thomas Wouters7e474022000-07-16 12:04:32 +0000110 characters are converted to lower case, spaces and hyphens are
Guido van Rossum9e896b32000-04-05 20:11:21 +0000111 replaced with underscores. */
112 v = normalizestring(encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000113 if (v == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000114 goto onError;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000115 PyString_InternInPlace(&v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000116
117 /* First, try to lookup the name in the registry dictionary */
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000118 result = PyDict_GetItem(interp->codec_search_cache, v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000119 if (result != NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 Py_INCREF(result);
121 Py_DECREF(v);
122 return result;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000123 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000124
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000125 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000126 args = PyTuple_New(1);
127 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000128 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000129 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000130
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000131 len = PyList_Size(interp->codec_search_path);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000132 if (len < 0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000134 if (len == 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 PyErr_SetString(PyExc_LookupError,
136 "no codec search functions registered: "
137 "can't find encoding");
138 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000139 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000140
141 for (i = 0; i < len; i++) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 PyObject *func;
143
144 func = PyList_GetItem(interp->codec_search_path, i);
145 if (func == NULL)
146 goto onError;
147 result = PyEval_CallObject(func, args);
148 if (result == NULL)
149 goto onError;
150 if (result == Py_None) {
151 Py_DECREF(result);
152 continue;
153 }
154 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
155 PyErr_SetString(PyExc_TypeError,
156 "codec search functions must return 4-tuples");
157 Py_DECREF(result);
158 goto onError;
159 }
160 break;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000161 }
162 if (i == len) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000163 /* XXX Perhaps we should cache misses too ? */
164 PyErr_Format(PyExc_LookupError,
Martin v. Löwiseb42b022002-09-26 16:01:24 +0000165 "unknown encoding: %s", encoding);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000166 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000167 }
168
169 /* Cache and return the result */
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000170 PyDict_SetItem(interp->codec_search_cache, v, result);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000171 Py_DECREF(args);
172 return result;
173
174 onError:
175 Py_XDECREF(args);
176 return NULL;
177}
178
179static
180PyObject *args_tuple(PyObject *object,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000181 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000182{
183 PyObject *args;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000184
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000185 args = PyTuple_New(1 + (errors != NULL));
186 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000187 return NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000188 Py_INCREF(object);
189 PyTuple_SET_ITEM(args,0,object);
190 if (errors) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 PyObject *v;
192
193 v = PyString_FromString(errors);
194 if (v == NULL) {
195 Py_DECREF(args);
196 return NULL;
197 }
198 PyTuple_SET_ITEM(args, 1, v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000199 }
200 return args;
201}
202
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000203/* Helper function to get a codec item */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000204
205static
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000206PyObject *codec_getitem(const char *encoding, int index)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000207{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000208 PyObject *codecs;
209 PyObject *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000210
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000211 codecs = _PyCodec_Lookup(encoding);
212 if (codecs == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000213 return NULL;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000214 v = PyTuple_GET_ITEM(codecs, index);
215 Py_DECREF(codecs);
216 Py_INCREF(v);
217 return v;
218}
219
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300220/* Helper functions to create an incremental codec. */
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000221static
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300222PyObject *codec_makeincrementalcodec(PyObject *codec_info,
223 const char *errors,
224 const char *attrname)
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000225{
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300226 PyObject *ret, *inccodec;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000227
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300228 inccodec = PyObject_GetAttrString(codec_info, attrname);
Walter Dörwaldba8e1802006-03-18 14:05:43 +0000229 if (inccodec == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000230 return NULL;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000231 if (errors)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 ret = PyObject_CallFunction(inccodec, "s", errors);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000233 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000234 ret = PyObject_CallFunction(inccodec, NULL);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000235 Py_DECREF(inccodec);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000236 return ret;
237}
238
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300239static
240PyObject *codec_getincrementalcodec(const char *encoding,
241 const char *errors,
242 const char *attrname)
243{
244 PyObject *codec_info, *ret;
245
246 codec_info = _PyCodec_Lookup(encoding);
247 if (codec_info == NULL)
248 return NULL;
249 ret = codec_makeincrementalcodec(codec_info, errors, attrname);
250 Py_DECREF(codec_info);
251 return ret;
252}
253
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000254/* Helper function to create a stream codec. */
255
256static
257PyObject *codec_getstreamcodec(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000258 PyObject *stream,
259 const char *errors,
260 const int index)
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000261{
Hye-Shik Change6a1cb92006-06-23 21:16:18 +0000262 PyObject *codecs, *streamcodec, *codeccls;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000263
264 codecs = _PyCodec_Lookup(encoding);
265 if (codecs == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 return NULL;
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000267
Hye-Shik Change6a1cb92006-06-23 21:16:18 +0000268 codeccls = PyTuple_GET_ITEM(codecs, index);
269 if (errors != NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000270 streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
Hye-Shik Change6a1cb92006-06-23 21:16:18 +0000271 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000272 streamcodec = PyObject_CallFunction(codeccls, "O", stream);
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000273 Py_DECREF(codecs);
274 return streamcodec;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000275}
276
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300277/* Helpers to work with the result of _PyCodec_Lookup
278
279 */
280PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
281 const char *errors)
282{
283 return codec_makeincrementalcodec(codec_info, errors,
284 "incrementaldecoder");
285}
286
287PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
288 const char *errors)
289{
290 return codec_makeincrementalcodec(codec_info, errors,
291 "incrementalencoder");
292}
293
294
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000295/* Convenience APIs to query the Codec registry.
296
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000297 All APIs return a codec object with incremented refcount.
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000298
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000299 */
300
301PyObject *PyCodec_Encoder(const char *encoding)
302{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000303 return codec_getitem(encoding, 0);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000304}
305
306PyObject *PyCodec_Decoder(const char *encoding)
307{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000308 return codec_getitem(encoding, 1);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000309}
310
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000311PyObject *PyCodec_IncrementalEncoder(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000312 const char *errors)
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000313{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000314 return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000315}
316
317PyObject *PyCodec_IncrementalDecoder(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000318 const char *errors)
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000319{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000320 return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000321}
322
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000323PyObject *PyCodec_StreamReader(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000324 PyObject *stream,
325 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000326{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000327 return codec_getstreamcodec(encoding, stream, errors, 2);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000328}
329
330PyObject *PyCodec_StreamWriter(const char *encoding,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000331 PyObject *stream,
332 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000333{
Walter Dörwaldd53850a2006-03-16 21:46:40 +0000334 return codec_getstreamcodec(encoding, stream, errors, 3);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000335}
336
Martin Panter6a8163a2016-04-15 02:14:19 +0000337/* Encode an object (e.g. a Unicode object) using the given encoding
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000338 and return the resulting encoded object (usually a Python string).
339
340 errors is passed to the encoder factory as argument if non-NULL. */
341
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300342static PyObject *
343_PyCodec_EncodeInternal(PyObject *object,
344 PyObject *encoder,
345 const char *encoding,
346 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000347{
Neal Norwitz3715c3e2005-11-24 22:09:18 +0000348 PyObject *args = NULL, *result = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000349 PyObject *v;
350
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000351 args = args_tuple(object, errors);
352 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000353 goto onError;
354
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000355 result = PyEval_CallObject(encoder,args);
356 if (result == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000357 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000358
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000359 if (!PyTuple_Check(result) ||
360 PyTuple_GET_SIZE(result) != 2) {
361 PyErr_SetString(PyExc_TypeError,
362 "encoder must return a tuple (object,integer)");
363 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000364 }
365 v = PyTuple_GET_ITEM(result,0);
366 Py_INCREF(v);
367 /* We don't check or use the second (integer) entry. */
368
369 Py_DECREF(args);
370 Py_DECREF(encoder);
371 Py_DECREF(result);
372 return v;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000373
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000374 onError:
Neal Norwitz3715c3e2005-11-24 22:09:18 +0000375 Py_XDECREF(result);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000376 Py_XDECREF(args);
377 Py_XDECREF(encoder);
378 return NULL;
379}
380
381/* Decode an object (usually a Python string) using the given encoding
Martin Panter6a8163a2016-04-15 02:14:19 +0000382 and return an equivalent object (e.g. a Unicode object).
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000383
384 errors is passed to the decoder factory as argument if non-NULL. */
385
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300386static PyObject *
387_PyCodec_DecodeInternal(PyObject *object,
388 PyObject *decoder,
389 const char *encoding,
390 const char *errors)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000391{
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000392 PyObject *args = NULL, *result = NULL;
393 PyObject *v;
394
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000395 args = args_tuple(object, errors);
396 if (args == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000397 goto onError;
398
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000399 result = PyEval_CallObject(decoder,args);
400 if (result == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000401 goto onError;
402 if (!PyTuple_Check(result) ||
403 PyTuple_GET_SIZE(result) != 2) {
404 PyErr_SetString(PyExc_TypeError,
405 "decoder must return a tuple (object,integer)");
406 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000407 }
408 v = PyTuple_GET_ITEM(result,0);
409 Py_INCREF(v);
410 /* We don't check or use the second (integer) entry. */
411
412 Py_DECREF(args);
413 Py_DECREF(decoder);
414 Py_DECREF(result);
415 return v;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000416
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000417 onError:
418 Py_XDECREF(args);
419 Py_XDECREF(decoder);
420 Py_XDECREF(result);
421 return NULL;
422}
423
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300424/* Generic encoding/decoding API */
425PyObject *PyCodec_Encode(PyObject *object,
426 const char *encoding,
427 const char *errors)
428{
429 PyObject *encoder;
430
431 encoder = PyCodec_Encoder(encoding);
432 if (encoder == NULL)
433 return NULL;
434
435 return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
436}
437
438PyObject *PyCodec_Decode(PyObject *object,
439 const char *encoding,
440 const char *errors)
441{
442 PyObject *decoder;
443
444 decoder = PyCodec_Decoder(encoding);
445 if (decoder == NULL)
446 return NULL;
447
448 return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
449}
450
451/* Text encoding/decoding API */
452PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
453 const char *alternate_command)
454{
455 PyObject *codec;
456 PyObject *attr;
457 int is_text_codec;
458
459 codec = _PyCodec_Lookup(encoding);
460 if (codec == NULL)
461 return NULL;
462
463 /* Backwards compatibility: assume any raw tuple describes a text
464 * encoding, and the same for anything lacking the private
465 * attribute.
466 */
467 if (Py_Py3kWarningFlag && !PyTuple_CheckExact(codec)) {
468 attr = PyObject_GetAttrString(codec, "_is_text_encoding");
469 if (attr == NULL) {
470 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
471 goto onError;
472 PyErr_Clear();
473 } else {
474 is_text_codec = PyObject_IsTrue(attr);
475 Py_DECREF(attr);
476 if (is_text_codec < 0)
477 goto onError;
478 if (!is_text_codec) {
479 PyObject *msg = PyString_FromFormat(
480 "'%.400s' is not a text encoding; "
481 "use %s to handle arbitrary codecs",
482 encoding, alternate_command);
483 if (msg == NULL)
484 goto onError;
485 if (PyErr_WarnPy3k(PyString_AS_STRING(msg), 1) < 0) {
486 Py_DECREF(msg);
487 goto onError;
488 }
489 Py_DECREF(msg);
490 }
491 }
492 }
493
494 /* This appears to be a valid text encoding */
495 return codec;
496
497 onError:
498 Py_DECREF(codec);
499 return NULL;
500}
501
502
503static
504PyObject *codec_getitem_checked(const char *encoding,
505 const char *alternate_command,
506 int index)
507{
508 PyObject *codec;
509 PyObject *v;
510
511 codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
512 if (codec == NULL)
513 return NULL;
514
515 v = PyTuple_GET_ITEM(codec, index);
516 Py_INCREF(v);
517 Py_DECREF(codec);
518 return v;
519}
520
521static PyObject * _PyCodec_TextEncoder(const char *encoding)
522{
523 return codec_getitem_checked(encoding, "codecs.encode()", 0);
524}
525
526static PyObject * _PyCodec_TextDecoder(const char *encoding)
527{
528 return codec_getitem_checked(encoding, "codecs.decode()", 1);
529}
530
531PyObject *_PyCodec_EncodeText(PyObject *object,
532 const char *encoding,
533 const char *errors)
534{
535 PyObject *encoder;
536
537 encoder = _PyCodec_TextEncoder(encoding);
538 if (encoder == NULL)
539 return NULL;
540
541 return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
542}
543
544PyObject *_PyCodec_DecodeText(PyObject *object,
545 const char *encoding,
546 const char *errors)
547{
548 PyObject *decoder;
549
550 decoder = _PyCodec_TextDecoder(encoding);
551 if (decoder == NULL)
552 return NULL;
553
554 return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
555}
556
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000557/* Register the error handling callback function error under the name
558 name. This function will be called by the codec when it encounters
559 an unencodable characters/undecodable bytes and doesn't know the
560 callback name, when name is specified as the error parameter
561 in the call to the encode/decode function.
562 Return 0 on success, -1 on error */
563int PyCodec_RegisterError(const char *name, PyObject *error)
564{
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000565 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000566 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000567 return -1;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000568 if (!PyCallable_Check(error)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000569 PyErr_SetString(PyExc_TypeError, "handler must be callable");
570 return -1;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000571 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000572 return PyDict_SetItemString(interp->codec_error_registry,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000573 (char *)name, error);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000574}
575
576/* Lookup the error handling callback function registered under the
577 name error. As a special case NULL can be passed, in which case
578 the error handling callback for strict encoding will be returned. */
579PyObject *PyCodec_LookupError(const char *name)
580{
581 PyObject *handler = NULL;
582
Nicholas Bastine5662ae2004-03-24 22:22:12 +0000583 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000584 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000585 return NULL;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000586
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000587 if (name==NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000588 name = "strict";
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000589 handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000590 if (!handler)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000592 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000593 Py_INCREF(handler);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000594 return handler;
595}
596
597static void wrong_exception_type(PyObject *exc)
598{
599 PyObject *type = PyObject_GetAttrString(exc, "__class__");
600 if (type != NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000601 PyObject *name = PyObject_GetAttrString(type, "__name__");
602 Py_DECREF(type);
603 if (name != NULL) {
604 PyObject *string = PyObject_Str(name);
605 Py_DECREF(name);
606 if (string != NULL) {
607 PyErr_Format(PyExc_TypeError,
608 "don't know how to handle %.400s in error callback",
609 PyString_AS_STRING(string));
610 Py_DECREF(string);
611 }
612 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000613 }
614}
615
616PyObject *PyCodec_StrictErrors(PyObject *exc)
617{
Brett Cannonbf364092006-03-01 04:25:17 +0000618 if (PyExceptionInstance_Check(exc))
619 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000620 else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000621 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000622 return NULL;
623}
624
625
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000626#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000627PyObject *PyCodec_IgnoreErrors(PyObject *exc)
628{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000629 Py_ssize_t end;
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300630
631 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000632 if (PyUnicodeEncodeError_GetEnd(exc, &end))
633 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000634 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300635 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 if (PyUnicodeDecodeError_GetEnd(exc, &end))
637 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000638 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300639 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000640 if (PyUnicodeTranslateError_GetEnd(exc, &end))
641 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000642 }
643 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000644 wrong_exception_type(exc);
645 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000646 }
647 /* ouch: passing NULL, 0, pos gives None instead of u'' */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000648 return Py_BuildValue("(u#n)", &end, 0, end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000649}
650
651
652PyObject *PyCodec_ReplaceErrors(PyObject *exc)
653{
654 PyObject *restuple;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000655 Py_ssize_t start;
656 Py_ssize_t end;
657 Py_ssize_t i;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000658
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300659 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000660 PyObject *res;
661 Py_UNICODE *p;
662 if (PyUnicodeEncodeError_GetStart(exc, &start))
663 return NULL;
664 if (PyUnicodeEncodeError_GetEnd(exc, &end))
665 return NULL;
666 res = PyUnicode_FromUnicode(NULL, end-start);
667 if (res == NULL)
668 return NULL;
669 for (p = PyUnicode_AS_UNICODE(res), i = start;
670 i<end; ++p, ++i)
671 *p = '?';
672 restuple = Py_BuildValue("(On)", res, end);
673 Py_DECREF(res);
674 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000675 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300676 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000677 Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
678 if (PyUnicodeDecodeError_GetEnd(exc, &end))
679 return NULL;
Serhiy Storchakaa9885e92013-08-20 20:08:53 +0300680 return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000681 }
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300682 else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000683 PyObject *res;
684 Py_UNICODE *p;
685 if (PyUnicodeTranslateError_GetStart(exc, &start))
686 return NULL;
687 if (PyUnicodeTranslateError_GetEnd(exc, &end))
688 return NULL;
689 res = PyUnicode_FromUnicode(NULL, end-start);
690 if (res == NULL)
691 return NULL;
692 for (p = PyUnicode_AS_UNICODE(res), i = start;
693 i<end; ++p, ++i)
694 *p = Py_UNICODE_REPLACEMENT_CHARACTER;
695 restuple = Py_BuildValue("(On)", res, end);
696 Py_DECREF(res);
697 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000698 }
699 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000700 wrong_exception_type(exc);
701 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000702 }
703}
704
705PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
706{
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300707 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000708 PyObject *restuple;
709 PyObject *object;
710 Py_ssize_t start;
711 Py_ssize_t end;
712 PyObject *res;
713 Py_UNICODE *p;
714 Py_UNICODE *startp;
Serhiy Storchakae822b032013-08-06 16:56:26 +0300715 Py_UNICODE *e;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000716 Py_UNICODE *outp;
Serhiy Storchakad5249222014-10-04 14:14:41 +0300717 Py_ssize_t ressize;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000718 if (PyUnicodeEncodeError_GetStart(exc, &start))
719 return NULL;
720 if (PyUnicodeEncodeError_GetEnd(exc, &end))
721 return NULL;
722 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
723 return NULL;
724 startp = PyUnicode_AS_UNICODE(object);
Serhiy Storchakad5249222014-10-04 14:14:41 +0300725 if (end - start > PY_SSIZE_T_MAX / (2+7+1)) {
726 end = start + PY_SSIZE_T_MAX / (2+7+1);
727#ifndef Py_UNICODE_WIDE
Serhiy Storchakafb7c3802014-10-04 14:51:44 +0300728 if (0xD800 <= startp[end - 1] && startp[end - 1] <= 0xDBFF)
Serhiy Storchakad5249222014-10-04 14:14:41 +0300729 end--;
730#endif
731 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300732 e = startp + end;
733 for (p = startp+start, ressize = 0; p < e;) {
734 Py_UCS4 ch = *p++;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000735#ifndef Py_UNICODE_WIDE
Serhiy Storchakae822b032013-08-06 16:56:26 +0300736 if ((0xD800 <= ch && ch <= 0xDBFF) &&
737 (p < e) &&
738 (0xDC00 <= *p && *p <= 0xDFFF)) {
739 ch = ((((ch & 0x03FF) << 10) |
740 ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
741 }
742#endif
743 if (ch < 10)
744 ressize += 2+1+1;
745 else if (ch < 100)
746 ressize += 2+2+1;
747 else if (ch < 1000)
748 ressize += 2+3+1;
749 else if (ch < 10000)
750 ressize += 2+4+1;
751 else if (ch < 100000)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000752 ressize += 2+5+1;
Serhiy Storchakae822b032013-08-06 16:56:26 +0300753 else if (ch < 1000000)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000754 ressize += 2+6+1;
755 else
756 ressize += 2+7+1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000757 }
758 /* allocate replacement */
759 res = PyUnicode_FromUnicode(NULL, ressize);
760 if (res == NULL) {
761 Py_DECREF(object);
762 return NULL;
763 }
764 /* generate replacement */
Serhiy Storchakae822b032013-08-06 16:56:26 +0300765 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000766 int digits;
767 int base;
Serhiy Storchakae822b032013-08-06 16:56:26 +0300768 Py_UCS4 ch = *p++;
769#ifndef Py_UNICODE_WIDE
770 if ((0xD800 <= ch && ch <= 0xDBFF) &&
771 (p < startp+end) &&
772 (0xDC00 <= *p && *p <= 0xDFFF)) {
773 ch = ((((ch & 0x03FF) << 10) |
774 ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
775 }
776#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000777 *outp++ = '&';
778 *outp++ = '#';
Serhiy Storchakae822b032013-08-06 16:56:26 +0300779 if (ch < 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000780 digits = 1;
781 base = 1;
782 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300783 else if (ch < 100) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000784 digits = 2;
785 base = 10;
786 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300787 else if (ch < 1000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000788 digits = 3;
789 base = 100;
790 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300791 else if (ch < 10000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000792 digits = 4;
793 base = 1000;
794 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300795 else if (ch < 100000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000796 digits = 5;
797 base = 10000;
798 }
Serhiy Storchakae822b032013-08-06 16:56:26 +0300799 else if (ch < 1000000) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000800 digits = 6;
801 base = 100000;
802 }
803 else {
804 digits = 7;
805 base = 1000000;
806 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000807 while (digits-->0) {
Serhiy Storchakae822b032013-08-06 16:56:26 +0300808 *outp++ = '0' + ch/base;
809 ch %= base;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000810 base /= 10;
811 }
812 *outp++ = ';';
813 }
814 restuple = Py_BuildValue("(On)", res, end);
815 Py_DECREF(res);
816 Py_DECREF(object);
817 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000818 }
819 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000820 wrong_exception_type(exc);
821 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000822 }
823}
824
825static Py_UNICODE hexdigits[] = {
826 '0', '1', '2', '3', '4', '5', '6', '7',
827 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
828};
829
830PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
831{
Serhiy Storchaka14e10a12015-05-18 16:08:38 +0300832 if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000833 PyObject *restuple;
834 PyObject *object;
835 Py_ssize_t start;
836 Py_ssize_t end;
837 PyObject *res;
838 Py_UNICODE *p;
839 Py_UNICODE *startp;
840 Py_UNICODE *outp;
Serhiy Storchakad5249222014-10-04 14:14:41 +0300841 Py_ssize_t ressize;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000842 if (PyUnicodeEncodeError_GetStart(exc, &start))
843 return NULL;
844 if (PyUnicodeEncodeError_GetEnd(exc, &end))
845 return NULL;
846 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
847 return NULL;
Serhiy Storchakad5249222014-10-04 14:14:41 +0300848 if (end - start > PY_SSIZE_T_MAX / (1+1+8))
849 end = start + PY_SSIZE_T_MAX / (1+1+8);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000850 startp = PyUnicode_AS_UNICODE(object);
851 for (p = startp+start, ressize = 0; p < startp+end; ++p) {
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000852#ifdef Py_UNICODE_WIDE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000853 if (*p >= 0x00010000)
854 ressize += 1+1+8;
855 else
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000856#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000857 if (*p >= 0x100) {
858 ressize += 1+1+4;
859 }
860 else
861 ressize += 1+1+2;
862 }
863 res = PyUnicode_FromUnicode(NULL, ressize);
Serhiy Storchaka7d96a092014-09-23 19:58:57 +0300864 if (res == NULL) {
865 Py_DECREF(object);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000866 return NULL;
Serhiy Storchaka7d96a092014-09-23 19:58:57 +0300867 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000868 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
869 p < startp+end; ++p) {
870 Py_UNICODE c = *p;
871 *outp++ = '\\';
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000872#ifdef Py_UNICODE_WIDE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000873 if (c >= 0x00010000) {
874 *outp++ = 'U';
875 *outp++ = hexdigits[(c>>28)&0xf];
876 *outp++ = hexdigits[(c>>24)&0xf];
877 *outp++ = hexdigits[(c>>20)&0xf];
878 *outp++ = hexdigits[(c>>16)&0xf];
879 *outp++ = hexdigits[(c>>12)&0xf];
880 *outp++ = hexdigits[(c>>8)&0xf];
881 }
882 else
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000883#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000884 if (c >= 0x100) {
885 *outp++ = 'u';
886 *outp++ = hexdigits[(c>>12)&0xf];
887 *outp++ = hexdigits[(c>>8)&0xf];
888 }
889 else
890 *outp++ = 'x';
891 *outp++ = hexdigits[(c>>4)&0xf];
892 *outp++ = hexdigits[c&0xf];
893 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000894
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 restuple = Py_BuildValue("(On)", res, end);
896 Py_DECREF(res);
897 Py_DECREF(object);
898 return restuple;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000899 }
900 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000901 wrong_exception_type(exc);
902 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000903 }
904}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000905#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000906
907static PyObject *strict_errors(PyObject *self, PyObject *exc)
908{
909 return PyCodec_StrictErrors(exc);
910}
911
912
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000913#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000914static PyObject *ignore_errors(PyObject *self, PyObject *exc)
915{
916 return PyCodec_IgnoreErrors(exc);
917}
918
919
920static PyObject *replace_errors(PyObject *self, PyObject *exc)
921{
922 return PyCodec_ReplaceErrors(exc);
923}
924
925
926static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
927{
928 return PyCodec_XMLCharRefReplaceErrors(exc);
929}
930
931
932static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
933{
934 return PyCodec_BackslashReplaceErrors(exc);
935}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000936#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000937
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000938static int _PyCodecRegistry_Init(void)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000939{
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000940 static struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000941 char *name;
942 PyMethodDef def;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000943 } methods[] =
944 {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000945 {
946 "strict",
947 {
948 "strict_errors",
949 strict_errors,
950 METH_O,
951 PyDoc_STR("Implements the 'strict' error handling, which "
952 "raises a UnicodeError on coding errors.")
953 }
954 },
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000955#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000956 {
957 "ignore",
958 {
959 "ignore_errors",
960 ignore_errors,
961 METH_O,
962 PyDoc_STR("Implements the 'ignore' error handling, which "
963 "ignores malformed data and continues.")
964 }
965 },
966 {
967 "replace",
968 {
969 "replace_errors",
970 replace_errors,
971 METH_O,
972 PyDoc_STR("Implements the 'replace' error handling, which "
973 "replaces malformed data with a replacement marker.")
974 }
975 },
976 {
977 "xmlcharrefreplace",
978 {
979 "xmlcharrefreplace_errors",
980 xmlcharrefreplace_errors,
981 METH_O,
982 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
983 "which replaces an unencodable character with the "
984 "appropriate XML character reference.")
985 }
986 },
987 {
988 "backslashreplace",
989 {
990 "backslashreplace_errors",
991 backslashreplace_errors,
992 METH_O,
993 PyDoc_STR("Implements the 'backslashreplace' error handling, "
994 "which replaces an unencodable character with a "
995 "backslashed escape sequence.")
996 }
997 }
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000998#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000999 };
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001000
Nicholas Bastine5662ae2004-03-24 22:22:12 +00001001 PyInterpreterState *interp = PyThreadState_GET()->interp;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001002 PyObject *mod;
Neal Norwitz739a8f82004-07-08 01:55:58 +00001003 unsigned i;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001004
1005 if (interp->codec_search_path != NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001006 return 0;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001007
1008 interp->codec_search_path = PyList_New(0);
1009 interp->codec_search_cache = PyDict_New();
1010 interp->codec_error_registry = PyDict_New();
1011
1012 if (interp->codec_error_registry) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
1014 PyObject *func = PyCFunction_New(&methods[i].def, NULL);
1015 int res;
1016 if (!func)
1017 Py_FatalError("can't initialize codec error registry");
1018 res = PyCodec_RegisterError(methods[i].name, func);
1019 Py_DECREF(func);
1020 if (res)
1021 Py_FatalError("can't initialize codec error registry");
1022 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001023 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001024
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001025 if (interp->codec_search_path == NULL ||
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001026 interp->codec_search_cache == NULL ||
1027 interp->codec_error_registry == NULL)
1028 Py_FatalError("can't initialize codec registry");
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001029
Thomas Woutersf7f438b2006-02-28 16:09:29 +00001030 mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001031 if (mod == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001032 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1033 /* Ignore ImportErrors... this is done so that
1034 distributions can disable the encodings package. Note
1035 that other errors are not masked, e.g. SystemErrors
1036 raised to inform the user of an error in the Python
1037 configuration are still reported back to the user. */
1038 PyErr_Clear();
1039 return 0;
1040 }
1041 return -1;
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +00001042 }
1043 Py_DECREF(mod);
1044 return 0;
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001045}