blob: b9a6461aa218fea261d7c90616421429b2d00114 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
14/* --- Globals ------------------------------------------------------------ */
15
16static PyObject *_PyCodec_SearchPath;
17static PyObject *_PyCodec_SearchCache;
18
19/* Flag used for lazy import of the standard encodings package */
20static int import_encodings_called = 0;
21
22/* --- Codec Registry ----------------------------------------------------- */
23
24/* Import the standard encodings package which will register the first
25 codec search function.
26
27 This is done in a lazy way so that the Unicode implementation does
28 not downgrade startup time of scripts not needing it.
29
Guido van Rossumb95de4f2000-03-31 17:25:23 +000030 ImportErrors are silently ignored by this function. Only one try is
31 made.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000032
33*/
34
35static
Guido van Rossumb95de4f2000-03-31 17:25:23 +000036int import_encodings()
Guido van Rossumfeee4b92000-03-10 22:57:27 +000037{
38 PyObject *mod;
39
40 import_encodings_called = 1;
41 mod = PyImport_ImportModule("encodings");
42 if (mod == NULL) {
Guido van Rossumb95de4f2000-03-31 17:25:23 +000043 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
44 /* Ignore ImportErrors... this is done so that
45 distributions can disable the encodings package. Note
46 that other errors are not masked, e.g. SystemErrors
47 raised to inform the user of an error in the Python
48 configuration are still reported back to the user. */
49 PyErr_Clear();
50 return 0;
51 }
52 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000053 }
54 Py_DECREF(mod);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000055 return 0;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000056}
57
58/* Register a new codec search function.
59
Guido van Rossumb95de4f2000-03-31 17:25:23 +000060 As side effect, this tries to load the encodings package, if not
61 yet done, to make sure that it is always first in the list of
62 search functions.
63
Guido van Rossumfeee4b92000-03-10 22:57:27 +000064 The search_function's refcount is incremented by this function. */
65
66int PyCodec_Register(PyObject *search_function)
67{
Guido van Rossumb95de4f2000-03-31 17:25:23 +000068 if (!import_encodings_called) {
69 if (import_encodings())
70 goto onError;
71 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +000072 if (search_function == NULL) {
73 PyErr_BadArgument();
Guido van Rossumb95de4f2000-03-31 17:25:23 +000074 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000075 }
76 if (!PyCallable_Check(search_function)) {
77 PyErr_SetString(PyExc_TypeError,
78 "argument must be callable");
Guido van Rossumb95de4f2000-03-31 17:25:23 +000079 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000080 }
81 return PyList_Append(_PyCodec_SearchPath, search_function);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000082
83 onError:
84 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000085}
86
87static
88PyObject *lowercasestring(const char *string)
89{
90 register int i;
91 int len = strlen(string);
92 char *p;
93 PyObject *v;
94
95 v = PyString_FromStringAndSize(NULL, len);
96 if (v == NULL)
97 return NULL;
98 p = PyString_AS_STRING(v);
99 for (i = 0; i < len; i++)
100 p[i] = tolower(string[i]);
101 return v;
102}
103
104/* Lookup the given encoding and return a tuple providing the codec
105 facilities.
106
107 The encoding string is looked up converted to all lower-case
108 characters. This makes encodings looked up through this mechanism
109 effectively case-insensitive.
110
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000111 If no codec is found, a KeyError is set and NULL returned.
112
113 As side effect, this tries to load the encodings package, if not
114 yet done. This is part of the lazy load strategy for the encodings
115 package.
116
117*/
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000118
119PyObject *_PyCodec_Lookup(const char *encoding)
120{
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000121 PyObject *result, *args = NULL, *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000122 int i, len;
123
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000124 if (_PyCodec_SearchCache == NULL ||
125 _PyCodec_SearchPath == NULL) {
Barry Warsaw51ac5802000-03-20 16:36:48 +0000126 PyErr_SetString(PyExc_SystemError,
127 "codec module not properly initialized");
128 goto onError;
129 }
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000130 if (!import_encodings_called) {
131 if (import_encodings())
132 goto onError;
133 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000134
135 /* Convert the encoding to a lower-cased Python string */
136 v = lowercasestring(encoding);
137 if (v == NULL)
138 goto onError;
139 PyString_InternInPlace(&v);
140
141 /* First, try to lookup the name in the registry dictionary */
142 result = PyDict_GetItem(_PyCodec_SearchCache, v);
143 if (result != NULL) {
144 Py_INCREF(result);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000145 Py_DECREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000146 return result;
147 }
148
149 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000150 args = PyTuple_New(1);
151 if (args == NULL)
152 goto onError;
153 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000154
155 len = PyList_Size(_PyCodec_SearchPath);
156 if (len < 0)
157 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000158 if (len == 0) {
159 PyErr_SetString(PyExc_LookupError,
160 "no codec search functions registered: "
161 "can't find encoding");
162 goto onError;
163 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000164
165 for (i = 0; i < len; i++) {
166 PyObject *func;
167
168 func = PyList_GetItem(_PyCodec_SearchPath, i);
169 if (func == NULL)
170 goto onError;
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000171 result = PyEval_CallObject(func, args);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000172 if (result == NULL)
173 goto onError;
174 if (result == Py_None) {
175 Py_DECREF(result);
176 continue;
177 }
178 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
179 PyErr_SetString(PyExc_TypeError,
180 "codec search functions must return 4-tuples");
181 Py_DECREF(result);
182 goto onError;
183 }
184 break;
185 }
186 if (i == len) {
187 /* XXX Perhaps we should cache misses too ? */
188 PyErr_SetString(PyExc_LookupError,
Barry Warsaw51ac5802000-03-20 16:36:48 +0000189 "unknown encoding");
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000190 goto onError;
191 }
192
193 /* Cache and return the result */
194 PyDict_SetItem(_PyCodec_SearchCache, v, result);
195 Py_DECREF(args);
196 return result;
197
198 onError:
199 Py_XDECREF(args);
200 return NULL;
201}
202
203static
204PyObject *args_tuple(PyObject *object,
205 const char *errors)
206{
207 PyObject *args;
208
209 args = PyTuple_New(1 + (errors != NULL));
210 if (args == NULL)
211 return NULL;
212 Py_INCREF(object);
213 PyTuple_SET_ITEM(args,0,object);
214 if (errors) {
215 PyObject *v;
216
217 v = PyString_FromString(errors);
218 if (v == NULL) {
219 Py_DECREF(args);
220 return NULL;
221 }
222 PyTuple_SET_ITEM(args, 1, v);
223 }
224 return args;
225}
226
227/* Build a codec by calling factory(stream[,errors]) or just
228 factory(errors) depending on whether the given parameters are
229 non-NULL. */
230
231static
232PyObject *build_stream_codec(PyObject *factory,
233 PyObject *stream,
234 const char *errors)
235{
236 PyObject *args, *codec;
237
238 args = args_tuple(stream, errors);
239 if (args == NULL)
240 return NULL;
241
242 codec = PyEval_CallObject(factory, args);
243 Py_DECREF(args);
244 return codec;
245}
246
247/* Convenience APIs to query the Codec registry.
248
249 All APIs return a codec object with incremented refcount.
250
251 */
252
253PyObject *PyCodec_Encoder(const char *encoding)
254{
255 PyObject *codecs;
256 PyObject *v;
257
258 codecs = _PyCodec_Lookup(encoding);
259 if (codecs == NULL)
260 goto onError;
261 v = PyTuple_GET_ITEM(codecs,0);
262 Py_INCREF(v);
263 return v;
264
265 onError:
266 return NULL;
267}
268
269PyObject *PyCodec_Decoder(const char *encoding)
270{
271 PyObject *codecs;
272 PyObject *v;
273
274 codecs = _PyCodec_Lookup(encoding);
275 if (codecs == NULL)
276 goto onError;
277 v = PyTuple_GET_ITEM(codecs,1);
278 Py_INCREF(v);
279 return v;
280
281 onError:
282 return NULL;
283}
284
285PyObject *PyCodec_StreamReader(const char *encoding,
286 PyObject *stream,
287 const char *errors)
288{
289 PyObject *codecs;
290
291 codecs = _PyCodec_Lookup(encoding);
292 if (codecs == NULL)
293 goto onError;
294 return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
295
296 onError:
297 return NULL;
298}
299
300PyObject *PyCodec_StreamWriter(const char *encoding,
301 PyObject *stream,
302 const char *errors)
303{
304 PyObject *codecs;
305
306 codecs = _PyCodec_Lookup(encoding);
307 if (codecs == NULL)
308 goto onError;
309 return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
310
311 onError:
312 return NULL;
313}
314
315/* Encode an object (e.g. an Unicode object) using the given encoding
316 and return the resulting encoded object (usually a Python string).
317
318 errors is passed to the encoder factory as argument if non-NULL. */
319
320PyObject *PyCodec_Encode(PyObject *object,
321 const char *encoding,
322 const char *errors)
323{
324 PyObject *encoder = NULL;
325 PyObject *args = NULL, *result;
326 PyObject *v;
327
328 encoder = PyCodec_Encoder(encoding);
329 if (encoder == NULL)
330 goto onError;
331
332 args = args_tuple(object, errors);
333 if (args == NULL)
334 goto onError;
335
336 result = PyEval_CallObject(encoder,args);
337 if (result == NULL)
338 goto onError;
339
340 if (!PyTuple_Check(result) ||
341 PyTuple_GET_SIZE(result) != 2) {
342 PyErr_SetString(PyExc_TypeError,
343 "encoder must return a tuple (object,integer)");
344 goto onError;
345 }
346 v = PyTuple_GET_ITEM(result,0);
347 Py_INCREF(v);
348 /* We don't check or use the second (integer) entry. */
349
350 Py_DECREF(args);
351 Py_DECREF(encoder);
352 Py_DECREF(result);
353 return v;
354
355 onError:
356 Py_XDECREF(args);
357 Py_XDECREF(encoder);
358 return NULL;
359}
360
361/* Decode an object (usually a Python string) using the given encoding
362 and return an equivalent object (e.g. an Unicode object).
363
364 errors is passed to the decoder factory as argument if non-NULL. */
365
366PyObject *PyCodec_Decode(PyObject *object,
367 const char *encoding,
368 const char *errors)
369{
370 PyObject *decoder = NULL;
371 PyObject *args = NULL, *result = NULL;
372 PyObject *v;
373
374 decoder = PyCodec_Decoder(encoding);
375 if (decoder == NULL)
376 goto onError;
377
378 args = args_tuple(object, errors);
379 if (args == NULL)
380 goto onError;
381
382 result = PyEval_CallObject(decoder,args);
383 if (result == NULL)
384 goto onError;
385 if (!PyTuple_Check(result) ||
386 PyTuple_GET_SIZE(result) != 2) {
387 PyErr_SetString(PyExc_TypeError,
388 "decoder must return a tuple (object,integer)");
389 goto onError;
390 }
391 v = PyTuple_GET_ITEM(result,0);
392 Py_INCREF(v);
393 /* We don't check or use the second (integer) entry. */
394
395 Py_DECREF(args);
396 Py_DECREF(decoder);
397 Py_DECREF(result);
398 return v;
399
400 onError:
401 Py_XDECREF(args);
402 Py_XDECREF(decoder);
403 Py_XDECREF(result);
404 return NULL;
405}
406
407void _PyCodecRegistry_Init()
408{
409 if (_PyCodec_SearchPath == NULL)
410 _PyCodec_SearchPath = PyList_New(0);
411 if (_PyCodec_SearchCache == NULL)
412 _PyCodec_SearchCache = PyDict_New();
413 if (_PyCodec_SearchPath == NULL ||
414 _PyCodec_SearchCache == NULL)
415 Py_FatalError("can't intialize codec registry");
416}
417
418void _PyCodecRegistry_Fini()
419{
420 Py_XDECREF(_PyCodec_SearchPath);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000421 _PyCodec_SearchPath = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000422 Py_XDECREF(_PyCodec_SearchCache);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000423 _PyCodec_SearchCache = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000424}