blob: 5e01ccae133544beff49215f85273f21f9841a14 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
14/* --- Globals ------------------------------------------------------------ */
15
16static PyObject *_PyCodec_SearchPath;
17static PyObject *_PyCodec_SearchCache;
18
19/* Flag used for lazy import of the standard encodings package */
20static int import_encodings_called = 0;
21
22/* --- Codec Registry ----------------------------------------------------- */
23
24/* Import the standard encodings package which will register the first
25 codec search function.
26
27 This is done in a lazy way so that the Unicode implementation does
28 not downgrade startup time of scripts not needing it.
29
Guido van Rossumb95de4f2000-03-31 17:25:23 +000030 ImportErrors are silently ignored by this function. Only one try is
31 made.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000032
33*/
34
35static
Guido van Rossumb95de4f2000-03-31 17:25:23 +000036int import_encodings()
Guido van Rossumfeee4b92000-03-10 22:57:27 +000037{
38 PyObject *mod;
39
40 import_encodings_called = 1;
41 mod = PyImport_ImportModule("encodings");
42 if (mod == NULL) {
Guido van Rossumb95de4f2000-03-31 17:25:23 +000043 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
44 /* Ignore ImportErrors... this is done so that
45 distributions can disable the encodings package. Note
46 that other errors are not masked, e.g. SystemErrors
47 raised to inform the user of an error in the Python
48 configuration are still reported back to the user. */
49 PyErr_Clear();
50 return 0;
51 }
52 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000053 }
54 Py_DECREF(mod);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000055 return 0;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000056}
57
Guido van Rossumfeee4b92000-03-10 22:57:27 +000058int PyCodec_Register(PyObject *search_function)
59{
Guido van Rossumb95de4f2000-03-31 17:25:23 +000060 if (!import_encodings_called) {
61 if (import_encodings())
62 goto onError;
63 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +000064 if (search_function == NULL) {
65 PyErr_BadArgument();
Guido van Rossumb95de4f2000-03-31 17:25:23 +000066 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000067 }
68 if (!PyCallable_Check(search_function)) {
69 PyErr_SetString(PyExc_TypeError,
70 "argument must be callable");
Guido van Rossumb95de4f2000-03-31 17:25:23 +000071 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000072 }
73 return PyList_Append(_PyCodec_SearchPath, search_function);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000074
75 onError:
76 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000077}
78
Guido van Rossum9e896b32000-04-05 20:11:21 +000079/* Convert a string to a normalized Python string: all characters are
80 converted to lower case, spaces are replaced with underscores. */
81
Guido van Rossumfeee4b92000-03-10 22:57:27 +000082static
Guido van Rossum9e896b32000-04-05 20:11:21 +000083PyObject *normalizestring(const char *string)
Guido van Rossumfeee4b92000-03-10 22:57:27 +000084{
85 register int i;
86 int len = strlen(string);
87 char *p;
88 PyObject *v;
89
90 v = PyString_FromStringAndSize(NULL, len);
91 if (v == NULL)
92 return NULL;
93 p = PyString_AS_STRING(v);
Guido van Rossum9e896b32000-04-05 20:11:21 +000094 for (i = 0; i < len; i++) {
95 register char ch = string[i];
96 if (ch == ' ')
97 ch = '-';
98 else
99 ch = tolower(ch);
100 p[i] = ch;
101 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000102 return v;
103}
104
105/* Lookup the given encoding and return a tuple providing the codec
106 facilities.
107
108 The encoding string is looked up converted to all lower-case
109 characters. This makes encodings looked up through this mechanism
110 effectively case-insensitive.
111
Fred Drake766de832000-05-09 19:55:59 +0000112 If no codec is found, a LookupError is set and NULL returned.
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000113
114 As side effect, this tries to load the encodings package, if not
115 yet done. This is part of the lazy load strategy for the encodings
116 package.
117
118*/
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000119
120PyObject *_PyCodec_Lookup(const char *encoding)
121{
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000122 PyObject *result, *args = NULL, *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000123 int i, len;
124
Fred Drake766de832000-05-09 19:55:59 +0000125 if (encoding == NULL) {
126 PyErr_BadArgument();
127 goto onError;
128 }
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000129 if (_PyCodec_SearchCache == NULL ||
130 _PyCodec_SearchPath == NULL) {
Barry Warsaw51ac5802000-03-20 16:36:48 +0000131 PyErr_SetString(PyExc_SystemError,
132 "codec module not properly initialized");
133 goto onError;
134 }
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000135 if (!import_encodings_called) {
136 if (import_encodings())
137 goto onError;
138 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000139
Guido van Rossum9e896b32000-04-05 20:11:21 +0000140 /* Convert the encoding to a normalized Python string: all
141 characters are converted to lower case, spaces and hypens are
142 replaced with underscores. */
143 v = normalizestring(encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000144 if (v == NULL)
145 goto onError;
146 PyString_InternInPlace(&v);
147
148 /* First, try to lookup the name in the registry dictionary */
149 result = PyDict_GetItem(_PyCodec_SearchCache, v);
150 if (result != NULL) {
151 Py_INCREF(result);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000152 Py_DECREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000153 return result;
154 }
155
156 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000157 args = PyTuple_New(1);
158 if (args == NULL)
159 goto onError;
160 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000161
162 len = PyList_Size(_PyCodec_SearchPath);
163 if (len < 0)
164 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000165 if (len == 0) {
166 PyErr_SetString(PyExc_LookupError,
167 "no codec search functions registered: "
168 "can't find encoding");
169 goto onError;
170 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000171
172 for (i = 0; i < len; i++) {
173 PyObject *func;
174
175 func = PyList_GetItem(_PyCodec_SearchPath, i);
176 if (func == NULL)
177 goto onError;
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000178 result = PyEval_CallObject(func, args);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000179 if (result == NULL)
180 goto onError;
181 if (result == Py_None) {
182 Py_DECREF(result);
183 continue;
184 }
185 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
186 PyErr_SetString(PyExc_TypeError,
187 "codec search functions must return 4-tuples");
188 Py_DECREF(result);
189 goto onError;
190 }
191 break;
192 }
193 if (i == len) {
194 /* XXX Perhaps we should cache misses too ? */
195 PyErr_SetString(PyExc_LookupError,
Barry Warsaw51ac5802000-03-20 16:36:48 +0000196 "unknown encoding");
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000197 goto onError;
198 }
199
200 /* Cache and return the result */
201 PyDict_SetItem(_PyCodec_SearchCache, v, result);
202 Py_DECREF(args);
203 return result;
204
205 onError:
206 Py_XDECREF(args);
207 return NULL;
208}
209
210static
211PyObject *args_tuple(PyObject *object,
212 const char *errors)
213{
214 PyObject *args;
215
216 args = PyTuple_New(1 + (errors != NULL));
217 if (args == NULL)
218 return NULL;
219 Py_INCREF(object);
220 PyTuple_SET_ITEM(args,0,object);
221 if (errors) {
222 PyObject *v;
223
224 v = PyString_FromString(errors);
225 if (v == NULL) {
226 Py_DECREF(args);
227 return NULL;
228 }
229 PyTuple_SET_ITEM(args, 1, v);
230 }
231 return args;
232}
233
234/* Build a codec by calling factory(stream[,errors]) or just
235 factory(errors) depending on whether the given parameters are
236 non-NULL. */
237
238static
239PyObject *build_stream_codec(PyObject *factory,
240 PyObject *stream,
241 const char *errors)
242{
243 PyObject *args, *codec;
244
245 args = args_tuple(stream, errors);
246 if (args == NULL)
247 return NULL;
248
249 codec = PyEval_CallObject(factory, args);
250 Py_DECREF(args);
251 return codec;
252}
253
254/* Convenience APIs to query the Codec registry.
255
256 All APIs return a codec object with incremented refcount.
257
258 */
259
260PyObject *PyCodec_Encoder(const char *encoding)
261{
262 PyObject *codecs;
263 PyObject *v;
264
265 codecs = _PyCodec_Lookup(encoding);
266 if (codecs == NULL)
267 goto onError;
268 v = PyTuple_GET_ITEM(codecs,0);
269 Py_INCREF(v);
270 return v;
271
272 onError:
273 return NULL;
274}
275
276PyObject *PyCodec_Decoder(const char *encoding)
277{
278 PyObject *codecs;
279 PyObject *v;
280
281 codecs = _PyCodec_Lookup(encoding);
282 if (codecs == NULL)
283 goto onError;
284 v = PyTuple_GET_ITEM(codecs,1);
285 Py_INCREF(v);
286 return v;
287
288 onError:
289 return NULL;
290}
291
292PyObject *PyCodec_StreamReader(const char *encoding,
293 PyObject *stream,
294 const char *errors)
295{
296 PyObject *codecs;
297
298 codecs = _PyCodec_Lookup(encoding);
299 if (codecs == NULL)
300 goto onError;
301 return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
302
303 onError:
304 return NULL;
305}
306
307PyObject *PyCodec_StreamWriter(const char *encoding,
308 PyObject *stream,
309 const char *errors)
310{
311 PyObject *codecs;
312
313 codecs = _PyCodec_Lookup(encoding);
314 if (codecs == NULL)
315 goto onError;
316 return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
317
318 onError:
319 return NULL;
320}
321
322/* Encode an object (e.g. an Unicode object) using the given encoding
323 and return the resulting encoded object (usually a Python string).
324
325 errors is passed to the encoder factory as argument if non-NULL. */
326
327PyObject *PyCodec_Encode(PyObject *object,
328 const char *encoding,
329 const char *errors)
330{
331 PyObject *encoder = NULL;
332 PyObject *args = NULL, *result;
333 PyObject *v;
334
335 encoder = PyCodec_Encoder(encoding);
336 if (encoder == NULL)
337 goto onError;
338
339 args = args_tuple(object, errors);
340 if (args == NULL)
341 goto onError;
342
343 result = PyEval_CallObject(encoder,args);
344 if (result == NULL)
345 goto onError;
346
347 if (!PyTuple_Check(result) ||
348 PyTuple_GET_SIZE(result) != 2) {
349 PyErr_SetString(PyExc_TypeError,
350 "encoder must return a tuple (object,integer)");
351 goto onError;
352 }
353 v = PyTuple_GET_ITEM(result,0);
354 Py_INCREF(v);
355 /* We don't check or use the second (integer) entry. */
356
357 Py_DECREF(args);
358 Py_DECREF(encoder);
359 Py_DECREF(result);
360 return v;
361
362 onError:
363 Py_XDECREF(args);
364 Py_XDECREF(encoder);
365 return NULL;
366}
367
368/* Decode an object (usually a Python string) using the given encoding
369 and return an equivalent object (e.g. an Unicode object).
370
371 errors is passed to the decoder factory as argument if non-NULL. */
372
373PyObject *PyCodec_Decode(PyObject *object,
374 const char *encoding,
375 const char *errors)
376{
377 PyObject *decoder = NULL;
378 PyObject *args = NULL, *result = NULL;
379 PyObject *v;
380
381 decoder = PyCodec_Decoder(encoding);
382 if (decoder == NULL)
383 goto onError;
384
385 args = args_tuple(object, errors);
386 if (args == NULL)
387 goto onError;
388
389 result = PyEval_CallObject(decoder,args);
390 if (result == NULL)
391 goto onError;
392 if (!PyTuple_Check(result) ||
393 PyTuple_GET_SIZE(result) != 2) {
394 PyErr_SetString(PyExc_TypeError,
395 "decoder must return a tuple (object,integer)");
396 goto onError;
397 }
398 v = PyTuple_GET_ITEM(result,0);
399 Py_INCREF(v);
400 /* We don't check or use the second (integer) entry. */
401
402 Py_DECREF(args);
403 Py_DECREF(decoder);
404 Py_DECREF(result);
405 return v;
406
407 onError:
408 Py_XDECREF(args);
409 Py_XDECREF(decoder);
410 Py_XDECREF(result);
411 return NULL;
412}
413
414void _PyCodecRegistry_Init()
415{
416 if (_PyCodec_SearchPath == NULL)
417 _PyCodec_SearchPath = PyList_New(0);
418 if (_PyCodec_SearchCache == NULL)
419 _PyCodec_SearchCache = PyDict_New();
420 if (_PyCodec_SearchPath == NULL ||
421 _PyCodec_SearchCache == NULL)
422 Py_FatalError("can't intialize codec registry");
423}
424
425void _PyCodecRegistry_Fini()
426{
427 Py_XDECREF(_PyCodec_SearchPath);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000428 _PyCodec_SearchPath = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000429 Py_XDECREF(_PyCodec_SearchCache);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000430 _PyCodec_SearchCache = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000431}