blob: b2a19b839b31e3836325226bfa4343a57e40bfb6 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
14/* --- Globals ------------------------------------------------------------ */
15
16static PyObject *_PyCodec_SearchPath;
17static PyObject *_PyCodec_SearchCache;
18
19/* Flag used for lazy import of the standard encodings package */
20static int import_encodings_called = 0;
21
22/* --- Codec Registry ----------------------------------------------------- */
23
24/* Import the standard encodings package which will register the first
25 codec search function.
26
27 This is done in a lazy way so that the Unicode implementation does
28 not downgrade startup time of scripts not needing it.
29
30 Errors are silently ignored by this function. Only one try is made.
31
32*/
33
34static
35void import_encodings()
36{
37 PyObject *mod;
38
39 import_encodings_called = 1;
40 mod = PyImport_ImportModule("encodings");
41 if (mod == NULL) {
42 PyErr_Clear();
43 return;
44 }
45 Py_DECREF(mod);
46}
47
48/* Register a new codec search function.
49
50 The search_function's refcount is incremented by this function. */
51
52int PyCodec_Register(PyObject *search_function)
53{
54 if (!import_encodings_called)
55 import_encodings();
56 if (search_function == NULL) {
57 PyErr_BadArgument();
58 return -1;
59 }
60 if (!PyCallable_Check(search_function)) {
61 PyErr_SetString(PyExc_TypeError,
62 "argument must be callable");
63 return -1;
64 }
65 return PyList_Append(_PyCodec_SearchPath, search_function);
66}
67
68static
69PyObject *lowercasestring(const char *string)
70{
71 register int i;
72 int len = strlen(string);
73 char *p;
74 PyObject *v;
75
76 v = PyString_FromStringAndSize(NULL, len);
77 if (v == NULL)
78 return NULL;
79 p = PyString_AS_STRING(v);
80 for (i = 0; i < len; i++)
81 p[i] = tolower(string[i]);
82 return v;
83}
84
85/* Lookup the given encoding and return a tuple providing the codec
86 facilities.
87
88 The encoding string is looked up converted to all lower-case
89 characters. This makes encodings looked up through this mechanism
90 effectively case-insensitive.
91
92 If no codec is found, a KeyError is set and NULL returned. */
93
94PyObject *_PyCodec_Lookup(const char *encoding)
95{
Guido van Rossum5ba3c842000-03-24 20:52:23 +000096 PyObject *result, *args = NULL, *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000097 int i, len;
98
Barry Warsaw51ac5802000-03-20 16:36:48 +000099 if (_PyCodec_SearchCache == NULL || _PyCodec_SearchPath == NULL) {
100 PyErr_SetString(PyExc_SystemError,
101 "codec module not properly initialized");
102 goto onError;
103 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000104 if (!import_encodings_called)
105 import_encodings();
106
107 /* Convert the encoding to a lower-cased Python string */
108 v = lowercasestring(encoding);
109 if (v == NULL)
110 goto onError;
111 PyString_InternInPlace(&v);
112
113 /* First, try to lookup the name in the registry dictionary */
114 result = PyDict_GetItem(_PyCodec_SearchCache, v);
115 if (result != NULL) {
116 Py_INCREF(result);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000117 Py_DECREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000118 return result;
119 }
120
121 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000122 args = PyTuple_New(1);
123 if (args == NULL)
124 goto onError;
125 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000126
127 len = PyList_Size(_PyCodec_SearchPath);
128 if (len < 0)
129 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000130
131 for (i = 0; i < len; i++) {
132 PyObject *func;
133
134 func = PyList_GetItem(_PyCodec_SearchPath, i);
135 if (func == NULL)
136 goto onError;
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000137 result = PyEval_CallObject(func, args);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000138 if (result == NULL)
139 goto onError;
140 if (result == Py_None) {
141 Py_DECREF(result);
142 continue;
143 }
144 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
145 PyErr_SetString(PyExc_TypeError,
146 "codec search functions must return 4-tuples");
147 Py_DECREF(result);
148 goto onError;
149 }
150 break;
151 }
152 if (i == len) {
153 /* XXX Perhaps we should cache misses too ? */
154 PyErr_SetString(PyExc_LookupError,
Barry Warsaw51ac5802000-03-20 16:36:48 +0000155 "unknown encoding");
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000156 goto onError;
157 }
158
159 /* Cache and return the result */
160 PyDict_SetItem(_PyCodec_SearchCache, v, result);
161 Py_DECREF(args);
162 return result;
163
164 onError:
165 Py_XDECREF(args);
166 return NULL;
167}
168
169static
170PyObject *args_tuple(PyObject *object,
171 const char *errors)
172{
173 PyObject *args;
174
175 args = PyTuple_New(1 + (errors != NULL));
176 if (args == NULL)
177 return NULL;
178 Py_INCREF(object);
179 PyTuple_SET_ITEM(args,0,object);
180 if (errors) {
181 PyObject *v;
182
183 v = PyString_FromString(errors);
184 if (v == NULL) {
185 Py_DECREF(args);
186 return NULL;
187 }
188 PyTuple_SET_ITEM(args, 1, v);
189 }
190 return args;
191}
192
193/* Build a codec by calling factory(stream[,errors]) or just
194 factory(errors) depending on whether the given parameters are
195 non-NULL. */
196
197static
198PyObject *build_stream_codec(PyObject *factory,
199 PyObject *stream,
200 const char *errors)
201{
202 PyObject *args, *codec;
203
204 args = args_tuple(stream, errors);
205 if (args == NULL)
206 return NULL;
207
208 codec = PyEval_CallObject(factory, args);
209 Py_DECREF(args);
210 return codec;
211}
212
213/* Convenience APIs to query the Codec registry.
214
215 All APIs return a codec object with incremented refcount.
216
217 */
218
219PyObject *PyCodec_Encoder(const char *encoding)
220{
221 PyObject *codecs;
222 PyObject *v;
223
224 codecs = _PyCodec_Lookup(encoding);
225 if (codecs == NULL)
226 goto onError;
227 v = PyTuple_GET_ITEM(codecs,0);
228 Py_INCREF(v);
229 return v;
230
231 onError:
232 return NULL;
233}
234
235PyObject *PyCodec_Decoder(const char *encoding)
236{
237 PyObject *codecs;
238 PyObject *v;
239
240 codecs = _PyCodec_Lookup(encoding);
241 if (codecs == NULL)
242 goto onError;
243 v = PyTuple_GET_ITEM(codecs,1);
244 Py_INCREF(v);
245 return v;
246
247 onError:
248 return NULL;
249}
250
251PyObject *PyCodec_StreamReader(const char *encoding,
252 PyObject *stream,
253 const char *errors)
254{
255 PyObject *codecs;
256
257 codecs = _PyCodec_Lookup(encoding);
258 if (codecs == NULL)
259 goto onError;
260 return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
261
262 onError:
263 return NULL;
264}
265
266PyObject *PyCodec_StreamWriter(const char *encoding,
267 PyObject *stream,
268 const char *errors)
269{
270 PyObject *codecs;
271
272 codecs = _PyCodec_Lookup(encoding);
273 if (codecs == NULL)
274 goto onError;
275 return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
276
277 onError:
278 return NULL;
279}
280
281/* Encode an object (e.g. an Unicode object) using the given encoding
282 and return the resulting encoded object (usually a Python string).
283
284 errors is passed to the encoder factory as argument if non-NULL. */
285
286PyObject *PyCodec_Encode(PyObject *object,
287 const char *encoding,
288 const char *errors)
289{
290 PyObject *encoder = NULL;
291 PyObject *args = NULL, *result;
292 PyObject *v;
293
294 encoder = PyCodec_Encoder(encoding);
295 if (encoder == NULL)
296 goto onError;
297
298 args = args_tuple(object, errors);
299 if (args == NULL)
300 goto onError;
301
302 result = PyEval_CallObject(encoder,args);
303 if (result == NULL)
304 goto onError;
305
306 if (!PyTuple_Check(result) ||
307 PyTuple_GET_SIZE(result) != 2) {
308 PyErr_SetString(PyExc_TypeError,
309 "encoder must return a tuple (object,integer)");
310 goto onError;
311 }
312 v = PyTuple_GET_ITEM(result,0);
313 Py_INCREF(v);
314 /* We don't check or use the second (integer) entry. */
315
316 Py_DECREF(args);
317 Py_DECREF(encoder);
318 Py_DECREF(result);
319 return v;
320
321 onError:
322 Py_XDECREF(args);
323 Py_XDECREF(encoder);
324 return NULL;
325}
326
327/* Decode an object (usually a Python string) using the given encoding
328 and return an equivalent object (e.g. an Unicode object).
329
330 errors is passed to the decoder factory as argument if non-NULL. */
331
332PyObject *PyCodec_Decode(PyObject *object,
333 const char *encoding,
334 const char *errors)
335{
336 PyObject *decoder = NULL;
337 PyObject *args = NULL, *result = NULL;
338 PyObject *v;
339
340 decoder = PyCodec_Decoder(encoding);
341 if (decoder == NULL)
342 goto onError;
343
344 args = args_tuple(object, errors);
345 if (args == NULL)
346 goto onError;
347
348 result = PyEval_CallObject(decoder,args);
349 if (result == NULL)
350 goto onError;
351 if (!PyTuple_Check(result) ||
352 PyTuple_GET_SIZE(result) != 2) {
353 PyErr_SetString(PyExc_TypeError,
354 "decoder must return a tuple (object,integer)");
355 goto onError;
356 }
357 v = PyTuple_GET_ITEM(result,0);
358 Py_INCREF(v);
359 /* We don't check or use the second (integer) entry. */
360
361 Py_DECREF(args);
362 Py_DECREF(decoder);
363 Py_DECREF(result);
364 return v;
365
366 onError:
367 Py_XDECREF(args);
368 Py_XDECREF(decoder);
369 Py_XDECREF(result);
370 return NULL;
371}
372
373void _PyCodecRegistry_Init()
374{
375 if (_PyCodec_SearchPath == NULL)
376 _PyCodec_SearchPath = PyList_New(0);
377 if (_PyCodec_SearchCache == NULL)
378 _PyCodec_SearchCache = PyDict_New();
379 if (_PyCodec_SearchPath == NULL ||
380 _PyCodec_SearchCache == NULL)
381 Py_FatalError("can't intialize codec registry");
382}
383
384void _PyCodecRegistry_Fini()
385{
386 Py_XDECREF(_PyCodec_SearchPath);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000387 _PyCodec_SearchPath = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000388 Py_XDECREF(_PyCodec_SearchCache);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000389 _PyCodec_SearchCache = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000390}