blob: 2d493776008706912a54176bb98e4718c9d86c19 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
14/* --- Globals ------------------------------------------------------------ */
15
16static PyObject *_PyCodec_SearchPath;
17static PyObject *_PyCodec_SearchCache;
18
19/* Flag used for lazy import of the standard encodings package */
20static int import_encodings_called = 0;
21
22/* --- Codec Registry ----------------------------------------------------- */
23
24/* Import the standard encodings package which will register the first
25 codec search function.
26
27 This is done in a lazy way so that the Unicode implementation does
28 not downgrade startup time of scripts not needing it.
29
30 Errors are silently ignored by this function. Only one try is made.
31
32*/
33
34static
35void import_encodings()
36{
37 PyObject *mod;
38
39 import_encodings_called = 1;
40 mod = PyImport_ImportModule("encodings");
41 if (mod == NULL) {
42 PyErr_Clear();
43 return;
44 }
45 Py_DECREF(mod);
46}
47
48/* Register a new codec search function.
49
50 The search_function's refcount is incremented by this function. */
51
52int PyCodec_Register(PyObject *search_function)
53{
54 if (!import_encodings_called)
55 import_encodings();
56 if (search_function == NULL) {
57 PyErr_BadArgument();
58 return -1;
59 }
60 if (!PyCallable_Check(search_function)) {
61 PyErr_SetString(PyExc_TypeError,
62 "argument must be callable");
63 return -1;
64 }
65 return PyList_Append(_PyCodec_SearchPath, search_function);
66}
67
68static
69PyObject *lowercasestring(const char *string)
70{
71 register int i;
72 int len = strlen(string);
73 char *p;
74 PyObject *v;
75
76 v = PyString_FromStringAndSize(NULL, len);
77 if (v == NULL)
78 return NULL;
79 p = PyString_AS_STRING(v);
80 for (i = 0; i < len; i++)
81 p[i] = tolower(string[i]);
82 return v;
83}
84
85/* Lookup the given encoding and return a tuple providing the codec
86 facilities.
87
88 The encoding string is looked up converted to all lower-case
89 characters. This makes encodings looked up through this mechanism
90 effectively case-insensitive.
91
92 If no codec is found, a KeyError is set and NULL returned. */
93
94PyObject *_PyCodec_Lookup(const char *encoding)
95{
Barry Warsaw51ac5802000-03-20 16:36:48 +000096 PyObject *result, *args = NULL, *v = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000097 int i, len;
98
Barry Warsaw51ac5802000-03-20 16:36:48 +000099 if (_PyCodec_SearchCache == NULL || _PyCodec_SearchPath == NULL) {
100 PyErr_SetString(PyExc_SystemError,
101 "codec module not properly initialized");
102 goto onError;
103 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000104 if (!import_encodings_called)
105 import_encodings();
106
107 /* Convert the encoding to a lower-cased Python string */
108 v = lowercasestring(encoding);
109 if (v == NULL)
110 goto onError;
111 PyString_InternInPlace(&v);
112
113 /* First, try to lookup the name in the registry dictionary */
114 result = PyDict_GetItem(_PyCodec_SearchCache, v);
115 if (result != NULL) {
116 Py_INCREF(result);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000117 Py_DECREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000118 return result;
119 }
120
121 /* Next, scan the search functions in order of registration */
122 len = PyList_Size(_PyCodec_SearchPath);
123 if (len < 0)
124 goto onError;
125
126 args = PyTuple_New(1);
127 if (args == NULL)
128 goto onError;
129 PyTuple_SET_ITEM(args,0,v);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000130 v = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000131
132 for (i = 0; i < len; i++) {
133 PyObject *func;
134
135 func = PyList_GetItem(_PyCodec_SearchPath, i);
136 if (func == NULL)
137 goto onError;
138 result = PyEval_CallObject(func,args);
139 if (result == NULL)
140 goto onError;
141 if (result == Py_None) {
142 Py_DECREF(result);
143 continue;
144 }
145 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
146 PyErr_SetString(PyExc_TypeError,
147 "codec search functions must return 4-tuples");
148 Py_DECREF(result);
149 goto onError;
150 }
151 break;
152 }
153 if (i == len) {
154 /* XXX Perhaps we should cache misses too ? */
155 PyErr_SetString(PyExc_LookupError,
Barry Warsaw51ac5802000-03-20 16:36:48 +0000156 "unknown encoding");
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000157 goto onError;
158 }
159
160 /* Cache and return the result */
161 PyDict_SetItem(_PyCodec_SearchCache, v, result);
162 Py_DECREF(args);
163 return result;
164
165 onError:
Barry Warsaw51ac5802000-03-20 16:36:48 +0000166 Py_XDECREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000167 Py_XDECREF(args);
168 return NULL;
169}
170
171static
172PyObject *args_tuple(PyObject *object,
173 const char *errors)
174{
175 PyObject *args;
176
177 args = PyTuple_New(1 + (errors != NULL));
178 if (args == NULL)
179 return NULL;
180 Py_INCREF(object);
181 PyTuple_SET_ITEM(args,0,object);
182 if (errors) {
183 PyObject *v;
184
185 v = PyString_FromString(errors);
186 if (v == NULL) {
187 Py_DECREF(args);
188 return NULL;
189 }
190 PyTuple_SET_ITEM(args, 1, v);
191 }
192 return args;
193}
194
195/* Build a codec by calling factory(stream[,errors]) or just
196 factory(errors) depending on whether the given parameters are
197 non-NULL. */
198
199static
200PyObject *build_stream_codec(PyObject *factory,
201 PyObject *stream,
202 const char *errors)
203{
204 PyObject *args, *codec;
205
206 args = args_tuple(stream, errors);
207 if (args == NULL)
208 return NULL;
209
210 codec = PyEval_CallObject(factory, args);
211 Py_DECREF(args);
212 return codec;
213}
214
215/* Convenience APIs to query the Codec registry.
216
217 All APIs return a codec object with incremented refcount.
218
219 */
220
221PyObject *PyCodec_Encoder(const char *encoding)
222{
223 PyObject *codecs;
224 PyObject *v;
225
226 codecs = _PyCodec_Lookup(encoding);
227 if (codecs == NULL)
228 goto onError;
229 v = PyTuple_GET_ITEM(codecs,0);
230 Py_INCREF(v);
231 return v;
232
233 onError:
234 return NULL;
235}
236
237PyObject *PyCodec_Decoder(const char *encoding)
238{
239 PyObject *codecs;
240 PyObject *v;
241
242 codecs = _PyCodec_Lookup(encoding);
243 if (codecs == NULL)
244 goto onError;
245 v = PyTuple_GET_ITEM(codecs,1);
246 Py_INCREF(v);
247 return v;
248
249 onError:
250 return NULL;
251}
252
253PyObject *PyCodec_StreamReader(const char *encoding,
254 PyObject *stream,
255 const char *errors)
256{
257 PyObject *codecs;
258
259 codecs = _PyCodec_Lookup(encoding);
260 if (codecs == NULL)
261 goto onError;
262 return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
263
264 onError:
265 return NULL;
266}
267
268PyObject *PyCodec_StreamWriter(const char *encoding,
269 PyObject *stream,
270 const char *errors)
271{
272 PyObject *codecs;
273
274 codecs = _PyCodec_Lookup(encoding);
275 if (codecs == NULL)
276 goto onError;
277 return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
278
279 onError:
280 return NULL;
281}
282
283/* Encode an object (e.g. an Unicode object) using the given encoding
284 and return the resulting encoded object (usually a Python string).
285
286 errors is passed to the encoder factory as argument if non-NULL. */
287
288PyObject *PyCodec_Encode(PyObject *object,
289 const char *encoding,
290 const char *errors)
291{
292 PyObject *encoder = NULL;
293 PyObject *args = NULL, *result;
294 PyObject *v;
295
296 encoder = PyCodec_Encoder(encoding);
297 if (encoder == NULL)
298 goto onError;
299
300 args = args_tuple(object, errors);
301 if (args == NULL)
302 goto onError;
303
304 result = PyEval_CallObject(encoder,args);
305 if (result == NULL)
306 goto onError;
307
308 if (!PyTuple_Check(result) ||
309 PyTuple_GET_SIZE(result) != 2) {
310 PyErr_SetString(PyExc_TypeError,
311 "encoder must return a tuple (object,integer)");
312 goto onError;
313 }
314 v = PyTuple_GET_ITEM(result,0);
315 Py_INCREF(v);
316 /* We don't check or use the second (integer) entry. */
317
318 Py_DECREF(args);
319 Py_DECREF(encoder);
320 Py_DECREF(result);
321 return v;
322
323 onError:
324 Py_XDECREF(args);
325 Py_XDECREF(encoder);
326 return NULL;
327}
328
329/* Decode an object (usually a Python string) using the given encoding
330 and return an equivalent object (e.g. an Unicode object).
331
332 errors is passed to the decoder factory as argument if non-NULL. */
333
334PyObject *PyCodec_Decode(PyObject *object,
335 const char *encoding,
336 const char *errors)
337{
338 PyObject *decoder = NULL;
339 PyObject *args = NULL, *result = NULL;
340 PyObject *v;
341
342 decoder = PyCodec_Decoder(encoding);
343 if (decoder == NULL)
344 goto onError;
345
346 args = args_tuple(object, errors);
347 if (args == NULL)
348 goto onError;
349
350 result = PyEval_CallObject(decoder,args);
351 if (result == NULL)
352 goto onError;
353 if (!PyTuple_Check(result) ||
354 PyTuple_GET_SIZE(result) != 2) {
355 PyErr_SetString(PyExc_TypeError,
356 "decoder must return a tuple (object,integer)");
357 goto onError;
358 }
359 v = PyTuple_GET_ITEM(result,0);
360 Py_INCREF(v);
361 /* We don't check or use the second (integer) entry. */
362
363 Py_DECREF(args);
364 Py_DECREF(decoder);
365 Py_DECREF(result);
366 return v;
367
368 onError:
369 Py_XDECREF(args);
370 Py_XDECREF(decoder);
371 Py_XDECREF(result);
372 return NULL;
373}
374
375void _PyCodecRegistry_Init()
376{
377 if (_PyCodec_SearchPath == NULL)
378 _PyCodec_SearchPath = PyList_New(0);
379 if (_PyCodec_SearchCache == NULL)
380 _PyCodec_SearchCache = PyDict_New();
381 if (_PyCodec_SearchPath == NULL ||
382 _PyCodec_SearchCache == NULL)
383 Py_FatalError("can't intialize codec registry");
384}
385
386void _PyCodecRegistry_Fini()
387{
388 Py_XDECREF(_PyCodec_SearchPath);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000389 _PyCodec_SearchPath = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000390 Py_XDECREF(_PyCodec_SearchCache);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000391 _PyCodec_SearchCache = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000392}