blob: 5075a20d666e502857a8d1617515f24d1d5011b4 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
14/* --- Globals ------------------------------------------------------------ */
15
16static PyObject *_PyCodec_SearchPath;
17static PyObject *_PyCodec_SearchCache;
18
19/* Flag used for lazy import of the standard encodings package */
20static int import_encodings_called = 0;
21
22/* --- Codec Registry ----------------------------------------------------- */
23
24/* Import the standard encodings package which will register the first
25 codec search function.
26
27 This is done in a lazy way so that the Unicode implementation does
28 not downgrade startup time of scripts not needing it.
29
30 Errors are silently ignored by this function. Only one try is made.
31
32*/
33
34static
35void import_encodings()
36{
37 PyObject *mod;
38
39 import_encodings_called = 1;
40 mod = PyImport_ImportModule("encodings");
41 if (mod == NULL) {
42 PyErr_Clear();
43 return;
44 }
45 Py_DECREF(mod);
46}
47
48/* Register a new codec search function.
49
50 The search_function's refcount is incremented by this function. */
51
52int PyCodec_Register(PyObject *search_function)
53{
54 if (!import_encodings_called)
55 import_encodings();
56 if (search_function == NULL) {
57 PyErr_BadArgument();
58 return -1;
59 }
60 if (!PyCallable_Check(search_function)) {
61 PyErr_SetString(PyExc_TypeError,
62 "argument must be callable");
63 return -1;
64 }
65 return PyList_Append(_PyCodec_SearchPath, search_function);
66}
67
68static
69PyObject *lowercasestring(const char *string)
70{
71 register int i;
72 int len = strlen(string);
73 char *p;
74 PyObject *v;
75
76 v = PyString_FromStringAndSize(NULL, len);
77 if (v == NULL)
78 return NULL;
79 p = PyString_AS_STRING(v);
80 for (i = 0; i < len; i++)
81 p[i] = tolower(string[i]);
82 return v;
83}
84
85/* Lookup the given encoding and return a tuple providing the codec
86 facilities.
87
88 The encoding string is looked up converted to all lower-case
89 characters. This makes encodings looked up through this mechanism
90 effectively case-insensitive.
91
92 If no codec is found, a KeyError is set and NULL returned. */
93
94PyObject *_PyCodec_Lookup(const char *encoding)
95{
96 PyObject *result, *args = NULL, *v;
97 int i, len;
98
99 if (!import_encodings_called)
100 import_encodings();
101
102 /* Convert the encoding to a lower-cased Python string */
103 v = lowercasestring(encoding);
104 if (v == NULL)
105 goto onError;
106 PyString_InternInPlace(&v);
107
108 /* First, try to lookup the name in the registry dictionary */
109 result = PyDict_GetItem(_PyCodec_SearchCache, v);
110 if (result != NULL) {
111 Py_INCREF(result);
112 return result;
113 }
114
115 /* Next, scan the search functions in order of registration */
116 len = PyList_Size(_PyCodec_SearchPath);
117 if (len < 0)
118 goto onError;
119
120 args = PyTuple_New(1);
121 if (args == NULL)
122 goto onError;
123 PyTuple_SET_ITEM(args,0,v);
124
125 for (i = 0; i < len; i++) {
126 PyObject *func;
127
128 func = PyList_GetItem(_PyCodec_SearchPath, i);
129 if (func == NULL)
130 goto onError;
131 result = PyEval_CallObject(func,args);
132 if (result == NULL)
133 goto onError;
134 if (result == Py_None) {
135 Py_DECREF(result);
136 continue;
137 }
138 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
139 PyErr_SetString(PyExc_TypeError,
140 "codec search functions must return 4-tuples");
141 Py_DECREF(result);
142 goto onError;
143 }
144 break;
145 }
146 if (i == len) {
147 /* XXX Perhaps we should cache misses too ? */
148 PyErr_SetString(PyExc_LookupError,
149 "unkown encoding");
150 goto onError;
151 }
152
153 /* Cache and return the result */
154 PyDict_SetItem(_PyCodec_SearchCache, v, result);
155 Py_DECREF(args);
156 return result;
157
158 onError:
159 Py_XDECREF(args);
160 return NULL;
161}
162
163static
164PyObject *args_tuple(PyObject *object,
165 const char *errors)
166{
167 PyObject *args;
168
169 args = PyTuple_New(1 + (errors != NULL));
170 if (args == NULL)
171 return NULL;
172 Py_INCREF(object);
173 PyTuple_SET_ITEM(args,0,object);
174 if (errors) {
175 PyObject *v;
176
177 v = PyString_FromString(errors);
178 if (v == NULL) {
179 Py_DECREF(args);
180 return NULL;
181 }
182 PyTuple_SET_ITEM(args, 1, v);
183 }
184 return args;
185}
186
187/* Build a codec by calling factory(stream[,errors]) or just
188 factory(errors) depending on whether the given parameters are
189 non-NULL. */
190
191static
192PyObject *build_stream_codec(PyObject *factory,
193 PyObject *stream,
194 const char *errors)
195{
196 PyObject *args, *codec;
197
198 args = args_tuple(stream, errors);
199 if (args == NULL)
200 return NULL;
201
202 codec = PyEval_CallObject(factory, args);
203 Py_DECREF(args);
204 return codec;
205}
206
207/* Convenience APIs to query the Codec registry.
208
209 All APIs return a codec object with incremented refcount.
210
211 */
212
213PyObject *PyCodec_Encoder(const char *encoding)
214{
215 PyObject *codecs;
216 PyObject *v;
217
218 codecs = _PyCodec_Lookup(encoding);
219 if (codecs == NULL)
220 goto onError;
221 v = PyTuple_GET_ITEM(codecs,0);
222 Py_INCREF(v);
223 return v;
224
225 onError:
226 return NULL;
227}
228
229PyObject *PyCodec_Decoder(const char *encoding)
230{
231 PyObject *codecs;
232 PyObject *v;
233
234 codecs = _PyCodec_Lookup(encoding);
235 if (codecs == NULL)
236 goto onError;
237 v = PyTuple_GET_ITEM(codecs,1);
238 Py_INCREF(v);
239 return v;
240
241 onError:
242 return NULL;
243}
244
245PyObject *PyCodec_StreamReader(const char *encoding,
246 PyObject *stream,
247 const char *errors)
248{
249 PyObject *codecs;
250
251 codecs = _PyCodec_Lookup(encoding);
252 if (codecs == NULL)
253 goto onError;
254 return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
255
256 onError:
257 return NULL;
258}
259
260PyObject *PyCodec_StreamWriter(const char *encoding,
261 PyObject *stream,
262 const char *errors)
263{
264 PyObject *codecs;
265
266 codecs = _PyCodec_Lookup(encoding);
267 if (codecs == NULL)
268 goto onError;
269 return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
270
271 onError:
272 return NULL;
273}
274
275/* Encode an object (e.g. an Unicode object) using the given encoding
276 and return the resulting encoded object (usually a Python string).
277
278 errors is passed to the encoder factory as argument if non-NULL. */
279
280PyObject *PyCodec_Encode(PyObject *object,
281 const char *encoding,
282 const char *errors)
283{
284 PyObject *encoder = NULL;
285 PyObject *args = NULL, *result;
286 PyObject *v;
287
288 encoder = PyCodec_Encoder(encoding);
289 if (encoder == NULL)
290 goto onError;
291
292 args = args_tuple(object, errors);
293 if (args == NULL)
294 goto onError;
295
296 result = PyEval_CallObject(encoder,args);
297 if (result == NULL)
298 goto onError;
299
300 if (!PyTuple_Check(result) ||
301 PyTuple_GET_SIZE(result) != 2) {
302 PyErr_SetString(PyExc_TypeError,
303 "encoder must return a tuple (object,integer)");
304 goto onError;
305 }
306 v = PyTuple_GET_ITEM(result,0);
307 Py_INCREF(v);
308 /* We don't check or use the second (integer) entry. */
309
310 Py_DECREF(args);
311 Py_DECREF(encoder);
312 Py_DECREF(result);
313 return v;
314
315 onError:
316 Py_XDECREF(args);
317 Py_XDECREF(encoder);
318 return NULL;
319}
320
321/* Decode an object (usually a Python string) using the given encoding
322 and return an equivalent object (e.g. an Unicode object).
323
324 errors is passed to the decoder factory as argument if non-NULL. */
325
326PyObject *PyCodec_Decode(PyObject *object,
327 const char *encoding,
328 const char *errors)
329{
330 PyObject *decoder = NULL;
331 PyObject *args = NULL, *result = NULL;
332 PyObject *v;
333
334 decoder = PyCodec_Decoder(encoding);
335 if (decoder == NULL)
336 goto onError;
337
338 args = args_tuple(object, errors);
339 if (args == NULL)
340 goto onError;
341
342 result = PyEval_CallObject(decoder,args);
343 if (result == NULL)
344 goto onError;
345 if (!PyTuple_Check(result) ||
346 PyTuple_GET_SIZE(result) != 2) {
347 PyErr_SetString(PyExc_TypeError,
348 "decoder must return a tuple (object,integer)");
349 goto onError;
350 }
351 v = PyTuple_GET_ITEM(result,0);
352 Py_INCREF(v);
353 /* We don't check or use the second (integer) entry. */
354
355 Py_DECREF(args);
356 Py_DECREF(decoder);
357 Py_DECREF(result);
358 return v;
359
360 onError:
361 Py_XDECREF(args);
362 Py_XDECREF(decoder);
363 Py_XDECREF(result);
364 return NULL;
365}
366
367void _PyCodecRegistry_Init()
368{
369 if (_PyCodec_SearchPath == NULL)
370 _PyCodec_SearchPath = PyList_New(0);
371 if (_PyCodec_SearchCache == NULL)
372 _PyCodec_SearchCache = PyDict_New();
373 if (_PyCodec_SearchPath == NULL ||
374 _PyCodec_SearchCache == NULL)
375 Py_FatalError("can't intialize codec registry");
376}
377
378void _PyCodecRegistry_Fini()
379{
380 Py_XDECREF(_PyCodec_SearchPath);
381 Py_XDECREF(_PyCodec_SearchCache);
382}