blob: a208898e8c07aaa9c78f0d4e0026fb7d185c6514 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
Guido van Rossum16b1ad92000-08-03 16:24:25 +00007Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumfeee4b92000-03-10 22:57:27 +00008
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
Guido van Rossumfeee4b92000-03-10 22:57:27 +000014/* --- Codec Registry ----------------------------------------------------- */
15
16/* Import the standard encodings package which will register the first
17 codec search function.
18
19 This is done in a lazy way so that the Unicode implementation does
20 not downgrade startup time of scripts not needing it.
21
Guido van Rossumb95de4f2000-03-31 17:25:23 +000022 ImportErrors are silently ignored by this function. Only one try is
23 made.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000024
25*/
26
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000027static int _PyCodecRegistry_Init(void); /* Forward */
Guido van Rossumfeee4b92000-03-10 22:57:27 +000028
Guido van Rossumfeee4b92000-03-10 22:57:27 +000029int PyCodec_Register(PyObject *search_function)
30{
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000031 PyInterpreterState *interp = PyThreadState_Get()->interp;
32 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
33 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000034 if (search_function == NULL) {
35 PyErr_BadArgument();
Guido van Rossumb95de4f2000-03-31 17:25:23 +000036 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000037 }
38 if (!PyCallable_Check(search_function)) {
39 PyErr_SetString(PyExc_TypeError,
40 "argument must be callable");
Guido van Rossumb95de4f2000-03-31 17:25:23 +000041 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000042 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000043 return PyList_Append(interp->codec_search_path, search_function);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000044
45 onError:
46 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000047}
48
Guido van Rossum9e896b32000-04-05 20:11:21 +000049/* Convert a string to a normalized Python string: all characters are
50 converted to lower case, spaces are replaced with underscores. */
51
Guido van Rossumfeee4b92000-03-10 22:57:27 +000052static
Guido van Rossum9e896b32000-04-05 20:11:21 +000053PyObject *normalizestring(const char *string)
Guido van Rossumfeee4b92000-03-10 22:57:27 +000054{
Guido van Rossum33831132000-06-29 14:50:15 +000055 register size_t i;
Guido van Rossum582acec2000-06-28 22:07:35 +000056 size_t len = strlen(string);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000057 char *p;
58 PyObject *v;
59
Guido van Rossum582acec2000-06-28 22:07:35 +000060 if (len > INT_MAX) {
61 PyErr_SetString(PyExc_OverflowError, "string is too large");
62 return NULL;
63 }
64
65 v = PyString_FromStringAndSize(NULL, (int)len);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000066 if (v == NULL)
67 return NULL;
68 p = PyString_AS_STRING(v);
Guido van Rossum9e896b32000-04-05 20:11:21 +000069 for (i = 0; i < len; i++) {
70 register char ch = string[i];
71 if (ch == ' ')
72 ch = '-';
73 else
74 ch = tolower(ch);
75 p[i] = ch;
76 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +000077 return v;
78}
79
80/* Lookup the given encoding and return a tuple providing the codec
81 facilities.
82
83 The encoding string is looked up converted to all lower-case
84 characters. This makes encodings looked up through this mechanism
85 effectively case-insensitive.
86
Fred Drake766de832000-05-09 19:55:59 +000087 If no codec is found, a LookupError is set and NULL returned.
Guido van Rossumb95de4f2000-03-31 17:25:23 +000088
89 As side effect, this tries to load the encodings package, if not
90 yet done. This is part of the lazy load strategy for the encodings
91 package.
92
93*/
Guido van Rossumfeee4b92000-03-10 22:57:27 +000094
95PyObject *_PyCodec_Lookup(const char *encoding)
96{
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +000097 PyInterpreterState *interp;
Guido van Rossum5ba3c842000-03-24 20:52:23 +000098 PyObject *result, *args = NULL, *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000099 int i, len;
100
Fred Drake766de832000-05-09 19:55:59 +0000101 if (encoding == NULL) {
102 PyErr_BadArgument();
103 goto onError;
104 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000105
106 interp = PyThreadState_Get()->interp;
107 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
Barry Warsaw51ac5802000-03-20 16:36:48 +0000108 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000109
Guido van Rossum9e896b32000-04-05 20:11:21 +0000110 /* Convert the encoding to a normalized Python string: all
Thomas Wouters7e474022000-07-16 12:04:32 +0000111 characters are converted to lower case, spaces and hyphens are
Guido van Rossum9e896b32000-04-05 20:11:21 +0000112 replaced with underscores. */
113 v = normalizestring(encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000114 if (v == NULL)
115 goto onError;
116 PyString_InternInPlace(&v);
117
118 /* First, try to lookup the name in the registry dictionary */
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000119 result = PyDict_GetItem(interp->codec_search_cache, v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000120 if (result != NULL) {
121 Py_INCREF(result);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000122 Py_DECREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000123 return result;
124 }
125
126 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000127 args = PyTuple_New(1);
128 if (args == NULL)
129 goto onError;
130 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000131
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000132 len = PyList_Size(interp->codec_search_path);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000133 if (len < 0)
134 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000135 if (len == 0) {
136 PyErr_SetString(PyExc_LookupError,
137 "no codec search functions registered: "
138 "can't find encoding");
139 goto onError;
140 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000141
142 for (i = 0; i < len; i++) {
143 PyObject *func;
144
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000145 func = PyList_GetItem(interp->codec_search_path, i);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000146 if (func == NULL)
147 goto onError;
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000148 result = PyEval_CallObject(func, args);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000149 if (result == NULL)
150 goto onError;
151 if (result == Py_None) {
152 Py_DECREF(result);
153 continue;
154 }
155 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
156 PyErr_SetString(PyExc_TypeError,
157 "codec search functions must return 4-tuples");
158 Py_DECREF(result);
159 goto onError;
160 }
161 break;
162 }
163 if (i == len) {
164 /* XXX Perhaps we should cache misses too ? */
Martin v. Löwiseb42b022002-09-26 16:01:24 +0000165 PyErr_Format(PyExc_LookupError,
166 "unknown encoding: %s", encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000167 goto onError;
168 }
169
170 /* Cache and return the result */
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000171 PyDict_SetItem(interp->codec_search_cache, v, result);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000172 Py_DECREF(args);
173 return result;
174
175 onError:
176 Py_XDECREF(args);
177 return NULL;
178}
179
180static
181PyObject *args_tuple(PyObject *object,
182 const char *errors)
183{
184 PyObject *args;
185
186 args = PyTuple_New(1 + (errors != NULL));
187 if (args == NULL)
188 return NULL;
189 Py_INCREF(object);
190 PyTuple_SET_ITEM(args,0,object);
191 if (errors) {
192 PyObject *v;
193
194 v = PyString_FromString(errors);
195 if (v == NULL) {
196 Py_DECREF(args);
197 return NULL;
198 }
199 PyTuple_SET_ITEM(args, 1, v);
200 }
201 return args;
202}
203
204/* Build a codec by calling factory(stream[,errors]) or just
205 factory(errors) depending on whether the given parameters are
206 non-NULL. */
207
208static
209PyObject *build_stream_codec(PyObject *factory,
210 PyObject *stream,
211 const char *errors)
212{
213 PyObject *args, *codec;
214
215 args = args_tuple(stream, errors);
216 if (args == NULL)
217 return NULL;
218
219 codec = PyEval_CallObject(factory, args);
220 Py_DECREF(args);
221 return codec;
222}
223
224/* Convenience APIs to query the Codec registry.
225
226 All APIs return a codec object with incremented refcount.
227
228 */
229
230PyObject *PyCodec_Encoder(const char *encoding)
231{
232 PyObject *codecs;
233 PyObject *v;
234
235 codecs = _PyCodec_Lookup(encoding);
236 if (codecs == NULL)
237 goto onError;
238 v = PyTuple_GET_ITEM(codecs,0);
Mark Hammonde21262c2002-07-18 23:06:17 +0000239 Py_DECREF(codecs);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000240 Py_INCREF(v);
241 return v;
242
243 onError:
244 return NULL;
245}
246
247PyObject *PyCodec_Decoder(const char *encoding)
248{
249 PyObject *codecs;
250 PyObject *v;
251
252 codecs = _PyCodec_Lookup(encoding);
253 if (codecs == NULL)
254 goto onError;
255 v = PyTuple_GET_ITEM(codecs,1);
Mark Hammonde21262c2002-07-18 23:06:17 +0000256 Py_DECREF(codecs);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000257 Py_INCREF(v);
258 return v;
259
260 onError:
261 return NULL;
262}
263
264PyObject *PyCodec_StreamReader(const char *encoding,
265 PyObject *stream,
266 const char *errors)
267{
Mark Hammonde21262c2002-07-18 23:06:17 +0000268 PyObject *codecs, *ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000269
270 codecs = _PyCodec_Lookup(encoding);
271 if (codecs == NULL)
272 goto onError;
Mark Hammonde21262c2002-07-18 23:06:17 +0000273 ret = build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
274 Py_DECREF(codecs);
275 return ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000276
277 onError:
278 return NULL;
279}
280
281PyObject *PyCodec_StreamWriter(const char *encoding,
282 PyObject *stream,
283 const char *errors)
284{
Mark Hammonde21262c2002-07-18 23:06:17 +0000285 PyObject *codecs, *ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000286
287 codecs = _PyCodec_Lookup(encoding);
288 if (codecs == NULL)
289 goto onError;
Mark Hammonde21262c2002-07-18 23:06:17 +0000290 ret = build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
291 Py_DECREF(codecs);
292 return ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000293
294 onError:
295 return NULL;
296}
297
298/* Encode an object (e.g. an Unicode object) using the given encoding
299 and return the resulting encoded object (usually a Python string).
300
301 errors is passed to the encoder factory as argument if non-NULL. */
302
303PyObject *PyCodec_Encode(PyObject *object,
304 const char *encoding,
305 const char *errors)
306{
307 PyObject *encoder = NULL;
308 PyObject *args = NULL, *result;
309 PyObject *v;
310
311 encoder = PyCodec_Encoder(encoding);
312 if (encoder == NULL)
313 goto onError;
314
315 args = args_tuple(object, errors);
316 if (args == NULL)
317 goto onError;
318
319 result = PyEval_CallObject(encoder,args);
320 if (result == NULL)
321 goto onError;
322
323 if (!PyTuple_Check(result) ||
324 PyTuple_GET_SIZE(result) != 2) {
325 PyErr_SetString(PyExc_TypeError,
326 "encoder must return a tuple (object,integer)");
327 goto onError;
328 }
329 v = PyTuple_GET_ITEM(result,0);
330 Py_INCREF(v);
331 /* We don't check or use the second (integer) entry. */
332
333 Py_DECREF(args);
334 Py_DECREF(encoder);
335 Py_DECREF(result);
336 return v;
337
338 onError:
339 Py_XDECREF(args);
340 Py_XDECREF(encoder);
341 return NULL;
342}
343
344/* Decode an object (usually a Python string) using the given encoding
345 and return an equivalent object (e.g. an Unicode object).
346
347 errors is passed to the decoder factory as argument if non-NULL. */
348
349PyObject *PyCodec_Decode(PyObject *object,
350 const char *encoding,
351 const char *errors)
352{
353 PyObject *decoder = NULL;
354 PyObject *args = NULL, *result = NULL;
355 PyObject *v;
356
357 decoder = PyCodec_Decoder(encoding);
358 if (decoder == NULL)
359 goto onError;
360
361 args = args_tuple(object, errors);
362 if (args == NULL)
363 goto onError;
364
365 result = PyEval_CallObject(decoder,args);
366 if (result == NULL)
367 goto onError;
368 if (!PyTuple_Check(result) ||
369 PyTuple_GET_SIZE(result) != 2) {
370 PyErr_SetString(PyExc_TypeError,
371 "decoder must return a tuple (object,integer)");
372 goto onError;
373 }
374 v = PyTuple_GET_ITEM(result,0);
375 Py_INCREF(v);
376 /* We don't check or use the second (integer) entry. */
377
378 Py_DECREF(args);
379 Py_DECREF(decoder);
380 Py_DECREF(result);
381 return v;
382
383 onError:
384 Py_XDECREF(args);
385 Py_XDECREF(decoder);
386 Py_XDECREF(result);
387 return NULL;
388}
389
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000390/* Register the error handling callback function error under the name
391 name. This function will be called by the codec when it encounters
392 an unencodable characters/undecodable bytes and doesn't know the
393 callback name, when name is specified as the error parameter
394 in the call to the encode/decode function.
395 Return 0 on success, -1 on error */
396int PyCodec_RegisterError(const char *name, PyObject *error)
397{
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000398 PyInterpreterState *interp = PyThreadState_Get()->interp;
399 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
400 return -1;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000401 if (!PyCallable_Check(error)) {
402 PyErr_SetString(PyExc_TypeError, "handler must be callable");
403 return -1;
404 }
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000405 return PyDict_SetItemString(interp->codec_error_registry,
406 (char *)name, error);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000407}
408
409/* Lookup the error handling callback function registered under the
410 name error. As a special case NULL can be passed, in which case
411 the error handling callback for strict encoding will be returned. */
412PyObject *PyCodec_LookupError(const char *name)
413{
414 PyObject *handler = NULL;
415
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000416 PyInterpreterState *interp = PyThreadState_Get()->interp;
417 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
418 return NULL;
419
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000420 if (name==NULL)
421 name = "strict";
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000422 handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000423 if (!handler)
424 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
425 else
426 Py_INCREF(handler);
427 return handler;
428}
429
430static void wrong_exception_type(PyObject *exc)
431{
432 PyObject *type = PyObject_GetAttrString(exc, "__class__");
433 if (type != NULL) {
434 PyObject *name = PyObject_GetAttrString(type, "__name__");
435 Py_DECREF(type);
436 if (name != NULL) {
437 PyObject *string = PyObject_Str(name);
438 Py_DECREF(name);
Walter Dörwaldf7bcd1d2002-09-02 18:22:32 +0000439 if (string != NULL) {
440 PyErr_Format(PyExc_TypeError,
441 "don't know how to handle %.400s in error callback",
442 PyString_AS_STRING(string));
443 Py_DECREF(string);
444 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000445 }
446 }
447}
448
449PyObject *PyCodec_StrictErrors(PyObject *exc)
450{
451 if (PyInstance_Check(exc))
452 PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
453 exc);
454 else
455 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
456 return NULL;
457}
458
459
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000460#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000461PyObject *PyCodec_IgnoreErrors(PyObject *exc)
462{
463 int end;
464 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
465 if (PyUnicodeEncodeError_GetEnd(exc, &end))
466 return NULL;
467 }
468 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
469 if (PyUnicodeDecodeError_GetEnd(exc, &end))
470 return NULL;
471 }
472 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
473 if (PyUnicodeTranslateError_GetEnd(exc, &end))
474 return NULL;
475 }
476 else {
477 wrong_exception_type(exc);
478 return NULL;
479 }
480 /* ouch: passing NULL, 0, pos gives None instead of u'' */
481 return Py_BuildValue("(u#i)", &end, 0, end);
482}
483
484
485PyObject *PyCodec_ReplaceErrors(PyObject *exc)
486{
487 PyObject *restuple;
488 int start;
489 int end;
490 int i;
491
492 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
493 PyObject *res;
494 Py_UNICODE *p;
495 if (PyUnicodeEncodeError_GetStart(exc, &start))
496 return NULL;
497 if (PyUnicodeEncodeError_GetEnd(exc, &end))
498 return NULL;
499 res = PyUnicode_FromUnicode(NULL, end-start);
500 if (res == NULL)
501 return NULL;
502 for (p = PyUnicode_AS_UNICODE(res), i = start;
503 i<end; ++p, ++i)
504 *p = '?';
505 restuple = Py_BuildValue("(Oi)", res, end);
506 Py_DECREF(res);
507 return restuple;
508 }
509 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
510 Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
511 if (PyUnicodeDecodeError_GetEnd(exc, &end))
512 return NULL;
513 return Py_BuildValue("(u#i)", &res, 1, end);
514 }
515 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
516 PyObject *res;
517 Py_UNICODE *p;
518 if (PyUnicodeTranslateError_GetStart(exc, &start))
519 return NULL;
520 if (PyUnicodeTranslateError_GetEnd(exc, &end))
521 return NULL;
522 res = PyUnicode_FromUnicode(NULL, end-start);
523 if (res == NULL)
524 return NULL;
525 for (p = PyUnicode_AS_UNICODE(res), i = start;
526 i<end; ++p, ++i)
527 *p = Py_UNICODE_REPLACEMENT_CHARACTER;
528 restuple = Py_BuildValue("(Oi)", res, end);
529 Py_DECREF(res);
530 return restuple;
531 }
532 else {
533 wrong_exception_type(exc);
534 return NULL;
535 }
536}
537
538PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
539{
540 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
541 PyObject *restuple;
542 PyObject *object;
543 int start;
544 int end;
545 PyObject *res;
546 Py_UNICODE *p;
547 Py_UNICODE *startp;
548 Py_UNICODE *outp;
549 int ressize;
550 if (PyUnicodeEncodeError_GetStart(exc, &start))
551 return NULL;
552 if (PyUnicodeEncodeError_GetEnd(exc, &end))
553 return NULL;
554 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
555 return NULL;
556 startp = PyUnicode_AS_UNICODE(object);
557 for (p = startp+start, ressize = 0; p < startp+end; ++p) {
558 if (*p<10)
559 ressize += 2+1+1;
560 else if (*p<100)
561 ressize += 2+2+1;
562 else if (*p<1000)
563 ressize += 2+3+1;
564 else if (*p<10000)
565 ressize += 2+4+1;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000566#ifndef Py_UNICODE_WIDE
567 else
568 ressize += 2+5+1;
569#else
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000570 else if (*p<100000)
571 ressize += 2+5+1;
572 else if (*p<1000000)
573 ressize += 2+6+1;
574 else
575 ressize += 2+7+1;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000576#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000577 }
578 /* allocate replacement */
579 res = PyUnicode_FromUnicode(NULL, ressize);
580 if (res == NULL) {
581 Py_DECREF(object);
582 return NULL;
583 }
584 /* generate replacement */
585 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
586 p < startp+end; ++p) {
587 Py_UNICODE c = *p;
588 int digits;
589 int base;
590 *outp++ = '&';
591 *outp++ = '#';
592 if (*p<10) {
593 digits = 1;
594 base = 1;
595 }
596 else if (*p<100) {
597 digits = 2;
598 base = 10;
599 }
600 else if (*p<1000) {
601 digits = 3;
602 base = 100;
603 }
604 else if (*p<10000) {
605 digits = 4;
606 base = 1000;
607 }
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000608#ifndef Py_UNICODE_WIDE
609 else {
610 digits = 5;
611 base = 10000;
612 }
613#else
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000614 else if (*p<100000) {
615 digits = 5;
616 base = 10000;
617 }
618 else if (*p<1000000) {
619 digits = 6;
620 base = 100000;
621 }
622 else {
623 digits = 7;
624 base = 1000000;
625 }
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000626#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000627 while (digits-->0) {
628 *outp++ = '0' + c/base;
629 c %= base;
630 base /= 10;
631 }
632 *outp++ = ';';
633 }
634 restuple = Py_BuildValue("(Oi)", res, end);
635 Py_DECREF(res);
636 Py_DECREF(object);
637 return restuple;
638 }
639 else {
640 wrong_exception_type(exc);
641 return NULL;
642 }
643}
644
645static Py_UNICODE hexdigits[] = {
646 '0', '1', '2', '3', '4', '5', '6', '7',
647 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
648};
649
650PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
651{
652 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
653 PyObject *restuple;
654 PyObject *object;
655 int start;
656 int end;
657 PyObject *res;
658 Py_UNICODE *p;
659 Py_UNICODE *startp;
660 Py_UNICODE *outp;
661 int ressize;
662 if (PyUnicodeEncodeError_GetStart(exc, &start))
663 return NULL;
664 if (PyUnicodeEncodeError_GetEnd(exc, &end))
665 return NULL;
666 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
667 return NULL;
668 startp = PyUnicode_AS_UNICODE(object);
669 for (p = startp+start, ressize = 0; p < startp+end; ++p) {
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000670#ifdef Py_UNICODE_WIDE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000671 if (*p >= 0x00010000)
672 ressize += 1+1+8;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000673 else
674#endif
675 if (*p >= 0x100) {
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000676 ressize += 1+1+4;
677 }
678 else
679 ressize += 1+1+2;
680 }
681 res = PyUnicode_FromUnicode(NULL, ressize);
682 if (res==NULL)
683 return NULL;
684 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
685 p < startp+end; ++p) {
686 Py_UNICODE c = *p;
687 *outp++ = '\\';
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000688#ifdef Py_UNICODE_WIDE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000689 if (c >= 0x00010000) {
690 *outp++ = 'U';
691 *outp++ = hexdigits[(c>>28)&0xf];
692 *outp++ = hexdigits[(c>>24)&0xf];
693 *outp++ = hexdigits[(c>>20)&0xf];
694 *outp++ = hexdigits[(c>>16)&0xf];
695 *outp++ = hexdigits[(c>>12)&0xf];
696 *outp++ = hexdigits[(c>>8)&0xf];
697 }
Hye-Shik Chang7db07e62003-12-29 01:36:01 +0000698 else
699#endif
700 if (c >= 0x100) {
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000701 *outp++ = 'u';
702 *outp++ = hexdigits[(c>>12)&0xf];
703 *outp++ = hexdigits[(c>>8)&0xf];
704 }
705 else
706 *outp++ = 'x';
707 *outp++ = hexdigits[(c>>4)&0xf];
708 *outp++ = hexdigits[c&0xf];
709 }
710
711 restuple = Py_BuildValue("(Oi)", res, end);
712 Py_DECREF(res);
713 Py_DECREF(object);
714 return restuple;
715 }
716 else {
717 wrong_exception_type(exc);
718 return NULL;
719 }
720}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000721#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000722
723static PyObject *strict_errors(PyObject *self, PyObject *exc)
724{
725 return PyCodec_StrictErrors(exc);
726}
727
728
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000729#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000730static PyObject *ignore_errors(PyObject *self, PyObject *exc)
731{
732 return PyCodec_IgnoreErrors(exc);
733}
734
735
736static PyObject *replace_errors(PyObject *self, PyObject *exc)
737{
738 return PyCodec_ReplaceErrors(exc);
739}
740
741
742static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
743{
744 return PyCodec_XMLCharRefReplaceErrors(exc);
745}
746
747
748static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
749{
750 return PyCodec_BackslashReplaceErrors(exc);
751}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000752#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000753
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000754static int _PyCodecRegistry_Init(void)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000755{
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000756 static struct {
757 char *name;
758 PyMethodDef def;
759 } methods[] =
760 {
761 {
762 "strict",
763 {
764 "strict_errors",
765 strict_errors,
766 METH_O
767 }
768 },
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000769#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000770 {
771 "ignore",
772 {
773 "ignore_errors",
774 ignore_errors,
775 METH_O
776 }
777 },
778 {
779 "replace",
780 {
781 "replace_errors",
782 replace_errors,
783 METH_O
784 }
785 },
786 {
787 "xmlcharrefreplace",
788 {
789 "xmlcharrefreplace_errors",
790 xmlcharrefreplace_errors,
791 METH_O
792 }
793 },
794 {
795 "backslashreplace",
796 {
797 "backslashreplace_errors",
798 backslashreplace_errors,
799 METH_O
800 }
801 }
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000802#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000803 };
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000804
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000805 PyInterpreterState *interp = PyThreadState_Get()->interp;
806 PyObject *mod;
807 int i;
808
809 if (interp->codec_search_path != NULL)
810 return 0;
811
812 interp->codec_search_path = PyList_New(0);
813 interp->codec_search_cache = PyDict_New();
814 interp->codec_error_registry = PyDict_New();
815
816 if (interp->codec_error_registry) {
817 for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
818 PyObject *func = PyCFunction_New(&methods[i].def, NULL);
819 int res;
820 if (!func)
821 Py_FatalError("can't initialize codec error registry");
822 res = PyCodec_RegisterError(methods[i].name, func);
823 Py_DECREF(func);
824 if (res)
825 Py_FatalError("can't initialize codec error registry");
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000826 }
827 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000828
Gustavo Niemeyer5ddd4c32003-03-19 00:35:36 +0000829 if (interp->codec_search_path == NULL ||
830 interp->codec_search_cache == NULL ||
831 interp->codec_error_registry == NULL)
832 Py_FatalError("can't initialize codec registry");
833
834 mod = PyImport_ImportModuleEx("encodings", NULL, NULL, NULL);
835 if (mod == NULL) {
836 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
837 /* Ignore ImportErrors... this is done so that
838 distributions can disable the encodings package. Note
839 that other errors are not masked, e.g. SystemErrors
840 raised to inform the user of an error in the Python
841 configuration are still reported back to the user. */
842 PyErr_Clear();
843 return 0;
844 }
845 return -1;
846 }
847 Py_DECREF(mod);
848 return 0;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000849}