blob: ec0daf380bf108100c56090f0eea9dc00f309c87 [file] [log] [blame]
Guido van Rossumfeee4b92000-03-10 22:57:27 +00001/* ------------------------------------------------------------------------
2
3 Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
Guido van Rossum16b1ad92000-08-03 16:24:25 +00007Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumfeee4b92000-03-10 22:57:27 +00008
9 ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
14/* --- Globals ------------------------------------------------------------ */
15
16static PyObject *_PyCodec_SearchPath;
17static PyObject *_PyCodec_SearchCache;
18
19/* Flag used for lazy import of the standard encodings package */
20static int import_encodings_called = 0;
21
22/* --- Codec Registry ----------------------------------------------------- */
23
24/* Import the standard encodings package which will register the first
25 codec search function.
26
27 This is done in a lazy way so that the Unicode implementation does
28 not downgrade startup time of scripts not needing it.
29
Guido van Rossumb95de4f2000-03-31 17:25:23 +000030 ImportErrors are silently ignored by this function. Only one try is
31 made.
Guido van Rossumfeee4b92000-03-10 22:57:27 +000032
33*/
34
35static
Thomas Woutersf70ef4f2000-07-22 18:47:25 +000036int import_encodings(void)
Guido van Rossumfeee4b92000-03-10 22:57:27 +000037{
38 PyObject *mod;
39
40 import_encodings_called = 1;
41 mod = PyImport_ImportModule("encodings");
42 if (mod == NULL) {
Guido van Rossumb95de4f2000-03-31 17:25:23 +000043 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
44 /* Ignore ImportErrors... this is done so that
45 distributions can disable the encodings package. Note
46 that other errors are not masked, e.g. SystemErrors
47 raised to inform the user of an error in the Python
48 configuration are still reported back to the user. */
49 PyErr_Clear();
50 return 0;
51 }
52 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000053 }
54 Py_DECREF(mod);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000055 return 0;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000056}
57
Guido van Rossumfeee4b92000-03-10 22:57:27 +000058int PyCodec_Register(PyObject *search_function)
59{
Guido van Rossumb95de4f2000-03-31 17:25:23 +000060 if (!import_encodings_called) {
61 if (import_encodings())
62 goto onError;
63 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +000064 if (search_function == NULL) {
65 PyErr_BadArgument();
Guido van Rossumb95de4f2000-03-31 17:25:23 +000066 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000067 }
68 if (!PyCallable_Check(search_function)) {
69 PyErr_SetString(PyExc_TypeError,
70 "argument must be callable");
Guido van Rossumb95de4f2000-03-31 17:25:23 +000071 goto onError;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000072 }
73 return PyList_Append(_PyCodec_SearchPath, search_function);
Guido van Rossumb95de4f2000-03-31 17:25:23 +000074
75 onError:
76 return -1;
Guido van Rossumfeee4b92000-03-10 22:57:27 +000077}
78
Guido van Rossum9e896b32000-04-05 20:11:21 +000079/* Convert a string to a normalized Python string: all characters are
80 converted to lower case, spaces are replaced with underscores. */
81
Guido van Rossumfeee4b92000-03-10 22:57:27 +000082static
Guido van Rossum9e896b32000-04-05 20:11:21 +000083PyObject *normalizestring(const char *string)
Guido van Rossumfeee4b92000-03-10 22:57:27 +000084{
Guido van Rossum33831132000-06-29 14:50:15 +000085 register size_t i;
Guido van Rossum582acec2000-06-28 22:07:35 +000086 size_t len = strlen(string);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000087 char *p;
88 PyObject *v;
89
Guido van Rossum582acec2000-06-28 22:07:35 +000090 if (len > INT_MAX) {
91 PyErr_SetString(PyExc_OverflowError, "string is too large");
92 return NULL;
93 }
94
95 v = PyString_FromStringAndSize(NULL, (int)len);
Guido van Rossumfeee4b92000-03-10 22:57:27 +000096 if (v == NULL)
97 return NULL;
98 p = PyString_AS_STRING(v);
Guido van Rossum9e896b32000-04-05 20:11:21 +000099 for (i = 0; i < len; i++) {
100 register char ch = string[i];
101 if (ch == ' ')
102 ch = '-';
103 else
104 ch = tolower(ch);
105 p[i] = ch;
106 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000107 return v;
108}
109
110/* Lookup the given encoding and return a tuple providing the codec
111 facilities.
112
113 The encoding string is looked up converted to all lower-case
114 characters. This makes encodings looked up through this mechanism
115 effectively case-insensitive.
116
Fred Drake766de832000-05-09 19:55:59 +0000117 If no codec is found, a LookupError is set and NULL returned.
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000118
119 As side effect, this tries to load the encodings package, if not
120 yet done. This is part of the lazy load strategy for the encodings
121 package.
122
123*/
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000124
125PyObject *_PyCodec_Lookup(const char *encoding)
126{
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000127 PyObject *result, *args = NULL, *v;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000128 int i, len;
129
Fred Drake766de832000-05-09 19:55:59 +0000130 if (encoding == NULL) {
131 PyErr_BadArgument();
132 goto onError;
133 }
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000134 if (_PyCodec_SearchCache == NULL ||
135 _PyCodec_SearchPath == NULL) {
Barry Warsaw51ac5802000-03-20 16:36:48 +0000136 PyErr_SetString(PyExc_SystemError,
137 "codec module not properly initialized");
138 goto onError;
139 }
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000140 if (!import_encodings_called) {
141 if (import_encodings())
142 goto onError;
143 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000144
Guido van Rossum9e896b32000-04-05 20:11:21 +0000145 /* Convert the encoding to a normalized Python string: all
Thomas Wouters7e474022000-07-16 12:04:32 +0000146 characters are converted to lower case, spaces and hyphens are
Guido van Rossum9e896b32000-04-05 20:11:21 +0000147 replaced with underscores. */
148 v = normalizestring(encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000149 if (v == NULL)
150 goto onError;
151 PyString_InternInPlace(&v);
152
153 /* First, try to lookup the name in the registry dictionary */
154 result = PyDict_GetItem(_PyCodec_SearchCache, v);
155 if (result != NULL) {
156 Py_INCREF(result);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000157 Py_DECREF(v);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000158 return result;
159 }
160
161 /* Next, scan the search functions in order of registration */
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000162 args = PyTuple_New(1);
163 if (args == NULL)
164 goto onError;
165 PyTuple_SET_ITEM(args,0,v);
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000166
167 len = PyList_Size(_PyCodec_SearchPath);
168 if (len < 0)
169 goto onError;
Guido van Rossumb95de4f2000-03-31 17:25:23 +0000170 if (len == 0) {
171 PyErr_SetString(PyExc_LookupError,
172 "no codec search functions registered: "
173 "can't find encoding");
174 goto onError;
175 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000176
177 for (i = 0; i < len; i++) {
178 PyObject *func;
179
180 func = PyList_GetItem(_PyCodec_SearchPath, i);
181 if (func == NULL)
182 goto onError;
Guido van Rossum5ba3c842000-03-24 20:52:23 +0000183 result = PyEval_CallObject(func, args);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000184 if (result == NULL)
185 goto onError;
186 if (result == Py_None) {
187 Py_DECREF(result);
188 continue;
189 }
190 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
191 PyErr_SetString(PyExc_TypeError,
192 "codec search functions must return 4-tuples");
193 Py_DECREF(result);
194 goto onError;
195 }
196 break;
197 }
198 if (i == len) {
199 /* XXX Perhaps we should cache misses too ? */
Martin v. Löwiseb42b022002-09-26 16:01:24 +0000200 PyErr_Format(PyExc_LookupError,
201 "unknown encoding: %s", encoding);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000202 goto onError;
203 }
204
205 /* Cache and return the result */
206 PyDict_SetItem(_PyCodec_SearchCache, v, result);
207 Py_DECREF(args);
208 return result;
209
210 onError:
211 Py_XDECREF(args);
212 return NULL;
213}
214
215static
216PyObject *args_tuple(PyObject *object,
217 const char *errors)
218{
219 PyObject *args;
220
221 args = PyTuple_New(1 + (errors != NULL));
222 if (args == NULL)
223 return NULL;
224 Py_INCREF(object);
225 PyTuple_SET_ITEM(args,0,object);
226 if (errors) {
227 PyObject *v;
228
229 v = PyString_FromString(errors);
230 if (v == NULL) {
231 Py_DECREF(args);
232 return NULL;
233 }
234 PyTuple_SET_ITEM(args, 1, v);
235 }
236 return args;
237}
238
239/* Build a codec by calling factory(stream[,errors]) or just
240 factory(errors) depending on whether the given parameters are
241 non-NULL. */
242
243static
244PyObject *build_stream_codec(PyObject *factory,
245 PyObject *stream,
246 const char *errors)
247{
248 PyObject *args, *codec;
249
250 args = args_tuple(stream, errors);
251 if (args == NULL)
252 return NULL;
253
254 codec = PyEval_CallObject(factory, args);
255 Py_DECREF(args);
256 return codec;
257}
258
259/* Convenience APIs to query the Codec registry.
260
261 All APIs return a codec object with incremented refcount.
262
263 */
264
265PyObject *PyCodec_Encoder(const char *encoding)
266{
267 PyObject *codecs;
268 PyObject *v;
269
270 codecs = _PyCodec_Lookup(encoding);
271 if (codecs == NULL)
272 goto onError;
273 v = PyTuple_GET_ITEM(codecs,0);
Mark Hammonde21262c2002-07-18 23:06:17 +0000274 Py_DECREF(codecs);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000275 Py_INCREF(v);
276 return v;
277
278 onError:
279 return NULL;
280}
281
282PyObject *PyCodec_Decoder(const char *encoding)
283{
284 PyObject *codecs;
285 PyObject *v;
286
287 codecs = _PyCodec_Lookup(encoding);
288 if (codecs == NULL)
289 goto onError;
290 v = PyTuple_GET_ITEM(codecs,1);
Mark Hammonde21262c2002-07-18 23:06:17 +0000291 Py_DECREF(codecs);
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000292 Py_INCREF(v);
293 return v;
294
295 onError:
296 return NULL;
297}
298
299PyObject *PyCodec_StreamReader(const char *encoding,
300 PyObject *stream,
301 const char *errors)
302{
Mark Hammonde21262c2002-07-18 23:06:17 +0000303 PyObject *codecs, *ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000304
305 codecs = _PyCodec_Lookup(encoding);
306 if (codecs == NULL)
307 goto onError;
Mark Hammonde21262c2002-07-18 23:06:17 +0000308 ret = build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
309 Py_DECREF(codecs);
310 return ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000311
312 onError:
313 return NULL;
314}
315
316PyObject *PyCodec_StreamWriter(const char *encoding,
317 PyObject *stream,
318 const char *errors)
319{
Mark Hammonde21262c2002-07-18 23:06:17 +0000320 PyObject *codecs, *ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000321
322 codecs = _PyCodec_Lookup(encoding);
323 if (codecs == NULL)
324 goto onError;
Mark Hammonde21262c2002-07-18 23:06:17 +0000325 ret = build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
326 Py_DECREF(codecs);
327 return ret;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000328
329 onError:
330 return NULL;
331}
332
333/* Encode an object (e.g. an Unicode object) using the given encoding
334 and return the resulting encoded object (usually a Python string).
335
336 errors is passed to the encoder factory as argument if non-NULL. */
337
338PyObject *PyCodec_Encode(PyObject *object,
339 const char *encoding,
340 const char *errors)
341{
342 PyObject *encoder = NULL;
343 PyObject *args = NULL, *result;
344 PyObject *v;
345
346 encoder = PyCodec_Encoder(encoding);
347 if (encoder == NULL)
348 goto onError;
349
350 args = args_tuple(object, errors);
351 if (args == NULL)
352 goto onError;
353
354 result = PyEval_CallObject(encoder,args);
355 if (result == NULL)
356 goto onError;
357
358 if (!PyTuple_Check(result) ||
359 PyTuple_GET_SIZE(result) != 2) {
360 PyErr_SetString(PyExc_TypeError,
361 "encoder must return a tuple (object,integer)");
362 goto onError;
363 }
364 v = PyTuple_GET_ITEM(result,0);
365 Py_INCREF(v);
366 /* We don't check or use the second (integer) entry. */
367
368 Py_DECREF(args);
369 Py_DECREF(encoder);
370 Py_DECREF(result);
371 return v;
372
373 onError:
374 Py_XDECREF(args);
375 Py_XDECREF(encoder);
376 return NULL;
377}
378
379/* Decode an object (usually a Python string) using the given encoding
380 and return an equivalent object (e.g. an Unicode object).
381
382 errors is passed to the decoder factory as argument if non-NULL. */
383
384PyObject *PyCodec_Decode(PyObject *object,
385 const char *encoding,
386 const char *errors)
387{
388 PyObject *decoder = NULL;
389 PyObject *args = NULL, *result = NULL;
390 PyObject *v;
391
392 decoder = PyCodec_Decoder(encoding);
393 if (decoder == NULL)
394 goto onError;
395
396 args = args_tuple(object, errors);
397 if (args == NULL)
398 goto onError;
399
400 result = PyEval_CallObject(decoder,args);
401 if (result == NULL)
402 goto onError;
403 if (!PyTuple_Check(result) ||
404 PyTuple_GET_SIZE(result) != 2) {
405 PyErr_SetString(PyExc_TypeError,
406 "decoder must return a tuple (object,integer)");
407 goto onError;
408 }
409 v = PyTuple_GET_ITEM(result,0);
410 Py_INCREF(v);
411 /* We don't check or use the second (integer) entry. */
412
413 Py_DECREF(args);
414 Py_DECREF(decoder);
415 Py_DECREF(result);
416 return v;
417
418 onError:
419 Py_XDECREF(args);
420 Py_XDECREF(decoder);
421 Py_XDECREF(result);
422 return NULL;
423}
424
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000425static PyObject *_PyCodec_ErrorRegistry;
426
427/* Register the error handling callback function error under the name
428 name. This function will be called by the codec when it encounters
429 an unencodable characters/undecodable bytes and doesn't know the
430 callback name, when name is specified as the error parameter
431 in the call to the encode/decode function.
432 Return 0 on success, -1 on error */
433int PyCodec_RegisterError(const char *name, PyObject *error)
434{
435 if (!PyCallable_Check(error)) {
436 PyErr_SetString(PyExc_TypeError, "handler must be callable");
437 return -1;
438 }
439 return PyDict_SetItemString( _PyCodec_ErrorRegistry, (char *)name, error);
440}
441
442/* Lookup the error handling callback function registered under the
443 name error. As a special case NULL can be passed, in which case
444 the error handling callback for strict encoding will be returned. */
445PyObject *PyCodec_LookupError(const char *name)
446{
447 PyObject *handler = NULL;
448
449 if (name==NULL)
450 name = "strict";
451 handler = PyDict_GetItemString(_PyCodec_ErrorRegistry, (char *)name);
452 if (!handler)
453 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
454 else
455 Py_INCREF(handler);
456 return handler;
457}
458
459static void wrong_exception_type(PyObject *exc)
460{
461 PyObject *type = PyObject_GetAttrString(exc, "__class__");
462 if (type != NULL) {
463 PyObject *name = PyObject_GetAttrString(type, "__name__");
464 Py_DECREF(type);
465 if (name != NULL) {
466 PyObject *string = PyObject_Str(name);
467 Py_DECREF(name);
Walter Dörwaldf7bcd1d2002-09-02 18:22:32 +0000468 if (string != NULL) {
469 PyErr_Format(PyExc_TypeError,
470 "don't know how to handle %.400s in error callback",
471 PyString_AS_STRING(string));
472 Py_DECREF(string);
473 }
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000474 }
475 }
476}
477
478PyObject *PyCodec_StrictErrors(PyObject *exc)
479{
480 if (PyInstance_Check(exc))
481 PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
482 exc);
483 else
484 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
485 return NULL;
486}
487
488
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000489#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000490PyObject *PyCodec_IgnoreErrors(PyObject *exc)
491{
492 int end;
493 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
494 if (PyUnicodeEncodeError_GetEnd(exc, &end))
495 return NULL;
496 }
497 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
498 if (PyUnicodeDecodeError_GetEnd(exc, &end))
499 return NULL;
500 }
501 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
502 if (PyUnicodeTranslateError_GetEnd(exc, &end))
503 return NULL;
504 }
505 else {
506 wrong_exception_type(exc);
507 return NULL;
508 }
509 /* ouch: passing NULL, 0, pos gives None instead of u'' */
510 return Py_BuildValue("(u#i)", &end, 0, end);
511}
512
513
514PyObject *PyCodec_ReplaceErrors(PyObject *exc)
515{
516 PyObject *restuple;
517 int start;
518 int end;
519 int i;
520
521 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
522 PyObject *res;
523 Py_UNICODE *p;
524 if (PyUnicodeEncodeError_GetStart(exc, &start))
525 return NULL;
526 if (PyUnicodeEncodeError_GetEnd(exc, &end))
527 return NULL;
528 res = PyUnicode_FromUnicode(NULL, end-start);
529 if (res == NULL)
530 return NULL;
531 for (p = PyUnicode_AS_UNICODE(res), i = start;
532 i<end; ++p, ++i)
533 *p = '?';
534 restuple = Py_BuildValue("(Oi)", res, end);
535 Py_DECREF(res);
536 return restuple;
537 }
538 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
539 Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
540 if (PyUnicodeDecodeError_GetEnd(exc, &end))
541 return NULL;
542 return Py_BuildValue("(u#i)", &res, 1, end);
543 }
544 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
545 PyObject *res;
546 Py_UNICODE *p;
547 if (PyUnicodeTranslateError_GetStart(exc, &start))
548 return NULL;
549 if (PyUnicodeTranslateError_GetEnd(exc, &end))
550 return NULL;
551 res = PyUnicode_FromUnicode(NULL, end-start);
552 if (res == NULL)
553 return NULL;
554 for (p = PyUnicode_AS_UNICODE(res), i = start;
555 i<end; ++p, ++i)
556 *p = Py_UNICODE_REPLACEMENT_CHARACTER;
557 restuple = Py_BuildValue("(Oi)", res, end);
558 Py_DECREF(res);
559 return restuple;
560 }
561 else {
562 wrong_exception_type(exc);
563 return NULL;
564 }
565}
566
567PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
568{
569 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
570 PyObject *restuple;
571 PyObject *object;
572 int start;
573 int end;
574 PyObject *res;
575 Py_UNICODE *p;
576 Py_UNICODE *startp;
577 Py_UNICODE *outp;
578 int ressize;
579 if (PyUnicodeEncodeError_GetStart(exc, &start))
580 return NULL;
581 if (PyUnicodeEncodeError_GetEnd(exc, &end))
582 return NULL;
583 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
584 return NULL;
585 startp = PyUnicode_AS_UNICODE(object);
586 for (p = startp+start, ressize = 0; p < startp+end; ++p) {
587 if (*p<10)
588 ressize += 2+1+1;
589 else if (*p<100)
590 ressize += 2+2+1;
591 else if (*p<1000)
592 ressize += 2+3+1;
593 else if (*p<10000)
594 ressize += 2+4+1;
595 else if (*p<100000)
596 ressize += 2+5+1;
597 else if (*p<1000000)
598 ressize += 2+6+1;
599 else
600 ressize += 2+7+1;
601 }
602 /* allocate replacement */
603 res = PyUnicode_FromUnicode(NULL, ressize);
604 if (res == NULL) {
605 Py_DECREF(object);
606 return NULL;
607 }
608 /* generate replacement */
609 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
610 p < startp+end; ++p) {
611 Py_UNICODE c = *p;
612 int digits;
613 int base;
614 *outp++ = '&';
615 *outp++ = '#';
616 if (*p<10) {
617 digits = 1;
618 base = 1;
619 }
620 else if (*p<100) {
621 digits = 2;
622 base = 10;
623 }
624 else if (*p<1000) {
625 digits = 3;
626 base = 100;
627 }
628 else if (*p<10000) {
629 digits = 4;
630 base = 1000;
631 }
632 else if (*p<100000) {
633 digits = 5;
634 base = 10000;
635 }
636 else if (*p<1000000) {
637 digits = 6;
638 base = 100000;
639 }
640 else {
641 digits = 7;
642 base = 1000000;
643 }
644 while (digits-->0) {
645 *outp++ = '0' + c/base;
646 c %= base;
647 base /= 10;
648 }
649 *outp++ = ';';
650 }
651 restuple = Py_BuildValue("(Oi)", res, end);
652 Py_DECREF(res);
653 Py_DECREF(object);
654 return restuple;
655 }
656 else {
657 wrong_exception_type(exc);
658 return NULL;
659 }
660}
661
662static Py_UNICODE hexdigits[] = {
663 '0', '1', '2', '3', '4', '5', '6', '7',
664 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
665};
666
667PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
668{
669 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
670 PyObject *restuple;
671 PyObject *object;
672 int start;
673 int end;
674 PyObject *res;
675 Py_UNICODE *p;
676 Py_UNICODE *startp;
677 Py_UNICODE *outp;
678 int ressize;
679 if (PyUnicodeEncodeError_GetStart(exc, &start))
680 return NULL;
681 if (PyUnicodeEncodeError_GetEnd(exc, &end))
682 return NULL;
683 if (!(object = PyUnicodeEncodeError_GetObject(exc)))
684 return NULL;
685 startp = PyUnicode_AS_UNICODE(object);
686 for (p = startp+start, ressize = 0; p < startp+end; ++p) {
687 if (*p >= 0x00010000)
688 ressize += 1+1+8;
689 else if (*p >= 0x100) {
690 ressize += 1+1+4;
691 }
692 else
693 ressize += 1+1+2;
694 }
695 res = PyUnicode_FromUnicode(NULL, ressize);
696 if (res==NULL)
697 return NULL;
698 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
699 p < startp+end; ++p) {
700 Py_UNICODE c = *p;
701 *outp++ = '\\';
702 if (c >= 0x00010000) {
703 *outp++ = 'U';
704 *outp++ = hexdigits[(c>>28)&0xf];
705 *outp++ = hexdigits[(c>>24)&0xf];
706 *outp++ = hexdigits[(c>>20)&0xf];
707 *outp++ = hexdigits[(c>>16)&0xf];
708 *outp++ = hexdigits[(c>>12)&0xf];
709 *outp++ = hexdigits[(c>>8)&0xf];
710 }
711 else if (c >= 0x100) {
712 *outp++ = 'u';
713 *outp++ = hexdigits[(c>>12)&0xf];
714 *outp++ = hexdigits[(c>>8)&0xf];
715 }
716 else
717 *outp++ = 'x';
718 *outp++ = hexdigits[(c>>4)&0xf];
719 *outp++ = hexdigits[c&0xf];
720 }
721
722 restuple = Py_BuildValue("(Oi)", res, end);
723 Py_DECREF(res);
724 Py_DECREF(object);
725 return restuple;
726 }
727 else {
728 wrong_exception_type(exc);
729 return NULL;
730 }
731}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000732#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000733
734static PyObject *strict_errors(PyObject *self, PyObject *exc)
735{
736 return PyCodec_StrictErrors(exc);
737}
738
739
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000740#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000741static PyObject *ignore_errors(PyObject *self, PyObject *exc)
742{
743 return PyCodec_IgnoreErrors(exc);
744}
745
746
747static PyObject *replace_errors(PyObject *self, PyObject *exc)
748{
749 return PyCodec_ReplaceErrors(exc);
750}
751
752
753static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
754{
755 return PyCodec_XMLCharRefReplaceErrors(exc);
756}
757
758
759static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
760{
761 return PyCodec_BackslashReplaceErrors(exc);
762}
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000763#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000764
765
Thomas Woutersf70ef4f2000-07-22 18:47:25 +0000766void _PyCodecRegistry_Init(void)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000767{
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000768 static struct {
769 char *name;
770 PyMethodDef def;
771 } methods[] =
772 {
773 {
774 "strict",
775 {
776 "strict_errors",
777 strict_errors,
778 METH_O
779 }
780 },
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000781#ifdef Py_USING_UNICODE
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000782 {
783 "ignore",
784 {
785 "ignore_errors",
786 ignore_errors,
787 METH_O
788 }
789 },
790 {
791 "replace",
792 {
793 "replace_errors",
794 replace_errors,
795 METH_O
796 }
797 },
798 {
799 "xmlcharrefreplace",
800 {
801 "xmlcharrefreplace_errors",
802 xmlcharrefreplace_errors,
803 METH_O
804 }
805 },
806 {
807 "backslashreplace",
808 {
809 "backslashreplace_errors",
810 backslashreplace_errors,
811 METH_O
812 }
813 }
Walter Dörwaldbf73db82002-11-21 20:08:33 +0000814#endif
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000815 };
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000816 if (_PyCodec_SearchPath == NULL)
817 _PyCodec_SearchPath = PyList_New(0);
818 if (_PyCodec_SearchCache == NULL)
819 _PyCodec_SearchCache = PyDict_New();
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000820 if (_PyCodec_ErrorRegistry == NULL) {
821 int i;
822 _PyCodec_ErrorRegistry = PyDict_New();
823
824 if (_PyCodec_ErrorRegistry) {
Martin v. Löwisba2cf072002-11-21 23:53:25 +0000825 for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000826 PyObject *func = PyCFunction_New(&methods[i].def, NULL);
827 int res;
828 if (!func)
829 Py_FatalError("can't initialize codec error registry");
830 res = PyCodec_RegisterError(methods[i].name, func);
831 Py_DECREF(func);
832 if (res)
833 Py_FatalError("can't initialize codec error registry");
834 }
835 }
836 }
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000837 if (_PyCodec_SearchPath == NULL ||
838 _PyCodec_SearchCache == NULL)
Thomas Wouters7e474022000-07-16 12:04:32 +0000839 Py_FatalError("can't initialize codec registry");
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000840}
841
Thomas Woutersf70ef4f2000-07-22 18:47:25 +0000842void _PyCodecRegistry_Fini(void)
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000843{
844 Py_XDECREF(_PyCodec_SearchPath);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000845 _PyCodec_SearchPath = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000846 Py_XDECREF(_PyCodec_SearchCache);
Barry Warsaw51ac5802000-03-20 16:36:48 +0000847 _PyCodec_SearchCache = NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000848 Py_XDECREF(_PyCodec_ErrorRegistry);
849 _PyCodec_ErrorRegistry = NULL;
Guido van Rossumfeee4b92000-03-10 22:57:27 +0000850}