blob: 7509c1b571826a8b7b2e7f6f0e68d7db064c74e2 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
38#include "Python.h"
39
40/* --- Registry ----------------------------------------------------------- */
41
Walter Dörwald0ae29812002-10-31 13:36:29 +000042PyDoc_STRVAR(register__doc__,
43"register(search_function)\n\
44\n\
45Register a codec search function. Search functions are expected to take\n\
46one argument, the encoding name in all lower case letters, and return\n\
47a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
48
Guido van Rossume2d67f92000-03-10 23:09:23 +000049static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000050PyObject *codec_register(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000051{
52 PyObject *search_function;
53
54 if (!PyArg_ParseTuple(args, "O:register", &search_function))
55 goto onError;
56
57 if (PyCodec_Register(search_function))
58 goto onError;
59
60 Py_INCREF(Py_None);
61 return Py_None;
62
63 onError:
64 return NULL;
65}
66
Walter Dörwald0ae29812002-10-31 13:36:29 +000067PyDoc_STRVAR(lookup__doc__,
68"lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
69\n\
70Looks up a codec tuple in the Python codec registry and returns\n\
71a tuple of functions.");
72
Guido van Rossume2d67f92000-03-10 23:09:23 +000073static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000074PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000075{
76 char *encoding;
77
78 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
79 goto onError;
80
81 return _PyCodec_Lookup(encoding);
82
83 onError:
84 return NULL;
85}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
98codec_encode(PyObject *self, PyObject *args)
99{
Brett Cannon3e377de2004-07-10 21:41:14 +0000100 const char *encoding = NULL;
101 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102 PyObject *v;
103
104 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
105 return NULL;
106
107 if (encoding == NULL)
108 encoding = PyUnicode_GetDefaultEncoding();
109
110 /* Encode via the codec registry */
111 v = PyCodec_Encode(v, encoding, errors);
112 if (v == NULL)
113 goto onError;
114 return v;
115
116 onError:
117 return NULL;
118}
119
120PyDoc_STRVAR(decode__doc__,
121"decode(obj, [encoding[,errors]]) -> object\n\
122\n\
123Decodes obj using the codec registered for encoding. encoding defaults\n\
124to the default encoding. errors may be given to set a different error\n\
125handling scheme. Default is 'strict' meaning that encoding errors raise\n\
126a ValueError. Other possible values are 'ignore' and 'replace'\n\
127as well as any other name registerd with codecs.register_error that is\n\
128able to handle ValueErrors.");
129
130static PyObject *
131codec_decode(PyObject *self, PyObject *args)
132{
Brett Cannon3e377de2004-07-10 21:41:14 +0000133 const char *encoding = NULL;
134 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000135 PyObject *v;
136
137 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
138 return NULL;
139
140 if (encoding == NULL)
141 encoding = PyUnicode_GetDefaultEncoding();
142
143 /* Decode via the codec registry */
144 v = PyCodec_Decode(v, encoding, errors);
145 if (v == NULL)
146 goto onError;
147 return v;
148
149 onError:
150 return NULL;
151}
152
Guido van Rossume2d67f92000-03-10 23:09:23 +0000153/* --- Helpers ------------------------------------------------------------ */
154
155static
156PyObject *codec_tuple(PyObject *unicode,
157 int len)
158{
159 PyObject *v,*w;
160
161 if (unicode == NULL)
162 return NULL;
163 v = PyTuple_New(2);
164 if (v == NULL) {
165 Py_DECREF(unicode);
166 return NULL;
167 }
168 PyTuple_SET_ITEM(v,0,unicode);
169 w = PyInt_FromLong(len);
170 if (w == NULL) {
171 Py_DECREF(v);
172 return NULL;
173 }
174 PyTuple_SET_ITEM(v,1,w);
175 return v;
176}
177
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000178/* --- String codecs ------------------------------------------------------ */
179static PyObject *
180escape_decode(PyObject *self,
181 PyObject *args)
182{
183 const char *errors = NULL;
184 const char *data;
185 int size;
186
187 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
188 &data, &size, &errors))
189 return NULL;
190 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
191 size);
192}
193
194static PyObject *
195escape_encode(PyObject *self,
196 PyObject *args)
197{
198 PyObject *str;
199 const char *errors = NULL;
200 char *buf;
201 int len;
202
203 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
204 &PyString_Type, &str, &errors))
205 return NULL;
206
207 str = PyString_Repr(str, 0);
208 if (!str)
209 return NULL;
210
211 /* The string will be quoted. Unquote, similar to unicode-escape. */
212 buf = PyString_AS_STRING (str);
213 len = PyString_GET_SIZE (str);
214 memmove(buf, buf+1, len-2);
215 _PyString_Resize(&str, len-2);
216
217 return codec_tuple(str, PyString_Size(str));
218}
219
220#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000221/* --- Decoder ------------------------------------------------------------ */
222
223static PyObject *
224unicode_internal_decode(PyObject *self,
225 PyObject *args)
226{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000227 PyObject *obj;
228 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000229 const char *data;
230 int size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000231
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000232 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
233 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000234 return NULL;
235
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000236 if (PyUnicode_Check(obj)) {
237 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000238 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000239 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000240 else {
241 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
242 return NULL;
243 return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
244 size / sizeof(Py_UNICODE)),
245 size);
246 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000247}
248
249static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000250utf_7_decode(PyObject *self,
251 PyObject *args)
252{
253 const char *data;
254 int size;
255 const char *errors = NULL;
256
257 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
258 &data, &size, &errors))
259 return NULL;
260
261 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
262 size);
263}
264
265static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000266utf_8_decode(PyObject *self,
267 PyObject *args)
268{
269 const char *data;
270 int size;
271 const char *errors = NULL;
272
273 if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
274 &data, &size, &errors))
275 return NULL;
276
277 return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
278 size);
279}
280
281static PyObject *
282utf_16_decode(PyObject *self,
283 PyObject *args)
284{
285 const char *data;
286 int size;
287 const char *errors = NULL;
288 int byteorder = 0;
289
290 if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
291 &data, &size, &errors))
292 return NULL;
293 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
294 size);
295}
296
297static PyObject *
298utf_16_le_decode(PyObject *self,
299 PyObject *args)
300{
301 const char *data;
302 int size;
303 const char *errors = NULL;
304 int byteorder = -1;
305
306 if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
307 &data, &size, &errors))
308 return NULL;
309 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
310 size);
311}
312
313static PyObject *
314utf_16_be_decode(PyObject *self,
315 PyObject *args)
316{
317 const char *data;
318 int size;
319 const char *errors = NULL;
320 int byteorder = 1;
321
322 if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
323 &data, &size, &errors))
324 return NULL;
325 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
326 size);
327}
328
329/* This non-standard version also provides access to the byteorder
330 parameter of the builtin UTF-16 codec.
331
332 It returns a tuple (unicode, bytesread, byteorder) with byteorder
333 being the value in effect at the end of data.
334
335*/
336
337static PyObject *
338utf_16_ex_decode(PyObject *self,
339 PyObject *args)
340{
341 const char *data;
342 int size;
343 const char *errors = NULL;
344 int byteorder = 0;
345 PyObject *unicode, *tuple;
346
347 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
348 &data, &size, &errors, &byteorder))
349 return NULL;
350
351 unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
352 if (unicode == NULL)
353 return NULL;
354 tuple = Py_BuildValue("Oii", unicode, size, byteorder);
355 Py_DECREF(unicode);
356 return tuple;
357}
358
359static PyObject *
360unicode_escape_decode(PyObject *self,
361 PyObject *args)
362{
363 const char *data;
364 int size;
365 const char *errors = NULL;
366
367 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
368 &data, &size, &errors))
369 return NULL;
370
371 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
372 size);
373}
374
375static PyObject *
376raw_unicode_escape_decode(PyObject *self,
377 PyObject *args)
378{
379 const char *data;
380 int size;
381 const char *errors = NULL;
382
383 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
384 &data, &size, &errors))
385 return NULL;
386
387 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
388 size);
389}
390
391static PyObject *
392latin_1_decode(PyObject *self,
393 PyObject *args)
394{
395 const char *data;
396 int size;
397 const char *errors = NULL;
398
399 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
400 &data, &size, &errors))
401 return NULL;
402
403 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
404 size);
405}
406
407static PyObject *
408ascii_decode(PyObject *self,
409 PyObject *args)
410{
411 const char *data;
412 int size;
413 const char *errors = NULL;
414
415 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
416 &data, &size, &errors))
417 return NULL;
418
419 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
420 size);
421}
422
423static PyObject *
424charmap_decode(PyObject *self,
425 PyObject *args)
426{
427 const char *data;
428 int size;
429 const char *errors = NULL;
430 PyObject *mapping = NULL;
431
432 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
433 &data, &size, &errors, &mapping))
434 return NULL;
435 if (mapping == Py_None)
436 mapping = NULL;
437
438 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
439 size);
440}
441
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000442#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000443
444static PyObject *
445mbcs_decode(PyObject *self,
446 PyObject *args)
447{
448 const char *data;
449 int size;
450 const char *errors = NULL;
451
452 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
453 &data, &size, &errors))
454 return NULL;
455
456 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
457 size);
458}
459
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000460#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000461
Guido van Rossume2d67f92000-03-10 23:09:23 +0000462/* --- Encoder ------------------------------------------------------------ */
463
464static PyObject *
465readbuffer_encode(PyObject *self,
466 PyObject *args)
467{
468 const char *data;
469 int size;
470 const char *errors = NULL;
471
472 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
473 &data, &size, &errors))
474 return NULL;
475
476 return codec_tuple(PyString_FromStringAndSize(data, size),
477 size);
478}
479
480static PyObject *
481charbuffer_encode(PyObject *self,
482 PyObject *args)
483{
484 const char *data;
485 int size;
486 const char *errors = NULL;
487
488 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
489 &data, &size, &errors))
490 return NULL;
491
492 return codec_tuple(PyString_FromStringAndSize(data, size),
493 size);
494}
495
496static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000497unicode_internal_encode(PyObject *self,
498 PyObject *args)
499{
500 PyObject *obj;
501 const char *errors = NULL;
502 const char *data;
503 int size;
504
505 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
506 &obj, &errors))
507 return NULL;
508
509 if (PyUnicode_Check(obj)) {
510 data = PyUnicode_AS_DATA(obj);
511 size = PyUnicode_GET_DATA_SIZE(obj);
512 return codec_tuple(PyString_FromStringAndSize(data, size),
513 size);
514 }
515 else {
516 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
517 return NULL;
518 return codec_tuple(PyString_FromStringAndSize(data, size),
519 size);
520 }
521}
522
523static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000524utf_7_encode(PyObject *self,
525 PyObject *args)
526{
527 PyObject *str, *v;
528 const char *errors = NULL;
529
530 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
531 &str, &errors))
532 return NULL;
533
534 str = PyUnicode_FromObject(str);
535 if (str == NULL)
536 return NULL;
537 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
538 PyUnicode_GET_SIZE(str),
539 0,
540 0,
541 errors),
542 PyUnicode_GET_SIZE(str));
543 Py_DECREF(str);
544 return v;
545}
546
547static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000548utf_8_encode(PyObject *self,
549 PyObject *args)
550{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000551 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000552 const char *errors = NULL;
553
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000554 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000555 &str, &errors))
556 return NULL;
557
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000558 str = PyUnicode_FromObject(str);
559 if (str == NULL)
560 return NULL;
561 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
562 PyUnicode_GET_SIZE(str),
563 errors),
564 PyUnicode_GET_SIZE(str));
565 Py_DECREF(str);
566 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000567}
568
569/* This version provides access to the byteorder parameter of the
570 builtin UTF-16 codecs as optional third argument. It defaults to 0
571 which means: use the native byte order and prepend the data with a
572 BOM mark.
573
574*/
575
576static PyObject *
577utf_16_encode(PyObject *self,
578 PyObject *args)
579{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000580 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000581 const char *errors = NULL;
582 int byteorder = 0;
583
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000584 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000585 &str, &errors, &byteorder))
586 return NULL;
587
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000588 str = PyUnicode_FromObject(str);
589 if (str == NULL)
590 return NULL;
591 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
592 PyUnicode_GET_SIZE(str),
593 errors,
594 byteorder),
595 PyUnicode_GET_SIZE(str));
596 Py_DECREF(str);
597 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000598}
599
600static PyObject *
601utf_16_le_encode(PyObject *self,
602 PyObject *args)
603{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000604 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000605 const char *errors = NULL;
606
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000607 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000608 &str, &errors))
609 return NULL;
610
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000611 str = PyUnicode_FromObject(str);
612 if (str == NULL)
613 return NULL;
614 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000615 PyUnicode_GET_SIZE(str),
616 errors,
617 -1),
618 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000619 Py_DECREF(str);
620 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000621}
622
623static PyObject *
624utf_16_be_encode(PyObject *self,
625 PyObject *args)
626{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000627 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000628 const char *errors = NULL;
629
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000630 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000631 &str, &errors))
632 return NULL;
633
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000634 str = PyUnicode_FromObject(str);
635 if (str == NULL)
636 return NULL;
637 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
638 PyUnicode_GET_SIZE(str),
639 errors,
640 +1),
641 PyUnicode_GET_SIZE(str));
642 Py_DECREF(str);
643 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000644}
645
646static PyObject *
647unicode_escape_encode(PyObject *self,
648 PyObject *args)
649{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000650 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000651 const char *errors = NULL;
652
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000653 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000654 &str, &errors))
655 return NULL;
656
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000657 str = PyUnicode_FromObject(str);
658 if (str == NULL)
659 return NULL;
660 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
661 PyUnicode_GET_SIZE(str)),
662 PyUnicode_GET_SIZE(str));
663 Py_DECREF(str);
664 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000665}
666
667static PyObject *
668raw_unicode_escape_encode(PyObject *self,
669 PyObject *args)
670{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000671 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000672 const char *errors = NULL;
673
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000674 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000675 &str, &errors))
676 return NULL;
677
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000678 str = PyUnicode_FromObject(str);
679 if (str == NULL)
680 return NULL;
681 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000682 PyUnicode_AS_UNICODE(str),
683 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000684 PyUnicode_GET_SIZE(str));
685 Py_DECREF(str);
686 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000687}
688
689static PyObject *
690latin_1_encode(PyObject *self,
691 PyObject *args)
692{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000693 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000694 const char *errors = NULL;
695
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000696 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000697 &str, &errors))
698 return NULL;
699
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000700 str = PyUnicode_FromObject(str);
701 if (str == NULL)
702 return NULL;
703 v = codec_tuple(PyUnicode_EncodeLatin1(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000704 PyUnicode_AS_UNICODE(str),
705 PyUnicode_GET_SIZE(str),
706 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000707 PyUnicode_GET_SIZE(str));
708 Py_DECREF(str);
709 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000710}
711
712static PyObject *
713ascii_encode(PyObject *self,
714 PyObject *args)
715{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000716 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000717 const char *errors = NULL;
718
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000719 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000720 &str, &errors))
721 return NULL;
722
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000723 str = PyUnicode_FromObject(str);
724 if (str == NULL)
725 return NULL;
726 v = codec_tuple(PyUnicode_EncodeASCII(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000727 PyUnicode_AS_UNICODE(str),
728 PyUnicode_GET_SIZE(str),
729 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000730 PyUnicode_GET_SIZE(str));
731 Py_DECREF(str);
732 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000733}
734
735static PyObject *
736charmap_encode(PyObject *self,
737 PyObject *args)
738{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000739 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000740 const char *errors = NULL;
741 PyObject *mapping = NULL;
742
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000743 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000744 &str, &errors, &mapping))
745 return NULL;
746 if (mapping == Py_None)
747 mapping = NULL;
748
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000749 str = PyUnicode_FromObject(str);
750 if (str == NULL)
751 return NULL;
752 v = codec_tuple(PyUnicode_EncodeCharmap(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000753 PyUnicode_AS_UNICODE(str),
754 PyUnicode_GET_SIZE(str),
755 mapping,
756 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000757 PyUnicode_GET_SIZE(str));
758 Py_DECREF(str);
759 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000760}
761
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000762#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000763
764static PyObject *
765mbcs_encode(PyObject *self,
766 PyObject *args)
767{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000768 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000769 const char *errors = NULL;
770
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000771 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000772 &str, &errors))
773 return NULL;
774
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000775 str = PyUnicode_FromObject(str);
776 if (str == NULL)
777 return NULL;
778 v = codec_tuple(PyUnicode_EncodeMBCS(
Guido van Rossum24bdb042000-03-28 20:29:59 +0000779 PyUnicode_AS_UNICODE(str),
780 PyUnicode_GET_SIZE(str),
781 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000782 PyUnicode_GET_SIZE(str));
783 Py_DECREF(str);
784 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000785}
786
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000787#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000788#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000789
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000790/* --- Error handler registry --------------------------------------------- */
791
Walter Dörwald0ae29812002-10-31 13:36:29 +0000792PyDoc_STRVAR(register_error__doc__,
793"register_error(errors, handler)\n\
794\n\
795Register the specified error handler under the name\n\
796errors. handler must be a callable object, that\n\
797will be called with an exception instance containing\n\
798information about the location of the encoding/decoding\n\
799error and must return a (replacement, new position) tuple.");
800
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000801static PyObject *register_error(PyObject *self, PyObject *args)
802{
803 const char *name;
804 PyObject *handler;
805
806 if (!PyArg_ParseTuple(args, "sO:register_error",
807 &name, &handler))
808 return NULL;
809 if (PyCodec_RegisterError(name, handler))
810 return NULL;
811 Py_INCREF(Py_None);
812 return Py_None;
813}
814
Walter Dörwald0ae29812002-10-31 13:36:29 +0000815PyDoc_STRVAR(lookup_error__doc__,
816"lookup_error(errors) -> handler\n\
817\n\
818Return the error handler for the specified error handling name\n\
819or raise a LookupError, if no handler exists under this name.");
820
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000821static PyObject *lookup_error(PyObject *self, PyObject *args)
822{
823 const char *name;
824
825 if (!PyArg_ParseTuple(args, "s:lookup_error",
826 &name))
827 return NULL;
828 return PyCodec_LookupError(name);
829}
830
Guido van Rossume2d67f92000-03-10 23:09:23 +0000831/* --- Module API --------------------------------------------------------- */
832
833static PyMethodDef _codecs_functions[] = {
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000834 {"register", codec_register, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000835 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000836 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000837 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +0000838 {"encode", codec_encode, METH_VARARGS,
839 encode__doc__},
840 {"decode", codec_decode, METH_VARARGS,
841 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000842 {"escape_encode", escape_encode, METH_VARARGS},
843 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000844#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000845 {"utf_8_encode", utf_8_encode, METH_VARARGS},
846 {"utf_8_decode", utf_8_decode, METH_VARARGS},
847 {"utf_7_encode", utf_7_encode, METH_VARARGS},
848 {"utf_7_decode", utf_7_decode, METH_VARARGS},
849 {"utf_16_encode", utf_16_encode, METH_VARARGS},
850 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
851 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
852 {"utf_16_decode", utf_16_decode, METH_VARARGS},
853 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
854 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
855 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
856 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
857 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
858 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
859 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
860 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
861 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
862 {"latin_1_encode", latin_1_encode, METH_VARARGS},
863 {"latin_1_decode", latin_1_decode, METH_VARARGS},
864 {"ascii_encode", ascii_encode, METH_VARARGS},
865 {"ascii_decode", ascii_decode, METH_VARARGS},
866 {"charmap_encode", charmap_encode, METH_VARARGS},
867 {"charmap_decode", charmap_decode, METH_VARARGS},
868 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
869 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000870#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000871 {"mbcs_encode", mbcs_encode, METH_VARARGS},
872 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +0000873#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000874#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +0000875 {"register_error", register_error, METH_VARARGS,
876 register_error__doc__},
877 {"lookup_error", lookup_error, METH_VARARGS,
878 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +0000879 {NULL, NULL} /* sentinel */
880};
881
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000882PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000883init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000884{
885 Py_InitModule("_codecs", _codecs_functions);
886}