blob: ccad827586669e02c1d184bdc06e982d74937c20 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
38#include "Python.h"
39
40/* --- Registry ----------------------------------------------------------- */
41
Walter Dörwald0ae29812002-10-31 13:36:29 +000042PyDoc_STRVAR(register__doc__,
43"register(search_function)\n\
44\n\
45Register a codec search function. Search functions are expected to take\n\
46one argument, the encoding name in all lower case letters, and return\n\
47a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
48
Guido van Rossume2d67f92000-03-10 23:09:23 +000049static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000050PyObject *codec_register(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000051{
52 PyObject *search_function;
53
54 if (!PyArg_ParseTuple(args, "O:register", &search_function))
55 goto onError;
56
57 if (PyCodec_Register(search_function))
58 goto onError;
59
60 Py_INCREF(Py_None);
61 return Py_None;
62
63 onError:
64 return NULL;
65}
66
Walter Dörwald0ae29812002-10-31 13:36:29 +000067PyDoc_STRVAR(lookup__doc__,
68"lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
69\n\
70Looks up a codec tuple in the Python codec registry and returns\n\
71a tuple of functions.");
72
Guido van Rossume2d67f92000-03-10 23:09:23 +000073static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000074PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000075{
76 char *encoding;
77
78 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
79 goto onError;
80
81 return _PyCodec_Lookup(encoding);
82
83 onError:
84 return NULL;
85}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
98codec_encode(PyObject *self, PyObject *args)
99{
Brett Cannon3e377de2004-07-10 21:41:14 +0000100 const char *encoding = NULL;
101 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102 PyObject *v;
103
104 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
105 return NULL;
106
107 if (encoding == NULL)
108 encoding = PyUnicode_GetDefaultEncoding();
109
110 /* Encode via the codec registry */
111 v = PyCodec_Encode(v, encoding, errors);
112 if (v == NULL)
113 goto onError;
114 return v;
115
116 onError:
117 return NULL;
118}
119
120PyDoc_STRVAR(decode__doc__,
121"decode(obj, [encoding[,errors]]) -> object\n\
122\n\
123Decodes obj using the codec registered for encoding. encoding defaults\n\
124to the default encoding. errors may be given to set a different error\n\
125handling scheme. Default is 'strict' meaning that encoding errors raise\n\
126a ValueError. Other possible values are 'ignore' and 'replace'\n\
127as well as any other name registerd with codecs.register_error that is\n\
128able to handle ValueErrors.");
129
130static PyObject *
131codec_decode(PyObject *self, PyObject *args)
132{
Brett Cannon3e377de2004-07-10 21:41:14 +0000133 const char *encoding = NULL;
134 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000135 PyObject *v;
136
137 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
138 return NULL;
139
140 if (encoding == NULL)
141 encoding = PyUnicode_GetDefaultEncoding();
142
143 /* Decode via the codec registry */
144 v = PyCodec_Decode(v, encoding, errors);
145 if (v == NULL)
146 goto onError;
147 return v;
148
149 onError:
150 return NULL;
151}
152
Guido van Rossume2d67f92000-03-10 23:09:23 +0000153/* --- Helpers ------------------------------------------------------------ */
154
155static
156PyObject *codec_tuple(PyObject *unicode,
157 int len)
158{
159 PyObject *v,*w;
160
161 if (unicode == NULL)
162 return NULL;
163 v = PyTuple_New(2);
164 if (v == NULL) {
165 Py_DECREF(unicode);
166 return NULL;
167 }
168 PyTuple_SET_ITEM(v,0,unicode);
169 w = PyInt_FromLong(len);
170 if (w == NULL) {
171 Py_DECREF(v);
172 return NULL;
173 }
174 PyTuple_SET_ITEM(v,1,w);
175 return v;
176}
177
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000178/* --- String codecs ------------------------------------------------------ */
179static PyObject *
180escape_decode(PyObject *self,
181 PyObject *args)
182{
183 const char *errors = NULL;
184 const char *data;
185 int size;
186
187 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
188 &data, &size, &errors))
189 return NULL;
190 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
191 size);
192}
193
194static PyObject *
195escape_encode(PyObject *self,
196 PyObject *args)
197{
198 PyObject *str;
199 const char *errors = NULL;
200 char *buf;
201 int len;
202
203 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
204 &PyString_Type, &str, &errors))
205 return NULL;
206
207 str = PyString_Repr(str, 0);
208 if (!str)
209 return NULL;
210
211 /* The string will be quoted. Unquote, similar to unicode-escape. */
212 buf = PyString_AS_STRING (str);
213 len = PyString_GET_SIZE (str);
214 memmove(buf, buf+1, len-2);
215 _PyString_Resize(&str, len-2);
216
217 return codec_tuple(str, PyString_Size(str));
218}
219
220#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000221/* --- Decoder ------------------------------------------------------------ */
222
223static PyObject *
224unicode_internal_decode(PyObject *self,
225 PyObject *args)
226{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000227 PyObject *obj;
228 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000229 const char *data;
230 int size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000231
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000232 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
233 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000234 return NULL;
235
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000236 if (PyUnicode_Check(obj)) {
237 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000238 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000239 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000240 else {
241 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
242 return NULL;
243 return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
244 size / sizeof(Py_UNICODE)),
245 size);
246 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000247}
248
249static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000250utf_7_decode(PyObject *self,
251 PyObject *args)
252{
253 const char *data;
254 int size;
255 const char *errors = NULL;
256
257 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
258 &data, &size, &errors))
259 return NULL;
260
261 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
262 size);
263}
264
265static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000266utf_8_decode(PyObject *self,
267 PyObject *args)
268{
269 const char *data;
270 int size;
271 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000272 int final = 0;
273 int consumed;
274 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000275
Walter Dörwald69652032004-09-07 20:24:22 +0000276 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
277 &data, &size, &errors, &final))
278 return NULL;
279 consumed = size;
280
281 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
282 final ? NULL : &consumed);
283 if (decoded == NULL)
284 return NULL;
285 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000286}
287
288static PyObject *
289utf_16_decode(PyObject *self,
290 PyObject *args)
291{
292 const char *data;
293 int size;
294 const char *errors = NULL;
295 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000296 int final = 0;
297 int consumed;
298 PyObject *decoded;
299
300 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
301 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000302 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000303 consumed = size;
304 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
305 final ? NULL : &consumed);
306 if (decoded == NULL)
307 return NULL;
308 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000309}
310
311static PyObject *
312utf_16_le_decode(PyObject *self,
313 PyObject *args)
314{
315 const char *data;
316 int size;
317 const char *errors = NULL;
318 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000319 int final = 0;
320 int consumed;
321 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000322
Walter Dörwald69652032004-09-07 20:24:22 +0000323 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
324 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000325 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000326 consumed = size;
327 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
328 &byteorder, final ? NULL : &consumed);
329 if (decoded == NULL)
330 return NULL;
331 return codec_tuple(decoded, consumed);
332
Guido van Rossume2d67f92000-03-10 23:09:23 +0000333}
334
335static PyObject *
336utf_16_be_decode(PyObject *self,
337 PyObject *args)
338{
339 const char *data;
340 int size;
341 const char *errors = NULL;
342 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000343 int final = 0;
344 int consumed;
345 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000346
Walter Dörwald69652032004-09-07 20:24:22 +0000347 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
348 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000349 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000350 consumed = size;
351 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
352 &byteorder, final ? NULL : &consumed);
353 if (decoded == NULL)
354 return NULL;
355 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000356}
357
358/* This non-standard version also provides access to the byteorder
359 parameter of the builtin UTF-16 codec.
360
361 It returns a tuple (unicode, bytesread, byteorder) with byteorder
362 being the value in effect at the end of data.
363
364*/
365
366static PyObject *
367utf_16_ex_decode(PyObject *self,
368 PyObject *args)
369{
370 const char *data;
371 int size;
372 const char *errors = NULL;
373 int byteorder = 0;
374 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000375 int final = 0;
376 int consumed;
377
378 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
379 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000380 return NULL;
381
Walter Dörwald69652032004-09-07 20:24:22 +0000382 consumed = size;
383 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
384 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000385 if (unicode == NULL)
386 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000387 tuple = Py_BuildValue("Oii", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000388 Py_DECREF(unicode);
389 return tuple;
390}
391
392static PyObject *
393unicode_escape_decode(PyObject *self,
394 PyObject *args)
395{
396 const char *data;
397 int size;
398 const char *errors = NULL;
399
400 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
401 &data, &size, &errors))
402 return NULL;
403
404 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
405 size);
406}
407
408static PyObject *
409raw_unicode_escape_decode(PyObject *self,
410 PyObject *args)
411{
412 const char *data;
413 int size;
414 const char *errors = NULL;
415
416 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
417 &data, &size, &errors))
418 return NULL;
419
420 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
421 size);
422}
423
424static PyObject *
425latin_1_decode(PyObject *self,
426 PyObject *args)
427{
428 const char *data;
429 int size;
430 const char *errors = NULL;
431
432 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
433 &data, &size, &errors))
434 return NULL;
435
436 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
437 size);
438}
439
440static PyObject *
441ascii_decode(PyObject *self,
442 PyObject *args)
443{
444 const char *data;
445 int size;
446 const char *errors = NULL;
447
448 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
449 &data, &size, &errors))
450 return NULL;
451
452 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
453 size);
454}
455
456static PyObject *
457charmap_decode(PyObject *self,
458 PyObject *args)
459{
460 const char *data;
461 int size;
462 const char *errors = NULL;
463 PyObject *mapping = NULL;
464
465 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
466 &data, &size, &errors, &mapping))
467 return NULL;
468 if (mapping == Py_None)
469 mapping = NULL;
470
471 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
472 size);
473}
474
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000475#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000476
477static PyObject *
478mbcs_decode(PyObject *self,
479 PyObject *args)
480{
481 const char *data;
482 int size;
483 const char *errors = NULL;
484
485 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
486 &data, &size, &errors))
487 return NULL;
488
489 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
490 size);
491}
492
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000493#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000494
Guido van Rossume2d67f92000-03-10 23:09:23 +0000495/* --- Encoder ------------------------------------------------------------ */
496
497static PyObject *
498readbuffer_encode(PyObject *self,
499 PyObject *args)
500{
501 const char *data;
502 int size;
503 const char *errors = NULL;
504
505 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
506 &data, &size, &errors))
507 return NULL;
508
509 return codec_tuple(PyString_FromStringAndSize(data, size),
510 size);
511}
512
513static PyObject *
514charbuffer_encode(PyObject *self,
515 PyObject *args)
516{
517 const char *data;
518 int size;
519 const char *errors = NULL;
520
521 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
522 &data, &size, &errors))
523 return NULL;
524
525 return codec_tuple(PyString_FromStringAndSize(data, size),
526 size);
527}
528
529static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000530unicode_internal_encode(PyObject *self,
531 PyObject *args)
532{
533 PyObject *obj;
534 const char *errors = NULL;
535 const char *data;
536 int size;
537
538 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
539 &obj, &errors))
540 return NULL;
541
542 if (PyUnicode_Check(obj)) {
543 data = PyUnicode_AS_DATA(obj);
544 size = PyUnicode_GET_DATA_SIZE(obj);
545 return codec_tuple(PyString_FromStringAndSize(data, size),
546 size);
547 }
548 else {
549 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
550 return NULL;
551 return codec_tuple(PyString_FromStringAndSize(data, size),
552 size);
553 }
554}
555
556static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000557utf_7_encode(PyObject *self,
558 PyObject *args)
559{
560 PyObject *str, *v;
561 const char *errors = NULL;
562
563 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
564 &str, &errors))
565 return NULL;
566
567 str = PyUnicode_FromObject(str);
568 if (str == NULL)
569 return NULL;
570 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
571 PyUnicode_GET_SIZE(str),
572 0,
573 0,
574 errors),
575 PyUnicode_GET_SIZE(str));
576 Py_DECREF(str);
577 return v;
578}
579
580static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000581utf_8_encode(PyObject *self,
582 PyObject *args)
583{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000584 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000585 const char *errors = NULL;
586
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000587 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000588 &str, &errors))
589 return NULL;
590
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000591 str = PyUnicode_FromObject(str);
592 if (str == NULL)
593 return NULL;
594 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
595 PyUnicode_GET_SIZE(str),
596 errors),
597 PyUnicode_GET_SIZE(str));
598 Py_DECREF(str);
599 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000600}
601
602/* This version provides access to the byteorder parameter of the
603 builtin UTF-16 codecs as optional third argument. It defaults to 0
604 which means: use the native byte order and prepend the data with a
605 BOM mark.
606
607*/
608
609static PyObject *
610utf_16_encode(PyObject *self,
611 PyObject *args)
612{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000613 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000614 const char *errors = NULL;
615 int byteorder = 0;
616
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000617 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000618 &str, &errors, &byteorder))
619 return NULL;
620
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000621 str = PyUnicode_FromObject(str);
622 if (str == NULL)
623 return NULL;
624 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
625 PyUnicode_GET_SIZE(str),
626 errors,
627 byteorder),
628 PyUnicode_GET_SIZE(str));
629 Py_DECREF(str);
630 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000631}
632
633static PyObject *
634utf_16_le_encode(PyObject *self,
635 PyObject *args)
636{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000637 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000638 const char *errors = NULL;
639
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000640 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000641 &str, &errors))
642 return NULL;
643
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000644 str = PyUnicode_FromObject(str);
645 if (str == NULL)
646 return NULL;
647 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000648 PyUnicode_GET_SIZE(str),
649 errors,
650 -1),
651 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000652 Py_DECREF(str);
653 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000654}
655
656static PyObject *
657utf_16_be_encode(PyObject *self,
658 PyObject *args)
659{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000660 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000661 const char *errors = NULL;
662
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000663 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000664 &str, &errors))
665 return NULL;
666
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000667 str = PyUnicode_FromObject(str);
668 if (str == NULL)
669 return NULL;
670 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
671 PyUnicode_GET_SIZE(str),
672 errors,
673 +1),
674 PyUnicode_GET_SIZE(str));
675 Py_DECREF(str);
676 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000677}
678
679static PyObject *
680unicode_escape_encode(PyObject *self,
681 PyObject *args)
682{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000683 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000684 const char *errors = NULL;
685
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000686 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000687 &str, &errors))
688 return NULL;
689
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000690 str = PyUnicode_FromObject(str);
691 if (str == NULL)
692 return NULL;
693 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
694 PyUnicode_GET_SIZE(str)),
695 PyUnicode_GET_SIZE(str));
696 Py_DECREF(str);
697 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000698}
699
700static PyObject *
701raw_unicode_escape_encode(PyObject *self,
702 PyObject *args)
703{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000704 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000705 const char *errors = NULL;
706
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000707 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000708 &str, &errors))
709 return NULL;
710
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000711 str = PyUnicode_FromObject(str);
712 if (str == NULL)
713 return NULL;
714 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000715 PyUnicode_AS_UNICODE(str),
716 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000717 PyUnicode_GET_SIZE(str));
718 Py_DECREF(str);
719 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000720}
721
722static PyObject *
723latin_1_encode(PyObject *self,
724 PyObject *args)
725{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000726 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000727 const char *errors = NULL;
728
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000729 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000730 &str, &errors))
731 return NULL;
732
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000733 str = PyUnicode_FromObject(str);
734 if (str == NULL)
735 return NULL;
736 v = codec_tuple(PyUnicode_EncodeLatin1(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000737 PyUnicode_AS_UNICODE(str),
738 PyUnicode_GET_SIZE(str),
739 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000740 PyUnicode_GET_SIZE(str));
741 Py_DECREF(str);
742 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000743}
744
745static PyObject *
746ascii_encode(PyObject *self,
747 PyObject *args)
748{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000749 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000750 const char *errors = NULL;
751
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000752 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000753 &str, &errors))
754 return NULL;
755
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000756 str = PyUnicode_FromObject(str);
757 if (str == NULL)
758 return NULL;
759 v = codec_tuple(PyUnicode_EncodeASCII(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000760 PyUnicode_AS_UNICODE(str),
761 PyUnicode_GET_SIZE(str),
762 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000763 PyUnicode_GET_SIZE(str));
764 Py_DECREF(str);
765 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000766}
767
768static PyObject *
769charmap_encode(PyObject *self,
770 PyObject *args)
771{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000772 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000773 const char *errors = NULL;
774 PyObject *mapping = NULL;
775
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000776 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000777 &str, &errors, &mapping))
778 return NULL;
779 if (mapping == Py_None)
780 mapping = NULL;
781
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000782 str = PyUnicode_FromObject(str);
783 if (str == NULL)
784 return NULL;
785 v = codec_tuple(PyUnicode_EncodeCharmap(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000786 PyUnicode_AS_UNICODE(str),
787 PyUnicode_GET_SIZE(str),
788 mapping,
789 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000790 PyUnicode_GET_SIZE(str));
791 Py_DECREF(str);
792 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000793}
794
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000795#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000796
797static PyObject *
798mbcs_encode(PyObject *self,
799 PyObject *args)
800{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000801 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000802 const char *errors = NULL;
803
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000804 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000805 &str, &errors))
806 return NULL;
807
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000808 str = PyUnicode_FromObject(str);
809 if (str == NULL)
810 return NULL;
811 v = codec_tuple(PyUnicode_EncodeMBCS(
Guido van Rossum24bdb042000-03-28 20:29:59 +0000812 PyUnicode_AS_UNICODE(str),
813 PyUnicode_GET_SIZE(str),
814 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000815 PyUnicode_GET_SIZE(str));
816 Py_DECREF(str);
817 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000818}
819
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000820#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000821#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000822
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000823/* --- Error handler registry --------------------------------------------- */
824
Walter Dörwald0ae29812002-10-31 13:36:29 +0000825PyDoc_STRVAR(register_error__doc__,
826"register_error(errors, handler)\n\
827\n\
828Register the specified error handler under the name\n\
829errors. handler must be a callable object, that\n\
830will be called with an exception instance containing\n\
831information about the location of the encoding/decoding\n\
832error and must return a (replacement, new position) tuple.");
833
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000834static PyObject *register_error(PyObject *self, PyObject *args)
835{
836 const char *name;
837 PyObject *handler;
838
839 if (!PyArg_ParseTuple(args, "sO:register_error",
840 &name, &handler))
841 return NULL;
842 if (PyCodec_RegisterError(name, handler))
843 return NULL;
844 Py_INCREF(Py_None);
845 return Py_None;
846}
847
Walter Dörwald0ae29812002-10-31 13:36:29 +0000848PyDoc_STRVAR(lookup_error__doc__,
849"lookup_error(errors) -> handler\n\
850\n\
851Return the error handler for the specified error handling name\n\
852or raise a LookupError, if no handler exists under this name.");
853
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000854static PyObject *lookup_error(PyObject *self, PyObject *args)
855{
856 const char *name;
857
858 if (!PyArg_ParseTuple(args, "s:lookup_error",
859 &name))
860 return NULL;
861 return PyCodec_LookupError(name);
862}
863
Guido van Rossume2d67f92000-03-10 23:09:23 +0000864/* --- Module API --------------------------------------------------------- */
865
866static PyMethodDef _codecs_functions[] = {
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000867 {"register", codec_register, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000868 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000869 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000870 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +0000871 {"encode", codec_encode, METH_VARARGS,
872 encode__doc__},
873 {"decode", codec_decode, METH_VARARGS,
874 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000875 {"escape_encode", escape_encode, METH_VARARGS},
876 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000877#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000878 {"utf_8_encode", utf_8_encode, METH_VARARGS},
879 {"utf_8_decode", utf_8_decode, METH_VARARGS},
880 {"utf_7_encode", utf_7_encode, METH_VARARGS},
881 {"utf_7_decode", utf_7_decode, METH_VARARGS},
882 {"utf_16_encode", utf_16_encode, METH_VARARGS},
883 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
884 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
885 {"utf_16_decode", utf_16_decode, METH_VARARGS},
886 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
887 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
888 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
889 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
890 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
891 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
892 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
893 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
894 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
895 {"latin_1_encode", latin_1_encode, METH_VARARGS},
896 {"latin_1_decode", latin_1_decode, METH_VARARGS},
897 {"ascii_encode", ascii_encode, METH_VARARGS},
898 {"ascii_decode", ascii_decode, METH_VARARGS},
899 {"charmap_encode", charmap_encode, METH_VARARGS},
900 {"charmap_decode", charmap_decode, METH_VARARGS},
901 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
902 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000903#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000904 {"mbcs_encode", mbcs_encode, METH_VARARGS},
905 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +0000906#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000907#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +0000908 {"register_error", register_error, METH_VARARGS,
909 register_error__doc__},
910 {"lookup_error", lookup_error, METH_VARARGS,
911 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +0000912 {NULL, NULL} /* sentinel */
913};
914
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000915PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000916init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000917{
918 Py_InitModule("_codecs", _codecs_functions);
919}