blob: 1e3fc5d5b8a1049b60d4ab6ce4eada7a7edeb968 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
38#include "Python.h"
39
40/* --- Registry ----------------------------------------------------------- */
41
42static
43PyObject *codecregister(PyObject *self, PyObject *args)
44{
45 PyObject *search_function;
46
47 if (!PyArg_ParseTuple(args, "O:register", &search_function))
48 goto onError;
49
50 if (PyCodec_Register(search_function))
51 goto onError;
52
53 Py_INCREF(Py_None);
54 return Py_None;
55
56 onError:
57 return NULL;
58}
59
60static
61PyObject *codeclookup(PyObject *self, PyObject *args)
62{
63 char *encoding;
64
65 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
66 goto onError;
67
68 return _PyCodec_Lookup(encoding);
69
70 onError:
71 return NULL;
72}
73
74/* --- Helpers ------------------------------------------------------------ */
75
76static
77PyObject *codec_tuple(PyObject *unicode,
78 int len)
79{
80 PyObject *v,*w;
81
82 if (unicode == NULL)
83 return NULL;
84 v = PyTuple_New(2);
85 if (v == NULL) {
86 Py_DECREF(unicode);
87 return NULL;
88 }
89 PyTuple_SET_ITEM(v,0,unicode);
90 w = PyInt_FromLong(len);
91 if (w == NULL) {
92 Py_DECREF(v);
93 return NULL;
94 }
95 PyTuple_SET_ITEM(v,1,w);
96 return v;
97}
98
Martin v. Löwis8a8da792002-08-14 07:46:28 +000099/* --- String codecs ------------------------------------------------------ */
100static PyObject *
101escape_decode(PyObject *self,
102 PyObject *args)
103{
104 const char *errors = NULL;
105 const char *data;
106 int size;
107
108 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
109 &data, &size, &errors))
110 return NULL;
111 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
112 size);
113}
114
115static PyObject *
116escape_encode(PyObject *self,
117 PyObject *args)
118{
119 PyObject *str;
120 const char *errors = NULL;
121 char *buf;
122 int len;
123
124 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
125 &PyString_Type, &str, &errors))
126 return NULL;
127
128 str = PyString_Repr(str, 0);
129 if (!str)
130 return NULL;
131
132 /* The string will be quoted. Unquote, similar to unicode-escape. */
133 buf = PyString_AS_STRING (str);
134 len = PyString_GET_SIZE (str);
135 memmove(buf, buf+1, len-2);
136 _PyString_Resize(&str, len-2);
137
138 return codec_tuple(str, PyString_Size(str));
139}
140
141#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000142/* --- Decoder ------------------------------------------------------------ */
143
144static PyObject *
145unicode_internal_decode(PyObject *self,
146 PyObject *args)
147{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000148 PyObject *obj;
149 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000150 const char *data;
151 int size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000152
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000153 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
154 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000155 return NULL;
156
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000157 if (PyUnicode_Check(obj))
158 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
159 else {
160 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
161 return NULL;
162 return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
163 size / sizeof(Py_UNICODE)),
164 size);
165 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000166}
167
168static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000169utf_7_decode(PyObject *self,
170 PyObject *args)
171{
172 const char *data;
173 int size;
174 const char *errors = NULL;
175
176 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
177 &data, &size, &errors))
178 return NULL;
179
180 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
181 size);
182}
183
184static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000185utf_8_decode(PyObject *self,
186 PyObject *args)
187{
188 const char *data;
189 int size;
190 const char *errors = NULL;
191
192 if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
193 &data, &size, &errors))
194 return NULL;
195
196 return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
197 size);
198}
199
200static PyObject *
201utf_16_decode(PyObject *self,
202 PyObject *args)
203{
204 const char *data;
205 int size;
206 const char *errors = NULL;
207 int byteorder = 0;
208
209 if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
210 &data, &size, &errors))
211 return NULL;
212 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
213 size);
214}
215
216static PyObject *
217utf_16_le_decode(PyObject *self,
218 PyObject *args)
219{
220 const char *data;
221 int size;
222 const char *errors = NULL;
223 int byteorder = -1;
224
225 if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
226 &data, &size, &errors))
227 return NULL;
228 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
229 size);
230}
231
232static PyObject *
233utf_16_be_decode(PyObject *self,
234 PyObject *args)
235{
236 const char *data;
237 int size;
238 const char *errors = NULL;
239 int byteorder = 1;
240
241 if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
242 &data, &size, &errors))
243 return NULL;
244 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
245 size);
246}
247
248/* This non-standard version also provides access to the byteorder
249 parameter of the builtin UTF-16 codec.
250
251 It returns a tuple (unicode, bytesread, byteorder) with byteorder
252 being the value in effect at the end of data.
253
254*/
255
256static PyObject *
257utf_16_ex_decode(PyObject *self,
258 PyObject *args)
259{
260 const char *data;
261 int size;
262 const char *errors = NULL;
263 int byteorder = 0;
264 PyObject *unicode, *tuple;
265
266 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
267 &data, &size, &errors, &byteorder))
268 return NULL;
269
270 unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
271 if (unicode == NULL)
272 return NULL;
273 tuple = Py_BuildValue("Oii", unicode, size, byteorder);
274 Py_DECREF(unicode);
275 return tuple;
276}
277
278static PyObject *
279unicode_escape_decode(PyObject *self,
280 PyObject *args)
281{
282 const char *data;
283 int size;
284 const char *errors = NULL;
285
286 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
287 &data, &size, &errors))
288 return NULL;
289
290 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
291 size);
292}
293
294static PyObject *
295raw_unicode_escape_decode(PyObject *self,
296 PyObject *args)
297{
298 const char *data;
299 int size;
300 const char *errors = NULL;
301
302 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
303 &data, &size, &errors))
304 return NULL;
305
306 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
307 size);
308}
309
310static PyObject *
311latin_1_decode(PyObject *self,
312 PyObject *args)
313{
314 const char *data;
315 int size;
316 const char *errors = NULL;
317
318 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
319 &data, &size, &errors))
320 return NULL;
321
322 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
323 size);
324}
325
326static PyObject *
327ascii_decode(PyObject *self,
328 PyObject *args)
329{
330 const char *data;
331 int size;
332 const char *errors = NULL;
333
334 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
335 &data, &size, &errors))
336 return NULL;
337
338 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
339 size);
340}
341
342static PyObject *
343charmap_decode(PyObject *self,
344 PyObject *args)
345{
346 const char *data;
347 int size;
348 const char *errors = NULL;
349 PyObject *mapping = NULL;
350
351 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
352 &data, &size, &errors, &mapping))
353 return NULL;
354 if (mapping == Py_None)
355 mapping = NULL;
356
357 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
358 size);
359}
360
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000361#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000362
363static PyObject *
364mbcs_decode(PyObject *self,
365 PyObject *args)
366{
367 const char *data;
368 int size;
369 const char *errors = NULL;
370
371 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
372 &data, &size, &errors))
373 return NULL;
374
375 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
376 size);
377}
378
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000379#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000380
Guido van Rossume2d67f92000-03-10 23:09:23 +0000381/* --- Encoder ------------------------------------------------------------ */
382
383static PyObject *
384readbuffer_encode(PyObject *self,
385 PyObject *args)
386{
387 const char *data;
388 int size;
389 const char *errors = NULL;
390
391 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
392 &data, &size, &errors))
393 return NULL;
394
395 return codec_tuple(PyString_FromStringAndSize(data, size),
396 size);
397}
398
399static PyObject *
400charbuffer_encode(PyObject *self,
401 PyObject *args)
402{
403 const char *data;
404 int size;
405 const char *errors = NULL;
406
407 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
408 &data, &size, &errors))
409 return NULL;
410
411 return codec_tuple(PyString_FromStringAndSize(data, size),
412 size);
413}
414
415static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000416unicode_internal_encode(PyObject *self,
417 PyObject *args)
418{
419 PyObject *obj;
420 const char *errors = NULL;
421 const char *data;
422 int size;
423
424 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
425 &obj, &errors))
426 return NULL;
427
428 if (PyUnicode_Check(obj)) {
429 data = PyUnicode_AS_DATA(obj);
430 size = PyUnicode_GET_DATA_SIZE(obj);
431 return codec_tuple(PyString_FromStringAndSize(data, size),
432 size);
433 }
434 else {
435 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
436 return NULL;
437 return codec_tuple(PyString_FromStringAndSize(data, size),
438 size);
439 }
440}
441
442static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000443utf_7_encode(PyObject *self,
444 PyObject *args)
445{
446 PyObject *str, *v;
447 const char *errors = NULL;
448
449 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
450 &str, &errors))
451 return NULL;
452
453 str = PyUnicode_FromObject(str);
454 if (str == NULL)
455 return NULL;
456 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
457 PyUnicode_GET_SIZE(str),
458 0,
459 0,
460 errors),
461 PyUnicode_GET_SIZE(str));
462 Py_DECREF(str);
463 return v;
464}
465
466static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000467utf_8_encode(PyObject *self,
468 PyObject *args)
469{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000470 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000471 const char *errors = NULL;
472
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000473 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000474 &str, &errors))
475 return NULL;
476
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000477 str = PyUnicode_FromObject(str);
478 if (str == NULL)
479 return NULL;
480 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
481 PyUnicode_GET_SIZE(str),
482 errors),
483 PyUnicode_GET_SIZE(str));
484 Py_DECREF(str);
485 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000486}
487
488/* This version provides access to the byteorder parameter of the
489 builtin UTF-16 codecs as optional third argument. It defaults to 0
490 which means: use the native byte order and prepend the data with a
491 BOM mark.
492
493*/
494
495static PyObject *
496utf_16_encode(PyObject *self,
497 PyObject *args)
498{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000499 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000500 const char *errors = NULL;
501 int byteorder = 0;
502
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000503 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000504 &str, &errors, &byteorder))
505 return NULL;
506
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000507 str = PyUnicode_FromObject(str);
508 if (str == NULL)
509 return NULL;
510 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
511 PyUnicode_GET_SIZE(str),
512 errors,
513 byteorder),
514 PyUnicode_GET_SIZE(str));
515 Py_DECREF(str);
516 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000517}
518
519static PyObject *
520utf_16_le_encode(PyObject *self,
521 PyObject *args)
522{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000523 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000524 const char *errors = NULL;
525
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000526 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000527 &str, &errors))
528 return NULL;
529
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000530 str = PyUnicode_FromObject(str);
531 if (str == NULL)
532 return NULL;
533 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000534 PyUnicode_GET_SIZE(str),
535 errors,
536 -1),
537 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000538 Py_DECREF(str);
539 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000540}
541
542static PyObject *
543utf_16_be_encode(PyObject *self,
544 PyObject *args)
545{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000546 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000547 const char *errors = NULL;
548
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000549 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000550 &str, &errors))
551 return NULL;
552
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000553 str = PyUnicode_FromObject(str);
554 if (str == NULL)
555 return NULL;
556 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
557 PyUnicode_GET_SIZE(str),
558 errors,
559 +1),
560 PyUnicode_GET_SIZE(str));
561 Py_DECREF(str);
562 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000563}
564
565static PyObject *
566unicode_escape_encode(PyObject *self,
567 PyObject *args)
568{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000569 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000570 const char *errors = NULL;
571
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000572 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000573 &str, &errors))
574 return NULL;
575
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000576 str = PyUnicode_FromObject(str);
577 if (str == NULL)
578 return NULL;
579 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
580 PyUnicode_GET_SIZE(str)),
581 PyUnicode_GET_SIZE(str));
582 Py_DECREF(str);
583 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000584}
585
586static PyObject *
587raw_unicode_escape_encode(PyObject *self,
588 PyObject *args)
589{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000590 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000591 const char *errors = NULL;
592
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000593 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000594 &str, &errors))
595 return NULL;
596
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000597 str = PyUnicode_FromObject(str);
598 if (str == NULL)
599 return NULL;
600 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000601 PyUnicode_AS_UNICODE(str),
602 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000603 PyUnicode_GET_SIZE(str));
604 Py_DECREF(str);
605 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000606}
607
608static PyObject *
609latin_1_encode(PyObject *self,
610 PyObject *args)
611{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000612 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000613 const char *errors = NULL;
614
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000615 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000616 &str, &errors))
617 return NULL;
618
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000619 str = PyUnicode_FromObject(str);
620 if (str == NULL)
621 return NULL;
622 v = codec_tuple(PyUnicode_EncodeLatin1(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000623 PyUnicode_AS_UNICODE(str),
624 PyUnicode_GET_SIZE(str),
625 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000626 PyUnicode_GET_SIZE(str));
627 Py_DECREF(str);
628 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000629}
630
631static PyObject *
632ascii_encode(PyObject *self,
633 PyObject *args)
634{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000635 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000636 const char *errors = NULL;
637
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000638 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000639 &str, &errors))
640 return NULL;
641
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000642 str = PyUnicode_FromObject(str);
643 if (str == NULL)
644 return NULL;
645 v = codec_tuple(PyUnicode_EncodeASCII(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000646 PyUnicode_AS_UNICODE(str),
647 PyUnicode_GET_SIZE(str),
648 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000649 PyUnicode_GET_SIZE(str));
650 Py_DECREF(str);
651 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000652}
653
654static PyObject *
655charmap_encode(PyObject *self,
656 PyObject *args)
657{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000658 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000659 const char *errors = NULL;
660 PyObject *mapping = NULL;
661
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000662 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000663 &str, &errors, &mapping))
664 return NULL;
665 if (mapping == Py_None)
666 mapping = NULL;
667
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000668 str = PyUnicode_FromObject(str);
669 if (str == NULL)
670 return NULL;
671 v = codec_tuple(PyUnicode_EncodeCharmap(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000672 PyUnicode_AS_UNICODE(str),
673 PyUnicode_GET_SIZE(str),
674 mapping,
675 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000676 PyUnicode_GET_SIZE(str));
677 Py_DECREF(str);
678 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000679}
680
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000681#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000682
683static PyObject *
684mbcs_encode(PyObject *self,
685 PyObject *args)
686{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000687 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000688 const char *errors = NULL;
689
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000690 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000691 &str, &errors))
692 return NULL;
693
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000694 str = PyUnicode_FromObject(str);
695 if (str == NULL)
696 return NULL;
697 v = codec_tuple(PyUnicode_EncodeMBCS(
Guido van Rossum24bdb042000-03-28 20:29:59 +0000698 PyUnicode_AS_UNICODE(str),
699 PyUnicode_GET_SIZE(str),
700 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000701 PyUnicode_GET_SIZE(str));
702 Py_DECREF(str);
703 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000704}
705
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000706#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000707#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000708
Guido van Rossume2d67f92000-03-10 23:09:23 +0000709/* --- Module API --------------------------------------------------------- */
710
711static PyMethodDef _codecs_functions[] = {
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000712 {"register", codecregister, METH_VARARGS},
713 {"lookup", codeclookup, METH_VARARGS},
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000714 {"escape_encode", escape_encode, METH_VARARGS},
715 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000716#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000717 {"utf_8_encode", utf_8_encode, METH_VARARGS},
718 {"utf_8_decode", utf_8_decode, METH_VARARGS},
719 {"utf_7_encode", utf_7_encode, METH_VARARGS},
720 {"utf_7_decode", utf_7_decode, METH_VARARGS},
721 {"utf_16_encode", utf_16_encode, METH_VARARGS},
722 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
723 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
724 {"utf_16_decode", utf_16_decode, METH_VARARGS},
725 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
726 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
727 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
728 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
729 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
730 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
731 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
732 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
733 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
734 {"latin_1_encode", latin_1_encode, METH_VARARGS},
735 {"latin_1_decode", latin_1_decode, METH_VARARGS},
736 {"ascii_encode", ascii_encode, METH_VARARGS},
737 {"ascii_decode", ascii_decode, METH_VARARGS},
738 {"charmap_encode", charmap_encode, METH_VARARGS},
739 {"charmap_decode", charmap_decode, METH_VARARGS},
740 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
741 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000742#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000743 {"mbcs_encode", mbcs_encode, METH_VARARGS},
744 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +0000745#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000746#endif /* Py_USING_UNICODE */
Guido van Rossume2d67f92000-03-10 23:09:23 +0000747 {NULL, NULL} /* sentinel */
748};
749
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000750PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000751init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000752{
753 Py_InitModule("_codecs", _codecs_functions);
754}