blob: cd19ab595468f2e22b1794a8c1c7f241c5000df8 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
38#include "Python.h"
39
40/* --- Registry ----------------------------------------------------------- */
41
Walter Dörwald0ae29812002-10-31 13:36:29 +000042PyDoc_STRVAR(register__doc__,
43"register(search_function)\n\
44\n\
45Register a codec search function. Search functions are expected to take\n\
46one argument, the encoding name in all lower case letters, and return\n\
47a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
48
Guido van Rossume2d67f92000-03-10 23:09:23 +000049static
50PyObject *codecregister(PyObject *self, PyObject *args)
51{
52 PyObject *search_function;
53
54 if (!PyArg_ParseTuple(args, "O:register", &search_function))
55 goto onError;
56
57 if (PyCodec_Register(search_function))
58 goto onError;
59
60 Py_INCREF(Py_None);
61 return Py_None;
62
63 onError:
64 return NULL;
65}
66
Walter Dörwald0ae29812002-10-31 13:36:29 +000067PyDoc_STRVAR(lookup__doc__,
68"lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
69\n\
70Looks up a codec tuple in the Python codec registry and returns\n\
71a tuple of functions.");
72
Guido van Rossume2d67f92000-03-10 23:09:23 +000073static
74PyObject *codeclookup(PyObject *self, PyObject *args)
75{
76 char *encoding;
77
78 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
79 goto onError;
80
81 return _PyCodec_Lookup(encoding);
82
83 onError:
84 return NULL;
85}
86
87/* --- Helpers ------------------------------------------------------------ */
88
89static
90PyObject *codec_tuple(PyObject *unicode,
91 int len)
92{
93 PyObject *v,*w;
94
95 if (unicode == NULL)
96 return NULL;
97 v = PyTuple_New(2);
98 if (v == NULL) {
99 Py_DECREF(unicode);
100 return NULL;
101 }
102 PyTuple_SET_ITEM(v,0,unicode);
103 w = PyInt_FromLong(len);
104 if (w == NULL) {
105 Py_DECREF(v);
106 return NULL;
107 }
108 PyTuple_SET_ITEM(v,1,w);
109 return v;
110}
111
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000112/* --- String codecs ------------------------------------------------------ */
113static PyObject *
114escape_decode(PyObject *self,
115 PyObject *args)
116{
117 const char *errors = NULL;
118 const char *data;
119 int size;
120
121 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
122 &data, &size, &errors))
123 return NULL;
124 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
125 size);
126}
127
128static PyObject *
129escape_encode(PyObject *self,
130 PyObject *args)
131{
132 PyObject *str;
133 const char *errors = NULL;
134 char *buf;
135 int len;
136
137 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
138 &PyString_Type, &str, &errors))
139 return NULL;
140
141 str = PyString_Repr(str, 0);
142 if (!str)
143 return NULL;
144
145 /* The string will be quoted. Unquote, similar to unicode-escape. */
146 buf = PyString_AS_STRING (str);
147 len = PyString_GET_SIZE (str);
148 memmove(buf, buf+1, len-2);
149 _PyString_Resize(&str, len-2);
150
151 return codec_tuple(str, PyString_Size(str));
152}
153
154#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000155/* --- Decoder ------------------------------------------------------------ */
156
157static PyObject *
158unicode_internal_decode(PyObject *self,
159 PyObject *args)
160{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000161 PyObject *obj;
162 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000163 const char *data;
164 int size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000165
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000166 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
167 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000168 return NULL;
169
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000170 if (PyUnicode_Check(obj))
171 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
172 else {
173 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
174 return NULL;
175 return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
176 size / sizeof(Py_UNICODE)),
177 size);
178 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000179}
180
181static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000182utf_7_decode(PyObject *self,
183 PyObject *args)
184{
185 const char *data;
186 int size;
187 const char *errors = NULL;
188
189 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
190 &data, &size, &errors))
191 return NULL;
192
193 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
194 size);
195}
196
197static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000198utf_8_decode(PyObject *self,
199 PyObject *args)
200{
201 const char *data;
202 int size;
203 const char *errors = NULL;
204
205 if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
206 &data, &size, &errors))
207 return NULL;
208
209 return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
210 size);
211}
212
213static PyObject *
214utf_16_decode(PyObject *self,
215 PyObject *args)
216{
217 const char *data;
218 int size;
219 const char *errors = NULL;
220 int byteorder = 0;
221
222 if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
223 &data, &size, &errors))
224 return NULL;
225 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
226 size);
227}
228
229static PyObject *
230utf_16_le_decode(PyObject *self,
231 PyObject *args)
232{
233 const char *data;
234 int size;
235 const char *errors = NULL;
236 int byteorder = -1;
237
238 if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
239 &data, &size, &errors))
240 return NULL;
241 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
242 size);
243}
244
245static PyObject *
246utf_16_be_decode(PyObject *self,
247 PyObject *args)
248{
249 const char *data;
250 int size;
251 const char *errors = NULL;
252 int byteorder = 1;
253
254 if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
255 &data, &size, &errors))
256 return NULL;
257 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
258 size);
259}
260
261/* This non-standard version also provides access to the byteorder
262 parameter of the builtin UTF-16 codec.
263
264 It returns a tuple (unicode, bytesread, byteorder) with byteorder
265 being the value in effect at the end of data.
266
267*/
268
269static PyObject *
270utf_16_ex_decode(PyObject *self,
271 PyObject *args)
272{
273 const char *data;
274 int size;
275 const char *errors = NULL;
276 int byteorder = 0;
277 PyObject *unicode, *tuple;
278
279 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
280 &data, &size, &errors, &byteorder))
281 return NULL;
282
283 unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
284 if (unicode == NULL)
285 return NULL;
286 tuple = Py_BuildValue("Oii", unicode, size, byteorder);
287 Py_DECREF(unicode);
288 return tuple;
289}
290
291static PyObject *
292unicode_escape_decode(PyObject *self,
293 PyObject *args)
294{
295 const char *data;
296 int size;
297 const char *errors = NULL;
298
299 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
300 &data, &size, &errors))
301 return NULL;
302
303 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
304 size);
305}
306
307static PyObject *
308raw_unicode_escape_decode(PyObject *self,
309 PyObject *args)
310{
311 const char *data;
312 int size;
313 const char *errors = NULL;
314
315 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
316 &data, &size, &errors))
317 return NULL;
318
319 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
320 size);
321}
322
323static PyObject *
324latin_1_decode(PyObject *self,
325 PyObject *args)
326{
327 const char *data;
328 int size;
329 const char *errors = NULL;
330
331 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
332 &data, &size, &errors))
333 return NULL;
334
335 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
336 size);
337}
338
339static PyObject *
340ascii_decode(PyObject *self,
341 PyObject *args)
342{
343 const char *data;
344 int size;
345 const char *errors = NULL;
346
347 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
348 &data, &size, &errors))
349 return NULL;
350
351 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
352 size);
353}
354
355static PyObject *
356charmap_decode(PyObject *self,
357 PyObject *args)
358{
359 const char *data;
360 int size;
361 const char *errors = NULL;
362 PyObject *mapping = NULL;
363
364 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
365 &data, &size, &errors, &mapping))
366 return NULL;
367 if (mapping == Py_None)
368 mapping = NULL;
369
370 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
371 size);
372}
373
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000374#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000375
376static PyObject *
377mbcs_decode(PyObject *self,
378 PyObject *args)
379{
380 const char *data;
381 int size;
382 const char *errors = NULL;
383
384 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
385 &data, &size, &errors))
386 return NULL;
387
388 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
389 size);
390}
391
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000392#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000393
Guido van Rossume2d67f92000-03-10 23:09:23 +0000394/* --- Encoder ------------------------------------------------------------ */
395
396static PyObject *
397readbuffer_encode(PyObject *self,
398 PyObject *args)
399{
400 const char *data;
401 int size;
402 const char *errors = NULL;
403
404 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
405 &data, &size, &errors))
406 return NULL;
407
408 return codec_tuple(PyString_FromStringAndSize(data, size),
409 size);
410}
411
412static PyObject *
413charbuffer_encode(PyObject *self,
414 PyObject *args)
415{
416 const char *data;
417 int size;
418 const char *errors = NULL;
419
420 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
421 &data, &size, &errors))
422 return NULL;
423
424 return codec_tuple(PyString_FromStringAndSize(data, size),
425 size);
426}
427
428static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000429unicode_internal_encode(PyObject *self,
430 PyObject *args)
431{
432 PyObject *obj;
433 const char *errors = NULL;
434 const char *data;
435 int size;
436
437 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
438 &obj, &errors))
439 return NULL;
440
441 if (PyUnicode_Check(obj)) {
442 data = PyUnicode_AS_DATA(obj);
443 size = PyUnicode_GET_DATA_SIZE(obj);
444 return codec_tuple(PyString_FromStringAndSize(data, size),
445 size);
446 }
447 else {
448 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
449 return NULL;
450 return codec_tuple(PyString_FromStringAndSize(data, size),
451 size);
452 }
453}
454
455static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000456utf_7_encode(PyObject *self,
457 PyObject *args)
458{
459 PyObject *str, *v;
460 const char *errors = NULL;
461
462 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
463 &str, &errors))
464 return NULL;
465
466 str = PyUnicode_FromObject(str);
467 if (str == NULL)
468 return NULL;
469 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
470 PyUnicode_GET_SIZE(str),
471 0,
472 0,
473 errors),
474 PyUnicode_GET_SIZE(str));
475 Py_DECREF(str);
476 return v;
477}
478
479static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000480utf_8_encode(PyObject *self,
481 PyObject *args)
482{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000483 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000484 const char *errors = NULL;
485
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000486 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000487 &str, &errors))
488 return NULL;
489
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000490 str = PyUnicode_FromObject(str);
491 if (str == NULL)
492 return NULL;
493 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
494 PyUnicode_GET_SIZE(str),
495 errors),
496 PyUnicode_GET_SIZE(str));
497 Py_DECREF(str);
498 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000499}
500
501/* This version provides access to the byteorder parameter of the
502 builtin UTF-16 codecs as optional third argument. It defaults to 0
503 which means: use the native byte order and prepend the data with a
504 BOM mark.
505
506*/
507
508static PyObject *
509utf_16_encode(PyObject *self,
510 PyObject *args)
511{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000512 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000513 const char *errors = NULL;
514 int byteorder = 0;
515
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000516 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000517 &str, &errors, &byteorder))
518 return NULL;
519
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000520 str = PyUnicode_FromObject(str);
521 if (str == NULL)
522 return NULL;
523 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
524 PyUnicode_GET_SIZE(str),
525 errors,
526 byteorder),
527 PyUnicode_GET_SIZE(str));
528 Py_DECREF(str);
529 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000530}
531
532static PyObject *
533utf_16_le_encode(PyObject *self,
534 PyObject *args)
535{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000536 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000537 const char *errors = NULL;
538
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000539 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000540 &str, &errors))
541 return NULL;
542
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000543 str = PyUnicode_FromObject(str);
544 if (str == NULL)
545 return NULL;
546 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000547 PyUnicode_GET_SIZE(str),
548 errors,
549 -1),
550 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000551 Py_DECREF(str);
552 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000553}
554
555static PyObject *
556utf_16_be_encode(PyObject *self,
557 PyObject *args)
558{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000559 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000560 const char *errors = NULL;
561
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000562 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000563 &str, &errors))
564 return NULL;
565
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000566 str = PyUnicode_FromObject(str);
567 if (str == NULL)
568 return NULL;
569 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
570 PyUnicode_GET_SIZE(str),
571 errors,
572 +1),
573 PyUnicode_GET_SIZE(str));
574 Py_DECREF(str);
575 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000576}
577
578static PyObject *
579unicode_escape_encode(PyObject *self,
580 PyObject *args)
581{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000582 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000583 const char *errors = NULL;
584
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000585 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000586 &str, &errors))
587 return NULL;
588
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000589 str = PyUnicode_FromObject(str);
590 if (str == NULL)
591 return NULL;
592 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
593 PyUnicode_GET_SIZE(str)),
594 PyUnicode_GET_SIZE(str));
595 Py_DECREF(str);
596 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000597}
598
599static PyObject *
600raw_unicode_escape_encode(PyObject *self,
601 PyObject *args)
602{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000603 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000604 const char *errors = NULL;
605
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000606 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000607 &str, &errors))
608 return NULL;
609
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000610 str = PyUnicode_FromObject(str);
611 if (str == NULL)
612 return NULL;
613 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000614 PyUnicode_AS_UNICODE(str),
615 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000616 PyUnicode_GET_SIZE(str));
617 Py_DECREF(str);
618 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000619}
620
621static PyObject *
622latin_1_encode(PyObject *self,
623 PyObject *args)
624{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000625 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000626 const char *errors = NULL;
627
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000628 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000629 &str, &errors))
630 return NULL;
631
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000632 str = PyUnicode_FromObject(str);
633 if (str == NULL)
634 return NULL;
635 v = codec_tuple(PyUnicode_EncodeLatin1(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000636 PyUnicode_AS_UNICODE(str),
637 PyUnicode_GET_SIZE(str),
638 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000639 PyUnicode_GET_SIZE(str));
640 Py_DECREF(str);
641 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000642}
643
644static PyObject *
645ascii_encode(PyObject *self,
646 PyObject *args)
647{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000648 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000649 const char *errors = NULL;
650
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000651 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000652 &str, &errors))
653 return NULL;
654
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000655 str = PyUnicode_FromObject(str);
656 if (str == NULL)
657 return NULL;
658 v = codec_tuple(PyUnicode_EncodeASCII(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000659 PyUnicode_AS_UNICODE(str),
660 PyUnicode_GET_SIZE(str),
661 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000662 PyUnicode_GET_SIZE(str));
663 Py_DECREF(str);
664 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000665}
666
667static PyObject *
668charmap_encode(PyObject *self,
669 PyObject *args)
670{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000671 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000672 const char *errors = NULL;
673 PyObject *mapping = NULL;
674
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000675 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000676 &str, &errors, &mapping))
677 return NULL;
678 if (mapping == Py_None)
679 mapping = NULL;
680
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000681 str = PyUnicode_FromObject(str);
682 if (str == NULL)
683 return NULL;
684 v = codec_tuple(PyUnicode_EncodeCharmap(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000685 PyUnicode_AS_UNICODE(str),
686 PyUnicode_GET_SIZE(str),
687 mapping,
688 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000689 PyUnicode_GET_SIZE(str));
690 Py_DECREF(str);
691 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000692}
693
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000694#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000695
696static PyObject *
697mbcs_encode(PyObject *self,
698 PyObject *args)
699{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000700 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000701 const char *errors = NULL;
702
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000703 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000704 &str, &errors))
705 return NULL;
706
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000707 str = PyUnicode_FromObject(str);
708 if (str == NULL)
709 return NULL;
710 v = codec_tuple(PyUnicode_EncodeMBCS(
Guido van Rossum24bdb042000-03-28 20:29:59 +0000711 PyUnicode_AS_UNICODE(str),
712 PyUnicode_GET_SIZE(str),
713 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000714 PyUnicode_GET_SIZE(str));
715 Py_DECREF(str);
716 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000717}
718
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000719#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000721
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000722/* --- Error handler registry --------------------------------------------- */
723
Walter Dörwald0ae29812002-10-31 13:36:29 +0000724PyDoc_STRVAR(register_error__doc__,
725"register_error(errors, handler)\n\
726\n\
727Register the specified error handler under the name\n\
728errors. handler must be a callable object, that\n\
729will be called with an exception instance containing\n\
730information about the location of the encoding/decoding\n\
731error and must return a (replacement, new position) tuple.");
732
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000733static PyObject *register_error(PyObject *self, PyObject *args)
734{
735 const char *name;
736 PyObject *handler;
737
738 if (!PyArg_ParseTuple(args, "sO:register_error",
739 &name, &handler))
740 return NULL;
741 if (PyCodec_RegisterError(name, handler))
742 return NULL;
743 Py_INCREF(Py_None);
744 return Py_None;
745}
746
Walter Dörwald0ae29812002-10-31 13:36:29 +0000747PyDoc_STRVAR(lookup_error__doc__,
748"lookup_error(errors) -> handler\n\
749\n\
750Return the error handler for the specified error handling name\n\
751or raise a LookupError, if no handler exists under this name.");
752
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000753static PyObject *lookup_error(PyObject *self, PyObject *args)
754{
755 const char *name;
756
757 if (!PyArg_ParseTuple(args, "s:lookup_error",
758 &name))
759 return NULL;
760 return PyCodec_LookupError(name);
761}
762
Guido van Rossume2d67f92000-03-10 23:09:23 +0000763/* --- Module API --------------------------------------------------------- */
764
765static PyMethodDef _codecs_functions[] = {
Walter Dörwald0ae29812002-10-31 13:36:29 +0000766 {"register", codecregister, METH_VARARGS,
767 register__doc__},
768 {"lookup", codeclookup, METH_VARARGS,
769 lookup__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000770 {"escape_encode", escape_encode, METH_VARARGS},
771 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000772#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000773 {"utf_8_encode", utf_8_encode, METH_VARARGS},
774 {"utf_8_decode", utf_8_decode, METH_VARARGS},
775 {"utf_7_encode", utf_7_encode, METH_VARARGS},
776 {"utf_7_decode", utf_7_decode, METH_VARARGS},
777 {"utf_16_encode", utf_16_encode, METH_VARARGS},
778 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
779 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
780 {"utf_16_decode", utf_16_decode, METH_VARARGS},
781 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
782 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
783 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
784 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
785 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
786 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
787 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
788 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
789 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
790 {"latin_1_encode", latin_1_encode, METH_VARARGS},
791 {"latin_1_decode", latin_1_decode, METH_VARARGS},
792 {"ascii_encode", ascii_encode, METH_VARARGS},
793 {"ascii_decode", ascii_decode, METH_VARARGS},
794 {"charmap_encode", charmap_encode, METH_VARARGS},
795 {"charmap_decode", charmap_decode, METH_VARARGS},
796 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
797 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000798#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000799 {"mbcs_encode", mbcs_encode, METH_VARARGS},
800 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +0000801#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000802#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +0000803 {"register_error", register_error, METH_VARARGS,
804 register_error__doc__},
805 {"lookup_error", lookup_error, METH_VARARGS,
806 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +0000807 {NULL, NULL} /* sentinel */
808};
809
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000810PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000811init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000812{
813 Py_InitModule("_codecs", _codecs_functions);
814}