blob: 210be516f949254ec32efc51cf4f9e65334f91fc [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
38#include "Python.h"
39
40/* --- Registry ----------------------------------------------------------- */
41
Walter Dörwald0ae29812002-10-31 13:36:29 +000042PyDoc_STRVAR(register__doc__,
43"register(search_function)\n\
44\n\
45Register a codec search function. Search functions are expected to take\n\
46one argument, the encoding name in all lower case letters, and return\n\
47a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
48
Guido van Rossume2d67f92000-03-10 23:09:23 +000049static
50PyObject *codecregister(PyObject *self, PyObject *args)
51{
52 PyObject *search_function;
53
54 if (!PyArg_ParseTuple(args, "O:register", &search_function))
55 goto onError;
56
57 if (PyCodec_Register(search_function))
58 goto onError;
59
60 Py_INCREF(Py_None);
61 return Py_None;
62
63 onError:
64 return NULL;
65}
66
Walter Dörwald0ae29812002-10-31 13:36:29 +000067PyDoc_STRVAR(lookup__doc__,
68"lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
69\n\
70Looks up a codec tuple in the Python codec registry and returns\n\
71a tuple of functions.");
72
Guido van Rossume2d67f92000-03-10 23:09:23 +000073static
74PyObject *codeclookup(PyObject *self, PyObject *args)
75{
76 char *encoding;
77
78 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
79 goto onError;
80
81 return _PyCodec_Lookup(encoding);
82
83 onError:
84 return NULL;
85}
86
87/* --- Helpers ------------------------------------------------------------ */
88
89static
90PyObject *codec_tuple(PyObject *unicode,
91 int len)
92{
93 PyObject *v,*w;
94
95 if (unicode == NULL)
96 return NULL;
97 v = PyTuple_New(2);
98 if (v == NULL) {
99 Py_DECREF(unicode);
100 return NULL;
101 }
102 PyTuple_SET_ITEM(v,0,unicode);
103 w = PyInt_FromLong(len);
104 if (w == NULL) {
105 Py_DECREF(v);
106 return NULL;
107 }
108 PyTuple_SET_ITEM(v,1,w);
109 return v;
110}
111
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000112/* --- String codecs ------------------------------------------------------ */
113static PyObject *
114escape_decode(PyObject *self,
115 PyObject *args)
116{
117 const char *errors = NULL;
118 const char *data;
119 int size;
120
121 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
122 &data, &size, &errors))
123 return NULL;
124 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
125 size);
126}
127
128static PyObject *
129escape_encode(PyObject *self,
130 PyObject *args)
131{
132 PyObject *str;
133 const char *errors = NULL;
134 char *buf;
135 int len;
136
137 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
138 &PyString_Type, &str, &errors))
139 return NULL;
140
141 str = PyString_Repr(str, 0);
142 if (!str)
143 return NULL;
144
145 /* The string will be quoted. Unquote, similar to unicode-escape. */
146 buf = PyString_AS_STRING (str);
147 len = PyString_GET_SIZE (str);
148 memmove(buf, buf+1, len-2);
149 _PyString_Resize(&str, len-2);
150
151 return codec_tuple(str, PyString_Size(str));
152}
153
154#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000155/* --- Decoder ------------------------------------------------------------ */
156
157static PyObject *
158unicode_internal_decode(PyObject *self,
159 PyObject *args)
160{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000161 PyObject *obj;
162 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000163 const char *data;
164 int size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000165
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000166 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
167 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000168 return NULL;
169
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000170 if (PyUnicode_Check(obj)) {
171 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000172 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000173 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000174 else {
175 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
176 return NULL;
177 return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
178 size / sizeof(Py_UNICODE)),
179 size);
180 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000181}
182
183static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000184utf_7_decode(PyObject *self,
185 PyObject *args)
186{
187 const char *data;
188 int size;
189 const char *errors = NULL;
190
191 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
192 &data, &size, &errors))
193 return NULL;
194
195 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
196 size);
197}
198
199static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000200utf_8_decode(PyObject *self,
201 PyObject *args)
202{
203 const char *data;
204 int size;
205 const char *errors = NULL;
206
207 if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
208 &data, &size, &errors))
209 return NULL;
210
211 return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
212 size);
213}
214
215static PyObject *
216utf_16_decode(PyObject *self,
217 PyObject *args)
218{
219 const char *data;
220 int size;
221 const char *errors = NULL;
222 int byteorder = 0;
223
224 if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
225 &data, &size, &errors))
226 return NULL;
227 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
228 size);
229}
230
231static PyObject *
232utf_16_le_decode(PyObject *self,
233 PyObject *args)
234{
235 const char *data;
236 int size;
237 const char *errors = NULL;
238 int byteorder = -1;
239
240 if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
241 &data, &size, &errors))
242 return NULL;
243 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
244 size);
245}
246
247static PyObject *
248utf_16_be_decode(PyObject *self,
249 PyObject *args)
250{
251 const char *data;
252 int size;
253 const char *errors = NULL;
254 int byteorder = 1;
255
256 if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
257 &data, &size, &errors))
258 return NULL;
259 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
260 size);
261}
262
263/* This non-standard version also provides access to the byteorder
264 parameter of the builtin UTF-16 codec.
265
266 It returns a tuple (unicode, bytesread, byteorder) with byteorder
267 being the value in effect at the end of data.
268
269*/
270
271static PyObject *
272utf_16_ex_decode(PyObject *self,
273 PyObject *args)
274{
275 const char *data;
276 int size;
277 const char *errors = NULL;
278 int byteorder = 0;
279 PyObject *unicode, *tuple;
280
281 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
282 &data, &size, &errors, &byteorder))
283 return NULL;
284
285 unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
286 if (unicode == NULL)
287 return NULL;
288 tuple = Py_BuildValue("Oii", unicode, size, byteorder);
289 Py_DECREF(unicode);
290 return tuple;
291}
292
293static PyObject *
294unicode_escape_decode(PyObject *self,
295 PyObject *args)
296{
297 const char *data;
298 int size;
299 const char *errors = NULL;
300
301 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
302 &data, &size, &errors))
303 return NULL;
304
305 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
306 size);
307}
308
309static PyObject *
310raw_unicode_escape_decode(PyObject *self,
311 PyObject *args)
312{
313 const char *data;
314 int size;
315 const char *errors = NULL;
316
317 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
318 &data, &size, &errors))
319 return NULL;
320
321 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
322 size);
323}
324
325static PyObject *
326latin_1_decode(PyObject *self,
327 PyObject *args)
328{
329 const char *data;
330 int size;
331 const char *errors = NULL;
332
333 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
334 &data, &size, &errors))
335 return NULL;
336
337 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
338 size);
339}
340
341static PyObject *
342ascii_decode(PyObject *self,
343 PyObject *args)
344{
345 const char *data;
346 int size;
347 const char *errors = NULL;
348
349 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
350 &data, &size, &errors))
351 return NULL;
352
353 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
354 size);
355}
356
357static PyObject *
358charmap_decode(PyObject *self,
359 PyObject *args)
360{
361 const char *data;
362 int size;
363 const char *errors = NULL;
364 PyObject *mapping = NULL;
365
366 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
367 &data, &size, &errors, &mapping))
368 return NULL;
369 if (mapping == Py_None)
370 mapping = NULL;
371
372 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
373 size);
374}
375
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000376#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000377
378static PyObject *
379mbcs_decode(PyObject *self,
380 PyObject *args)
381{
382 const char *data;
383 int size;
384 const char *errors = NULL;
385
386 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
387 &data, &size, &errors))
388 return NULL;
389
390 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
391 size);
392}
393
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000394#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000395
Guido van Rossume2d67f92000-03-10 23:09:23 +0000396/* --- Encoder ------------------------------------------------------------ */
397
398static PyObject *
399readbuffer_encode(PyObject *self,
400 PyObject *args)
401{
402 const char *data;
403 int size;
404 const char *errors = NULL;
405
406 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
407 &data, &size, &errors))
408 return NULL;
409
410 return codec_tuple(PyString_FromStringAndSize(data, size),
411 size);
412}
413
414static PyObject *
415charbuffer_encode(PyObject *self,
416 PyObject *args)
417{
418 const char *data;
419 int size;
420 const char *errors = NULL;
421
422 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
423 &data, &size, &errors))
424 return NULL;
425
426 return codec_tuple(PyString_FromStringAndSize(data, size),
427 size);
428}
429
430static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000431unicode_internal_encode(PyObject *self,
432 PyObject *args)
433{
434 PyObject *obj;
435 const char *errors = NULL;
436 const char *data;
437 int size;
438
439 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
440 &obj, &errors))
441 return NULL;
442
443 if (PyUnicode_Check(obj)) {
444 data = PyUnicode_AS_DATA(obj);
445 size = PyUnicode_GET_DATA_SIZE(obj);
446 return codec_tuple(PyString_FromStringAndSize(data, size),
447 size);
448 }
449 else {
450 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
451 return NULL;
452 return codec_tuple(PyString_FromStringAndSize(data, size),
453 size);
454 }
455}
456
457static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000458utf_7_encode(PyObject *self,
459 PyObject *args)
460{
461 PyObject *str, *v;
462 const char *errors = NULL;
463
464 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
465 &str, &errors))
466 return NULL;
467
468 str = PyUnicode_FromObject(str);
469 if (str == NULL)
470 return NULL;
471 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
472 PyUnicode_GET_SIZE(str),
473 0,
474 0,
475 errors),
476 PyUnicode_GET_SIZE(str));
477 Py_DECREF(str);
478 return v;
479}
480
481static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000482utf_8_encode(PyObject *self,
483 PyObject *args)
484{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000485 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000486 const char *errors = NULL;
487
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000488 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000489 &str, &errors))
490 return NULL;
491
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000492 str = PyUnicode_FromObject(str);
493 if (str == NULL)
494 return NULL;
495 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
496 PyUnicode_GET_SIZE(str),
497 errors),
498 PyUnicode_GET_SIZE(str));
499 Py_DECREF(str);
500 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000501}
502
503/* This version provides access to the byteorder parameter of the
504 builtin UTF-16 codecs as optional third argument. It defaults to 0
505 which means: use the native byte order and prepend the data with a
506 BOM mark.
507
508*/
509
510static PyObject *
511utf_16_encode(PyObject *self,
512 PyObject *args)
513{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000514 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000515 const char *errors = NULL;
516 int byteorder = 0;
517
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000518 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000519 &str, &errors, &byteorder))
520 return NULL;
521
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000522 str = PyUnicode_FromObject(str);
523 if (str == NULL)
524 return NULL;
525 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
526 PyUnicode_GET_SIZE(str),
527 errors,
528 byteorder),
529 PyUnicode_GET_SIZE(str));
530 Py_DECREF(str);
531 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000532}
533
534static PyObject *
535utf_16_le_encode(PyObject *self,
536 PyObject *args)
537{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000538 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000539 const char *errors = NULL;
540
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000541 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000542 &str, &errors))
543 return NULL;
544
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000545 str = PyUnicode_FromObject(str);
546 if (str == NULL)
547 return NULL;
548 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000549 PyUnicode_GET_SIZE(str),
550 errors,
551 -1),
552 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000553 Py_DECREF(str);
554 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000555}
556
557static PyObject *
558utf_16_be_encode(PyObject *self,
559 PyObject *args)
560{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000561 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000562 const char *errors = NULL;
563
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000564 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000565 &str, &errors))
566 return NULL;
567
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000568 str = PyUnicode_FromObject(str);
569 if (str == NULL)
570 return NULL;
571 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
572 PyUnicode_GET_SIZE(str),
573 errors,
574 +1),
575 PyUnicode_GET_SIZE(str));
576 Py_DECREF(str);
577 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000578}
579
580static PyObject *
581unicode_escape_encode(PyObject *self,
582 PyObject *args)
583{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000584 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000585 const char *errors = NULL;
586
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000587 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000588 &str, &errors))
589 return NULL;
590
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000591 str = PyUnicode_FromObject(str);
592 if (str == NULL)
593 return NULL;
594 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
595 PyUnicode_GET_SIZE(str)),
596 PyUnicode_GET_SIZE(str));
597 Py_DECREF(str);
598 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000599}
600
601static PyObject *
602raw_unicode_escape_encode(PyObject *self,
603 PyObject *args)
604{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000605 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000606 const char *errors = NULL;
607
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000608 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000609 &str, &errors))
610 return NULL;
611
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000612 str = PyUnicode_FromObject(str);
613 if (str == NULL)
614 return NULL;
615 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000616 PyUnicode_AS_UNICODE(str),
617 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000618 PyUnicode_GET_SIZE(str));
619 Py_DECREF(str);
620 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000621}
622
623static PyObject *
624latin_1_encode(PyObject *self,
625 PyObject *args)
626{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000627 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000628 const char *errors = NULL;
629
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000630 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000631 &str, &errors))
632 return NULL;
633
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000634 str = PyUnicode_FromObject(str);
635 if (str == NULL)
636 return NULL;
637 v = codec_tuple(PyUnicode_EncodeLatin1(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000638 PyUnicode_AS_UNICODE(str),
639 PyUnicode_GET_SIZE(str),
640 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000641 PyUnicode_GET_SIZE(str));
642 Py_DECREF(str);
643 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000644}
645
646static PyObject *
647ascii_encode(PyObject *self,
648 PyObject *args)
649{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000650 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000651 const char *errors = NULL;
652
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000653 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000654 &str, &errors))
655 return NULL;
656
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000657 str = PyUnicode_FromObject(str);
658 if (str == NULL)
659 return NULL;
660 v = codec_tuple(PyUnicode_EncodeASCII(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000661 PyUnicode_AS_UNICODE(str),
662 PyUnicode_GET_SIZE(str),
663 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000664 PyUnicode_GET_SIZE(str));
665 Py_DECREF(str);
666 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000667}
668
669static PyObject *
670charmap_encode(PyObject *self,
671 PyObject *args)
672{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000673 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000674 const char *errors = NULL;
675 PyObject *mapping = NULL;
676
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000677 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000678 &str, &errors, &mapping))
679 return NULL;
680 if (mapping == Py_None)
681 mapping = NULL;
682
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000683 str = PyUnicode_FromObject(str);
684 if (str == NULL)
685 return NULL;
686 v = codec_tuple(PyUnicode_EncodeCharmap(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000687 PyUnicode_AS_UNICODE(str),
688 PyUnicode_GET_SIZE(str),
689 mapping,
690 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000691 PyUnicode_GET_SIZE(str));
692 Py_DECREF(str);
693 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000694}
695
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000696#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000697
698static PyObject *
699mbcs_encode(PyObject *self,
700 PyObject *args)
701{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000702 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000703 const char *errors = NULL;
704
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000705 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000706 &str, &errors))
707 return NULL;
708
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000709 str = PyUnicode_FromObject(str);
710 if (str == NULL)
711 return NULL;
712 v = codec_tuple(PyUnicode_EncodeMBCS(
Guido van Rossum24bdb042000-03-28 20:29:59 +0000713 PyUnicode_AS_UNICODE(str),
714 PyUnicode_GET_SIZE(str),
715 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000716 PyUnicode_GET_SIZE(str));
717 Py_DECREF(str);
718 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000719}
720
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000721#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000722#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000723
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000724/* --- Error handler registry --------------------------------------------- */
725
Walter Dörwald0ae29812002-10-31 13:36:29 +0000726PyDoc_STRVAR(register_error__doc__,
727"register_error(errors, handler)\n\
728\n\
729Register the specified error handler under the name\n\
730errors. handler must be a callable object, that\n\
731will be called with an exception instance containing\n\
732information about the location of the encoding/decoding\n\
733error and must return a (replacement, new position) tuple.");
734
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000735static PyObject *register_error(PyObject *self, PyObject *args)
736{
737 const char *name;
738 PyObject *handler;
739
740 if (!PyArg_ParseTuple(args, "sO:register_error",
741 &name, &handler))
742 return NULL;
743 if (PyCodec_RegisterError(name, handler))
744 return NULL;
745 Py_INCREF(Py_None);
746 return Py_None;
747}
748
Walter Dörwald0ae29812002-10-31 13:36:29 +0000749PyDoc_STRVAR(lookup_error__doc__,
750"lookup_error(errors) -> handler\n\
751\n\
752Return the error handler for the specified error handling name\n\
753or raise a LookupError, if no handler exists under this name.");
754
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000755static PyObject *lookup_error(PyObject *self, PyObject *args)
756{
757 const char *name;
758
759 if (!PyArg_ParseTuple(args, "s:lookup_error",
760 &name))
761 return NULL;
762 return PyCodec_LookupError(name);
763}
764
Guido van Rossume2d67f92000-03-10 23:09:23 +0000765/* --- Module API --------------------------------------------------------- */
766
767static PyMethodDef _codecs_functions[] = {
Walter Dörwald0ae29812002-10-31 13:36:29 +0000768 {"register", codecregister, METH_VARARGS,
769 register__doc__},
770 {"lookup", codeclookup, METH_VARARGS,
771 lookup__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000772 {"escape_encode", escape_encode, METH_VARARGS},
773 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000774#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000775 {"utf_8_encode", utf_8_encode, METH_VARARGS},
776 {"utf_8_decode", utf_8_decode, METH_VARARGS},
777 {"utf_7_encode", utf_7_encode, METH_VARARGS},
778 {"utf_7_decode", utf_7_decode, METH_VARARGS},
779 {"utf_16_encode", utf_16_encode, METH_VARARGS},
780 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
781 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
782 {"utf_16_decode", utf_16_decode, METH_VARARGS},
783 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
784 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
785 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
786 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
787 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
788 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
789 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
790 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
791 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
792 {"latin_1_encode", latin_1_encode, METH_VARARGS},
793 {"latin_1_decode", latin_1_decode, METH_VARARGS},
794 {"ascii_encode", ascii_encode, METH_VARARGS},
795 {"ascii_decode", ascii_decode, METH_VARARGS},
796 {"charmap_encode", charmap_encode, METH_VARARGS},
797 {"charmap_decode", charmap_decode, METH_VARARGS},
798 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
799 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000800#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000801 {"mbcs_encode", mbcs_encode, METH_VARARGS},
802 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +0000803#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000804#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +0000805 {"register_error", register_error, METH_VARARGS,
806 register_error__doc__},
807 {"lookup_error", lookup_error, METH_VARARGS,
808 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +0000809 {NULL, NULL} /* sentinel */
810};
811
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000812PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000813init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000814{
815 Py_InitModule("_codecs", _codecs_functions);
816}