blob: a6c42b134bd55a773a8ff2edd394c4da1a47be7b [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
38#include "Python.h"
39
40/* --- Registry ----------------------------------------------------------- */
41
Walter Dörwald0ae29812002-10-31 13:36:29 +000042PyDoc_STRVAR(register__doc__,
43"register(search_function)\n\
44\n\
45Register a codec search function. Search functions are expected to take\n\
46one argument, the encoding name in all lower case letters, and return\n\
47a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
48
Guido van Rossume2d67f92000-03-10 23:09:23 +000049static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000050PyObject *codec_register(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000051{
52 PyObject *search_function;
53
54 if (!PyArg_ParseTuple(args, "O:register", &search_function))
55 goto onError;
56
57 if (PyCodec_Register(search_function))
58 goto onError;
59
60 Py_INCREF(Py_None);
61 return Py_None;
62
63 onError:
64 return NULL;
65}
66
Walter Dörwald0ae29812002-10-31 13:36:29 +000067PyDoc_STRVAR(lookup__doc__,
68"lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
69\n\
70Looks up a codec tuple in the Python codec registry and returns\n\
71a tuple of functions.");
72
Guido van Rossume2d67f92000-03-10 23:09:23 +000073static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000074PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000075{
76 char *encoding;
77
78 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
79 goto onError;
80
81 return _PyCodec_Lookup(encoding);
82
83 onError:
84 return NULL;
85}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
98codec_encode(PyObject *self, PyObject *args)
99{
Brett Cannon3e377de2004-07-10 21:41:14 +0000100 const char *encoding = NULL;
101 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102 PyObject *v;
103
104 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
105 return NULL;
106
Martin v. Löwise2713be2005-03-08 15:03:08 +0000107#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000108 if (encoding == NULL)
109 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000110#else
111 if (encoding == NULL) {
112 PyErr_SetString(PyExc_ValueError, "no encoding specified");
113 return NULL;
114 }
115#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000116
117 /* Encode via the codec registry */
118 v = PyCodec_Encode(v, encoding, errors);
119 if (v == NULL)
120 goto onError;
121 return v;
122
123 onError:
124 return NULL;
125}
126
127PyDoc_STRVAR(decode__doc__,
128"decode(obj, [encoding[,errors]]) -> object\n\
129\n\
130Decodes obj using the codec registered for encoding. encoding defaults\n\
131to the default encoding. errors may be given to set a different error\n\
132handling scheme. Default is 'strict' meaning that encoding errors raise\n\
133a ValueError. Other possible values are 'ignore' and 'replace'\n\
134as well as any other name registerd with codecs.register_error that is\n\
135able to handle ValueErrors.");
136
137static PyObject *
138codec_decode(PyObject *self, PyObject *args)
139{
Brett Cannon3e377de2004-07-10 21:41:14 +0000140 const char *encoding = NULL;
141 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000142 PyObject *v;
143
144 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
145 return NULL;
146
Martin v. Löwise2713be2005-03-08 15:03:08 +0000147#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000148 if (encoding == NULL)
149 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000150#else
151 if (encoding == NULL) {
152 PyErr_SetString(PyExc_ValueError, "no encoding specified");
153 return NULL;
154 }
155#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000156
157 /* Decode via the codec registry */
158 v = PyCodec_Decode(v, encoding, errors);
159 if (v == NULL)
160 goto onError;
161 return v;
162
163 onError:
164 return NULL;
165}
166
Guido van Rossume2d67f92000-03-10 23:09:23 +0000167/* --- Helpers ------------------------------------------------------------ */
168
169static
170PyObject *codec_tuple(PyObject *unicode,
171 int len)
172{
173 PyObject *v,*w;
174
175 if (unicode == NULL)
176 return NULL;
177 v = PyTuple_New(2);
178 if (v == NULL) {
179 Py_DECREF(unicode);
180 return NULL;
181 }
182 PyTuple_SET_ITEM(v,0,unicode);
183 w = PyInt_FromLong(len);
184 if (w == NULL) {
185 Py_DECREF(v);
186 return NULL;
187 }
188 PyTuple_SET_ITEM(v,1,w);
189 return v;
190}
191
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000192/* --- String codecs ------------------------------------------------------ */
193static PyObject *
194escape_decode(PyObject *self,
195 PyObject *args)
196{
197 const char *errors = NULL;
198 const char *data;
199 int size;
200
201 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
202 &data, &size, &errors))
203 return NULL;
204 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
205 size);
206}
207
208static PyObject *
209escape_encode(PyObject *self,
210 PyObject *args)
211{
212 PyObject *str;
213 const char *errors = NULL;
214 char *buf;
215 int len;
216
217 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
218 &PyString_Type, &str, &errors))
219 return NULL;
220
221 str = PyString_Repr(str, 0);
222 if (!str)
223 return NULL;
224
225 /* The string will be quoted. Unquote, similar to unicode-escape. */
226 buf = PyString_AS_STRING (str);
227 len = PyString_GET_SIZE (str);
228 memmove(buf, buf+1, len-2);
229 _PyString_Resize(&str, len-2);
230
231 return codec_tuple(str, PyString_Size(str));
232}
233
234#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000235/* --- Decoder ------------------------------------------------------------ */
236
237static PyObject *
238unicode_internal_decode(PyObject *self,
239 PyObject *args)
240{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000241 PyObject *obj;
242 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000243 const char *data;
244 int size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000245
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000246 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
247 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000248 return NULL;
249
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000250 if (PyUnicode_Check(obj)) {
251 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000252 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000253 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000254 else {
255 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
256 return NULL;
257 return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
258 size / sizeof(Py_UNICODE)),
259 size);
260 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000261}
262
263static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000264utf_7_decode(PyObject *self,
265 PyObject *args)
266{
267 const char *data;
268 int size;
269 const char *errors = NULL;
270
271 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
272 &data, &size, &errors))
273 return NULL;
274
275 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
276 size);
277}
278
279static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000280utf_8_decode(PyObject *self,
281 PyObject *args)
282{
283 const char *data;
284 int size;
285 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000286 int final = 0;
287 int consumed;
288 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000289
Walter Dörwald69652032004-09-07 20:24:22 +0000290 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
291 &data, &size, &errors, &final))
292 return NULL;
293 consumed = size;
294
295 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
296 final ? NULL : &consumed);
297 if (decoded == NULL)
298 return NULL;
299 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000300}
301
302static PyObject *
303utf_16_decode(PyObject *self,
304 PyObject *args)
305{
306 const char *data;
307 int size;
308 const char *errors = NULL;
309 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000310 int final = 0;
311 int consumed;
312 PyObject *decoded;
313
314 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
315 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000316 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000317 consumed = size;
318 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
319 final ? NULL : &consumed);
320 if (decoded == NULL)
321 return NULL;
322 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000323}
324
325static PyObject *
326utf_16_le_decode(PyObject *self,
327 PyObject *args)
328{
329 const char *data;
330 int size;
331 const char *errors = NULL;
332 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000333 int final = 0;
334 int consumed;
335 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000336
Walter Dörwald69652032004-09-07 20:24:22 +0000337 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
338 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000339 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000340 consumed = size;
341 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
342 &byteorder, final ? NULL : &consumed);
343 if (decoded == NULL)
344 return NULL;
345 return codec_tuple(decoded, consumed);
346
Guido van Rossume2d67f92000-03-10 23:09:23 +0000347}
348
349static PyObject *
350utf_16_be_decode(PyObject *self,
351 PyObject *args)
352{
353 const char *data;
354 int size;
355 const char *errors = NULL;
356 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000357 int final = 0;
358 int consumed;
359 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000360
Walter Dörwald69652032004-09-07 20:24:22 +0000361 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
362 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000363 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000364 consumed = size;
365 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
366 &byteorder, final ? NULL : &consumed);
367 if (decoded == NULL)
368 return NULL;
369 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000370}
371
372/* This non-standard version also provides access to the byteorder
373 parameter of the builtin UTF-16 codec.
374
375 It returns a tuple (unicode, bytesread, byteorder) with byteorder
376 being the value in effect at the end of data.
377
378*/
379
380static PyObject *
381utf_16_ex_decode(PyObject *self,
382 PyObject *args)
383{
384 const char *data;
385 int size;
386 const char *errors = NULL;
387 int byteorder = 0;
388 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000389 int final = 0;
390 int consumed;
391
392 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
393 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000394 return NULL;
395
Walter Dörwald69652032004-09-07 20:24:22 +0000396 consumed = size;
397 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
398 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000399 if (unicode == NULL)
400 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000401 tuple = Py_BuildValue("Oii", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000402 Py_DECREF(unicode);
403 return tuple;
404}
405
406static PyObject *
407unicode_escape_decode(PyObject *self,
408 PyObject *args)
409{
410 const char *data;
411 int size;
412 const char *errors = NULL;
413
414 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
415 &data, &size, &errors))
416 return NULL;
417
418 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
419 size);
420}
421
422static PyObject *
423raw_unicode_escape_decode(PyObject *self,
424 PyObject *args)
425{
426 const char *data;
427 int size;
428 const char *errors = NULL;
429
430 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
431 &data, &size, &errors))
432 return NULL;
433
434 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
435 size);
436}
437
438static PyObject *
439latin_1_decode(PyObject *self,
440 PyObject *args)
441{
442 const char *data;
443 int size;
444 const char *errors = NULL;
445
446 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
447 &data, &size, &errors))
448 return NULL;
449
450 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
451 size);
452}
453
454static PyObject *
455ascii_decode(PyObject *self,
456 PyObject *args)
457{
458 const char *data;
459 int size;
460 const char *errors = NULL;
461
462 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
463 &data, &size, &errors))
464 return NULL;
465
466 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
467 size);
468}
469
470static PyObject *
471charmap_decode(PyObject *self,
472 PyObject *args)
473{
474 const char *data;
475 int size;
476 const char *errors = NULL;
477 PyObject *mapping = NULL;
478
479 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
480 &data, &size, &errors, &mapping))
481 return NULL;
482 if (mapping == Py_None)
483 mapping = NULL;
484
485 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
486 size);
487}
488
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000489#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000490
491static PyObject *
492mbcs_decode(PyObject *self,
493 PyObject *args)
494{
495 const char *data;
496 int size;
497 const char *errors = NULL;
498
499 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
500 &data, &size, &errors))
501 return NULL;
502
503 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
504 size);
505}
506
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000507#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000508
Guido van Rossume2d67f92000-03-10 23:09:23 +0000509/* --- Encoder ------------------------------------------------------------ */
510
511static PyObject *
512readbuffer_encode(PyObject *self,
513 PyObject *args)
514{
515 const char *data;
516 int size;
517 const char *errors = NULL;
518
519 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
520 &data, &size, &errors))
521 return NULL;
522
523 return codec_tuple(PyString_FromStringAndSize(data, size),
524 size);
525}
526
527static PyObject *
528charbuffer_encode(PyObject *self,
529 PyObject *args)
530{
531 const char *data;
532 int size;
533 const char *errors = NULL;
534
535 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
536 &data, &size, &errors))
537 return NULL;
538
539 return codec_tuple(PyString_FromStringAndSize(data, size),
540 size);
541}
542
543static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000544unicode_internal_encode(PyObject *self,
545 PyObject *args)
546{
547 PyObject *obj;
548 const char *errors = NULL;
549 const char *data;
550 int size;
551
552 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
553 &obj, &errors))
554 return NULL;
555
556 if (PyUnicode_Check(obj)) {
557 data = PyUnicode_AS_DATA(obj);
558 size = PyUnicode_GET_DATA_SIZE(obj);
559 return codec_tuple(PyString_FromStringAndSize(data, size),
560 size);
561 }
562 else {
563 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
564 return NULL;
565 return codec_tuple(PyString_FromStringAndSize(data, size),
566 size);
567 }
568}
569
570static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000571utf_7_encode(PyObject *self,
572 PyObject *args)
573{
574 PyObject *str, *v;
575 const char *errors = NULL;
576
577 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
578 &str, &errors))
579 return NULL;
580
581 str = PyUnicode_FromObject(str);
582 if (str == NULL)
583 return NULL;
584 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
585 PyUnicode_GET_SIZE(str),
586 0,
587 0,
588 errors),
589 PyUnicode_GET_SIZE(str));
590 Py_DECREF(str);
591 return v;
592}
593
594static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000595utf_8_encode(PyObject *self,
596 PyObject *args)
597{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000598 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000599 const char *errors = NULL;
600
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000601 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000602 &str, &errors))
603 return NULL;
604
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000605 str = PyUnicode_FromObject(str);
606 if (str == NULL)
607 return NULL;
608 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
609 PyUnicode_GET_SIZE(str),
610 errors),
611 PyUnicode_GET_SIZE(str));
612 Py_DECREF(str);
613 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000614}
615
616/* This version provides access to the byteorder parameter of the
617 builtin UTF-16 codecs as optional third argument. It defaults to 0
618 which means: use the native byte order and prepend the data with a
619 BOM mark.
620
621*/
622
623static PyObject *
624utf_16_encode(PyObject *self,
625 PyObject *args)
626{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000627 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000628 const char *errors = NULL;
629 int byteorder = 0;
630
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000631 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000632 &str, &errors, &byteorder))
633 return NULL;
634
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000635 str = PyUnicode_FromObject(str);
636 if (str == NULL)
637 return NULL;
638 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
639 PyUnicode_GET_SIZE(str),
640 errors,
641 byteorder),
642 PyUnicode_GET_SIZE(str));
643 Py_DECREF(str);
644 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000645}
646
647static PyObject *
648utf_16_le_encode(PyObject *self,
649 PyObject *args)
650{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000651 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000652 const char *errors = NULL;
653
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000654 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000655 &str, &errors))
656 return NULL;
657
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000658 str = PyUnicode_FromObject(str);
659 if (str == NULL)
660 return NULL;
661 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000662 PyUnicode_GET_SIZE(str),
663 errors,
664 -1),
665 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000666 Py_DECREF(str);
667 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000668}
669
670static PyObject *
671utf_16_be_encode(PyObject *self,
672 PyObject *args)
673{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000674 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000675 const char *errors = NULL;
676
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000677 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000678 &str, &errors))
679 return NULL;
680
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000681 str = PyUnicode_FromObject(str);
682 if (str == NULL)
683 return NULL;
684 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
685 PyUnicode_GET_SIZE(str),
686 errors,
687 +1),
688 PyUnicode_GET_SIZE(str));
689 Py_DECREF(str);
690 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000691}
692
693static PyObject *
694unicode_escape_encode(PyObject *self,
695 PyObject *args)
696{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000697 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000698 const char *errors = NULL;
699
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000700 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000701 &str, &errors))
702 return NULL;
703
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000704 str = PyUnicode_FromObject(str);
705 if (str == NULL)
706 return NULL;
707 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
708 PyUnicode_GET_SIZE(str)),
709 PyUnicode_GET_SIZE(str));
710 Py_DECREF(str);
711 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000712}
713
714static PyObject *
715raw_unicode_escape_encode(PyObject *self,
716 PyObject *args)
717{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000718 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000719 const char *errors = NULL;
720
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000721 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000722 &str, &errors))
723 return NULL;
724
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000725 str = PyUnicode_FromObject(str);
726 if (str == NULL)
727 return NULL;
728 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000729 PyUnicode_AS_UNICODE(str),
730 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000731 PyUnicode_GET_SIZE(str));
732 Py_DECREF(str);
733 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000734}
735
736static PyObject *
737latin_1_encode(PyObject *self,
738 PyObject *args)
739{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000740 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000741 const char *errors = NULL;
742
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000743 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000744 &str, &errors))
745 return NULL;
746
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000747 str = PyUnicode_FromObject(str);
748 if (str == NULL)
749 return NULL;
750 v = codec_tuple(PyUnicode_EncodeLatin1(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000751 PyUnicode_AS_UNICODE(str),
752 PyUnicode_GET_SIZE(str),
753 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000754 PyUnicode_GET_SIZE(str));
755 Py_DECREF(str);
756 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000757}
758
759static PyObject *
760ascii_encode(PyObject *self,
761 PyObject *args)
762{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000763 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000764 const char *errors = NULL;
765
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000766 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000767 &str, &errors))
768 return NULL;
769
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000770 str = PyUnicode_FromObject(str);
771 if (str == NULL)
772 return NULL;
773 v = codec_tuple(PyUnicode_EncodeASCII(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000774 PyUnicode_AS_UNICODE(str),
775 PyUnicode_GET_SIZE(str),
776 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000777 PyUnicode_GET_SIZE(str));
778 Py_DECREF(str);
779 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000780}
781
782static PyObject *
783charmap_encode(PyObject *self,
784 PyObject *args)
785{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000786 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000787 const char *errors = NULL;
788 PyObject *mapping = NULL;
789
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000790 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000791 &str, &errors, &mapping))
792 return NULL;
793 if (mapping == Py_None)
794 mapping = NULL;
795
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000796 str = PyUnicode_FromObject(str);
797 if (str == NULL)
798 return NULL;
799 v = codec_tuple(PyUnicode_EncodeCharmap(
Guido van Rossume2d67f92000-03-10 23:09:23 +0000800 PyUnicode_AS_UNICODE(str),
801 PyUnicode_GET_SIZE(str),
802 mapping,
803 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000804 PyUnicode_GET_SIZE(str));
805 Py_DECREF(str);
806 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000807}
808
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000809#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000810
811static PyObject *
812mbcs_encode(PyObject *self,
813 PyObject *args)
814{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000815 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000816 const char *errors = NULL;
817
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000818 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000819 &str, &errors))
820 return NULL;
821
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000822 str = PyUnicode_FromObject(str);
823 if (str == NULL)
824 return NULL;
825 v = codec_tuple(PyUnicode_EncodeMBCS(
Guido van Rossum24bdb042000-03-28 20:29:59 +0000826 PyUnicode_AS_UNICODE(str),
827 PyUnicode_GET_SIZE(str),
828 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000829 PyUnicode_GET_SIZE(str));
830 Py_DECREF(str);
831 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000832}
833
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000834#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000835#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000836
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000837/* --- Error handler registry --------------------------------------------- */
838
Walter Dörwald0ae29812002-10-31 13:36:29 +0000839PyDoc_STRVAR(register_error__doc__,
840"register_error(errors, handler)\n\
841\n\
842Register the specified error handler under the name\n\
843errors. handler must be a callable object, that\n\
844will be called with an exception instance containing\n\
845information about the location of the encoding/decoding\n\
846error and must return a (replacement, new position) tuple.");
847
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000848static PyObject *register_error(PyObject *self, PyObject *args)
849{
850 const char *name;
851 PyObject *handler;
852
853 if (!PyArg_ParseTuple(args, "sO:register_error",
854 &name, &handler))
855 return NULL;
856 if (PyCodec_RegisterError(name, handler))
857 return NULL;
858 Py_INCREF(Py_None);
859 return Py_None;
860}
861
Walter Dörwald0ae29812002-10-31 13:36:29 +0000862PyDoc_STRVAR(lookup_error__doc__,
863"lookup_error(errors) -> handler\n\
864\n\
865Return the error handler for the specified error handling name\n\
866or raise a LookupError, if no handler exists under this name.");
867
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000868static PyObject *lookup_error(PyObject *self, PyObject *args)
869{
870 const char *name;
871
872 if (!PyArg_ParseTuple(args, "s:lookup_error",
873 &name))
874 return NULL;
875 return PyCodec_LookupError(name);
876}
877
Guido van Rossume2d67f92000-03-10 23:09:23 +0000878/* --- Module API --------------------------------------------------------- */
879
880static PyMethodDef _codecs_functions[] = {
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000881 {"register", codec_register, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000882 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000883 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000884 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +0000885 {"encode", codec_encode, METH_VARARGS,
886 encode__doc__},
887 {"decode", codec_decode, METH_VARARGS,
888 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000889 {"escape_encode", escape_encode, METH_VARARGS},
890 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000891#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000892 {"utf_8_encode", utf_8_encode, METH_VARARGS},
893 {"utf_8_decode", utf_8_decode, METH_VARARGS},
894 {"utf_7_encode", utf_7_encode, METH_VARARGS},
895 {"utf_7_decode", utf_7_decode, METH_VARARGS},
896 {"utf_16_encode", utf_16_encode, METH_VARARGS},
897 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
898 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
899 {"utf_16_decode", utf_16_decode, METH_VARARGS},
900 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
901 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
902 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
903 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
904 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
905 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
906 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
907 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
908 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
909 {"latin_1_encode", latin_1_encode, METH_VARARGS},
910 {"latin_1_decode", latin_1_decode, METH_VARARGS},
911 {"ascii_encode", ascii_encode, METH_VARARGS},
912 {"ascii_decode", ascii_decode, METH_VARARGS},
913 {"charmap_encode", charmap_encode, METH_VARARGS},
914 {"charmap_decode", charmap_decode, METH_VARARGS},
915 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
916 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000917#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000918 {"mbcs_encode", mbcs_encode, METH_VARARGS},
919 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +0000920#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000921#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +0000922 {"register_error", register_error, METH_VARARGS,
923 register_error__doc__},
924 {"lookup_error", lookup_error, METH_VARARGS,
925 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +0000926 {NULL, NULL} /* sentinel */
927};
928
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000929PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000930init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000931{
932 Py_InitModule("_codecs", _codecs_functions);
933}