blob: 39b443b1861a17ac2557953e52fc7e9f0089752a [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
48a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
49
Guido van Rossume2d67f92000-03-10 23:09:23 +000050static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000051PyObject *codec_register(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000052{
53 PyObject *search_function;
54
55 if (!PyArg_ParseTuple(args, "O:register", &search_function))
56 goto onError;
57
58 if (PyCodec_Register(search_function))
59 goto onError;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000060
Guido van Rossume2d67f92000-03-10 23:09:23 +000061 Py_INCREF(Py_None);
62 return Py_None;
63
64 onError:
65 return NULL;
66}
67
Walter Dörwald0ae29812002-10-31 13:36:29 +000068PyDoc_STRVAR(lookup__doc__,
69"lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
70\n\
71Looks up a codec tuple in the Python codec registry and returns\n\
72a tuple of functions.");
73
Guido van Rossume2d67f92000-03-10 23:09:23 +000074static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000075PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000076{
77 char *encoding;
78
79 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
80 goto onError;
81
82 return _PyCodec_Lookup(encoding);
83
84 onError:
85 return NULL;
86}
87
Marc-André Lemburg3f419742004-07-10 12:06:10 +000088PyDoc_STRVAR(encode__doc__,
89"encode(obj, [encoding[,errors]]) -> object\n\
90\n\
91Encodes obj using the codec registered for encoding. encoding defaults\n\
92to the default encoding. errors may be given to set a different error\n\
93handling scheme. Default is 'strict' meaning that encoding errors raise\n\
94a ValueError. Other possible values are 'ignore', 'replace' and\n\
95'xmlcharrefreplace' as well as any other name registered with\n\
96codecs.register_error that can handle ValueErrors.");
97
98static PyObject *
99codec_encode(PyObject *self, PyObject *args)
100{
Brett Cannon3e377de2004-07-10 21:41:14 +0000101 const char *encoding = NULL;
102 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000103 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000104
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000105 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
106 return NULL;
107
Martin v. Löwise2713be2005-03-08 15:03:08 +0000108#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109 if (encoding == NULL)
110 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000111#else
112 if (encoding == NULL) {
113 PyErr_SetString(PyExc_ValueError, "no encoding specified");
114 return NULL;
115 }
116#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000117
118 /* Encode via the codec registry */
119 v = PyCodec_Encode(v, encoding, errors);
120 if (v == NULL)
121 goto onError;
122 return v;
123
124 onError:
125 return NULL;
126}
127
128PyDoc_STRVAR(decode__doc__,
129"decode(obj, [encoding[,errors]]) -> object\n\
130\n\
131Decodes obj using the codec registered for encoding. encoding defaults\n\
132to the default encoding. errors may be given to set a different error\n\
133handling scheme. Default is 'strict' meaning that encoding errors raise\n\
134a ValueError. Other possible values are 'ignore' and 'replace'\n\
135as well as any other name registerd with codecs.register_error that is\n\
136able to handle ValueErrors.");
137
138static PyObject *
139codec_decode(PyObject *self, PyObject *args)
140{
Brett Cannon3e377de2004-07-10 21:41:14 +0000141 const char *encoding = NULL;
142 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000144
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000145 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
146 return NULL;
147
Martin v. Löwise2713be2005-03-08 15:03:08 +0000148#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000149 if (encoding == NULL)
150 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000151#else
152 if (encoding == NULL) {
153 PyErr_SetString(PyExc_ValueError, "no encoding specified");
154 return NULL;
155 }
156#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000157
158 /* Decode via the codec registry */
159 v = PyCodec_Decode(v, encoding, errors);
160 if (v == NULL)
161 goto onError;
162 return v;
163
164 onError:
165 return NULL;
166}
167
Guido van Rossume2d67f92000-03-10 23:09:23 +0000168/* --- Helpers ------------------------------------------------------------ */
169
170static
171PyObject *codec_tuple(PyObject *unicode,
172 int len)
173{
174 PyObject *v,*w;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000175
Guido van Rossume2d67f92000-03-10 23:09:23 +0000176 if (unicode == NULL)
177 return NULL;
178 v = PyTuple_New(2);
179 if (v == NULL) {
180 Py_DECREF(unicode);
181 return NULL;
182 }
183 PyTuple_SET_ITEM(v,0,unicode);
184 w = PyInt_FromLong(len);
185 if (w == NULL) {
186 Py_DECREF(v);
187 return NULL;
188 }
189 PyTuple_SET_ITEM(v,1,w);
190 return v;
191}
192
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000193/* --- String codecs ------------------------------------------------------ */
194static PyObject *
195escape_decode(PyObject *self,
196 PyObject *args)
197{
198 const char *errors = NULL;
199 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000200 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000201
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000202 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
203 &data, &size, &errors))
204 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000205 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000206 size);
207}
208
209static PyObject *
210escape_encode(PyObject *self,
211 PyObject *args)
212{
213 PyObject *str;
214 const char *errors = NULL;
215 char *buf;
216 int len;
217
218 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
219 &PyString_Type, &str, &errors))
220 return NULL;
221
222 str = PyString_Repr(str, 0);
223 if (!str)
224 return NULL;
225
226 /* The string will be quoted. Unquote, similar to unicode-escape. */
227 buf = PyString_AS_STRING (str);
228 len = PyString_GET_SIZE (str);
229 memmove(buf, buf+1, len-2);
230 _PyString_Resize(&str, len-2);
231
232 return codec_tuple(str, PyString_Size(str));
233}
234
235#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000236/* --- Decoder ------------------------------------------------------------ */
237
238static PyObject *
239unicode_internal_decode(PyObject *self,
240 PyObject *args)
241{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000242 PyObject *obj;
243 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000244 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000245 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000246
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000247 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
248 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000249 return NULL;
250
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000251 if (PyUnicode_Check(obj)) {
252 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000253 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000254 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000255 else {
256 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
257 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000258
259 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000260 size);
261 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000262}
263
264static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000265utf_7_decode(PyObject *self,
266 PyObject *args)
267{
268 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000269 Py_ssize_t size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000270 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000271
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000272 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
273 &data, &size, &errors))
274 return NULL;
275
276 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
277 size);
278}
279
280static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000281utf_8_decode(PyObject *self,
282 PyObject *args)
283{
284 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000285 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000286 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000287 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000288 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000289 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000290
Walter Dörwald69652032004-09-07 20:24:22 +0000291 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
292 &data, &size, &errors, &final))
293 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000294 if (size < 0) {
295 PyErr_SetString(PyExc_ValueError, "negative argument");
296 return 0;
297 }
Walter Dörwald69652032004-09-07 20:24:22 +0000298 consumed = size;
299
300 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
301 final ? NULL : &consumed);
302 if (decoded == NULL)
303 return NULL;
304 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000305}
306
307static PyObject *
308utf_16_decode(PyObject *self,
309 PyObject *args)
310{
311 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000312 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000313 const char *errors = NULL;
314 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000315 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000316 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000317 PyObject *decoded;
318
319 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
320 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000321 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000322 /* XXX Why is consumed initialized to size? mvl */
323 if (size < 0) {
324 PyErr_SetString(PyExc_ValueError, "negative argument");
325 return 0;
326 }
Walter Dörwald69652032004-09-07 20:24:22 +0000327 consumed = size;
328 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
329 final ? NULL : &consumed);
330 if (decoded == NULL)
331 return NULL;
332 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000333}
334
335static PyObject *
336utf_16_le_decode(PyObject *self,
337 PyObject *args)
338{
339 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000340 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000341 const char *errors = NULL;
342 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000343 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000344 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000345 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000346
Walter Dörwald69652032004-09-07 20:24:22 +0000347 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
348 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000349 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000350
351 /* XXX Why is consumed initialized to size? mvl */
352 if (size < 0) {
353 PyErr_SetString(PyExc_ValueError, "negative argument");
354 return 0;
355 }
Walter Dörwald69652032004-09-07 20:24:22 +0000356 consumed = size;
357 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
358 &byteorder, final ? NULL : &consumed);
359 if (decoded == NULL)
360 return NULL;
361 return codec_tuple(decoded, consumed);
362
Guido van Rossume2d67f92000-03-10 23:09:23 +0000363}
364
365static PyObject *
366utf_16_be_decode(PyObject *self,
367 PyObject *args)
368{
369 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000370 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000371 const char *errors = NULL;
372 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000373 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000374 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000375 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000376
Walter Dörwald69652032004-09-07 20:24:22 +0000377 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
378 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000379 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000380 /* XXX Why is consumed initialized to size? mvl */
381 if (size < 0) {
382 PyErr_SetString(PyExc_ValueError, "negative argument");
383 return 0;
384 }
Walter Dörwald69652032004-09-07 20:24:22 +0000385 consumed = size;
386 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
387 &byteorder, final ? NULL : &consumed);
388 if (decoded == NULL)
389 return NULL;
390 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000391}
392
393/* This non-standard version also provides access to the byteorder
394 parameter of the builtin UTF-16 codec.
395
396 It returns a tuple (unicode, bytesread, byteorder) with byteorder
397 being the value in effect at the end of data.
398
399*/
400
401static PyObject *
402utf_16_ex_decode(PyObject *self,
403 PyObject *args)
404{
405 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000406 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000407 const char *errors = NULL;
408 int byteorder = 0;
409 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000410 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000411 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000412
413 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
414 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000415 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000416 /* XXX Why is consumed initialized to size? mvl */
417 if (size < 0) {
418 PyErr_SetString(PyExc_ValueError, "negative argument");
419 return 0;
420 }
Walter Dörwald69652032004-09-07 20:24:22 +0000421 consumed = size;
422 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
423 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000424 if (unicode == NULL)
425 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000426 tuple = Py_BuildValue("Oii", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000427 Py_DECREF(unicode);
428 return tuple;
429}
430
431static PyObject *
432unicode_escape_decode(PyObject *self,
433 PyObject *args)
434{
435 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000437 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000438
Guido van Rossume2d67f92000-03-10 23:09:23 +0000439 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
440 &data, &size, &errors))
441 return NULL;
442
443 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
444 size);
445}
446
447static PyObject *
448raw_unicode_escape_decode(PyObject *self,
449 PyObject *args)
450{
451 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000452 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000453 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000454
Guido van Rossume2d67f92000-03-10 23:09:23 +0000455 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
456 &data, &size, &errors))
457 return NULL;
458
459 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
460 size);
461}
462
463static PyObject *
464latin_1_decode(PyObject *self,
465 PyObject *args)
466{
467 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000468 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000469 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000470
Guido van Rossume2d67f92000-03-10 23:09:23 +0000471 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
472 &data, &size, &errors))
473 return NULL;
474
475 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
476 size);
477}
478
479static PyObject *
480ascii_decode(PyObject *self,
481 PyObject *args)
482{
483 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000484 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000485 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000486
Guido van Rossume2d67f92000-03-10 23:09:23 +0000487 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
488 &data, &size, &errors))
489 return NULL;
490
491 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
492 size);
493}
494
495static PyObject *
496charmap_decode(PyObject *self,
497 PyObject *args)
498{
499 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000500 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000501 const char *errors = NULL;
502 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000503
Guido van Rossume2d67f92000-03-10 23:09:23 +0000504 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
505 &data, &size, &errors, &mapping))
506 return NULL;
507 if (mapping == Py_None)
508 mapping = NULL;
509
510 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
511 size);
512}
513
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000514#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000515
516static PyObject *
517mbcs_decode(PyObject *self,
518 PyObject *args)
519{
520 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000521 Py_ssize_t size;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000522 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000523
Guido van Rossum24bdb042000-03-28 20:29:59 +0000524 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
525 &data, &size, &errors))
526 return NULL;
527
528 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
529 size);
530}
531
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000532#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000533
Guido van Rossume2d67f92000-03-10 23:09:23 +0000534/* --- Encoder ------------------------------------------------------------ */
535
536static PyObject *
537readbuffer_encode(PyObject *self,
538 PyObject *args)
539{
540 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000541 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000542 const char *errors = NULL;
543
544 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
545 &data, &size, &errors))
546 return NULL;
547
548 return codec_tuple(PyString_FromStringAndSize(data, size),
549 size);
550}
551
552static PyObject *
553charbuffer_encode(PyObject *self,
554 PyObject *args)
555{
556 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000557 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000558 const char *errors = NULL;
559
560 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
561 &data, &size, &errors))
562 return NULL;
563
564 return codec_tuple(PyString_FromStringAndSize(data, size),
565 size);
566}
567
568static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000569unicode_internal_encode(PyObject *self,
570 PyObject *args)
571{
572 PyObject *obj;
573 const char *errors = NULL;
574 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000575 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000576
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000577 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
578 &obj, &errors))
579 return NULL;
580
581 if (PyUnicode_Check(obj)) {
582 data = PyUnicode_AS_DATA(obj);
583 size = PyUnicode_GET_DATA_SIZE(obj);
584 return codec_tuple(PyString_FromStringAndSize(data, size),
585 size);
586 }
587 else {
588 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
589 return NULL;
590 return codec_tuple(PyString_FromStringAndSize(data, size),
591 size);
592 }
593}
594
595static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000596utf_7_encode(PyObject *self,
597 PyObject *args)
598{
599 PyObject *str, *v;
600 const char *errors = NULL;
601
602 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
603 &str, &errors))
604 return NULL;
605
606 str = PyUnicode_FromObject(str);
607 if (str == NULL)
608 return NULL;
609 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
610 PyUnicode_GET_SIZE(str),
611 0,
612 0,
613 errors),
614 PyUnicode_GET_SIZE(str));
615 Py_DECREF(str);
616 return v;
617}
618
619static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000620utf_8_encode(PyObject *self,
621 PyObject *args)
622{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000623 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000624 const char *errors = NULL;
625
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000626 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000627 &str, &errors))
628 return NULL;
629
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000630 str = PyUnicode_FromObject(str);
631 if (str == NULL)
632 return NULL;
633 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
634 PyUnicode_GET_SIZE(str),
635 errors),
636 PyUnicode_GET_SIZE(str));
637 Py_DECREF(str);
638 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000639}
640
641/* This version provides access to the byteorder parameter of the
642 builtin UTF-16 codecs as optional third argument. It defaults to 0
643 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000644 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000645
646*/
647
648static PyObject *
649utf_16_encode(PyObject *self,
650 PyObject *args)
651{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000652 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000653 const char *errors = NULL;
654 int byteorder = 0;
655
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000656 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000657 &str, &errors, &byteorder))
658 return NULL;
659
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000660 str = PyUnicode_FromObject(str);
661 if (str == NULL)
662 return NULL;
663 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
664 PyUnicode_GET_SIZE(str),
665 errors,
666 byteorder),
667 PyUnicode_GET_SIZE(str));
668 Py_DECREF(str);
669 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000670}
671
672static PyObject *
673utf_16_le_encode(PyObject *self,
674 PyObject *args)
675{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000676 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000677 const char *errors = NULL;
678
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000679 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000680 &str, &errors))
681 return NULL;
682
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000683 str = PyUnicode_FromObject(str);
684 if (str == NULL)
685 return NULL;
686 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000687 PyUnicode_GET_SIZE(str),
688 errors,
689 -1),
690 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000691 Py_DECREF(str);
692 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000693}
694
695static PyObject *
696utf_16_be_encode(PyObject *self,
697 PyObject *args)
698{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000699 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000700 const char *errors = NULL;
701
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000702 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000703 &str, &errors))
704 return NULL;
705
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000706 str = PyUnicode_FromObject(str);
707 if (str == NULL)
708 return NULL;
709 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
710 PyUnicode_GET_SIZE(str),
711 errors,
712 +1),
713 PyUnicode_GET_SIZE(str));
714 Py_DECREF(str);
715 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000716}
717
718static PyObject *
719unicode_escape_encode(PyObject *self,
720 PyObject *args)
721{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000722 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000723 const char *errors = NULL;
724
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000725 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000726 &str, &errors))
727 return NULL;
728
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000729 str = PyUnicode_FromObject(str);
730 if (str == NULL)
731 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000732 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000733 PyUnicode_GET_SIZE(str)),
734 PyUnicode_GET_SIZE(str));
735 Py_DECREF(str);
736 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000737}
738
739static PyObject *
740raw_unicode_escape_encode(PyObject *self,
741 PyObject *args)
742{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000743 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000744 const char *errors = NULL;
745
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000746 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000747 &str, &errors))
748 return NULL;
749
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000750 str = PyUnicode_FromObject(str);
751 if (str == NULL)
752 return NULL;
753 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000754 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000755 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000756 PyUnicode_GET_SIZE(str));
757 Py_DECREF(str);
758 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000759}
760
761static PyObject *
762latin_1_encode(PyObject *self,
763 PyObject *args)
764{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000765 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000766 const char *errors = NULL;
767
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000768 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000769 &str, &errors))
770 return NULL;
771
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000772 str = PyUnicode_FromObject(str);
773 if (str == NULL)
774 return NULL;
775 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000776 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000777 PyUnicode_GET_SIZE(str),
778 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000779 PyUnicode_GET_SIZE(str));
780 Py_DECREF(str);
781 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000782}
783
784static PyObject *
785ascii_encode(PyObject *self,
786 PyObject *args)
787{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000788 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000789 const char *errors = NULL;
790
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000791 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000792 &str, &errors))
793 return NULL;
794
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000795 str = PyUnicode_FromObject(str);
796 if (str == NULL)
797 return NULL;
798 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000799 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000800 PyUnicode_GET_SIZE(str),
801 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000802 PyUnicode_GET_SIZE(str));
803 Py_DECREF(str);
804 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000805}
806
807static PyObject *
808charmap_encode(PyObject *self,
809 PyObject *args)
810{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000811 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000812 const char *errors = NULL;
813 PyObject *mapping = NULL;
814
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000815 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000816 &str, &errors, &mapping))
817 return NULL;
818 if (mapping == Py_None)
819 mapping = NULL;
820
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000821 str = PyUnicode_FromObject(str);
822 if (str == NULL)
823 return NULL;
824 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000825 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000826 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000827 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +0000828 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000829 PyUnicode_GET_SIZE(str));
830 Py_DECREF(str);
831 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000832}
833
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000834#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000835
836static PyObject *
837mbcs_encode(PyObject *self,
838 PyObject *args)
839{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000840 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000841 const char *errors = NULL;
842
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000843 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000844 &str, &errors))
845 return NULL;
846
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000847 str = PyUnicode_FromObject(str);
848 if (str == NULL)
849 return NULL;
850 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000851 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +0000852 PyUnicode_GET_SIZE(str),
853 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000854 PyUnicode_GET_SIZE(str));
855 Py_DECREF(str);
856 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000857}
858
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000859#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000860#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000861
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000862/* --- Error handler registry --------------------------------------------- */
863
Walter Dörwald0ae29812002-10-31 13:36:29 +0000864PyDoc_STRVAR(register_error__doc__,
865"register_error(errors, handler)\n\
866\n\
867Register the specified error handler under the name\n\
868errors. handler must be a callable object, that\n\
869will be called with an exception instance containing\n\
870information about the location of the encoding/decoding\n\
871error and must return a (replacement, new position) tuple.");
872
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000873static PyObject *register_error(PyObject *self, PyObject *args)
874{
875 const char *name;
876 PyObject *handler;
877
878 if (!PyArg_ParseTuple(args, "sO:register_error",
879 &name, &handler))
880 return NULL;
881 if (PyCodec_RegisterError(name, handler))
882 return NULL;
883 Py_INCREF(Py_None);
884 return Py_None;
885}
886
Walter Dörwald0ae29812002-10-31 13:36:29 +0000887PyDoc_STRVAR(lookup_error__doc__,
888"lookup_error(errors) -> handler\n\
889\n\
890Return the error handler for the specified error handling name\n\
891or raise a LookupError, if no handler exists under this name.");
892
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000893static PyObject *lookup_error(PyObject *self, PyObject *args)
894{
895 const char *name;
896
897 if (!PyArg_ParseTuple(args, "s:lookup_error",
898 &name))
899 return NULL;
900 return PyCodec_LookupError(name);
901}
902
Guido van Rossume2d67f92000-03-10 23:09:23 +0000903/* --- Module API --------------------------------------------------------- */
904
905static PyMethodDef _codecs_functions[] = {
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000906 {"register", codec_register, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000907 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000908 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +0000909 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +0000910 {"encode", codec_encode, METH_VARARGS,
911 encode__doc__},
912 {"decode", codec_decode, METH_VARARGS,
913 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000914 {"escape_encode", escape_encode, METH_VARARGS},
915 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000916#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000917 {"utf_8_encode", utf_8_encode, METH_VARARGS},
918 {"utf_8_decode", utf_8_decode, METH_VARARGS},
919 {"utf_7_encode", utf_7_encode, METH_VARARGS},
920 {"utf_7_decode", utf_7_decode, METH_VARARGS},
921 {"utf_16_encode", utf_16_encode, METH_VARARGS},
922 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
923 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
924 {"utf_16_decode", utf_16_decode, METH_VARARGS},
925 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
926 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
927 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
928 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
929 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
930 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
931 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
932 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
933 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
934 {"latin_1_encode", latin_1_encode, METH_VARARGS},
935 {"latin_1_decode", latin_1_decode, METH_VARARGS},
936 {"ascii_encode", ascii_encode, METH_VARARGS},
937 {"ascii_decode", ascii_decode, METH_VARARGS},
938 {"charmap_encode", charmap_encode, METH_VARARGS},
939 {"charmap_decode", charmap_decode, METH_VARARGS},
940 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
941 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000942#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000943 {"mbcs_encode", mbcs_encode, METH_VARARGS},
944 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +0000945#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000946#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +0000947 {"register_error", register_error, METH_VARARGS,
948 register_error__doc__},
949 {"lookup_error", lookup_error, METH_VARARGS,
950 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +0000951 {NULL, NULL} /* sentinel */
952};
953
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000954PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000955init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000956{
957 Py_InitModule("_codecs", _codecs_functions);
958}