blob: caee3fd4117797e2af9e96fd474c998d227fce84 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000064a tuple of function (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
97 if (encoding == NULL)
98 encoding = PyUnicode_GetDefaultEncoding();
99
100 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000101 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102}
103
104PyDoc_STRVAR(decode__doc__,
105"decode(obj, [encoding[,errors]]) -> object\n\
106\n\
107Decodes obj using the codec registered for encoding. encoding defaults\n\
108to the default encoding. errors may be given to set a different error\n\
109handling scheme. Default is 'strict' meaning that encoding errors raise\n\
110a ValueError. Other possible values are 'ignore' and 'replace'\n\
111as well as any other name registerd with codecs.register_error that is\n\
112able to handle ValueErrors.");
113
114static PyObject *
115codec_decode(PyObject *self, PyObject *args)
116{
Brett Cannon3e377de2004-07-10 21:41:14 +0000117 const char *encoding = NULL;
118 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000119 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000120
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000121 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
122 return NULL;
123
124 if (encoding == NULL)
125 encoding = PyUnicode_GetDefaultEncoding();
126
127 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000128 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000129}
130
Guido van Rossume2d67f92000-03-10 23:09:23 +0000131/* --- Helpers ------------------------------------------------------------ */
132
133static
134PyObject *codec_tuple(PyObject *unicode,
Thomas Wouters477c8d52006-05-27 19:21:47 +0000135 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000136{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000137 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000138 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 return NULL;
140 v = Py_BuildValue("On", unicode, len);
141 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000142 return v;
143}
144
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000145/* --- String codecs ------------------------------------------------------ */
146static PyObject *
147escape_decode(PyObject *self,
148 PyObject *args)
149{
150 const char *errors = NULL;
151 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000152 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000153
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000154 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
155 &data, &size, &errors))
156 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000157 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000158 size);
159}
160
161static PyObject *
162escape_encode(PyObject *self,
163 PyObject *args)
164{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000165 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000166 PyObject *str;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000167 Py_ssize_t size;
168 Py_ssize_t newsize;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000169 const char *errors = NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000170 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000171
172 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
173 &PyString_Type, &str, &errors))
174 return NULL;
175
Martin v. Löwis5b222132007-06-10 09:51:05 +0000176 size = PyString_GET_SIZE(str);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000177 newsize = 4*size;
178 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
179 PyErr_SetString(PyExc_OverflowError,
180 "string is too large to encode");
181 return NULL;
182 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000183 v = PyString_FromStringAndSize(NULL, newsize);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000184
Walter Dörwald1ab83302007-05-18 17:15:44 +0000185 if (v == NULL) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000186 return NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000187 }
188 else {
189 register Py_ssize_t i;
190 register char c;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000191 register char *p = PyString_AS_STRING(v);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000192
193 for (i = 0; i < size; i++) {
194 /* There's at least enough room for a hex escape */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000195 assert(newsize - (p - PyString_AS_STRING(v)) >= 4);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000196 c = PyString_AS_STRING(str)[i];
197 if (c == '\'' || c == '\\')
198 *p++ = '\\', *p++ = c;
199 else if (c == '\t')
200 *p++ = '\\', *p++ = 't';
201 else if (c == '\n')
202 *p++ = '\\', *p++ = 'n';
203 else if (c == '\r')
204 *p++ = '\\', *p++ = 'r';
205 else if (c < ' ' || c >= 0x7f) {
206 *p++ = '\\';
207 *p++ = 'x';
208 *p++ = hexdigits[(c & 0xf0) >> 4];
209 *p++ = hexdigits[c & 0xf];
210 }
211 else
212 *p++ = c;
213 }
214 *p = '\0';
Guido van Rossum98297ee2007-11-06 21:34:58 +0000215 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000216 return NULL;
217 }
218 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000219
Guido van Rossum98297ee2007-11-06 21:34:58 +0000220 return codec_tuple(v, PyString_Size(v));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000221}
222
Guido van Rossume2d67f92000-03-10 23:09:23 +0000223/* --- Decoder ------------------------------------------------------------ */
224
225static PyObject *
226unicode_internal_decode(PyObject *self,
227 PyObject *args)
228{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000229 PyObject *obj;
230 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000231 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000232 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000233
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000234 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
235 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000236 return NULL;
237
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000238 if (PyUnicode_Check(obj)) {
239 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000240 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000241 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000242 else {
243 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
244 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000245
246 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000247 size);
248 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000249}
250
251static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000252utf_7_decode(PyObject *self,
253 PyObject *args)
254{
255 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000256 Py_ssize_t size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000257 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000258
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000259 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
260 &data, &size, &errors))
261 return NULL;
262
263 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
264 size);
265}
266
267static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000268utf_8_decode(PyObject *self,
269 PyObject *args)
270{
271 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000272 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000273 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000274 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000275 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000276 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000277
Walter Dörwald69652032004-09-07 20:24:22 +0000278 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
279 &data, &size, &errors, &final))
280 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000281 if (size < 0) {
282 PyErr_SetString(PyExc_ValueError, "negative argument");
283 return 0;
284 }
Walter Dörwald69652032004-09-07 20:24:22 +0000285 consumed = size;
286
287 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
288 final ? NULL : &consumed);
289 if (decoded == NULL)
290 return NULL;
291 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000292}
293
294static PyObject *
295utf_16_decode(PyObject *self,
296 PyObject *args)
297{
298 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000299 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000300 const char *errors = NULL;
301 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000302 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000303 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000304 PyObject *decoded;
305
306 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
307 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000308 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000309 if (size < 0) {
310 PyErr_SetString(PyExc_ValueError, "negative argument");
311 return 0;
312 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000313 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000314 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
315 final ? NULL : &consumed);
316 if (decoded == NULL)
317 return NULL;
318 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000319}
320
321static PyObject *
322utf_16_le_decode(PyObject *self,
323 PyObject *args)
324{
325 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000326 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000327 const char *errors = NULL;
328 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000329 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000330 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000331 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000332
Walter Dörwald69652032004-09-07 20:24:22 +0000333 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
334 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000335 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000336
Martin v. Löwis18e16552006-02-15 17:27:45 +0000337 if (size < 0) {
338 PyErr_SetString(PyExc_ValueError, "negative argument");
339 return 0;
340 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000341 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000342 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
343 &byteorder, final ? NULL : &consumed);
344 if (decoded == NULL)
345 return NULL;
346 return codec_tuple(decoded, consumed);
347
Guido van Rossume2d67f92000-03-10 23:09:23 +0000348}
349
350static PyObject *
351utf_16_be_decode(PyObject *self,
352 PyObject *args)
353{
354 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000355 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000356 const char *errors = NULL;
357 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000358 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000359 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000360 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000361
Walter Dörwald69652032004-09-07 20:24:22 +0000362 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
363 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000364 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 if (size < 0) {
366 PyErr_SetString(PyExc_ValueError, "negative argument");
367 return 0;
368 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000369 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000370 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
371 &byteorder, final ? NULL : &consumed);
372 if (decoded == NULL)
373 return NULL;
374 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000375}
376
377/* This non-standard version also provides access to the byteorder
378 parameter of the builtin UTF-16 codec.
379
380 It returns a tuple (unicode, bytesread, byteorder) with byteorder
381 being the value in effect at the end of data.
382
383*/
384
385static PyObject *
386utf_16_ex_decode(PyObject *self,
387 PyObject *args)
388{
389 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000390 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000391 const char *errors = NULL;
392 int byteorder = 0;
393 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000394 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000395 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000396
397 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
398 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000399 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000400 if (size < 0) {
401 PyErr_SetString(PyExc_ValueError, "negative argument");
402 return 0;
403 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000404 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000405 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
406 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000407 if (unicode == NULL)
408 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000409 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000410 Py_DECREF(unicode);
411 return tuple;
412}
413
414static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000415utf_32_decode(PyObject *self,
416 PyObject *args)
417{
418 const char *data;
419 Py_ssize_t size;
420 const char *errors = NULL;
421 int byteorder = 0;
422 int final = 0;
423 Py_ssize_t consumed;
424 PyObject *decoded;
425
426 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
427 &data, &size, &errors, &final))
428 return NULL;
429 if (size < 0) {
430 PyErr_SetString(PyExc_ValueError, "negative argument");
431 return 0;
432 }
433 consumed = size; /* This is overwritten unless final is true. */
434 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
435 final ? NULL : &consumed);
436 if (decoded == NULL)
437 return NULL;
438 return codec_tuple(decoded, consumed);
439}
440
441static PyObject *
442utf_32_le_decode(PyObject *self,
443 PyObject *args)
444{
445 const char *data;
446 Py_ssize_t size;
447 const char *errors = NULL;
448 int byteorder = -1;
449 int final = 0;
450 Py_ssize_t consumed;
451 PyObject *decoded = NULL;
452
453 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
454 &data, &size, &errors, &final))
455 return NULL;
456
457 if (size < 0) {
458 PyErr_SetString(PyExc_ValueError, "negative argument");
459 return 0;
460 }
461 consumed = size; /* This is overwritten unless final is true. */
462 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
463 &byteorder, final ? NULL : &consumed);
464 if (decoded == NULL)
465 return NULL;
466 return codec_tuple(decoded, consumed);
467
468}
469
470static PyObject *
471utf_32_be_decode(PyObject *self,
472 PyObject *args)
473{
474 const char *data;
475 Py_ssize_t size;
476 const char *errors = NULL;
477 int byteorder = 1;
478 int final = 0;
479 Py_ssize_t consumed;
480 PyObject *decoded = NULL;
481
482 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
483 &data, &size, &errors, &final))
484 return NULL;
485 if (size < 0) {
486 PyErr_SetString(PyExc_ValueError, "negative argument");
487 return 0;
488 }
489 consumed = size; /* This is overwritten unless final is true. */
490 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
491 &byteorder, final ? NULL : &consumed);
492 if (decoded == NULL)
493 return NULL;
494 return codec_tuple(decoded, consumed);
495}
496
497/* This non-standard version also provides access to the byteorder
498 parameter of the builtin UTF-32 codec.
499
500 It returns a tuple (unicode, bytesread, byteorder) with byteorder
501 being the value in effect at the end of data.
502
503*/
504
505static PyObject *
506utf_32_ex_decode(PyObject *self,
507 PyObject *args)
508{
509 const char *data;
510 Py_ssize_t size;
511 const char *errors = NULL;
512 int byteorder = 0;
513 PyObject *unicode, *tuple;
514 int final = 0;
515 Py_ssize_t consumed;
516
517 if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
518 &data, &size, &errors, &byteorder, &final))
519 return NULL;
520 if (size < 0) {
521 PyErr_SetString(PyExc_ValueError, "negative argument");
522 return 0;
523 }
524 consumed = size; /* This is overwritten unless final is true. */
525 unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
526 final ? NULL : &consumed);
527 if (unicode == NULL)
528 return NULL;
529 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
530 Py_DECREF(unicode);
531 return tuple;
532}
533
534static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000535unicode_escape_decode(PyObject *self,
536 PyObject *args)
537{
538 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000539 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000540 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000541
Guido van Rossume2d67f92000-03-10 23:09:23 +0000542 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
543 &data, &size, &errors))
544 return NULL;
545
546 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
547 size);
548}
549
550static PyObject *
551raw_unicode_escape_decode(PyObject *self,
552 PyObject *args)
553{
554 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000556 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000557
Guido van Rossume2d67f92000-03-10 23:09:23 +0000558 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
559 &data, &size, &errors))
560 return NULL;
561
562 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
563 size);
564}
565
566static PyObject *
567latin_1_decode(PyObject *self,
568 PyObject *args)
569{
570 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000571 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000572 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000573
Guido van Rossume2d67f92000-03-10 23:09:23 +0000574 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
575 &data, &size, &errors))
576 return NULL;
577
578 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
579 size);
580}
581
582static PyObject *
583ascii_decode(PyObject *self,
584 PyObject *args)
585{
586 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000587 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000588 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000589
Guido van Rossume2d67f92000-03-10 23:09:23 +0000590 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
591 &data, &size, &errors))
592 return NULL;
593
594 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
595 size);
596}
597
598static PyObject *
599charmap_decode(PyObject *self,
600 PyObject *args)
601{
602 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000603 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000604 const char *errors = NULL;
605 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000606
Guido van Rossume2d67f92000-03-10 23:09:23 +0000607 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
608 &data, &size, &errors, &mapping))
609 return NULL;
610 if (mapping == Py_None)
611 mapping = NULL;
612
613 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
614 size);
615}
616
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000617#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000618
619static PyObject *
620mbcs_decode(PyObject *self,
621 PyObject *args)
622{
623 const char *data;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000624 Py_ssize_t size, consumed;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000625 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000626 int final = 0;
627 PyObject *decoded;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000628
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000629 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
630 &data, &size, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000631 return NULL;
632
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000633 decoded = PyUnicode_DecodeMBCSStateful(
634 data, size, errors, final ? NULL : &consumed);
635 if (!decoded)
636 return NULL;
637 return codec_tuple(decoded, final ? size : consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000638}
639
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000640#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000641
Guido van Rossume2d67f92000-03-10 23:09:23 +0000642/* --- Encoder ------------------------------------------------------------ */
643
644static PyObject *
645readbuffer_encode(PyObject *self,
646 PyObject *args)
647{
648 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000649 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000650 const char *errors = NULL;
651
652 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
653 &data, &size, &errors))
654 return NULL;
655
Guido van Rossum98297ee2007-11-06 21:34:58 +0000656 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000657}
658
659static PyObject *
660charbuffer_encode(PyObject *self,
661 PyObject *args)
662{
663 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000664 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000665 const char *errors = NULL;
666
667 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
668 &data, &size, &errors))
669 return NULL;
670
Guido van Rossum98297ee2007-11-06 21:34:58 +0000671 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000672}
673
674static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000675unicode_internal_encode(PyObject *self,
676 PyObject *args)
677{
678 PyObject *obj;
679 const char *errors = NULL;
680 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000681 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000682
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000683 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
684 &obj, &errors))
685 return NULL;
686
687 if (PyUnicode_Check(obj)) {
688 data = PyUnicode_AS_DATA(obj);
689 size = PyUnicode_GET_DATA_SIZE(obj);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000690 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000691 }
692 else {
693 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
694 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000695 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000696 }
697}
698
699static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000700utf_7_encode(PyObject *self,
701 PyObject *args)
702{
703 PyObject *str, *v;
704 const char *errors = NULL;
705
706 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
707 &str, &errors))
708 return NULL;
709
710 str = PyUnicode_FromObject(str);
711 if (str == NULL)
712 return NULL;
713 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
714 PyUnicode_GET_SIZE(str),
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000715 0,
716 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000717 errors),
718 PyUnicode_GET_SIZE(str));
719 Py_DECREF(str);
720 return v;
721}
722
723static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000724utf_8_encode(PyObject *self,
725 PyObject *args)
726{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000727 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000728 const char *errors = NULL;
729
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000730 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000731 &str, &errors))
732 return NULL;
733
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000734 str = PyUnicode_FromObject(str);
735 if (str == NULL)
736 return NULL;
737 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
738 PyUnicode_GET_SIZE(str),
739 errors),
740 PyUnicode_GET_SIZE(str));
741 Py_DECREF(str);
742 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000743}
744
745/* This version provides access to the byteorder parameter of the
746 builtin UTF-16 codecs as optional third argument. It defaults to 0
747 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000748 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000749
750*/
751
752static PyObject *
753utf_16_encode(PyObject *self,
754 PyObject *args)
755{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000756 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000757 const char *errors = NULL;
758 int byteorder = 0;
759
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000760 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000761 &str, &errors, &byteorder))
762 return NULL;
763
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000764 str = PyUnicode_FromObject(str);
765 if (str == NULL)
766 return NULL;
767 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
768 PyUnicode_GET_SIZE(str),
769 errors,
770 byteorder),
771 PyUnicode_GET_SIZE(str));
772 Py_DECREF(str);
773 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000774}
775
776static PyObject *
777utf_16_le_encode(PyObject *self,
778 PyObject *args)
779{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000780 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000781 const char *errors = NULL;
782
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000783 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000784 &str, &errors))
785 return NULL;
786
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000787 str = PyUnicode_FromObject(str);
788 if (str == NULL)
789 return NULL;
790 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000791 PyUnicode_GET_SIZE(str),
792 errors,
793 -1),
794 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000795 Py_DECREF(str);
796 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000797}
798
799static PyObject *
800utf_16_be_encode(PyObject *self,
801 PyObject *args)
802{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000803 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000804 const char *errors = NULL;
805
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000806 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000807 &str, &errors))
808 return NULL;
809
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000810 str = PyUnicode_FromObject(str);
811 if (str == NULL)
812 return NULL;
813 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
814 PyUnicode_GET_SIZE(str),
815 errors,
816 +1),
817 PyUnicode_GET_SIZE(str));
818 Py_DECREF(str);
819 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000820}
821
Walter Dörwald41980ca2007-08-16 21:55:45 +0000822/* This version provides access to the byteorder parameter of the
823 builtin UTF-32 codecs as optional third argument. It defaults to 0
824 which means: use the native byte order and prepend the data with a
825 BOM mark.
826
827*/
828
829static PyObject *
830utf_32_encode(PyObject *self,
831 PyObject *args)
832{
833 PyObject *str, *v;
834 const char *errors = NULL;
835 int byteorder = 0;
836
837 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
838 &str, &errors, &byteorder))
839 return NULL;
840
841 str = PyUnicode_FromObject(str);
842 if (str == NULL)
843 return NULL;
844 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
845 PyUnicode_GET_SIZE(str),
846 errors,
847 byteorder),
848 PyUnicode_GET_SIZE(str));
849 Py_DECREF(str);
850 return v;
851}
852
853static PyObject *
854utf_32_le_encode(PyObject *self,
855 PyObject *args)
856{
857 PyObject *str, *v;
858 const char *errors = NULL;
859
860 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
861 &str, &errors))
862 return NULL;
863
864 str = PyUnicode_FromObject(str);
865 if (str == NULL)
866 return NULL;
867 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
868 PyUnicode_GET_SIZE(str),
869 errors,
870 -1),
871 PyUnicode_GET_SIZE(str));
872 Py_DECREF(str);
873 return v;
874}
875
876static PyObject *
877utf_32_be_encode(PyObject *self,
878 PyObject *args)
879{
880 PyObject *str, *v;
881 const char *errors = NULL;
882
883 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
884 &str, &errors))
885 return NULL;
886
887 str = PyUnicode_FromObject(str);
888 if (str == NULL)
889 return NULL;
890 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
891 PyUnicode_GET_SIZE(str),
892 errors,
893 +1),
894 PyUnicode_GET_SIZE(str));
895 Py_DECREF(str);
896 return v;
897}
898
Guido van Rossume2d67f92000-03-10 23:09:23 +0000899static PyObject *
900unicode_escape_encode(PyObject *self,
901 PyObject *args)
902{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000903 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000904 const char *errors = NULL;
905
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000906 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000907 &str, &errors))
908 return NULL;
909
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000910 str = PyUnicode_FromObject(str);
911 if (str == NULL)
912 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000913 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000914 PyUnicode_GET_SIZE(str)),
915 PyUnicode_GET_SIZE(str));
916 Py_DECREF(str);
917 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000918}
919
920static PyObject *
921raw_unicode_escape_encode(PyObject *self,
922 PyObject *args)
923{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000924 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000925 const char *errors = NULL;
926
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000927 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000928 &str, &errors))
929 return NULL;
930
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000931 str = PyUnicode_FromObject(str);
932 if (str == NULL)
933 return NULL;
934 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000935 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000936 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000937 PyUnicode_GET_SIZE(str));
938 Py_DECREF(str);
939 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000940}
941
942static PyObject *
943latin_1_encode(PyObject *self,
944 PyObject *args)
945{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000946 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000947 const char *errors = NULL;
948
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000949 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000950 &str, &errors))
951 return NULL;
952
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000953 str = PyUnicode_FromObject(str);
954 if (str == NULL)
955 return NULL;
956 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000957 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000958 PyUnicode_GET_SIZE(str),
959 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000960 PyUnicode_GET_SIZE(str));
961 Py_DECREF(str);
962 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000963}
964
965static PyObject *
966ascii_encode(PyObject *self,
967 PyObject *args)
968{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000969 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000970 const char *errors = NULL;
971
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000972 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000973 &str, &errors))
974 return NULL;
975
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000976 str = PyUnicode_FromObject(str);
977 if (str == NULL)
978 return NULL;
979 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000980 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000981 PyUnicode_GET_SIZE(str),
982 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000983 PyUnicode_GET_SIZE(str));
984 Py_DECREF(str);
985 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000986}
987
988static PyObject *
989charmap_encode(PyObject *self,
990 PyObject *args)
991{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000992 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000993 const char *errors = NULL;
994 PyObject *mapping = NULL;
995
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000996 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000997 &str, &errors, &mapping))
998 return NULL;
999 if (mapping == Py_None)
1000 mapping = NULL;
1001
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001002 str = PyUnicode_FromObject(str);
1003 if (str == NULL)
1004 return NULL;
1005 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001006 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +00001007 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001008 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +00001009 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001010 PyUnicode_GET_SIZE(str));
1011 Py_DECREF(str);
1012 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001013}
1014
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001015static PyObject*
1016charmap_build(PyObject *self, PyObject *args)
1017{
1018 PyObject *map;
1019 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1020 return NULL;
1021 return PyUnicode_BuildEncodingMap(map);
1022}
1023
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001024#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001025
1026static PyObject *
1027mbcs_encode(PyObject *self,
1028 PyObject *args)
1029{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001030 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001031 const char *errors = NULL;
1032
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001033 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +00001034 &str, &errors))
1035 return NULL;
1036
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001037 str = PyUnicode_FromObject(str);
1038 if (str == NULL)
1039 return NULL;
1040 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001041 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001042 PyUnicode_GET_SIZE(str),
1043 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001044 PyUnicode_GET_SIZE(str));
1045 Py_DECREF(str);
1046 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001047}
1048
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001049#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001050
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001051/* --- Error handler registry --------------------------------------------- */
1052
Walter Dörwald0ae29812002-10-31 13:36:29 +00001053PyDoc_STRVAR(register_error__doc__,
1054"register_error(errors, handler)\n\
1055\n\
1056Register the specified error handler under the name\n\
1057errors. handler must be a callable object, that\n\
1058will be called with an exception instance containing\n\
1059information about the location of the encoding/decoding\n\
1060error and must return a (replacement, new position) tuple.");
1061
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001062static PyObject *register_error(PyObject *self, PyObject *args)
1063{
1064 const char *name;
1065 PyObject *handler;
1066
1067 if (!PyArg_ParseTuple(args, "sO:register_error",
1068 &name, &handler))
1069 return NULL;
1070 if (PyCodec_RegisterError(name, handler))
1071 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001072 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001073}
1074
Walter Dörwald0ae29812002-10-31 13:36:29 +00001075PyDoc_STRVAR(lookup_error__doc__,
1076"lookup_error(errors) -> handler\n\
1077\n\
1078Return the error handler for the specified error handling name\n\
1079or raise a LookupError, if no handler exists under this name.");
1080
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001081static PyObject *lookup_error(PyObject *self, PyObject *args)
1082{
1083 const char *name;
1084
1085 if (!PyArg_ParseTuple(args, "s:lookup_error",
1086 &name))
1087 return NULL;
1088 return PyCodec_LookupError(name);
1089}
1090
Guido van Rossume2d67f92000-03-10 23:09:23 +00001091/* --- Module API --------------------------------------------------------- */
1092
1093static PyMethodDef _codecs_functions[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001094 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001095 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001096 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001097 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001098 {"encode", codec_encode, METH_VARARGS,
1099 encode__doc__},
1100 {"decode", codec_decode, METH_VARARGS,
1101 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001102 {"escape_encode", escape_encode, METH_VARARGS},
1103 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001104 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1105 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1106 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1107 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1108 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1109 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1110 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1111 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1112 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1113 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1114 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald41980ca2007-08-16 21:55:45 +00001115 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1116 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1117 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1118 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1119 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1120 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1121 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001122 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1123 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1124 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1125 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1126 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1127 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1128 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1129 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1130 {"ascii_encode", ascii_encode, METH_VARARGS},
1131 {"ascii_decode", ascii_decode, METH_VARARGS},
1132 {"charmap_encode", charmap_encode, METH_VARARGS},
1133 {"charmap_decode", charmap_decode, METH_VARARGS},
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001134 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001135 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1136 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001137#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001138 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1139 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001140#endif
Walter Dörwald0ae29812002-10-31 13:36:29 +00001141 {"register_error", register_error, METH_VARARGS,
1142 register_error__doc__},
1143 {"lookup_error", lookup_error, METH_VARARGS,
1144 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001145 {NULL, NULL} /* sentinel */
1146};
1147
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001148PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001149init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001150{
1151 Py_InitModule("_codecs", _codecs_functions);
1152}