blob: fc3e3f9bbd66add4a115d409048f5c951a235719 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000064a tuple of function (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
97 if (encoding == NULL)
98 encoding = PyUnicode_GetDefaultEncoding();
99
100 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000101 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102}
103
104PyDoc_STRVAR(decode__doc__,
105"decode(obj, [encoding[,errors]]) -> object\n\
106\n\
107Decodes obj using the codec registered for encoding. encoding defaults\n\
108to the default encoding. errors may be given to set a different error\n\
109handling scheme. Default is 'strict' meaning that encoding errors raise\n\
110a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000111as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000112able to handle ValueErrors.");
113
114static PyObject *
115codec_decode(PyObject *self, PyObject *args)
116{
Brett Cannon3e377de2004-07-10 21:41:14 +0000117 const char *encoding = NULL;
118 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000119 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000120
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000121 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
122 return NULL;
123
124 if (encoding == NULL)
125 encoding = PyUnicode_GetDefaultEncoding();
126
127 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000128 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000129}
130
Guido van Rossume2d67f92000-03-10 23:09:23 +0000131/* --- Helpers ------------------------------------------------------------ */
132
133static
134PyObject *codec_tuple(PyObject *unicode,
Thomas Wouters477c8d52006-05-27 19:21:47 +0000135 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000136{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000137 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000138 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 return NULL;
140 v = Py_BuildValue("On", unicode, len);
141 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000142 return v;
143}
144
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000145/* --- String codecs ------------------------------------------------------ */
146static PyObject *
147escape_decode(PyObject *self,
148 PyObject *args)
149{
150 const char *errors = NULL;
151 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000152 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000153
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000154 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
155 &data, &size, &errors))
156 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000158 size);
159}
160
161static PyObject *
162escape_encode(PyObject *self,
163 PyObject *args)
164{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000165 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000166 PyObject *str;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000167 Py_ssize_t size;
168 Py_ssize_t newsize;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000169 const char *errors = NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000170 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000171
172 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
Christian Heimes72b710a2008-05-26 13:28:38 +0000173 &PyBytes_Type, &str, &errors))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000174 return NULL;
175
Christian Heimes72b710a2008-05-26 13:28:38 +0000176 size = PyBytes_GET_SIZE(str);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000177 newsize = 4*size;
178 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
179 PyErr_SetString(PyExc_OverflowError,
180 "string is too large to encode");
181 return NULL;
182 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000183 v = PyBytes_FromStringAndSize(NULL, newsize);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000184
Walter Dörwald1ab83302007-05-18 17:15:44 +0000185 if (v == NULL) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000186 return NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000187 }
188 else {
189 register Py_ssize_t i;
190 register char c;
Christian Heimes72b710a2008-05-26 13:28:38 +0000191 register char *p = PyBytes_AS_STRING(v);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000192
193 for (i = 0; i < size; i++) {
194 /* There's at least enough room for a hex escape */
Christian Heimes72b710a2008-05-26 13:28:38 +0000195 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
196 c = PyBytes_AS_STRING(str)[i];
Walter Dörwald1ab83302007-05-18 17:15:44 +0000197 if (c == '\'' || c == '\\')
198 *p++ = '\\', *p++ = c;
199 else if (c == '\t')
200 *p++ = '\\', *p++ = 't';
201 else if (c == '\n')
202 *p++ = '\\', *p++ = 'n';
203 else if (c == '\r')
204 *p++ = '\\', *p++ = 'r';
205 else if (c < ' ' || c >= 0x7f) {
206 *p++ = '\\';
207 *p++ = 'x';
208 *p++ = hexdigits[(c & 0xf0) >> 4];
209 *p++ = hexdigits[c & 0xf];
210 }
211 else
212 *p++ = c;
213 }
214 *p = '\0';
Christian Heimes72b710a2008-05-26 13:28:38 +0000215 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000216 return NULL;
217 }
218 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000219
Christian Heimes72b710a2008-05-26 13:28:38 +0000220 return codec_tuple(v, PyBytes_Size(v));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000221}
222
Guido van Rossume2d67f92000-03-10 23:09:23 +0000223/* --- Decoder ------------------------------------------------------------ */
224
225static PyObject *
226unicode_internal_decode(PyObject *self,
227 PyObject *args)
228{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000229 PyObject *obj;
230 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000231 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000232 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000233
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000234 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
235 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000236 return NULL;
237
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000238 if (PyUnicode_Check(obj)) {
239 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000240 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000241 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000242 else {
243 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
244 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000245
246 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000247 size);
248 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000249}
250
251static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000252utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000253 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000254{
Martin v. Löwis423be952008-08-13 15:53:07 +0000255 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000256 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000257 int final = 0;
258 Py_ssize_t consumed;
259 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000260
Martin v. Löwis423be952008-08-13 15:53:07 +0000261 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
262 &pbuf, &errors, &final))
263 return NULL;
264 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000265
Martin v. Löwis423be952008-08-13 15:53:07 +0000266 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
267 final ? NULL : &consumed);
268 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000269 if (decoded == NULL)
270 return NULL;
271 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000272}
273
274static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000275utf_8_decode(PyObject *self,
276 PyObject *args)
277{
Martin v. Löwis423be952008-08-13 15:53:07 +0000278 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000279 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000280 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000281 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000282 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000283
Martin v. Löwis423be952008-08-13 15:53:07 +0000284 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
285 &pbuf, &errors, &final))
Walter Dörwald69652032004-09-07 20:24:22 +0000286 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000287 consumed = pbuf.len;
288
289 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000290 final ? NULL : &consumed);
Martin v. Löwis423be952008-08-13 15:53:07 +0000291 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000292 if (decoded == NULL)
293 return NULL;
294 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000295}
296
297static PyObject *
298utf_16_decode(PyObject *self,
299 PyObject *args)
300{
Martin v. Löwis423be952008-08-13 15:53:07 +0000301 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000302 const char *errors = NULL;
303 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000304 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000305 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000306 PyObject *decoded;
307
Martin v. Löwis423be952008-08-13 15:53:07 +0000308 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
309 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000310 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000311 consumed = pbuf.len; /* This is overwritten unless final is true. */
312 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
313 &byteorder, final ? NULL : &consumed);
314 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000315 if (decoded == NULL)
316 return NULL;
317 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000318}
319
320static PyObject *
321utf_16_le_decode(PyObject *self,
322 PyObject *args)
323{
Martin v. Löwis423be952008-08-13 15:53:07 +0000324 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000325 const char *errors = NULL;
326 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000327 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000328 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000329 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000330
Martin v. Löwis423be952008-08-13 15:53:07 +0000331 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
332 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000333 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000334
Martin v. Löwis423be952008-08-13 15:53:07 +0000335 consumed = pbuf.len; /* This is overwritten unless final is true. */
336 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000337 &byteorder, final ? NULL : &consumed);
Martin v. Löwis423be952008-08-13 15:53:07 +0000338 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000339 if (decoded == NULL)
340 return NULL;
341 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000342}
343
344static PyObject *
345utf_16_be_decode(PyObject *self,
346 PyObject *args)
347{
Martin v. Löwis423be952008-08-13 15:53:07 +0000348 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000349 const char *errors = NULL;
350 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000351 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000352 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000353 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000354
Martin v. Löwis423be952008-08-13 15:53:07 +0000355 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
356 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000357 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000358
359 consumed = pbuf.len; /* This is overwritten unless final is true. */
360 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000361 &byteorder, final ? NULL : &consumed);
Martin v. Löwis423be952008-08-13 15:53:07 +0000362 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000363 if (decoded == NULL)
364 return NULL;
365 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000366}
367
368/* This non-standard version also provides access to the byteorder
369 parameter of the builtin UTF-16 codec.
370
371 It returns a tuple (unicode, bytesread, byteorder) with byteorder
372 being the value in effect at the end of data.
373
374*/
375
376static PyObject *
377utf_16_ex_decode(PyObject *self,
378 PyObject *args)
379{
Martin v. Löwis423be952008-08-13 15:53:07 +0000380 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000381 const char *errors = NULL;
382 int byteorder = 0;
383 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000384 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000386
Martin v. Löwis423be952008-08-13 15:53:07 +0000387 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
388 &pbuf, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000389 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000390 consumed = pbuf.len; /* This is overwritten unless final is true. */
391 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
392 &byteorder, final ? NULL : &consumed);
393 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000394 if (unicode == NULL)
395 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000396 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000397 Py_DECREF(unicode);
398 return tuple;
399}
400
401static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000402utf_32_decode(PyObject *self,
403 PyObject *args)
404{
Martin v. Löwis423be952008-08-13 15:53:07 +0000405 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000406 const char *errors = NULL;
407 int byteorder = 0;
408 int final = 0;
409 Py_ssize_t consumed;
410 PyObject *decoded;
411
Martin v. Löwis423be952008-08-13 15:53:07 +0000412 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
413 &pbuf, &errors, &final))
Walter Dörwald41980ca2007-08-16 21:55:45 +0000414 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000415 consumed = pbuf.len; /* This is overwritten unless final is true. */
416 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
417 &byteorder, final ? NULL : &consumed);
418 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000419 if (decoded == NULL)
420 return NULL;
421 return codec_tuple(decoded, consumed);
422}
423
424static PyObject *
425utf_32_le_decode(PyObject *self,
426 PyObject *args)
427{
Martin v. Löwis423be952008-08-13 15:53:07 +0000428 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000429 const char *errors = NULL;
430 int byteorder = -1;
431 int final = 0;
432 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000433 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000434
Martin v. Löwis423be952008-08-13 15:53:07 +0000435 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
436 &pbuf, &errors, &final))
Walter Dörwald41980ca2007-08-16 21:55:45 +0000437 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000438 consumed = pbuf.len; /* This is overwritten unless final is true. */
439 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
440 &byteorder, final ? NULL : &consumed);
441 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000442 if (decoded == NULL)
443 return NULL;
444 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000445}
446
447static PyObject *
448utf_32_be_decode(PyObject *self,
449 PyObject *args)
450{
Martin v. Löwis423be952008-08-13 15:53:07 +0000451 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000452 const char *errors = NULL;
453 int byteorder = 1;
454 int final = 0;
455 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000456 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000457
Martin v. Löwis423be952008-08-13 15:53:07 +0000458 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
459 &pbuf, &errors, &final))
Walter Dörwald41980ca2007-08-16 21:55:45 +0000460 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000461 consumed = pbuf.len; /* This is overwritten unless final is true. */
462 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
463 &byteorder, final ? NULL : &consumed);
464 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000465 if (decoded == NULL)
466 return NULL;
467 return codec_tuple(decoded, consumed);
468}
469
470/* This non-standard version also provides access to the byteorder
471 parameter of the builtin UTF-32 codec.
472
473 It returns a tuple (unicode, bytesread, byteorder) with byteorder
474 being the value in effect at the end of data.
475
476*/
477
478static PyObject *
479utf_32_ex_decode(PyObject *self,
480 PyObject *args)
481{
Martin v. Löwis423be952008-08-13 15:53:07 +0000482 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000483 const char *errors = NULL;
484 int byteorder = 0;
485 PyObject *unicode, *tuple;
486 int final = 0;
487 Py_ssize_t consumed;
488
Martin v. Löwis423be952008-08-13 15:53:07 +0000489 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
490 &pbuf, &errors, &byteorder, &final))
Walter Dörwald41980ca2007-08-16 21:55:45 +0000491 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000492 consumed = pbuf.len; /* This is overwritten unless final is true. */
493 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
494 &byteorder, final ? NULL : &consumed);
495 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000496 if (unicode == NULL)
497 return NULL;
498 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
499 Py_DECREF(unicode);
500 return tuple;
501}
502
503static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000504unicode_escape_decode(PyObject *self,
505 PyObject *args)
506{
Martin v. Löwis423be952008-08-13 15:53:07 +0000507 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000508 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000509 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000510
Martin v. Löwis423be952008-08-13 15:53:07 +0000511 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
512 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000513 return NULL;
514
Martin v. Löwis423be952008-08-13 15:53:07 +0000515 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
516 PyBuffer_Release(&pbuf);
517 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000518}
519
520static PyObject *
521raw_unicode_escape_decode(PyObject *self,
522 PyObject *args)
523{
Martin v. Löwis423be952008-08-13 15:53:07 +0000524 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000525 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000526 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000527
Martin v. Löwis423be952008-08-13 15:53:07 +0000528 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
529 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000530 return NULL;
531
Martin v. Löwis423be952008-08-13 15:53:07 +0000532 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
533 PyBuffer_Release(&pbuf);
534 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000535}
536
537static PyObject *
538latin_1_decode(PyObject *self,
539 PyObject *args)
540{
Martin v. Löwis423be952008-08-13 15:53:07 +0000541 Py_buffer pbuf;
542 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000543 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000544
Martin v. Löwis423be952008-08-13 15:53:07 +0000545 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
546 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000547 return NULL;
548
Martin v. Löwis423be952008-08-13 15:53:07 +0000549 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
550 PyBuffer_Release(&pbuf);
551 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000552}
553
554static PyObject *
555ascii_decode(PyObject *self,
556 PyObject *args)
557{
Martin v. Löwis423be952008-08-13 15:53:07 +0000558 Py_buffer pbuf;
559 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000560 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000561
Martin v. Löwis423be952008-08-13 15:53:07 +0000562 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
563 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000564 return NULL;
565
Martin v. Löwis423be952008-08-13 15:53:07 +0000566 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
567 PyBuffer_Release(&pbuf);
568 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000569}
570
571static PyObject *
572charmap_decode(PyObject *self,
573 PyObject *args)
574{
Martin v. Löwis423be952008-08-13 15:53:07 +0000575 Py_buffer pbuf;
576 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000577 const char *errors = NULL;
578 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000579
Martin v. Löwis423be952008-08-13 15:53:07 +0000580 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
581 &pbuf, &errors, &mapping))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000582 return NULL;
583 if (mapping == Py_None)
584 mapping = NULL;
585
Martin v. Löwis423be952008-08-13 15:53:07 +0000586 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
587 PyBuffer_Release(&pbuf);
588 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000589}
590
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000591#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000592
593static PyObject *
594mbcs_decode(PyObject *self,
595 PyObject *args)
596{
Martin v. Löwis423be952008-08-13 15:53:07 +0000597 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000598 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000599 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000600 Py_ssize_t consumed;
601 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000602
Martin v. Löwis423be952008-08-13 15:53:07 +0000603 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
604 &pbuf, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000605 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000606 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000607
Martin v. Löwis423be952008-08-13 15:53:07 +0000608 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
609 final ? NULL : &consumed);
610 PyBuffer_Release(&pbuf);
611 if (decoded == NULL)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000612 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000613 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000614}
615
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000616#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000617
Guido van Rossume2d67f92000-03-10 23:09:23 +0000618/* --- Encoder ------------------------------------------------------------ */
619
620static PyObject *
621readbuffer_encode(PyObject *self,
622 PyObject *args)
623{
Martin v. Löwis423be952008-08-13 15:53:07 +0000624 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000625 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000626 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000627 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000628 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000629
Martin v. Löwis423be952008-08-13 15:53:07 +0000630 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
631 &pdata, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000632 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000633 data = pdata.buf;
634 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000635
Martin v. Löwis423be952008-08-13 15:53:07 +0000636 result = PyBytes_FromStringAndSize(data, size);
637 PyBuffer_Release(&pdata);
638 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000639}
640
641static PyObject *
642charbuffer_encode(PyObject *self,
643 PyObject *args)
644{
645 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000646 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000647 const char *errors = NULL;
648
649 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
650 &data, &size, &errors))
651 return NULL;
652
Christian Heimes72b710a2008-05-26 13:28:38 +0000653 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000654}
655
656static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000657unicode_internal_encode(PyObject *self,
658 PyObject *args)
659{
660 PyObject *obj;
661 const char *errors = NULL;
662 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000663 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000664
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000665 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
666 &obj, &errors))
667 return NULL;
668
669 if (PyUnicode_Check(obj)) {
670 data = PyUnicode_AS_DATA(obj);
671 size = PyUnicode_GET_DATA_SIZE(obj);
Christian Heimes72b710a2008-05-26 13:28:38 +0000672 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000673 }
674 else {
675 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
676 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000677 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000678 }
679}
680
681static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000682utf_7_encode(PyObject *self,
683 PyObject *args)
684{
685 PyObject *str, *v;
686 const char *errors = NULL;
687
688 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
689 &str, &errors))
690 return NULL;
691
692 str = PyUnicode_FromObject(str);
693 if (str == NULL)
694 return NULL;
695 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
696 PyUnicode_GET_SIZE(str),
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000697 0,
698 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000699 errors),
700 PyUnicode_GET_SIZE(str));
701 Py_DECREF(str);
702 return v;
703}
704
705static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000706utf_8_encode(PyObject *self,
707 PyObject *args)
708{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000709 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000710 const char *errors = NULL;
711
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000712 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000713 &str, &errors))
714 return NULL;
715
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000716 str = PyUnicode_FromObject(str);
717 if (str == NULL)
718 return NULL;
719 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
720 PyUnicode_GET_SIZE(str),
721 errors),
722 PyUnicode_GET_SIZE(str));
723 Py_DECREF(str);
724 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000725}
726
727/* This version provides access to the byteorder parameter of the
728 builtin UTF-16 codecs as optional third argument. It defaults to 0
729 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000730 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000731
732*/
733
734static PyObject *
735utf_16_encode(PyObject *self,
736 PyObject *args)
737{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000738 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000739 const char *errors = NULL;
740 int byteorder = 0;
741
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000742 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000743 &str, &errors, &byteorder))
744 return NULL;
745
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000746 str = PyUnicode_FromObject(str);
747 if (str == NULL)
748 return NULL;
749 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
750 PyUnicode_GET_SIZE(str),
751 errors,
752 byteorder),
753 PyUnicode_GET_SIZE(str));
754 Py_DECREF(str);
755 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000756}
757
758static PyObject *
759utf_16_le_encode(PyObject *self,
760 PyObject *args)
761{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000762 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000763 const char *errors = NULL;
764
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000765 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000766 &str, &errors))
767 return NULL;
768
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000769 str = PyUnicode_FromObject(str);
770 if (str == NULL)
771 return NULL;
772 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000773 PyUnicode_GET_SIZE(str),
774 errors,
775 -1),
776 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000777 Py_DECREF(str);
778 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000779}
780
781static PyObject *
782utf_16_be_encode(PyObject *self,
783 PyObject *args)
784{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000785 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000786 const char *errors = NULL;
787
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000788 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000789 &str, &errors))
790 return NULL;
791
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000792 str = PyUnicode_FromObject(str);
793 if (str == NULL)
794 return NULL;
795 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
796 PyUnicode_GET_SIZE(str),
797 errors,
798 +1),
799 PyUnicode_GET_SIZE(str));
800 Py_DECREF(str);
801 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000802}
803
Walter Dörwald41980ca2007-08-16 21:55:45 +0000804/* This version provides access to the byteorder parameter of the
805 builtin UTF-32 codecs as optional third argument. It defaults to 0
806 which means: use the native byte order and prepend the data with a
807 BOM mark.
808
809*/
810
811static PyObject *
812utf_32_encode(PyObject *self,
813 PyObject *args)
814{
815 PyObject *str, *v;
816 const char *errors = NULL;
817 int byteorder = 0;
818
819 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
820 &str, &errors, &byteorder))
821 return NULL;
822
823 str = PyUnicode_FromObject(str);
824 if (str == NULL)
825 return NULL;
826 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
827 PyUnicode_GET_SIZE(str),
828 errors,
829 byteorder),
830 PyUnicode_GET_SIZE(str));
831 Py_DECREF(str);
832 return v;
833}
834
835static PyObject *
836utf_32_le_encode(PyObject *self,
837 PyObject *args)
838{
839 PyObject *str, *v;
840 const char *errors = NULL;
841
842 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
843 &str, &errors))
844 return NULL;
845
846 str = PyUnicode_FromObject(str);
847 if (str == NULL)
848 return NULL;
849 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
850 PyUnicode_GET_SIZE(str),
851 errors,
852 -1),
853 PyUnicode_GET_SIZE(str));
854 Py_DECREF(str);
855 return v;
856}
857
858static PyObject *
859utf_32_be_encode(PyObject *self,
860 PyObject *args)
861{
862 PyObject *str, *v;
863 const char *errors = NULL;
864
865 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
866 &str, &errors))
867 return NULL;
868
869 str = PyUnicode_FromObject(str);
870 if (str == NULL)
871 return NULL;
872 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
873 PyUnicode_GET_SIZE(str),
874 errors,
875 +1),
876 PyUnicode_GET_SIZE(str));
877 Py_DECREF(str);
878 return v;
879}
880
Guido van Rossume2d67f92000-03-10 23:09:23 +0000881static PyObject *
882unicode_escape_encode(PyObject *self,
883 PyObject *args)
884{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000885 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000886 const char *errors = NULL;
887
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000888 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000889 &str, &errors))
890 return NULL;
891
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000892 str = PyUnicode_FromObject(str);
893 if (str == NULL)
894 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000895 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000896 PyUnicode_GET_SIZE(str)),
897 PyUnicode_GET_SIZE(str));
898 Py_DECREF(str);
899 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000900}
901
902static PyObject *
903raw_unicode_escape_encode(PyObject *self,
904 PyObject *args)
905{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000906 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000907 const char *errors = NULL;
908
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000909 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000910 &str, &errors))
911 return NULL;
912
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000913 str = PyUnicode_FromObject(str);
914 if (str == NULL)
915 return NULL;
916 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000917 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000918 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000919 PyUnicode_GET_SIZE(str));
920 Py_DECREF(str);
921 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000922}
923
924static PyObject *
925latin_1_encode(PyObject *self,
926 PyObject *args)
927{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000928 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000929 const char *errors = NULL;
930
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000931 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000932 &str, &errors))
933 return NULL;
934
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000935 str = PyUnicode_FromObject(str);
936 if (str == NULL)
937 return NULL;
938 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000939 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000940 PyUnicode_GET_SIZE(str),
941 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000942 PyUnicode_GET_SIZE(str));
943 Py_DECREF(str);
944 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000945}
946
947static PyObject *
948ascii_encode(PyObject *self,
949 PyObject *args)
950{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000951 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000952 const char *errors = NULL;
953
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000954 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000955 &str, &errors))
956 return NULL;
957
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000958 str = PyUnicode_FromObject(str);
959 if (str == NULL)
960 return NULL;
961 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000962 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000963 PyUnicode_GET_SIZE(str),
964 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000965 PyUnicode_GET_SIZE(str));
966 Py_DECREF(str);
967 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000968}
969
970static PyObject *
971charmap_encode(PyObject *self,
972 PyObject *args)
973{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000974 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000975 const char *errors = NULL;
976 PyObject *mapping = NULL;
977
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000978 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000979 &str, &errors, &mapping))
980 return NULL;
981 if (mapping == Py_None)
982 mapping = NULL;
983
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000984 str = PyUnicode_FromObject(str);
985 if (str == NULL)
986 return NULL;
987 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000988 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000989 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000990 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +0000991 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000992 PyUnicode_GET_SIZE(str));
993 Py_DECREF(str);
994 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000995}
996
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000997static PyObject*
998charmap_build(PyObject *self, PyObject *args)
999{
1000 PyObject *map;
1001 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1002 return NULL;
1003 return PyUnicode_BuildEncodingMap(map);
1004}
1005
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001006#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001007
1008static PyObject *
1009mbcs_encode(PyObject *self,
1010 PyObject *args)
1011{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001012 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001013 const char *errors = NULL;
1014
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001015 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +00001016 &str, &errors))
1017 return NULL;
1018
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001019 str = PyUnicode_FromObject(str);
1020 if (str == NULL)
1021 return NULL;
1022 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001023 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001024 PyUnicode_GET_SIZE(str),
1025 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001026 PyUnicode_GET_SIZE(str));
1027 Py_DECREF(str);
1028 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001029}
1030
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001031#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001032
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001033/* --- Error handler registry --------------------------------------------- */
1034
Walter Dörwald0ae29812002-10-31 13:36:29 +00001035PyDoc_STRVAR(register_error__doc__,
1036"register_error(errors, handler)\n\
1037\n\
1038Register the specified error handler under the name\n\
1039errors. handler must be a callable object, that\n\
1040will be called with an exception instance containing\n\
1041information about the location of the encoding/decoding\n\
1042error and must return a (replacement, new position) tuple.");
1043
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001044static PyObject *register_error(PyObject *self, PyObject *args)
1045{
1046 const char *name;
1047 PyObject *handler;
1048
1049 if (!PyArg_ParseTuple(args, "sO:register_error",
1050 &name, &handler))
1051 return NULL;
1052 if (PyCodec_RegisterError(name, handler))
1053 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001054 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001055}
1056
Walter Dörwald0ae29812002-10-31 13:36:29 +00001057PyDoc_STRVAR(lookup_error__doc__,
1058"lookup_error(errors) -> handler\n\
1059\n\
1060Return the error handler for the specified error handling name\n\
1061or raise a LookupError, if no handler exists under this name.");
1062
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001063static PyObject *lookup_error(PyObject *self, PyObject *args)
1064{
1065 const char *name;
1066
1067 if (!PyArg_ParseTuple(args, "s:lookup_error",
1068 &name))
1069 return NULL;
1070 return PyCodec_LookupError(name);
1071}
1072
Guido van Rossume2d67f92000-03-10 23:09:23 +00001073/* --- Module API --------------------------------------------------------- */
1074
1075static PyMethodDef _codecs_functions[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001076 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001077 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001078 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001079 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001080 {"encode", codec_encode, METH_VARARGS,
1081 encode__doc__},
1082 {"decode", codec_decode, METH_VARARGS,
1083 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001084 {"escape_encode", escape_encode, METH_VARARGS},
1085 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001086 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1087 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1088 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1089 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1090 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1091 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1092 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1093 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1094 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1095 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1096 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald41980ca2007-08-16 21:55:45 +00001097 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1098 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1099 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1100 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1101 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1102 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1103 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001104 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1105 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1106 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1107 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1108 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1109 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1110 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1111 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1112 {"ascii_encode", ascii_encode, METH_VARARGS},
1113 {"ascii_decode", ascii_decode, METH_VARARGS},
1114 {"charmap_encode", charmap_encode, METH_VARARGS},
1115 {"charmap_decode", charmap_decode, METH_VARARGS},
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001116 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001117 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1118 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001119#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001120 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1121 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001122#endif
Walter Dörwald0ae29812002-10-31 13:36:29 +00001123 {"register_error", register_error, METH_VARARGS,
1124 register_error__doc__},
1125 {"lookup_error", lookup_error, METH_VARARGS,
1126 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001127 {NULL, NULL} /* sentinel */
1128};
1129
Martin v. Löwis1a214512008-06-11 05:26:20 +00001130static struct PyModuleDef codecsmodule = {
1131 PyModuleDef_HEAD_INIT,
1132 "_codecs",
1133 NULL,
1134 -1,
1135 _codecs_functions,
1136 NULL,
1137 NULL,
1138 NULL,
1139 NULL
1140};
1141
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001142PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001143PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001144{
Martin v. Löwis1a214512008-06-11 05:26:20 +00001145 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001146}