blob: caaac5879b1e313bd2112f9571cb9b84b74633e3 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000064a tuple of function (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
97 if (encoding == NULL)
98 encoding = PyUnicode_GetDefaultEncoding();
99
100 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000101 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102}
103
104PyDoc_STRVAR(decode__doc__,
105"decode(obj, [encoding[,errors]]) -> object\n\
106\n\
107Decodes obj using the codec registered for encoding. encoding defaults\n\
108to the default encoding. errors may be given to set a different error\n\
109handling scheme. Default is 'strict' meaning that encoding errors raise\n\
110a ValueError. Other possible values are 'ignore' and 'replace'\n\
111as well as any other name registerd with codecs.register_error that is\n\
112able to handle ValueErrors.");
113
114static PyObject *
115codec_decode(PyObject *self, PyObject *args)
116{
Brett Cannon3e377de2004-07-10 21:41:14 +0000117 const char *encoding = NULL;
118 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000119 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000120
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000121 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
122 return NULL;
123
124 if (encoding == NULL)
125 encoding = PyUnicode_GetDefaultEncoding();
126
127 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000128 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000129}
130
Guido van Rossume2d67f92000-03-10 23:09:23 +0000131/* --- Helpers ------------------------------------------------------------ */
132
133static
134PyObject *codec_tuple(PyObject *unicode,
Thomas Wouters477c8d52006-05-27 19:21:47 +0000135 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000136{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000137 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000138 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 return NULL;
140 v = Py_BuildValue("On", unicode, len);
141 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000142 return v;
143}
144
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000145/* --- String codecs ------------------------------------------------------ */
146static PyObject *
147escape_decode(PyObject *self,
148 PyObject *args)
149{
150 const char *errors = NULL;
151 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000152 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000153
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000154 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
155 &data, &size, &errors))
156 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000157 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000158 size);
159}
160
161static PyObject *
162escape_encode(PyObject *self,
163 PyObject *args)
164{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000165 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000166 PyObject *str;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000167 Py_ssize_t size;
168 Py_ssize_t newsize;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000169 const char *errors = NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000170 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000171
172 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
173 &PyString_Type, &str, &errors))
174 return NULL;
175
Martin v. Löwis5b222132007-06-10 09:51:05 +0000176 size = PyString_GET_SIZE(str);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000177 newsize = 4*size;
178 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
179 PyErr_SetString(PyExc_OverflowError,
180 "string is too large to encode");
181 return NULL;
182 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000183 v = PyString_FromStringAndSize(NULL, newsize);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000184
Walter Dörwald1ab83302007-05-18 17:15:44 +0000185 if (v == NULL) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000186 return NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000187 }
188 else {
189 register Py_ssize_t i;
190 register char c;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000191 register char *p = PyString_AS_STRING(v);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000192
193 for (i = 0; i < size; i++) {
194 /* There's at least enough room for a hex escape */
Guido van Rossum98297ee2007-11-06 21:34:58 +0000195 assert(newsize - (p - PyString_AS_STRING(v)) >= 4);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000196 c = PyString_AS_STRING(str)[i];
197 if (c == '\'' || c == '\\')
198 *p++ = '\\', *p++ = c;
199 else if (c == '\t')
200 *p++ = '\\', *p++ = 't';
201 else if (c == '\n')
202 *p++ = '\\', *p++ = 'n';
203 else if (c == '\r')
204 *p++ = '\\', *p++ = 'r';
205 else if (c < ' ' || c >= 0x7f) {
206 *p++ = '\\';
207 *p++ = 'x';
208 *p++ = hexdigits[(c & 0xf0) >> 4];
209 *p++ = hexdigits[c & 0xf];
210 }
211 else
212 *p++ = c;
213 }
214 *p = '\0';
Guido van Rossum98297ee2007-11-06 21:34:58 +0000215 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000216 return NULL;
217 }
218 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000219
Guido van Rossum98297ee2007-11-06 21:34:58 +0000220 return codec_tuple(v, PyString_Size(v));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000221}
222
Guido van Rossume2d67f92000-03-10 23:09:23 +0000223/* --- Decoder ------------------------------------------------------------ */
224
225static PyObject *
226unicode_internal_decode(PyObject *self,
227 PyObject *args)
228{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000229 PyObject *obj;
230 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000231 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000232 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000233
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000234 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
235 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000236 return NULL;
237
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000238 if (PyUnicode_Check(obj)) {
239 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000240 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000241 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000242 else {
243 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
244 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000245
246 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000247 size);
248 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000249}
250
251static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000252utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000253 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000254{
255 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000256 Py_ssize_t size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000257 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000258 int final = 0;
259 Py_ssize_t consumed;
260 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000261
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000262 if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode",
263 &data, &size, &errors, &final))
264 return NULL;
265 consumed = size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000266
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000267 decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors,
268 final ? NULL : &consumed);
269 if (decoded == NULL)
270 return NULL;
271 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000272}
273
274static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000275utf_8_decode(PyObject *self,
276 PyObject *args)
277{
278 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000279 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000280 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000281 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000282 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000283 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000284
Walter Dörwald69652032004-09-07 20:24:22 +0000285 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
286 &data, &size, &errors, &final))
287 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000288 if (size < 0) {
289 PyErr_SetString(PyExc_ValueError, "negative argument");
290 return 0;
291 }
Walter Dörwald69652032004-09-07 20:24:22 +0000292 consumed = size;
293
294 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
295 final ? NULL : &consumed);
296 if (decoded == NULL)
297 return NULL;
298 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000299}
300
301static PyObject *
302utf_16_decode(PyObject *self,
303 PyObject *args)
304{
305 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000306 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000307 const char *errors = NULL;
308 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000309 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000310 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000311 PyObject *decoded;
312
313 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
314 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000315 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000316 if (size < 0) {
317 PyErr_SetString(PyExc_ValueError, "negative argument");
318 return 0;
319 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000320 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000321 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
322 final ? NULL : &consumed);
323 if (decoded == NULL)
324 return NULL;
325 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000326}
327
328static PyObject *
329utf_16_le_decode(PyObject *self,
330 PyObject *args)
331{
332 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000333 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000334 const char *errors = NULL;
335 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000336 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000337 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000338 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000339
Walter Dörwald69652032004-09-07 20:24:22 +0000340 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
341 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000342 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000343
Martin v. Löwis18e16552006-02-15 17:27:45 +0000344 if (size < 0) {
345 PyErr_SetString(PyExc_ValueError, "negative argument");
346 return 0;
347 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000348 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000349 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
350 &byteorder, final ? NULL : &consumed);
351 if (decoded == NULL)
352 return NULL;
353 return codec_tuple(decoded, consumed);
354
Guido van Rossume2d67f92000-03-10 23:09:23 +0000355}
356
357static PyObject *
358utf_16_be_decode(PyObject *self,
359 PyObject *args)
360{
361 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000362 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000363 const char *errors = NULL;
364 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000365 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000366 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000367 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000368
Walter Dörwald69652032004-09-07 20:24:22 +0000369 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
370 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000371 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 if (size < 0) {
373 PyErr_SetString(PyExc_ValueError, "negative argument");
374 return 0;
375 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000376 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000377 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
378 &byteorder, final ? NULL : &consumed);
379 if (decoded == NULL)
380 return NULL;
381 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000382}
383
384/* This non-standard version also provides access to the byteorder
385 parameter of the builtin UTF-16 codec.
386
387 It returns a tuple (unicode, bytesread, byteorder) with byteorder
388 being the value in effect at the end of data.
389
390*/
391
392static PyObject *
393utf_16_ex_decode(PyObject *self,
394 PyObject *args)
395{
396 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000397 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000398 const char *errors = NULL;
399 int byteorder = 0;
400 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000401 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000402 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000403
404 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
405 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000406 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000407 if (size < 0) {
408 PyErr_SetString(PyExc_ValueError, "negative argument");
409 return 0;
410 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000411 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000412 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
413 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000414 if (unicode == NULL)
415 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000416 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000417 Py_DECREF(unicode);
418 return tuple;
419}
420
421static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000422utf_32_decode(PyObject *self,
423 PyObject *args)
424{
425 const char *data;
426 Py_ssize_t size;
427 const char *errors = NULL;
428 int byteorder = 0;
429 int final = 0;
430 Py_ssize_t consumed;
431 PyObject *decoded;
432
433 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
434 &data, &size, &errors, &final))
435 return NULL;
436 if (size < 0) {
437 PyErr_SetString(PyExc_ValueError, "negative argument");
438 return 0;
439 }
440 consumed = size; /* This is overwritten unless final is true. */
441 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
442 final ? NULL : &consumed);
443 if (decoded == NULL)
444 return NULL;
445 return codec_tuple(decoded, consumed);
446}
447
448static PyObject *
449utf_32_le_decode(PyObject *self,
450 PyObject *args)
451{
452 const char *data;
453 Py_ssize_t size;
454 const char *errors = NULL;
455 int byteorder = -1;
456 int final = 0;
457 Py_ssize_t consumed;
458 PyObject *decoded = NULL;
459
460 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
461 &data, &size, &errors, &final))
462 return NULL;
463
464 if (size < 0) {
465 PyErr_SetString(PyExc_ValueError, "negative argument");
466 return 0;
467 }
468 consumed = size; /* This is overwritten unless final is true. */
469 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
470 &byteorder, final ? NULL : &consumed);
471 if (decoded == NULL)
472 return NULL;
473 return codec_tuple(decoded, consumed);
474
475}
476
477static PyObject *
478utf_32_be_decode(PyObject *self,
479 PyObject *args)
480{
481 const char *data;
482 Py_ssize_t size;
483 const char *errors = NULL;
484 int byteorder = 1;
485 int final = 0;
486 Py_ssize_t consumed;
487 PyObject *decoded = NULL;
488
489 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
490 &data, &size, &errors, &final))
491 return NULL;
492 if (size < 0) {
493 PyErr_SetString(PyExc_ValueError, "negative argument");
494 return 0;
495 }
496 consumed = size; /* This is overwritten unless final is true. */
497 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
498 &byteorder, final ? NULL : &consumed);
499 if (decoded == NULL)
500 return NULL;
501 return codec_tuple(decoded, consumed);
502}
503
504/* This non-standard version also provides access to the byteorder
505 parameter of the builtin UTF-32 codec.
506
507 It returns a tuple (unicode, bytesread, byteorder) with byteorder
508 being the value in effect at the end of data.
509
510*/
511
512static PyObject *
513utf_32_ex_decode(PyObject *self,
514 PyObject *args)
515{
516 const char *data;
517 Py_ssize_t size;
518 const char *errors = NULL;
519 int byteorder = 0;
520 PyObject *unicode, *tuple;
521 int final = 0;
522 Py_ssize_t consumed;
523
524 if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
525 &data, &size, &errors, &byteorder, &final))
526 return NULL;
527 if (size < 0) {
528 PyErr_SetString(PyExc_ValueError, "negative argument");
529 return 0;
530 }
531 consumed = size; /* This is overwritten unless final is true. */
532 unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
533 final ? NULL : &consumed);
534 if (unicode == NULL)
535 return NULL;
536 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
537 Py_DECREF(unicode);
538 return tuple;
539}
540
541static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000542unicode_escape_decode(PyObject *self,
543 PyObject *args)
544{
545 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000547 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000548
Guido van Rossume2d67f92000-03-10 23:09:23 +0000549 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
550 &data, &size, &errors))
551 return NULL;
552
553 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
554 size);
555}
556
557static PyObject *
558raw_unicode_escape_decode(PyObject *self,
559 PyObject *args)
560{
561 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000562 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000563 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000564
Guido van Rossume2d67f92000-03-10 23:09:23 +0000565 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
566 &data, &size, &errors))
567 return NULL;
568
569 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
570 size);
571}
572
573static PyObject *
574latin_1_decode(PyObject *self,
575 PyObject *args)
576{
577 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000578 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000579 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000580
Guido van Rossume2d67f92000-03-10 23:09:23 +0000581 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
582 &data, &size, &errors))
583 return NULL;
584
585 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
586 size);
587}
588
589static PyObject *
590ascii_decode(PyObject *self,
591 PyObject *args)
592{
593 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000594 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000595 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000596
Guido van Rossume2d67f92000-03-10 23:09:23 +0000597 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
598 &data, &size, &errors))
599 return NULL;
600
601 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
602 size);
603}
604
605static PyObject *
606charmap_decode(PyObject *self,
607 PyObject *args)
608{
609 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000610 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000611 const char *errors = NULL;
612 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000613
Guido van Rossume2d67f92000-03-10 23:09:23 +0000614 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
615 &data, &size, &errors, &mapping))
616 return NULL;
617 if (mapping == Py_None)
618 mapping = NULL;
619
620 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
621 size);
622}
623
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000624#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000625
626static PyObject *
627mbcs_decode(PyObject *self,
628 PyObject *args)
629{
630 const char *data;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000631 Py_ssize_t size, consumed;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000632 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000633 int final = 0;
634 PyObject *decoded;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000635
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000636 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
637 &data, &size, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000638 return NULL;
639
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000640 decoded = PyUnicode_DecodeMBCSStateful(
641 data, size, errors, final ? NULL : &consumed);
642 if (!decoded)
643 return NULL;
644 return codec_tuple(decoded, final ? size : consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000645}
646
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000647#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000648
Guido van Rossume2d67f92000-03-10 23:09:23 +0000649/* --- Encoder ------------------------------------------------------------ */
650
651static PyObject *
652readbuffer_encode(PyObject *self,
653 PyObject *args)
654{
655 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000657 const char *errors = NULL;
658
659 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
660 &data, &size, &errors))
661 return NULL;
662
Guido van Rossum98297ee2007-11-06 21:34:58 +0000663 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000664}
665
666static PyObject *
667charbuffer_encode(PyObject *self,
668 PyObject *args)
669{
670 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000671 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000672 const char *errors = NULL;
673
674 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
675 &data, &size, &errors))
676 return NULL;
677
Guido van Rossum98297ee2007-11-06 21:34:58 +0000678 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000679}
680
681static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000682unicode_internal_encode(PyObject *self,
683 PyObject *args)
684{
685 PyObject *obj;
686 const char *errors = NULL;
687 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000688 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000689
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000690 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
691 &obj, &errors))
692 return NULL;
693
694 if (PyUnicode_Check(obj)) {
695 data = PyUnicode_AS_DATA(obj);
696 size = PyUnicode_GET_DATA_SIZE(obj);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000697 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000698 }
699 else {
700 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
701 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000702 return codec_tuple(PyString_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000703 }
704}
705
706static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000707utf_7_encode(PyObject *self,
708 PyObject *args)
709{
710 PyObject *str, *v;
711 const char *errors = NULL;
712
713 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
714 &str, &errors))
715 return NULL;
716
717 str = PyUnicode_FromObject(str);
718 if (str == NULL)
719 return NULL;
720 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
721 PyUnicode_GET_SIZE(str),
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000722 0,
723 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000724 errors),
725 PyUnicode_GET_SIZE(str));
726 Py_DECREF(str);
727 return v;
728}
729
730static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000731utf_8_encode(PyObject *self,
732 PyObject *args)
733{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000734 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000735 const char *errors = NULL;
736
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000737 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000738 &str, &errors))
739 return NULL;
740
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000741 str = PyUnicode_FromObject(str);
742 if (str == NULL)
743 return NULL;
744 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
745 PyUnicode_GET_SIZE(str),
746 errors),
747 PyUnicode_GET_SIZE(str));
748 Py_DECREF(str);
749 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000750}
751
752/* This version provides access to the byteorder parameter of the
753 builtin UTF-16 codecs as optional third argument. It defaults to 0
754 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000755 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000756
757*/
758
759static PyObject *
760utf_16_encode(PyObject *self,
761 PyObject *args)
762{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000763 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000764 const char *errors = NULL;
765 int byteorder = 0;
766
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000767 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000768 &str, &errors, &byteorder))
769 return NULL;
770
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000771 str = PyUnicode_FromObject(str);
772 if (str == NULL)
773 return NULL;
774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
775 PyUnicode_GET_SIZE(str),
776 errors,
777 byteorder),
778 PyUnicode_GET_SIZE(str));
779 Py_DECREF(str);
780 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000781}
782
783static PyObject *
784utf_16_le_encode(PyObject *self,
785 PyObject *args)
786{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000787 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000788 const char *errors = NULL;
789
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000790 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000791 &str, &errors))
792 return NULL;
793
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000794 str = PyUnicode_FromObject(str);
795 if (str == NULL)
796 return NULL;
797 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000798 PyUnicode_GET_SIZE(str),
799 errors,
800 -1),
801 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000802 Py_DECREF(str);
803 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000804}
805
806static PyObject *
807utf_16_be_encode(PyObject *self,
808 PyObject *args)
809{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000810 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000811 const char *errors = NULL;
812
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000813 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000814 &str, &errors))
815 return NULL;
816
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000817 str = PyUnicode_FromObject(str);
818 if (str == NULL)
819 return NULL;
820 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
821 PyUnicode_GET_SIZE(str),
822 errors,
823 +1),
824 PyUnicode_GET_SIZE(str));
825 Py_DECREF(str);
826 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000827}
828
Walter Dörwald41980ca2007-08-16 21:55:45 +0000829/* This version provides access to the byteorder parameter of the
830 builtin UTF-32 codecs as optional third argument. It defaults to 0
831 which means: use the native byte order and prepend the data with a
832 BOM mark.
833
834*/
835
836static PyObject *
837utf_32_encode(PyObject *self,
838 PyObject *args)
839{
840 PyObject *str, *v;
841 const char *errors = NULL;
842 int byteorder = 0;
843
844 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
845 &str, &errors, &byteorder))
846 return NULL;
847
848 str = PyUnicode_FromObject(str);
849 if (str == NULL)
850 return NULL;
851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
852 PyUnicode_GET_SIZE(str),
853 errors,
854 byteorder),
855 PyUnicode_GET_SIZE(str));
856 Py_DECREF(str);
857 return v;
858}
859
860static PyObject *
861utf_32_le_encode(PyObject *self,
862 PyObject *args)
863{
864 PyObject *str, *v;
865 const char *errors = NULL;
866
867 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
868 &str, &errors))
869 return NULL;
870
871 str = PyUnicode_FromObject(str);
872 if (str == NULL)
873 return NULL;
874 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
875 PyUnicode_GET_SIZE(str),
876 errors,
877 -1),
878 PyUnicode_GET_SIZE(str));
879 Py_DECREF(str);
880 return v;
881}
882
883static PyObject *
884utf_32_be_encode(PyObject *self,
885 PyObject *args)
886{
887 PyObject *str, *v;
888 const char *errors = NULL;
889
890 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
891 &str, &errors))
892 return NULL;
893
894 str = PyUnicode_FromObject(str);
895 if (str == NULL)
896 return NULL;
897 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
898 PyUnicode_GET_SIZE(str),
899 errors,
900 +1),
901 PyUnicode_GET_SIZE(str));
902 Py_DECREF(str);
903 return v;
904}
905
Guido van Rossume2d67f92000-03-10 23:09:23 +0000906static PyObject *
907unicode_escape_encode(PyObject *self,
908 PyObject *args)
909{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000910 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000911 const char *errors = NULL;
912
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000913 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000914 &str, &errors))
915 return NULL;
916
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000917 str = PyUnicode_FromObject(str);
918 if (str == NULL)
919 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000920 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000921 PyUnicode_GET_SIZE(str)),
922 PyUnicode_GET_SIZE(str));
923 Py_DECREF(str);
924 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000925}
926
927static PyObject *
928raw_unicode_escape_encode(PyObject *self,
929 PyObject *args)
930{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000931 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000932 const char *errors = NULL;
933
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000934 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000935 &str, &errors))
936 return NULL;
937
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000938 str = PyUnicode_FromObject(str);
939 if (str == NULL)
940 return NULL;
941 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000942 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000943 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000944 PyUnicode_GET_SIZE(str));
945 Py_DECREF(str);
946 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000947}
948
949static PyObject *
950latin_1_encode(PyObject *self,
951 PyObject *args)
952{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000953 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000954 const char *errors = NULL;
955
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000956 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000957 &str, &errors))
958 return NULL;
959
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000960 str = PyUnicode_FromObject(str);
961 if (str == NULL)
962 return NULL;
963 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000964 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000965 PyUnicode_GET_SIZE(str),
966 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000967 PyUnicode_GET_SIZE(str));
968 Py_DECREF(str);
969 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000970}
971
972static PyObject *
973ascii_encode(PyObject *self,
974 PyObject *args)
975{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000976 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000977 const char *errors = NULL;
978
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000979 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000980 &str, &errors))
981 return NULL;
982
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000983 str = PyUnicode_FromObject(str);
984 if (str == NULL)
985 return NULL;
986 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000987 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000988 PyUnicode_GET_SIZE(str),
989 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000990 PyUnicode_GET_SIZE(str));
991 Py_DECREF(str);
992 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000993}
994
995static PyObject *
996charmap_encode(PyObject *self,
997 PyObject *args)
998{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000999 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001000 const char *errors = NULL;
1001 PyObject *mapping = NULL;
1002
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001003 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +00001004 &str, &errors, &mapping))
1005 return NULL;
1006 if (mapping == Py_None)
1007 mapping = NULL;
1008
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001009 str = PyUnicode_FromObject(str);
1010 if (str == NULL)
1011 return NULL;
1012 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001013 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +00001014 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001015 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +00001016 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001017 PyUnicode_GET_SIZE(str));
1018 Py_DECREF(str);
1019 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001020}
1021
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001022static PyObject*
1023charmap_build(PyObject *self, PyObject *args)
1024{
1025 PyObject *map;
1026 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1027 return NULL;
1028 return PyUnicode_BuildEncodingMap(map);
1029}
1030
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001031#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001032
1033static PyObject *
1034mbcs_encode(PyObject *self,
1035 PyObject *args)
1036{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001037 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001038 const char *errors = NULL;
1039
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001040 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +00001041 &str, &errors))
1042 return NULL;
1043
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001044 str = PyUnicode_FromObject(str);
1045 if (str == NULL)
1046 return NULL;
1047 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001048 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001049 PyUnicode_GET_SIZE(str),
1050 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001051 PyUnicode_GET_SIZE(str));
1052 Py_DECREF(str);
1053 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001054}
1055
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001056#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001057
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001058/* --- Error handler registry --------------------------------------------- */
1059
Walter Dörwald0ae29812002-10-31 13:36:29 +00001060PyDoc_STRVAR(register_error__doc__,
1061"register_error(errors, handler)\n\
1062\n\
1063Register the specified error handler under the name\n\
1064errors. handler must be a callable object, that\n\
1065will be called with an exception instance containing\n\
1066information about the location of the encoding/decoding\n\
1067error and must return a (replacement, new position) tuple.");
1068
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001069static PyObject *register_error(PyObject *self, PyObject *args)
1070{
1071 const char *name;
1072 PyObject *handler;
1073
1074 if (!PyArg_ParseTuple(args, "sO:register_error",
1075 &name, &handler))
1076 return NULL;
1077 if (PyCodec_RegisterError(name, handler))
1078 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001079 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001080}
1081
Walter Dörwald0ae29812002-10-31 13:36:29 +00001082PyDoc_STRVAR(lookup_error__doc__,
1083"lookup_error(errors) -> handler\n\
1084\n\
1085Return the error handler for the specified error handling name\n\
1086or raise a LookupError, if no handler exists under this name.");
1087
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001088static PyObject *lookup_error(PyObject *self, PyObject *args)
1089{
1090 const char *name;
1091
1092 if (!PyArg_ParseTuple(args, "s:lookup_error",
1093 &name))
1094 return NULL;
1095 return PyCodec_LookupError(name);
1096}
1097
Guido van Rossume2d67f92000-03-10 23:09:23 +00001098/* --- Module API --------------------------------------------------------- */
1099
1100static PyMethodDef _codecs_functions[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001101 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001102 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001103 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001104 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001105 {"encode", codec_encode, METH_VARARGS,
1106 encode__doc__},
1107 {"decode", codec_decode, METH_VARARGS,
1108 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001109 {"escape_encode", escape_encode, METH_VARARGS},
1110 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001111 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1112 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1113 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1114 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1115 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1116 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1117 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1118 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1119 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1120 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1121 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald41980ca2007-08-16 21:55:45 +00001122 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1123 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1124 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1125 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1126 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1127 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1128 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001129 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1130 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1131 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1132 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1133 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1134 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1135 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1136 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1137 {"ascii_encode", ascii_encode, METH_VARARGS},
1138 {"ascii_decode", ascii_decode, METH_VARARGS},
1139 {"charmap_encode", charmap_encode, METH_VARARGS},
1140 {"charmap_decode", charmap_decode, METH_VARARGS},
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001141 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001142 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1143 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001144#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001145 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1146 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001147#endif
Walter Dörwald0ae29812002-10-31 13:36:29 +00001148 {"register_error", register_error, METH_VARARGS,
1149 register_error__doc__},
1150 {"lookup_error", lookup_error, METH_VARARGS,
1151 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001152 {NULL, NULL} /* sentinel */
1153};
1154
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001155PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001156init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001157{
1158 Py_InitModule("_codecs", _codecs_functions);
1159}