blob: e3933e7e23fc5a9041f1787cccc6d8e6d0b861ab [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000064a tuple of function (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
97 if (encoding == NULL)
98 encoding = PyUnicode_GetDefaultEncoding();
99
100 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000101 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102}
103
104PyDoc_STRVAR(decode__doc__,
105"decode(obj, [encoding[,errors]]) -> object\n\
106\n\
107Decodes obj using the codec registered for encoding. encoding defaults\n\
108to the default encoding. errors may be given to set a different error\n\
109handling scheme. Default is 'strict' meaning that encoding errors raise\n\
110a ValueError. Other possible values are 'ignore' and 'replace'\n\
111as well as any other name registerd with codecs.register_error that is\n\
112able to handle ValueErrors.");
113
114static PyObject *
115codec_decode(PyObject *self, PyObject *args)
116{
Brett Cannon3e377de2004-07-10 21:41:14 +0000117 const char *encoding = NULL;
118 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000119 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000120
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000121 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
122 return NULL;
123
124 if (encoding == NULL)
125 encoding = PyUnicode_GetDefaultEncoding();
126
127 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000128 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000129}
130
Guido van Rossume2d67f92000-03-10 23:09:23 +0000131/* --- Helpers ------------------------------------------------------------ */
132
133static
134PyObject *codec_tuple(PyObject *unicode,
Thomas Wouters477c8d52006-05-27 19:21:47 +0000135 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000136{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000137 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000138 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 return NULL;
140 v = Py_BuildValue("On", unicode, len);
141 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000142 return v;
143}
144
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000145/* --- String codecs ------------------------------------------------------ */
146static PyObject *
147escape_decode(PyObject *self,
148 PyObject *args)
149{
150 const char *errors = NULL;
151 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000152 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000153
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000154 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
155 &data, &size, &errors))
156 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000157 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000158 size);
159}
160
161static PyObject *
162escape_encode(PyObject *self,
163 PyObject *args)
164{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000165 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000166 PyObject *str;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000167 Py_ssize_t size;
168 Py_ssize_t newsize;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000169 const char *errors = NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000170 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000171
172 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
173 &PyString_Type, &str, &errors))
174 return NULL;
175
Martin v. Löwis5b222132007-06-10 09:51:05 +0000176 size = PyString_GET_SIZE(str);
Walter Dörwald1ab83302007-05-18 17:15:44 +0000177 newsize = 4*size;
178 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
179 PyErr_SetString(PyExc_OverflowError,
180 "string is too large to encode");
181 return NULL;
182 }
183 v = PyBytes_FromStringAndSize(NULL, newsize);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000184
Walter Dörwald1ab83302007-05-18 17:15:44 +0000185 if (v == NULL) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000186 return NULL;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000187 }
188 else {
189 register Py_ssize_t i;
190 register char c;
191 register char *p = PyBytes_AS_STRING(v);
192
193 for (i = 0; i < size; i++) {
194 /* There's at least enough room for a hex escape */
195 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
196 c = PyString_AS_STRING(str)[i];
197 if (c == '\'' || c == '\\')
198 *p++ = '\\', *p++ = c;
199 else if (c == '\t')
200 *p++ = '\\', *p++ = 't';
201 else if (c == '\n')
202 *p++ = '\\', *p++ = 'n';
203 else if (c == '\r')
204 *p++ = '\\', *p++ = 'r';
205 else if (c < ' ' || c >= 0x7f) {
206 *p++ = '\\';
207 *p++ = 'x';
208 *p++ = hexdigits[(c & 0xf0) >> 4];
209 *p++ = hexdigits[c & 0xf];
210 }
211 else
212 *p++ = c;
213 }
214 *p = '\0';
215 if (PyBytes_Resize(v, (p - PyBytes_AS_STRING(v)))) {
216 Py_DECREF(v);
217 return NULL;
218 }
219 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000220
Walter Dörwald1ab83302007-05-18 17:15:44 +0000221 return codec_tuple(v, PyBytes_Size(v));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000222}
223
Guido van Rossume2d67f92000-03-10 23:09:23 +0000224/* --- Decoder ------------------------------------------------------------ */
225
226static PyObject *
227unicode_internal_decode(PyObject *self,
228 PyObject *args)
229{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000230 PyObject *obj;
231 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000232 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000233 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000234
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000235 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
236 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000237 return NULL;
238
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000239 if (PyUnicode_Check(obj)) {
240 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000241 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000242 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000243 else {
244 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
245 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000246
247 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000248 size);
249 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000250}
251
252static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000253utf_7_decode(PyObject *self,
254 PyObject *args)
255{
256 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000257 Py_ssize_t size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000258 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000259
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000260 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
261 &data, &size, &errors))
262 return NULL;
263
264 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
265 size);
266}
267
268static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000269utf_8_decode(PyObject *self,
270 PyObject *args)
271{
272 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000273 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000274 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000275 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000276 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000277 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000278
Walter Dörwald69652032004-09-07 20:24:22 +0000279 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
280 &data, &size, &errors, &final))
281 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000282 if (size < 0) {
283 PyErr_SetString(PyExc_ValueError, "negative argument");
284 return 0;
285 }
Walter Dörwald69652032004-09-07 20:24:22 +0000286 consumed = size;
287
288 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
289 final ? NULL : &consumed);
290 if (decoded == NULL)
291 return NULL;
292 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000293}
294
295static PyObject *
296utf_16_decode(PyObject *self,
297 PyObject *args)
298{
299 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000300 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000301 const char *errors = NULL;
302 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000303 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000304 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000305 PyObject *decoded;
306
307 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
308 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000309 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000310 if (size < 0) {
311 PyErr_SetString(PyExc_ValueError, "negative argument");
312 return 0;
313 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000314 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000315 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
316 final ? NULL : &consumed);
317 if (decoded == NULL)
318 return NULL;
319 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000320}
321
322static PyObject *
323utf_16_le_decode(PyObject *self,
324 PyObject *args)
325{
326 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000327 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000328 const char *errors = NULL;
329 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000330 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000331 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000332 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000333
Walter Dörwald69652032004-09-07 20:24:22 +0000334 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
335 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000336 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000337
Martin v. Löwis18e16552006-02-15 17:27:45 +0000338 if (size < 0) {
339 PyErr_SetString(PyExc_ValueError, "negative argument");
340 return 0;
341 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000342 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000343 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
344 &byteorder, final ? NULL : &consumed);
345 if (decoded == NULL)
346 return NULL;
347 return codec_tuple(decoded, consumed);
348
Guido van Rossume2d67f92000-03-10 23:09:23 +0000349}
350
351static PyObject *
352utf_16_be_decode(PyObject *self,
353 PyObject *args)
354{
355 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000357 const char *errors = NULL;
358 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000359 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000360 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000361 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000362
Walter Dörwald69652032004-09-07 20:24:22 +0000363 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
364 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000365 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000366 if (size < 0) {
367 PyErr_SetString(PyExc_ValueError, "negative argument");
368 return 0;
369 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000370 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000371 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
372 &byteorder, final ? NULL : &consumed);
373 if (decoded == NULL)
374 return NULL;
375 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000376}
377
378/* This non-standard version also provides access to the byteorder
379 parameter of the builtin UTF-16 codec.
380
381 It returns a tuple (unicode, bytesread, byteorder) with byteorder
382 being the value in effect at the end of data.
383
384*/
385
386static PyObject *
387utf_16_ex_decode(PyObject *self,
388 PyObject *args)
389{
390 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000391 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000392 const char *errors = NULL;
393 int byteorder = 0;
394 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000395 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000396 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000397
398 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
399 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000400 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000401 if (size < 0) {
402 PyErr_SetString(PyExc_ValueError, "negative argument");
403 return 0;
404 }
Thomas Wouters477c8d52006-05-27 19:21:47 +0000405 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000406 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
407 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000408 if (unicode == NULL)
409 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000410 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000411 Py_DECREF(unicode);
412 return tuple;
413}
414
415static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000416utf_32_decode(PyObject *self,
417 PyObject *args)
418{
419 const char *data;
420 Py_ssize_t size;
421 const char *errors = NULL;
422 int byteorder = 0;
423 int final = 0;
424 Py_ssize_t consumed;
425 PyObject *decoded;
426
427 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
428 &data, &size, &errors, &final))
429 return NULL;
430 if (size < 0) {
431 PyErr_SetString(PyExc_ValueError, "negative argument");
432 return 0;
433 }
434 consumed = size; /* This is overwritten unless final is true. */
435 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
436 final ? NULL : &consumed);
437 if (decoded == NULL)
438 return NULL;
439 return codec_tuple(decoded, consumed);
440}
441
442static PyObject *
443utf_32_le_decode(PyObject *self,
444 PyObject *args)
445{
446 const char *data;
447 Py_ssize_t size;
448 const char *errors = NULL;
449 int byteorder = -1;
450 int final = 0;
451 Py_ssize_t consumed;
452 PyObject *decoded = NULL;
453
454 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
455 &data, &size, &errors, &final))
456 return NULL;
457
458 if (size < 0) {
459 PyErr_SetString(PyExc_ValueError, "negative argument");
460 return 0;
461 }
462 consumed = size; /* This is overwritten unless final is true. */
463 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
464 &byteorder, final ? NULL : &consumed);
465 if (decoded == NULL)
466 return NULL;
467 return codec_tuple(decoded, consumed);
468
469}
470
471static PyObject *
472utf_32_be_decode(PyObject *self,
473 PyObject *args)
474{
475 const char *data;
476 Py_ssize_t size;
477 const char *errors = NULL;
478 int byteorder = 1;
479 int final = 0;
480 Py_ssize_t consumed;
481 PyObject *decoded = NULL;
482
483 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
484 &data, &size, &errors, &final))
485 return NULL;
486 if (size < 0) {
487 PyErr_SetString(PyExc_ValueError, "negative argument");
488 return 0;
489 }
490 consumed = size; /* This is overwritten unless final is true. */
491 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
492 &byteorder, final ? NULL : &consumed);
493 if (decoded == NULL)
494 return NULL;
495 return codec_tuple(decoded, consumed);
496}
497
498/* This non-standard version also provides access to the byteorder
499 parameter of the builtin UTF-32 codec.
500
501 It returns a tuple (unicode, bytesread, byteorder) with byteorder
502 being the value in effect at the end of data.
503
504*/
505
506static PyObject *
507utf_32_ex_decode(PyObject *self,
508 PyObject *args)
509{
510 const char *data;
511 Py_ssize_t size;
512 const char *errors = NULL;
513 int byteorder = 0;
514 PyObject *unicode, *tuple;
515 int final = 0;
516 Py_ssize_t consumed;
517
518 if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
519 &data, &size, &errors, &byteorder, &final))
520 return NULL;
521 if (size < 0) {
522 PyErr_SetString(PyExc_ValueError, "negative argument");
523 return 0;
524 }
525 consumed = size; /* This is overwritten unless final is true. */
526 unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
527 final ? NULL : &consumed);
528 if (unicode == NULL)
529 return NULL;
530 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
531 Py_DECREF(unicode);
532 return tuple;
533}
534
535static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000536unicode_escape_decode(PyObject *self,
537 PyObject *args)
538{
539 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000540 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000541 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000542
Guido van Rossume2d67f92000-03-10 23:09:23 +0000543 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
544 &data, &size, &errors))
545 return NULL;
546
547 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
548 size);
549}
550
551static PyObject *
552raw_unicode_escape_decode(PyObject *self,
553 PyObject *args)
554{
555 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000556 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000557 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000558
Guido van Rossume2d67f92000-03-10 23:09:23 +0000559 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
560 &data, &size, &errors))
561 return NULL;
562
563 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
564 size);
565}
566
567static PyObject *
568latin_1_decode(PyObject *self,
569 PyObject *args)
570{
571 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000572 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000573 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000574
Guido van Rossume2d67f92000-03-10 23:09:23 +0000575 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
576 &data, &size, &errors))
577 return NULL;
578
579 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
580 size);
581}
582
583static PyObject *
584ascii_decode(PyObject *self,
585 PyObject *args)
586{
587 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000588 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000589 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000590
Guido van Rossume2d67f92000-03-10 23:09:23 +0000591 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
592 &data, &size, &errors))
593 return NULL;
594
595 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
596 size);
597}
598
599static PyObject *
600charmap_decode(PyObject *self,
601 PyObject *args)
602{
603 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000604 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000605 const char *errors = NULL;
606 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000607
Guido van Rossume2d67f92000-03-10 23:09:23 +0000608 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
609 &data, &size, &errors, &mapping))
610 return NULL;
611 if (mapping == Py_None)
612 mapping = NULL;
613
614 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
615 size);
616}
617
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000618#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000619
620static PyObject *
621mbcs_decode(PyObject *self,
622 PyObject *args)
623{
624 const char *data;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000625 Py_ssize_t size, consumed;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000626 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000627 int final = 0;
628 PyObject *decoded;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000629
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000630 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
631 &data, &size, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000632 return NULL;
633
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000634 decoded = PyUnicode_DecodeMBCSStateful(
635 data, size, errors, final ? NULL : &consumed);
636 if (!decoded)
637 return NULL;
638 return codec_tuple(decoded, final ? size : consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000639}
640
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000641#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000642
Guido van Rossume2d67f92000-03-10 23:09:23 +0000643/* --- Encoder ------------------------------------------------------------ */
644
645static PyObject *
646readbuffer_encode(PyObject *self,
647 PyObject *args)
648{
649 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000650 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000651 const char *errors = NULL;
652
653 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
654 &data, &size, &errors))
655 return NULL;
656
Walter Dörwald2233d272007-06-22 12:17:08 +0000657 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000658}
659
660static PyObject *
661charbuffer_encode(PyObject *self,
662 PyObject *args)
663{
664 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000665 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000666 const char *errors = NULL;
667
668 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
669 &data, &size, &errors))
670 return NULL;
671
Walter Dörwald2233d272007-06-22 12:17:08 +0000672 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000673}
674
675static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000676unicode_internal_encode(PyObject *self,
677 PyObject *args)
678{
679 PyObject *obj;
680 const char *errors = NULL;
681 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000682 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000683
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000684 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
685 &obj, &errors))
686 return NULL;
687
688 if (PyUnicode_Check(obj)) {
689 data = PyUnicode_AS_DATA(obj);
690 size = PyUnicode_GET_DATA_SIZE(obj);
Walter Dörwald2233d272007-06-22 12:17:08 +0000691 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000692 }
693 else {
694 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
695 return NULL;
Walter Dörwald2233d272007-06-22 12:17:08 +0000696 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000697 }
698}
699
700static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000701utf_7_encode(PyObject *self,
702 PyObject *args)
703{
704 PyObject *str, *v;
705 const char *errors = NULL;
706
707 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
708 &str, &errors))
709 return NULL;
710
711 str = PyUnicode_FromObject(str);
712 if (str == NULL)
713 return NULL;
714 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
715 PyUnicode_GET_SIZE(str),
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000716 0,
717 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000718 errors),
719 PyUnicode_GET_SIZE(str));
720 Py_DECREF(str);
721 return v;
722}
723
724static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000725utf_8_encode(PyObject *self,
726 PyObject *args)
727{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000728 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000729 const char *errors = NULL;
730
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000731 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000732 &str, &errors))
733 return NULL;
734
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000735 str = PyUnicode_FromObject(str);
736 if (str == NULL)
737 return NULL;
738 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
739 PyUnicode_GET_SIZE(str),
740 errors),
741 PyUnicode_GET_SIZE(str));
742 Py_DECREF(str);
743 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000744}
745
746/* This version provides access to the byteorder parameter of the
747 builtin UTF-16 codecs as optional third argument. It defaults to 0
748 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000749 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000750
751*/
752
753static PyObject *
754utf_16_encode(PyObject *self,
755 PyObject *args)
756{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000757 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000758 const char *errors = NULL;
759 int byteorder = 0;
760
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000761 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000762 &str, &errors, &byteorder))
763 return NULL;
764
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000765 str = PyUnicode_FromObject(str);
766 if (str == NULL)
767 return NULL;
768 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
769 PyUnicode_GET_SIZE(str),
770 errors,
771 byteorder),
772 PyUnicode_GET_SIZE(str));
773 Py_DECREF(str);
774 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000775}
776
777static PyObject *
778utf_16_le_encode(PyObject *self,
779 PyObject *args)
780{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000781 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000782 const char *errors = NULL;
783
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000784 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000785 &str, &errors))
786 return NULL;
787
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000788 str = PyUnicode_FromObject(str);
789 if (str == NULL)
790 return NULL;
791 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000792 PyUnicode_GET_SIZE(str),
793 errors,
794 -1),
795 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000796 Py_DECREF(str);
797 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000798}
799
800static PyObject *
801utf_16_be_encode(PyObject *self,
802 PyObject *args)
803{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000804 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000805 const char *errors = NULL;
806
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000807 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000808 &str, &errors))
809 return NULL;
810
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000811 str = PyUnicode_FromObject(str);
812 if (str == NULL)
813 return NULL;
814 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
815 PyUnicode_GET_SIZE(str),
816 errors,
817 +1),
818 PyUnicode_GET_SIZE(str));
819 Py_DECREF(str);
820 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000821}
822
Walter Dörwald41980ca2007-08-16 21:55:45 +0000823/* This version provides access to the byteorder parameter of the
824 builtin UTF-32 codecs as optional third argument. It defaults to 0
825 which means: use the native byte order and prepend the data with a
826 BOM mark.
827
828*/
829
830static PyObject *
831utf_32_encode(PyObject *self,
832 PyObject *args)
833{
834 PyObject *str, *v;
835 const char *errors = NULL;
836 int byteorder = 0;
837
838 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
839 &str, &errors, &byteorder))
840 return NULL;
841
842 str = PyUnicode_FromObject(str);
843 if (str == NULL)
844 return NULL;
845 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
846 PyUnicode_GET_SIZE(str),
847 errors,
848 byteorder),
849 PyUnicode_GET_SIZE(str));
850 Py_DECREF(str);
851 return v;
852}
853
854static PyObject *
855utf_32_le_encode(PyObject *self,
856 PyObject *args)
857{
858 PyObject *str, *v;
859 const char *errors = NULL;
860
861 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
862 &str, &errors))
863 return NULL;
864
865 str = PyUnicode_FromObject(str);
866 if (str == NULL)
867 return NULL;
868 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
869 PyUnicode_GET_SIZE(str),
870 errors,
871 -1),
872 PyUnicode_GET_SIZE(str));
873 Py_DECREF(str);
874 return v;
875}
876
877static PyObject *
878utf_32_be_encode(PyObject *self,
879 PyObject *args)
880{
881 PyObject *str, *v;
882 const char *errors = NULL;
883
884 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
885 &str, &errors))
886 return NULL;
887
888 str = PyUnicode_FromObject(str);
889 if (str == NULL)
890 return NULL;
891 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
892 PyUnicode_GET_SIZE(str),
893 errors,
894 +1),
895 PyUnicode_GET_SIZE(str));
896 Py_DECREF(str);
897 return v;
898}
899
Guido van Rossume2d67f92000-03-10 23:09:23 +0000900static PyObject *
901unicode_escape_encode(PyObject *self,
902 PyObject *args)
903{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000904 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000905 const char *errors = NULL;
906
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000907 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000908 &str, &errors))
909 return NULL;
910
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000911 str = PyUnicode_FromObject(str);
912 if (str == NULL)
913 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000914 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000915 PyUnicode_GET_SIZE(str)),
916 PyUnicode_GET_SIZE(str));
917 Py_DECREF(str);
918 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000919}
920
921static PyObject *
922raw_unicode_escape_encode(PyObject *self,
923 PyObject *args)
924{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000925 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000926 const char *errors = NULL;
927
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000928 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000929 &str, &errors))
930 return NULL;
931
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000932 str = PyUnicode_FromObject(str);
933 if (str == NULL)
934 return NULL;
935 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000936 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000937 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000938 PyUnicode_GET_SIZE(str));
939 Py_DECREF(str);
940 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000941}
942
943static PyObject *
944latin_1_encode(PyObject *self,
945 PyObject *args)
946{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000947 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000948 const char *errors = NULL;
949
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000950 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000951 &str, &errors))
952 return NULL;
953
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000954 str = PyUnicode_FromObject(str);
955 if (str == NULL)
956 return NULL;
957 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000958 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000959 PyUnicode_GET_SIZE(str),
960 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000961 PyUnicode_GET_SIZE(str));
962 Py_DECREF(str);
963 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000964}
965
966static PyObject *
967ascii_encode(PyObject *self,
968 PyObject *args)
969{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000970 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000971 const char *errors = NULL;
972
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000973 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000974 &str, &errors))
975 return NULL;
976
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000977 str = PyUnicode_FromObject(str);
978 if (str == NULL)
979 return NULL;
980 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000981 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000982 PyUnicode_GET_SIZE(str),
983 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000984 PyUnicode_GET_SIZE(str));
985 Py_DECREF(str);
986 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000987}
988
989static PyObject *
990charmap_encode(PyObject *self,
991 PyObject *args)
992{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000993 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000994 const char *errors = NULL;
995 PyObject *mapping = NULL;
996
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000997 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000998 &str, &errors, &mapping))
999 return NULL;
1000 if (mapping == Py_None)
1001 mapping = NULL;
1002
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001003 str = PyUnicode_FromObject(str);
1004 if (str == NULL)
1005 return NULL;
1006 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001007 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +00001008 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001009 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +00001010 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001011 PyUnicode_GET_SIZE(str));
1012 Py_DECREF(str);
1013 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001014}
1015
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001016static PyObject*
1017charmap_build(PyObject *self, PyObject *args)
1018{
1019 PyObject *map;
1020 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1021 return NULL;
1022 return PyUnicode_BuildEncodingMap(map);
1023}
1024
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001025#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001026
1027static PyObject *
1028mbcs_encode(PyObject *self,
1029 PyObject *args)
1030{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001031 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001032 const char *errors = NULL;
1033
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001034 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +00001035 &str, &errors))
1036 return NULL;
1037
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001038 str = PyUnicode_FromObject(str);
1039 if (str == NULL)
1040 return NULL;
1041 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001042 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001043 PyUnicode_GET_SIZE(str),
1044 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001045 PyUnicode_GET_SIZE(str));
1046 Py_DECREF(str);
1047 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001048}
1049
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001050#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001051
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001052/* --- Error handler registry --------------------------------------------- */
1053
Walter Dörwald0ae29812002-10-31 13:36:29 +00001054PyDoc_STRVAR(register_error__doc__,
1055"register_error(errors, handler)\n\
1056\n\
1057Register the specified error handler under the name\n\
1058errors. handler must be a callable object, that\n\
1059will be called with an exception instance containing\n\
1060information about the location of the encoding/decoding\n\
1061error and must return a (replacement, new position) tuple.");
1062
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001063static PyObject *register_error(PyObject *self, PyObject *args)
1064{
1065 const char *name;
1066 PyObject *handler;
1067
1068 if (!PyArg_ParseTuple(args, "sO:register_error",
1069 &name, &handler))
1070 return NULL;
1071 if (PyCodec_RegisterError(name, handler))
1072 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001073 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001074}
1075
Walter Dörwald0ae29812002-10-31 13:36:29 +00001076PyDoc_STRVAR(lookup_error__doc__,
1077"lookup_error(errors) -> handler\n\
1078\n\
1079Return the error handler for the specified error handling name\n\
1080or raise a LookupError, if no handler exists under this name.");
1081
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001082static PyObject *lookup_error(PyObject *self, PyObject *args)
1083{
1084 const char *name;
1085
1086 if (!PyArg_ParseTuple(args, "s:lookup_error",
1087 &name))
1088 return NULL;
1089 return PyCodec_LookupError(name);
1090}
1091
Guido van Rossume2d67f92000-03-10 23:09:23 +00001092/* --- Module API --------------------------------------------------------- */
1093
1094static PyMethodDef _codecs_functions[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001095 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001096 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001097 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001098 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001099 {"encode", codec_encode, METH_VARARGS,
1100 encode__doc__},
1101 {"decode", codec_decode, METH_VARARGS,
1102 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001103 {"escape_encode", escape_encode, METH_VARARGS},
1104 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001105 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1106 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1107 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1108 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1109 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1110 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1111 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1112 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1113 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1114 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1115 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald41980ca2007-08-16 21:55:45 +00001116 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1117 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1118 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1119 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1120 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1121 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1122 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001123 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1124 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1125 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1126 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1127 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1128 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1129 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1130 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1131 {"ascii_encode", ascii_encode, METH_VARARGS},
1132 {"ascii_decode", ascii_decode, METH_VARARGS},
1133 {"charmap_encode", charmap_encode, METH_VARARGS},
1134 {"charmap_decode", charmap_decode, METH_VARARGS},
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001135 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001136 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1137 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001138#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001139 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1140 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001141#endif
Walter Dörwald0ae29812002-10-31 13:36:29 +00001142 {"register_error", register_error, METH_VARARGS,
1143 register_error__doc__},
1144 {"lookup_error", lookup_error, METH_VARARGS,
1145 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001146 {NULL, NULL} /* sentinel */
1147};
1148
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001149PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001150init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001151{
1152 Py_InitModule("_codecs", _codecs_functions);
1153}