blob: d4eb0d5b9c0026cb5c8409ed9e7bd7087d153b03 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Walter Dörwald219336a2007-07-19 13:04:38 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Georg Brandl96a8c392006-05-29 21:04:52 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Georg Brandl96a8c392006-05-29 21:04:52 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Georg Brandl96a8c392006-05-29 21:04:52 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Walter Dörwald219336a2007-07-19 13:04:38 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000064a tuple of function (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Georg Brandl96a8c392006-05-29 21:04:52 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
Martin v. Löwise2713be2005-03-08 15:03:08 +000097#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +000098 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000100#else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
104 }
105#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106
107 /* Encode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000108 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
118as well as any other name registerd with codecs.register_error that is\n\
119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
Brett Cannon3e377de2004-07-10 21:41:14 +0000124 const char *encoding = NULL;
125 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000126 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000127
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
130
Martin v. Löwise2713be2005-03-08 15:03:08 +0000131#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000134#else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
138 }
139#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
Martin v. Löwis66851282006-04-22 11:40:03 +0000149 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000150{
Georg Brandl96a8c392006-05-29 21:04:52 +0000151 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000152 if (unicode == NULL)
Georg Brandl96a8c392006-05-29 21:04:52 +0000153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000156 return v;
157}
158
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
162 PyObject *args)
163{
164 const char *errors = NULL;
165 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000166 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000167
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000172 size);
173}
174
175static PyObject *
176escape_encode(PyObject *self,
177 PyObject *args)
178{
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
Martin v. Löwis66851282006-04-22 11:40:03 +0000182 Py_ssize_t len;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000183
184 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
185 &PyString_Type, &str, &errors))
186 return NULL;
187
188 str = PyString_Repr(str, 0);
189 if (!str)
190 return NULL;
191
192 /* The string will be quoted. Unquote, similar to unicode-escape. */
193 buf = PyString_AS_STRING (str);
194 len = PyString_GET_SIZE (str);
195 memmove(buf, buf+1, len-2);
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000196 if (_PyString_Resize(&str, len-2) < 0)
197 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000198
199 return codec_tuple(str, PyString_Size(str));
200}
201
202#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000203/* --- Decoder ------------------------------------------------------------ */
204
205static PyObject *
206unicode_internal_decode(PyObject *self,
207 PyObject *args)
208{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000209 PyObject *obj;
210 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000211 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000212 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000213
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000214 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
215 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000216 return NULL;
217
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000218 if (PyUnicode_Check(obj)) {
219 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000220 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000221 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000222 else {
223 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
224 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000225
226 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000227 size);
228 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000229}
230
231static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000232utf_7_decode(PyObject *self,
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000233 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000234{
235 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000237 const char *errors = NULL;
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000238 int final = 0;
239 Py_ssize_t consumed;
240 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000241
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000242 if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode",
243 &data, &size, &errors, &final))
244 return NULL;
245 consumed = size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000246
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000247 decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors,
248 final ? NULL : &consumed);
249 if (decoded == NULL)
250 return NULL;
251 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000252}
253
254static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000255utf_8_decode(PyObject *self,
256 PyObject *args)
257{
258 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000259 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000260 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000261 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000262 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000263 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000264
Walter Dörwald69652032004-09-07 20:24:22 +0000265 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
266 &data, &size, &errors, &final))
267 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000268 if (size < 0) {
269 PyErr_SetString(PyExc_ValueError, "negative argument");
270 return 0;
271 }
Walter Dörwald69652032004-09-07 20:24:22 +0000272 consumed = size;
273
274 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
275 final ? NULL : &consumed);
276 if (decoded == NULL)
277 return NULL;
278 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000279}
280
281static PyObject *
282utf_16_decode(PyObject *self,
283 PyObject *args)
284{
285 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000286 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000287 const char *errors = NULL;
288 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000289 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000290 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000291 PyObject *decoded;
292
293 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
294 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000295 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000296 if (size < 0) {
297 PyErr_SetString(PyExc_ValueError, "negative argument");
298 return 0;
299 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000300 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000301 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
302 final ? NULL : &consumed);
303 if (decoded == NULL)
304 return NULL;
305 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000306}
307
308static PyObject *
309utf_16_le_decode(PyObject *self,
310 PyObject *args)
311{
312 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000313 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000314 const char *errors = NULL;
315 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000316 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000317 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000318 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000319
Walter Dörwald69652032004-09-07 20:24:22 +0000320 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
321 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000322 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000323
Martin v. Löwis18e16552006-02-15 17:27:45 +0000324 if (size < 0) {
325 PyErr_SetString(PyExc_ValueError, "negative argument");
326 return 0;
327 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000328 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000329 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
330 &byteorder, final ? NULL : &consumed);
331 if (decoded == NULL)
332 return NULL;
333 return codec_tuple(decoded, consumed);
334
Guido van Rossume2d67f92000-03-10 23:09:23 +0000335}
336
337static PyObject *
338utf_16_be_decode(PyObject *self,
339 PyObject *args)
340{
341 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000342 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000343 const char *errors = NULL;
344 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000345 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000346 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000347 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000348
Walter Dörwald69652032004-09-07 20:24:22 +0000349 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
350 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000351 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000352 if (size < 0) {
353 PyErr_SetString(PyExc_ValueError, "negative argument");
354 return 0;
355 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000356 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000357 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
358 &byteorder, final ? NULL : &consumed);
359 if (decoded == NULL)
360 return NULL;
361 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000362}
363
364/* This non-standard version also provides access to the byteorder
365 parameter of the builtin UTF-16 codec.
366
367 It returns a tuple (unicode, bytesread, byteorder) with byteorder
368 being the value in effect at the end of data.
369
370*/
371
372static PyObject *
373utf_16_ex_decode(PyObject *self,
374 PyObject *args)
375{
376 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000377 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000378 const char *errors = NULL;
379 int byteorder = 0;
380 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000381 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000382 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000383
384 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
385 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000386 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000387 if (size < 0) {
388 PyErr_SetString(PyExc_ValueError, "negative argument");
389 return 0;
390 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000391 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000392 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
393 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000394 if (unicode == NULL)
395 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +0000396 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000397 Py_DECREF(unicode);
398 return tuple;
399}
400
401static PyObject *
Walter Dörwald6e390802007-08-17 16:41:28 +0000402utf_32_decode(PyObject *self,
403 PyObject *args)
404{
405 const char *data;
406 Py_ssize_t size;
407 const char *errors = NULL;
408 int byteorder = 0;
409 int final = 0;
410 Py_ssize_t consumed;
411 PyObject *decoded;
412
413 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
414 &data, &size, &errors, &final))
415 return NULL;
416 if (size < 0) {
417 PyErr_SetString(PyExc_ValueError, "negative argument");
418 return 0;
419 }
420 consumed = size; /* This is overwritten unless final is true. */
421 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
422 final ? NULL : &consumed);
423 if (decoded == NULL)
424 return NULL;
425 return codec_tuple(decoded, consumed);
426}
427
428static PyObject *
429utf_32_le_decode(PyObject *self,
430 PyObject *args)
431{
432 const char *data;
433 Py_ssize_t size;
434 const char *errors = NULL;
435 int byteorder = -1;
436 int final = 0;
437 Py_ssize_t consumed;
438 PyObject *decoded = NULL;
439
440 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
441 &data, &size, &errors, &final))
442 return NULL;
443
444 if (size < 0) {
445 PyErr_SetString(PyExc_ValueError, "negative argument");
446 return 0;
447 }
448 consumed = size; /* This is overwritten unless final is true. */
449 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
450 &byteorder, final ? NULL : &consumed);
451 if (decoded == NULL)
452 return NULL;
453 return codec_tuple(decoded, consumed);
454
455}
456
457static PyObject *
458utf_32_be_decode(PyObject *self,
459 PyObject *args)
460{
461 const char *data;
462 Py_ssize_t size;
463 const char *errors = NULL;
464 int byteorder = 1;
465 int final = 0;
466 Py_ssize_t consumed;
467 PyObject *decoded = NULL;
468
469 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
470 &data, &size, &errors, &final))
471 return NULL;
472 if (size < 0) {
473 PyErr_SetString(PyExc_ValueError, "negative argument");
474 return 0;
475 }
476 consumed = size; /* This is overwritten unless final is true. */
477 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
478 &byteorder, final ? NULL : &consumed);
479 if (decoded == NULL)
480 return NULL;
481 return codec_tuple(decoded, consumed);
482}
483
484/* This non-standard version also provides access to the byteorder
485 parameter of the builtin UTF-32 codec.
486
487 It returns a tuple (unicode, bytesread, byteorder) with byteorder
488 being the value in effect at the end of data.
489
490*/
491
492static PyObject *
493utf_32_ex_decode(PyObject *self,
494 PyObject *args)
495{
496 const char *data;
497 Py_ssize_t size;
498 const char *errors = NULL;
499 int byteorder = 0;
500 PyObject *unicode, *tuple;
501 int final = 0;
502 Py_ssize_t consumed;
503
504 if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
505 &data, &size, &errors, &byteorder, &final))
506 return NULL;
507 if (size < 0) {
508 PyErr_SetString(PyExc_ValueError, "negative argument");
509 return 0;
510 }
511 consumed = size; /* This is overwritten unless final is true. */
512 unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
513 final ? NULL : &consumed);
514 if (unicode == NULL)
515 return NULL;
516 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
517 Py_DECREF(unicode);
518 return tuple;
519}
520
521static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000522unicode_escape_decode(PyObject *self,
523 PyObject *args)
524{
525 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000526 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000527 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000528
Guido van Rossume2d67f92000-03-10 23:09:23 +0000529 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
530 &data, &size, &errors))
531 return NULL;
532
533 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
534 size);
535}
536
537static PyObject *
538raw_unicode_escape_decode(PyObject *self,
539 PyObject *args)
540{
541 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000542 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000543 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000544
Guido van Rossume2d67f92000-03-10 23:09:23 +0000545 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
546 &data, &size, &errors))
547 return NULL;
548
549 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
550 size);
551}
552
553static PyObject *
554latin_1_decode(PyObject *self,
555 PyObject *args)
556{
557 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000558 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000559 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000560
Guido van Rossume2d67f92000-03-10 23:09:23 +0000561 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
562 &data, &size, &errors))
563 return NULL;
564
565 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
566 size);
567}
568
569static PyObject *
570ascii_decode(PyObject *self,
571 PyObject *args)
572{
573 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000574 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000575 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000576
Guido van Rossume2d67f92000-03-10 23:09:23 +0000577 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
578 &data, &size, &errors))
579 return NULL;
580
581 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
582 size);
583}
584
585static PyObject *
586charmap_decode(PyObject *self,
587 PyObject *args)
588{
589 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000590 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000591 const char *errors = NULL;
592 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000593
Guido van Rossume2d67f92000-03-10 23:09:23 +0000594 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
595 &data, &size, &errors, &mapping))
596 return NULL;
597 if (mapping == Py_None)
598 mapping = NULL;
599
600 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
601 size);
602}
603
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000604#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000605
606static PyObject *
607mbcs_decode(PyObject *self,
608 PyObject *args)
609{
610 const char *data;
Martin v. Löwisd8251432006-06-14 05:21:04 +0000611 Py_ssize_t size, consumed;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000612 const char *errors = NULL;
Martin v. Löwis961b91b2006-08-02 13:53:55 +0000613 int final = 0;
Martin v. Löwisd8251432006-06-14 05:21:04 +0000614 PyObject *decoded;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000615
Martin v. Löwisd8251432006-06-14 05:21:04 +0000616 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
617 &data, &size, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000618 return NULL;
619
Martin v. Löwisd8251432006-06-14 05:21:04 +0000620 decoded = PyUnicode_DecodeMBCSStateful(
621 data, size, errors, final ? NULL : &consumed);
622 if (!decoded)
623 return NULL;
624 return codec_tuple(decoded, final ? size : consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000625}
626
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000627#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000628
Guido van Rossume2d67f92000-03-10 23:09:23 +0000629/* --- Encoder ------------------------------------------------------------ */
630
631static PyObject *
632readbuffer_encode(PyObject *self,
633 PyObject *args)
634{
635 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000636 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000637 const char *errors = NULL;
638
639 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
640 &data, &size, &errors))
641 return NULL;
642
643 return codec_tuple(PyString_FromStringAndSize(data, size),
644 size);
645}
646
647static PyObject *
648charbuffer_encode(PyObject *self,
649 PyObject *args)
650{
651 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000652 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000653 const char *errors = NULL;
654
655 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
656 &data, &size, &errors))
657 return NULL;
658
659 return codec_tuple(PyString_FromStringAndSize(data, size),
660 size);
661}
662
663static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000664unicode_internal_encode(PyObject *self,
665 PyObject *args)
666{
667 PyObject *obj;
668 const char *errors = NULL;
669 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000670 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000671
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000672 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
673 &obj, &errors))
674 return NULL;
675
676 if (PyUnicode_Check(obj)) {
677 data = PyUnicode_AS_DATA(obj);
678 size = PyUnicode_GET_DATA_SIZE(obj);
679 return codec_tuple(PyString_FromStringAndSize(data, size),
680 size);
681 }
682 else {
683 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
684 return NULL;
685 return codec_tuple(PyString_FromStringAndSize(data, size),
686 size);
687 }
688}
689
690static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000691utf_7_encode(PyObject *self,
692 PyObject *args)
693{
694 PyObject *str, *v;
695 const char *errors = NULL;
696
697 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
698 &str, &errors))
699 return NULL;
700
701 str = PyUnicode_FromObject(str);
702 if (str == NULL)
703 return NULL;
704 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
705 PyUnicode_GET_SIZE(str),
Georg Brandl96a8c392006-05-29 21:04:52 +0000706 0,
707 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000708 errors),
709 PyUnicode_GET_SIZE(str));
710 Py_DECREF(str);
711 return v;
712}
713
714static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000715utf_8_encode(PyObject *self,
716 PyObject *args)
717{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000718 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000719 const char *errors = NULL;
720
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000721 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000722 &str, &errors))
723 return NULL;
724
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000725 str = PyUnicode_FromObject(str);
726 if (str == NULL)
727 return NULL;
728 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
729 PyUnicode_GET_SIZE(str),
730 errors),
731 PyUnicode_GET_SIZE(str));
732 Py_DECREF(str);
733 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000734}
735
736/* This version provides access to the byteorder parameter of the
737 builtin UTF-16 codecs as optional third argument. It defaults to 0
738 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000739 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000740
741*/
742
743static PyObject *
744utf_16_encode(PyObject *self,
745 PyObject *args)
746{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000747 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000748 const char *errors = NULL;
749 int byteorder = 0;
750
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000751 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000752 &str, &errors, &byteorder))
753 return NULL;
754
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000755 str = PyUnicode_FromObject(str);
756 if (str == NULL)
757 return NULL;
758 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
759 PyUnicode_GET_SIZE(str),
760 errors,
761 byteorder),
762 PyUnicode_GET_SIZE(str));
763 Py_DECREF(str);
764 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000765}
766
767static PyObject *
768utf_16_le_encode(PyObject *self,
769 PyObject *args)
770{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000771 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000772 const char *errors = NULL;
773
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000774 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000775 &str, &errors))
776 return NULL;
777
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000778 str = PyUnicode_FromObject(str);
779 if (str == NULL)
780 return NULL;
781 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000782 PyUnicode_GET_SIZE(str),
783 errors,
784 -1),
785 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000786 Py_DECREF(str);
787 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000788}
789
790static PyObject *
791utf_16_be_encode(PyObject *self,
792 PyObject *args)
793{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000794 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000795 const char *errors = NULL;
796
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000797 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000798 &str, &errors))
799 return NULL;
800
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000801 str = PyUnicode_FromObject(str);
802 if (str == NULL)
803 return NULL;
804 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
805 PyUnicode_GET_SIZE(str),
806 errors,
807 +1),
808 PyUnicode_GET_SIZE(str));
809 Py_DECREF(str);
810 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000811}
812
Walter Dörwald6e390802007-08-17 16:41:28 +0000813/* This version provides access to the byteorder parameter of the
814 builtin UTF-32 codecs as optional third argument. It defaults to 0
815 which means: use the native byte order and prepend the data with a
816 BOM mark.
817
818*/
819
820static PyObject *
821utf_32_encode(PyObject *self,
822 PyObject *args)
823{
824 PyObject *str, *v;
825 const char *errors = NULL;
826 int byteorder = 0;
827
828 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
829 &str, &errors, &byteorder))
830 return NULL;
831
832 str = PyUnicode_FromObject(str);
833 if (str == NULL)
834 return NULL;
835 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
836 PyUnicode_GET_SIZE(str),
837 errors,
838 byteorder),
839 PyUnicode_GET_SIZE(str));
840 Py_DECREF(str);
841 return v;
842}
843
844static PyObject *
845utf_32_le_encode(PyObject *self,
846 PyObject *args)
847{
848 PyObject *str, *v;
849 const char *errors = NULL;
850
851 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
852 &str, &errors))
853 return NULL;
854
855 str = PyUnicode_FromObject(str);
856 if (str == NULL)
857 return NULL;
858 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
859 PyUnicode_GET_SIZE(str),
860 errors,
861 -1),
862 PyUnicode_GET_SIZE(str));
863 Py_DECREF(str);
864 return v;
865}
866
867static PyObject *
868utf_32_be_encode(PyObject *self,
869 PyObject *args)
870{
871 PyObject *str, *v;
872 const char *errors = NULL;
873
874 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
875 &str, &errors))
876 return NULL;
877
878 str = PyUnicode_FromObject(str);
879 if (str == NULL)
880 return NULL;
881 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
882 PyUnicode_GET_SIZE(str),
883 errors,
884 +1),
885 PyUnicode_GET_SIZE(str));
886 Py_DECREF(str);
887 return v;
888}
889
Guido van Rossume2d67f92000-03-10 23:09:23 +0000890static PyObject *
891unicode_escape_encode(PyObject *self,
892 PyObject *args)
893{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000894 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000895 const char *errors = NULL;
896
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000897 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000898 &str, &errors))
899 return NULL;
900
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000901 str = PyUnicode_FromObject(str);
902 if (str == NULL)
903 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000904 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000905 PyUnicode_GET_SIZE(str)),
906 PyUnicode_GET_SIZE(str));
907 Py_DECREF(str);
908 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000909}
910
911static PyObject *
912raw_unicode_escape_encode(PyObject *self,
913 PyObject *args)
914{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000915 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000916 const char *errors = NULL;
917
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000918 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000919 &str, &errors))
920 return NULL;
921
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000922 str = PyUnicode_FromObject(str);
923 if (str == NULL)
924 return NULL;
925 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000926 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000927 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000928 PyUnicode_GET_SIZE(str));
929 Py_DECREF(str);
930 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000931}
932
933static PyObject *
934latin_1_encode(PyObject *self,
935 PyObject *args)
936{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000937 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000938 const char *errors = NULL;
939
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000940 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000941 &str, &errors))
942 return NULL;
943
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000944 str = PyUnicode_FromObject(str);
945 if (str == NULL)
946 return NULL;
947 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000948 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000949 PyUnicode_GET_SIZE(str),
950 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000951 PyUnicode_GET_SIZE(str));
952 Py_DECREF(str);
953 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000954}
955
956static PyObject *
957ascii_encode(PyObject *self,
958 PyObject *args)
959{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000960 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000961 const char *errors = NULL;
962
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000963 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000964 &str, &errors))
965 return NULL;
966
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000967 str = PyUnicode_FromObject(str);
968 if (str == NULL)
969 return NULL;
970 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000971 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000972 PyUnicode_GET_SIZE(str),
973 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000974 PyUnicode_GET_SIZE(str));
975 Py_DECREF(str);
976 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000977}
978
979static PyObject *
980charmap_encode(PyObject *self,
981 PyObject *args)
982{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000983 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000984 const char *errors = NULL;
985 PyObject *mapping = NULL;
986
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000987 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000988 &str, &errors, &mapping))
989 return NULL;
990 if (mapping == Py_None)
991 mapping = NULL;
992
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000993 str = PyUnicode_FromObject(str);
994 if (str == NULL)
995 return NULL;
996 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000997 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000998 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000999 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +00001000 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001001 PyUnicode_GET_SIZE(str));
1002 Py_DECREF(str);
1003 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001004}
1005
Martin v. Löwis3f767792006-06-04 19:36:28 +00001006static PyObject*
1007charmap_build(PyObject *self, PyObject *args)
1008{
1009 PyObject *map;
1010 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1011 return NULL;
1012 return PyUnicode_BuildEncodingMap(map);
1013}
1014
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001015#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001016
1017static PyObject *
1018mbcs_encode(PyObject *self,
1019 PyObject *args)
1020{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001021 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001022 const char *errors = NULL;
1023
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001024 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +00001025 &str, &errors))
1026 return NULL;
1027
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001028 str = PyUnicode_FromObject(str);
1029 if (str == NULL)
1030 return NULL;
1031 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001032 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001033 PyUnicode_GET_SIZE(str),
1034 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001035 PyUnicode_GET_SIZE(str));
1036 Py_DECREF(str);
1037 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001038}
1039
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001040#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001041#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001042
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001043/* --- Error handler registry --------------------------------------------- */
1044
Walter Dörwald0ae29812002-10-31 13:36:29 +00001045PyDoc_STRVAR(register_error__doc__,
1046"register_error(errors, handler)\n\
1047\n\
1048Register the specified error handler under the name\n\
1049errors. handler must be a callable object, that\n\
1050will be called with an exception instance containing\n\
1051information about the location of the encoding/decoding\n\
1052error and must return a (replacement, new position) tuple.");
1053
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001054static PyObject *register_error(PyObject *self, PyObject *args)
1055{
1056 const char *name;
1057 PyObject *handler;
1058
1059 if (!PyArg_ParseTuple(args, "sO:register_error",
1060 &name, &handler))
1061 return NULL;
1062 if (PyCodec_RegisterError(name, handler))
1063 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +00001064 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001065}
1066
Walter Dörwald0ae29812002-10-31 13:36:29 +00001067PyDoc_STRVAR(lookup_error__doc__,
1068"lookup_error(errors) -> handler\n\
1069\n\
1070Return the error handler for the specified error handling name\n\
1071or raise a LookupError, if no handler exists under this name.");
1072
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001073static PyObject *lookup_error(PyObject *self, PyObject *args)
1074{
1075 const char *name;
1076
1077 if (!PyArg_ParseTuple(args, "s:lookup_error",
1078 &name))
1079 return NULL;
1080 return PyCodec_LookupError(name);
1081}
1082
Guido van Rossume2d67f92000-03-10 23:09:23 +00001083/* --- Module API --------------------------------------------------------- */
1084
1085static PyMethodDef _codecs_functions[] = {
Georg Brandl96a8c392006-05-29 21:04:52 +00001086 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001087 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001088 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001089 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001090 {"encode", codec_encode, METH_VARARGS,
1091 encode__doc__},
1092 {"decode", codec_decode, METH_VARARGS,
1093 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001094 {"escape_encode", escape_encode, METH_VARARGS},
1095 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001096#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001097 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1098 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1099 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1100 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1101 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1102 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1103 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1104 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1105 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1106 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1107 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald6e390802007-08-17 16:41:28 +00001108 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1109 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1110 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1111 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1112 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1113 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1114 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001115 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1116 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1117 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1118 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1119 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1120 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1121 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1122 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1123 {"ascii_encode", ascii_encode, METH_VARARGS},
1124 {"ascii_decode", ascii_decode, METH_VARARGS},
1125 {"charmap_encode", charmap_encode, METH_VARARGS},
1126 {"charmap_decode", charmap_decode, METH_VARARGS},
Martin v. Löwis3f767792006-06-04 19:36:28 +00001127 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001128 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1129 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001130#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001131 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1132 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001133#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001134#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +00001135 {"register_error", register_error, METH_VARARGS,
1136 register_error__doc__},
1137 {"lookup_error", lookup_error, METH_VARARGS,
1138 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001139 {NULL, NULL} /* sentinel */
1140};
1141
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001142PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001143init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001144{
1145 Py_InitModule("_codecs", _codecs_functions);
1146}