blob: c0c8857866480c5f8c4e2d2f8ed9d40b765701e5 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Walter Dörwald219336a2007-07-19 13:04:38 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Georg Brandl96a8c392006-05-29 21:04:52 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Georg Brandl96a8c392006-05-29 21:04:52 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Georg Brandl96a8c392006-05-29 21:04:52 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Walter Dörwald219336a2007-07-19 13:04:38 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Georg Brandl4c95e092009-03-15 21:32:06 +000064a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Georg Brandl96a8c392006-05-29 21:04:52 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
Martin v. Löwise2713be2005-03-08 15:03:08 +000097#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +000098 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000100#else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
104 }
105#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106
107 /* Encode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000108 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchling658c45f2008-10-03 12:26:42 +0000118as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
Brett Cannon3e377de2004-07-10 21:41:14 +0000124 const char *encoding = NULL;
125 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000126 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000127
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
130
Martin v. Löwise2713be2005-03-08 15:03:08 +0000131#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000134#else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
138 }
139#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
Martin v. Löwis66851282006-04-22 11:40:03 +0000149 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000150{
Georg Brandl96a8c392006-05-29 21:04:52 +0000151 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000152 if (unicode == NULL)
Georg Brandl96a8c392006-05-29 21:04:52 +0000153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000156 return v;
157}
158
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
162 PyObject *args)
163{
164 const char *errors = NULL;
165 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000166 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000167
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000172 size);
173}
174
175static PyObject *
176escape_encode(PyObject *self,
177 PyObject *args)
178{
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
Philip Jenvey034b0ac2010-04-05 02:51:51 +0000182 Py_ssize_t consumed, len;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000183
Philip Jenvey034b0ac2010-04-05 02:51:51 +0000184 if (!PyArg_ParseTuple(args, "S|z:escape_encode",
185 &str, &errors))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000186 return NULL;
187
Philip Jenvey034b0ac2010-04-05 02:51:51 +0000188 consumed = PyString_GET_SIZE(str);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000189 str = PyString_Repr(str, 0);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000190 if (!str)
191 return NULL;
192
193 /* The string will be quoted. Unquote, similar to unicode-escape. */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000194 buf = PyString_AS_STRING (str);
195 len = PyString_GET_SIZE (str);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000196 memmove(buf, buf+1, len-2);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000197 if (_PyString_Resize(&str, len-2) < 0)
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000198 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000199
Philip Jenvey034b0ac2010-04-05 02:51:51 +0000200 return codec_tuple(str, consumed);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000201}
202
203#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000204/* --- Decoder ------------------------------------------------------------ */
205
206static PyObject *
207unicode_internal_decode(PyObject *self,
208 PyObject *args)
209{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000210 PyObject *obj;
211 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000212 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000213 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000214
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000215 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
216 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000217 return NULL;
218
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000219 if (PyUnicode_Check(obj)) {
220 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000221 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000222 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000223 else {
224 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
225 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000226
227 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000228 size);
229 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000230}
231
232static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000233utf_7_decode(PyObject *self,
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000234 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000235{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000236 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000237 const char *errors = NULL;
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000238 int final = 0;
239 Py_ssize_t consumed;
240 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000241
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000242 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
243 &pbuf, &errors, &final))
244 return NULL;
245 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000246
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000247 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
248 final ? NULL : &consumed);
249 PyBuffer_Release(&pbuf);
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000250 if (decoded == NULL)
251 return NULL;
252 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000253}
254
255static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000256utf_8_decode(PyObject *self,
257 PyObject *args)
258{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000259 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000260 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000261 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000262 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000263 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000264
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000265 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
266 &pbuf, &errors, &final))
Walter Dörwald69652032004-09-07 20:24:22 +0000267 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000268 consumed = pbuf.len;
269
270 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000271 final ? NULL : &consumed);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000272 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000273 if (decoded == NULL)
274 return NULL;
275 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000276}
277
278static PyObject *
279utf_16_decode(PyObject *self,
280 PyObject *args)
281{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000282 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000283 const char *errors = NULL;
284 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000285 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000286 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000287 PyObject *decoded;
288
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000289 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
290 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000291 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000292 consumed = pbuf.len; /* This is overwritten unless final is true. */
293 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
294 &byteorder, final ? NULL : &consumed);
295 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000296 if (decoded == NULL)
297 return NULL;
298 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000299}
300
301static PyObject *
302utf_16_le_decode(PyObject *self,
303 PyObject *args)
304{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000305 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000306 const char *errors = NULL;
307 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000308 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000309 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000310 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000311
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000312 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
313 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000314 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000315
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000316 consumed = pbuf.len; /* This is overwritten unless final is true. */
317 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000318 &byteorder, final ? NULL : &consumed);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000319 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000320 if (decoded == NULL)
321 return NULL;
322 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000323}
324
325static PyObject *
326utf_16_be_decode(PyObject *self,
327 PyObject *args)
328{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000329 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000330 const char *errors = NULL;
331 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000332 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000333 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000334 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000335
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000336 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
337 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000338 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000339
340 consumed = pbuf.len; /* This is overwritten unless final is true. */
341 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000342 &byteorder, final ? NULL : &consumed);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000343 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000344 if (decoded == NULL)
345 return NULL;
346 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000347}
348
349/* This non-standard version also provides access to the byteorder
350 parameter of the builtin UTF-16 codec.
351
352 It returns a tuple (unicode, bytesread, byteorder) with byteorder
353 being the value in effect at the end of data.
354
355*/
356
357static PyObject *
358utf_16_ex_decode(PyObject *self,
359 PyObject *args)
360{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000361 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000362 const char *errors = NULL;
363 int byteorder = 0;
364 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000365 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000366 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000367
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000368 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
369 &pbuf, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000370 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000371 consumed = pbuf.len; /* This is overwritten unless final is true. */
372 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
373 &byteorder, final ? NULL : &consumed);
374 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000375 if (unicode == NULL)
376 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +0000377 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000378 Py_DECREF(unicode);
379 return tuple;
380}
381
382static PyObject *
Walter Dörwald6e390802007-08-17 16:41:28 +0000383utf_32_decode(PyObject *self,
384 PyObject *args)
385{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000386 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000387 const char *errors = NULL;
388 int byteorder = 0;
389 int final = 0;
390 Py_ssize_t consumed;
391 PyObject *decoded;
392
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000393 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
394 &pbuf, &errors, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000395 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000396 consumed = pbuf.len; /* This is overwritten unless final is true. */
397 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
398 &byteorder, final ? NULL : &consumed);
399 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000400 if (decoded == NULL)
401 return NULL;
402 return codec_tuple(decoded, consumed);
403}
404
405static PyObject *
406utf_32_le_decode(PyObject *self,
407 PyObject *args)
408{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000409 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000410 const char *errors = NULL;
411 int byteorder = -1;
412 int final = 0;
413 Py_ssize_t consumed;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000414 PyObject *decoded;
Walter Dörwald6e390802007-08-17 16:41:28 +0000415
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000416 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
417 &pbuf, &errors, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000418 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000419 consumed = pbuf.len; /* This is overwritten unless final is true. */
420 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
421 &byteorder, final ? NULL : &consumed);
422 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000423 if (decoded == NULL)
424 return NULL;
425 return codec_tuple(decoded, consumed);
Walter Dörwald6e390802007-08-17 16:41:28 +0000426}
427
428static PyObject *
429utf_32_be_decode(PyObject *self,
430 PyObject *args)
431{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000432 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000433 const char *errors = NULL;
434 int byteorder = 1;
435 int final = 0;
436 Py_ssize_t consumed;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000437 PyObject *decoded;
Walter Dörwald6e390802007-08-17 16:41:28 +0000438
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000439 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
440 &pbuf, &errors, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000441 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000442 consumed = pbuf.len; /* This is overwritten unless final is true. */
443 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
444 &byteorder, final ? NULL : &consumed);
445 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000446 if (decoded == NULL)
447 return NULL;
448 return codec_tuple(decoded, consumed);
449}
450
451/* This non-standard version also provides access to the byteorder
452 parameter of the builtin UTF-32 codec.
453
454 It returns a tuple (unicode, bytesread, byteorder) with byteorder
455 being the value in effect at the end of data.
456
457*/
458
459static PyObject *
460utf_32_ex_decode(PyObject *self,
461 PyObject *args)
462{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000463 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000464 const char *errors = NULL;
465 int byteorder = 0;
466 PyObject *unicode, *tuple;
467 int final = 0;
468 Py_ssize_t consumed;
469
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000470 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
471 &pbuf, &errors, &byteorder, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000472 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000473 consumed = pbuf.len; /* This is overwritten unless final is true. */
474 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
475 &byteorder, final ? NULL : &consumed);
476 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000477 if (unicode == NULL)
478 return NULL;
479 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
480 Py_DECREF(unicode);
481 return tuple;
482}
483
484static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000485unicode_escape_decode(PyObject *self,
486 PyObject *args)
487{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000488 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000489 const char *errors = NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000490 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000491
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000492 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
493 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000494 return NULL;
495
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000496 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
497 PyBuffer_Release(&pbuf);
498 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000499}
500
501static PyObject *
502raw_unicode_escape_decode(PyObject *self,
503 PyObject *args)
504{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000505 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000506 const char *errors = NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000507 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000508
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000509 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
510 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000511 return NULL;
512
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000513 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
514 PyBuffer_Release(&pbuf);
515 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000516}
517
518static PyObject *
519latin_1_decode(PyObject *self,
520 PyObject *args)
521{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000522 Py_buffer pbuf;
523 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000524 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000525
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000526 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
527 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000528 return NULL;
529
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000530 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
531 PyBuffer_Release(&pbuf);
532 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000533}
534
535static PyObject *
536ascii_decode(PyObject *self,
537 PyObject *args)
538{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000539 Py_buffer pbuf;
540 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000541 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000542
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000543 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
544 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000545 return NULL;
546
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000547 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
548 PyBuffer_Release(&pbuf);
549 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000550}
551
552static PyObject *
553charmap_decode(PyObject *self,
554 PyObject *args)
555{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000556 Py_buffer pbuf;
557 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000558 const char *errors = NULL;
559 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000560
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000561 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
562 &pbuf, &errors, &mapping))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000563 return NULL;
564 if (mapping == Py_None)
565 mapping = NULL;
566
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000567 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
568 PyBuffer_Release(&pbuf);
569 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000570}
571
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000572#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000573
574static PyObject *
575mbcs_decode(PyObject *self,
576 PyObject *args)
577{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000578 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000579 const char *errors = NULL;
Martin v. Löwis961b91b2006-08-02 13:53:55 +0000580 int final = 0;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000581 Py_ssize_t consumed;
582 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000583
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000584 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
585 &pbuf, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000586 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000587 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000588
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000589 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
590 final ? NULL : &consumed);
591 PyBuffer_Release(&pbuf);
592 if (decoded == NULL)
Martin v. Löwisd8251432006-06-14 05:21:04 +0000593 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000594 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000595}
596
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000597#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000598
Guido van Rossume2d67f92000-03-10 23:09:23 +0000599/* --- Encoder ------------------------------------------------------------ */
600
601static PyObject *
602readbuffer_encode(PyObject *self,
603 PyObject *args)
604{
605 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000606 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000607 const char *errors = NULL;
608
609 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
610 &data, &size, &errors))
611 return NULL;
612
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000613 return codec_tuple(PyString_FromStringAndSize(data, size),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000614 size);
615}
616
617static PyObject *
618charbuffer_encode(PyObject *self,
619 PyObject *args)
620{
621 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000622 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000623 const char *errors = NULL;
624
625 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
626 &data, &size, &errors))
627 return NULL;
628
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000629 return codec_tuple(PyString_FromStringAndSize(data, size),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000630 size);
631}
632
633static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000634unicode_internal_encode(PyObject *self,
635 PyObject *args)
636{
637 PyObject *obj;
638 const char *errors = NULL;
639 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000640 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000641
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000642 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
643 &obj, &errors))
644 return NULL;
645
646 if (PyUnicode_Check(obj)) {
647 data = PyUnicode_AS_DATA(obj);
648 size = PyUnicode_GET_DATA_SIZE(obj);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000649 return codec_tuple(PyString_FromStringAndSize(data, size),
Walter Dörwalda7fb4082009-05-06 14:28:24 +0000650 PyUnicode_GET_SIZE(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000651 }
652 else {
653 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
654 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000655 return codec_tuple(PyString_FromStringAndSize(data, size),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000656 size);
657 }
658}
659
660static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000661utf_7_encode(PyObject *self,
662 PyObject *args)
663{
664 PyObject *str, *v;
665 const char *errors = NULL;
666
667 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
668 &str, &errors))
669 return NULL;
670
671 str = PyUnicode_FromObject(str);
672 if (str == NULL)
673 return NULL;
674 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
675 PyUnicode_GET_SIZE(str),
Georg Brandl96a8c392006-05-29 21:04:52 +0000676 0,
677 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000678 errors),
679 PyUnicode_GET_SIZE(str));
680 Py_DECREF(str);
681 return v;
682}
683
684static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000685utf_8_encode(PyObject *self,
686 PyObject *args)
687{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000688 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000689 const char *errors = NULL;
690
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000691 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000692 &str, &errors))
693 return NULL;
694
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000695 str = PyUnicode_FromObject(str);
696 if (str == NULL)
697 return NULL;
698 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
699 PyUnicode_GET_SIZE(str),
700 errors),
701 PyUnicode_GET_SIZE(str));
702 Py_DECREF(str);
703 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000704}
705
706/* This version provides access to the byteorder parameter of the
707 builtin UTF-16 codecs as optional third argument. It defaults to 0
708 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000709 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000710
711*/
712
713static PyObject *
714utf_16_encode(PyObject *self,
715 PyObject *args)
716{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000717 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000718 const char *errors = NULL;
719 int byteorder = 0;
720
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000721 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000722 &str, &errors, &byteorder))
723 return NULL;
724
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000725 str = PyUnicode_FromObject(str);
726 if (str == NULL)
727 return NULL;
728 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
729 PyUnicode_GET_SIZE(str),
730 errors,
731 byteorder),
732 PyUnicode_GET_SIZE(str));
733 Py_DECREF(str);
734 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000735}
736
737static PyObject *
738utf_16_le_encode(PyObject *self,
739 PyObject *args)
740{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000741 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000742 const char *errors = NULL;
743
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000744 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000745 &str, &errors))
746 return NULL;
747
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000748 str = PyUnicode_FromObject(str);
749 if (str == NULL)
750 return NULL;
751 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000752 PyUnicode_GET_SIZE(str),
753 errors,
754 -1),
755 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000756 Py_DECREF(str);
757 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000758}
759
760static PyObject *
761utf_16_be_encode(PyObject *self,
762 PyObject *args)
763{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000764 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000765 const char *errors = NULL;
766
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000767 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000768 &str, &errors))
769 return NULL;
770
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000771 str = PyUnicode_FromObject(str);
772 if (str == NULL)
773 return NULL;
774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
775 PyUnicode_GET_SIZE(str),
776 errors,
777 +1),
778 PyUnicode_GET_SIZE(str));
779 Py_DECREF(str);
780 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000781}
782
Walter Dörwald6e390802007-08-17 16:41:28 +0000783/* This version provides access to the byteorder parameter of the
784 builtin UTF-32 codecs as optional third argument. It defaults to 0
785 which means: use the native byte order and prepend the data with a
786 BOM mark.
787
788*/
789
790static PyObject *
791utf_32_encode(PyObject *self,
792 PyObject *args)
793{
794 PyObject *str, *v;
795 const char *errors = NULL;
796 int byteorder = 0;
797
798 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
799 &str, &errors, &byteorder))
800 return NULL;
801
802 str = PyUnicode_FromObject(str);
803 if (str == NULL)
804 return NULL;
805 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
806 PyUnicode_GET_SIZE(str),
807 errors,
808 byteorder),
809 PyUnicode_GET_SIZE(str));
810 Py_DECREF(str);
811 return v;
812}
813
814static PyObject *
815utf_32_le_encode(PyObject *self,
816 PyObject *args)
817{
818 PyObject *str, *v;
819 const char *errors = NULL;
820
821 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
822 &str, &errors))
823 return NULL;
824
825 str = PyUnicode_FromObject(str);
826 if (str == NULL)
827 return NULL;
828 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
829 PyUnicode_GET_SIZE(str),
830 errors,
831 -1),
832 PyUnicode_GET_SIZE(str));
833 Py_DECREF(str);
834 return v;
835}
836
837static PyObject *
838utf_32_be_encode(PyObject *self,
839 PyObject *args)
840{
841 PyObject *str, *v;
842 const char *errors = NULL;
843
844 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
845 &str, &errors))
846 return NULL;
847
848 str = PyUnicode_FromObject(str);
849 if (str == NULL)
850 return NULL;
851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
852 PyUnicode_GET_SIZE(str),
853 errors,
854 +1),
855 PyUnicode_GET_SIZE(str));
856 Py_DECREF(str);
857 return v;
858}
859
Guido van Rossume2d67f92000-03-10 23:09:23 +0000860static PyObject *
861unicode_escape_encode(PyObject *self,
862 PyObject *args)
863{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000864 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000865 const char *errors = NULL;
866
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000867 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000868 &str, &errors))
869 return NULL;
870
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000871 str = PyUnicode_FromObject(str);
872 if (str == NULL)
873 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000874 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000875 PyUnicode_GET_SIZE(str)),
876 PyUnicode_GET_SIZE(str));
877 Py_DECREF(str);
878 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000879}
880
881static PyObject *
882raw_unicode_escape_encode(PyObject *self,
883 PyObject *args)
884{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000885 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000886 const char *errors = NULL;
887
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000888 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000889 &str, &errors))
890 return NULL;
891
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000892 str = PyUnicode_FromObject(str);
893 if (str == NULL)
894 return NULL;
895 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000896 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000897 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000898 PyUnicode_GET_SIZE(str));
899 Py_DECREF(str);
900 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000901}
902
903static PyObject *
904latin_1_encode(PyObject *self,
905 PyObject *args)
906{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000907 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000908 const char *errors = NULL;
909
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000910 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000911 &str, &errors))
912 return NULL;
913
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000914 str = PyUnicode_FromObject(str);
915 if (str == NULL)
916 return NULL;
917 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000918 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000919 PyUnicode_GET_SIZE(str),
920 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000921 PyUnicode_GET_SIZE(str));
922 Py_DECREF(str);
923 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000924}
925
926static PyObject *
927ascii_encode(PyObject *self,
928 PyObject *args)
929{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000930 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000931 const char *errors = NULL;
932
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000933 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000934 &str, &errors))
935 return NULL;
936
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000937 str = PyUnicode_FromObject(str);
938 if (str == NULL)
939 return NULL;
940 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000941 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000942 PyUnicode_GET_SIZE(str),
943 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000944 PyUnicode_GET_SIZE(str));
945 Py_DECREF(str);
946 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000947}
948
949static PyObject *
950charmap_encode(PyObject *self,
951 PyObject *args)
952{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000953 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000954 const char *errors = NULL;
955 PyObject *mapping = NULL;
956
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000957 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000958 &str, &errors, &mapping))
959 return NULL;
960 if (mapping == Py_None)
961 mapping = NULL;
962
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000963 str = PyUnicode_FromObject(str);
964 if (str == NULL)
965 return NULL;
966 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000967 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000968 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000969 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +0000970 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000971 PyUnicode_GET_SIZE(str));
972 Py_DECREF(str);
973 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000974}
975
Martin v. Löwis3f767792006-06-04 19:36:28 +0000976static PyObject*
977charmap_build(PyObject *self, PyObject *args)
978{
979 PyObject *map;
980 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
981 return NULL;
982 return PyUnicode_BuildEncodingMap(map);
983}
984
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000985#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000986
987static PyObject *
988mbcs_encode(PyObject *self,
989 PyObject *args)
990{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000991 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000992 const char *errors = NULL;
993
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000994 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000995 &str, &errors))
996 return NULL;
997
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000998 str = PyUnicode_FromObject(str);
999 if (str == NULL)
1000 return NULL;
1001 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001002 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001003 PyUnicode_GET_SIZE(str),
1004 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001005 PyUnicode_GET_SIZE(str));
1006 Py_DECREF(str);
1007 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001008}
1009
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001010#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001011#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001012
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001013/* --- Error handler registry --------------------------------------------- */
1014
Walter Dörwald0ae29812002-10-31 13:36:29 +00001015PyDoc_STRVAR(register_error__doc__,
1016"register_error(errors, handler)\n\
1017\n\
1018Register the specified error handler under the name\n\
1019errors. handler must be a callable object, that\n\
1020will be called with an exception instance containing\n\
1021information about the location of the encoding/decoding\n\
1022error and must return a (replacement, new position) tuple.");
1023
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001024static PyObject *register_error(PyObject *self, PyObject *args)
1025{
1026 const char *name;
1027 PyObject *handler;
1028
1029 if (!PyArg_ParseTuple(args, "sO:register_error",
1030 &name, &handler))
1031 return NULL;
1032 if (PyCodec_RegisterError(name, handler))
1033 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +00001034 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001035}
1036
Walter Dörwald0ae29812002-10-31 13:36:29 +00001037PyDoc_STRVAR(lookup_error__doc__,
1038"lookup_error(errors) -> handler\n\
1039\n\
1040Return the error handler for the specified error handling name\n\
1041or raise a LookupError, if no handler exists under this name.");
1042
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001043static PyObject *lookup_error(PyObject *self, PyObject *args)
1044{
1045 const char *name;
1046
1047 if (!PyArg_ParseTuple(args, "s:lookup_error",
1048 &name))
1049 return NULL;
1050 return PyCodec_LookupError(name);
1051}
1052
Guido van Rossume2d67f92000-03-10 23:09:23 +00001053/* --- Module API --------------------------------------------------------- */
1054
1055static PyMethodDef _codecs_functions[] = {
Georg Brandl96a8c392006-05-29 21:04:52 +00001056 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001057 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001058 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001059 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001060 {"encode", codec_encode, METH_VARARGS,
1061 encode__doc__},
1062 {"decode", codec_decode, METH_VARARGS,
1063 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001064 {"escape_encode", escape_encode, METH_VARARGS},
1065 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001066#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001067 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1068 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1069 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1070 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1071 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1072 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1073 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1074 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1075 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1076 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1077 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald6e390802007-08-17 16:41:28 +00001078 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1079 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1080 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1081 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1082 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1083 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1084 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001085 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1086 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1087 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1088 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1089 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1090 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1091 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1092 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1093 {"ascii_encode", ascii_encode, METH_VARARGS},
1094 {"ascii_decode", ascii_decode, METH_VARARGS},
1095 {"charmap_encode", charmap_encode, METH_VARARGS},
1096 {"charmap_decode", charmap_decode, METH_VARARGS},
Martin v. Löwis3f767792006-06-04 19:36:28 +00001097 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001098 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1099 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001100#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001101 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1102 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001103#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001104#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +00001105 {"register_error", register_error, METH_VARARGS,
1106 register_error__doc__},
1107 {"lookup_error", lookup_error, METH_VARARGS,
1108 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001109 {NULL, NULL} /* sentinel */
1110};
1111
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001112PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001113init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001114{
1115 Py_InitModule("_codecs", _codecs_functions);
1116}