blob: 9a1470021e21ddd8385664839ed7517067fd71dd [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Walter Dörwald219336a2007-07-19 13:04:38 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Georg Brandl96a8c392006-05-29 21:04:52 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Georg Brandl96a8c392006-05-29 21:04:52 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Georg Brandl96a8c392006-05-29 21:04:52 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Walter Dörwald219336a2007-07-19 13:04:38 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000064a tuple of function (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Georg Brandl96a8c392006-05-29 21:04:52 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
Martin v. Löwise2713be2005-03-08 15:03:08 +000097#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +000098 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000100#else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
104 }
105#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106
107 /* Encode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000108 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchling658c45f2008-10-03 12:26:42 +0000118as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
Brett Cannon3e377de2004-07-10 21:41:14 +0000124 const char *encoding = NULL;
125 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000126 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000127
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
130
Martin v. Löwise2713be2005-03-08 15:03:08 +0000131#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000134#else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
138 }
139#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
Martin v. Löwis66851282006-04-22 11:40:03 +0000149 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000150{
Georg Brandl96a8c392006-05-29 21:04:52 +0000151 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000152 if (unicode == NULL)
Georg Brandl96a8c392006-05-29 21:04:52 +0000153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000156 return v;
157}
158
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
162 PyObject *args)
163{
164 const char *errors = NULL;
165 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000166 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000167
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000172 size);
173}
174
175static PyObject *
176escape_encode(PyObject *self,
177 PyObject *args)
178{
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
Martin v. Löwis66851282006-04-22 11:40:03 +0000182 Py_ssize_t len;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000183
184 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000185 &PyString_Type, &str, &errors))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000186 return NULL;
187
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000188 str = PyString_Repr(str, 0);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000189 if (!str)
190 return NULL;
191
192 /* The string will be quoted. Unquote, similar to unicode-escape. */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000193 buf = PyString_AS_STRING (str);
194 len = PyString_GET_SIZE (str);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000195 memmove(buf, buf+1, len-2);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000196 if (_PyString_Resize(&str, len-2) < 0)
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000197 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000198
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000199 return codec_tuple(str, PyString_Size(str));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000200}
201
202#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000203/* --- Decoder ------------------------------------------------------------ */
204
205static PyObject *
206unicode_internal_decode(PyObject *self,
207 PyObject *args)
208{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000209 PyObject *obj;
210 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000211 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000212 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000213
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000214 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
215 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000216 return NULL;
217
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000218 if (PyUnicode_Check(obj)) {
219 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000220 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000221 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000222 else {
223 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
224 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000225
226 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000227 size);
228 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000229}
230
231static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000232utf_7_decode(PyObject *self,
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000233 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000234{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000235 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000236 const char *errors = NULL;
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000237 int final = 0;
238 Py_ssize_t consumed;
239 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000240
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000241 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
242 &pbuf, &errors, &final))
243 return NULL;
244 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000245
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000246 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
247 final ? NULL : &consumed);
248 PyBuffer_Release(&pbuf);
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000249 if (decoded == NULL)
250 return NULL;
251 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000252}
253
254static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000255utf_8_decode(PyObject *self,
256 PyObject *args)
257{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000258 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000259 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000260 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000261 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000262 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000263
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000264 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
265 &pbuf, &errors, &final))
Walter Dörwald69652032004-09-07 20:24:22 +0000266 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000267 consumed = pbuf.len;
268
269 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000270 final ? NULL : &consumed);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000271 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000272 if (decoded == NULL)
273 return NULL;
274 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000275}
276
277static PyObject *
278utf_16_decode(PyObject *self,
279 PyObject *args)
280{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000281 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000282 const char *errors = NULL;
283 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000284 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000285 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000286 PyObject *decoded;
287
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000288 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
289 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000290 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000291 consumed = pbuf.len; /* This is overwritten unless final is true. */
292 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
293 &byteorder, final ? NULL : &consumed);
294 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000295 if (decoded == NULL)
296 return NULL;
297 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000298}
299
300static PyObject *
301utf_16_le_decode(PyObject *self,
302 PyObject *args)
303{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000304 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000305 const char *errors = NULL;
306 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000307 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000308 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000309 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000310
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000311 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
312 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000313 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000314
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000315 consumed = pbuf.len; /* This is overwritten unless final is true. */
316 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000317 &byteorder, final ? NULL : &consumed);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000318 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000319 if (decoded == NULL)
320 return NULL;
321 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000322}
323
324static PyObject *
325utf_16_be_decode(PyObject *self,
326 PyObject *args)
327{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000328 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000329 const char *errors = NULL;
330 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000331 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000332 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000333 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000334
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000335 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
336 &pbuf, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000337 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000338
339 consumed = pbuf.len; /* This is overwritten unless final is true. */
340 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Walter Dörwald69652032004-09-07 20:24:22 +0000341 &byteorder, final ? NULL : &consumed);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000342 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000343 if (decoded == NULL)
344 return NULL;
345 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000346}
347
348/* This non-standard version also provides access to the byteorder
349 parameter of the builtin UTF-16 codec.
350
351 It returns a tuple (unicode, bytesread, byteorder) with byteorder
352 being the value in effect at the end of data.
353
354*/
355
356static PyObject *
357utf_16_ex_decode(PyObject *self,
358 PyObject *args)
359{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000360 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000361 const char *errors = NULL;
362 int byteorder = 0;
363 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000364 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000366
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000367 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
368 &pbuf, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000369 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000370 consumed = pbuf.len; /* This is overwritten unless final is true. */
371 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
372 &byteorder, final ? NULL : &consumed);
373 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000374 if (unicode == NULL)
375 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +0000376 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000377 Py_DECREF(unicode);
378 return tuple;
379}
380
381static PyObject *
Walter Dörwald6e390802007-08-17 16:41:28 +0000382utf_32_decode(PyObject *self,
383 PyObject *args)
384{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000385 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000386 const char *errors = NULL;
387 int byteorder = 0;
388 int final = 0;
389 Py_ssize_t consumed;
390 PyObject *decoded;
391
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000392 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
393 &pbuf, &errors, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000394 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000395 consumed = pbuf.len; /* This is overwritten unless final is true. */
396 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
397 &byteorder, final ? NULL : &consumed);
398 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000399 if (decoded == NULL)
400 return NULL;
401 return codec_tuple(decoded, consumed);
402}
403
404static PyObject *
405utf_32_le_decode(PyObject *self,
406 PyObject *args)
407{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000408 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000409 const char *errors = NULL;
410 int byteorder = -1;
411 int final = 0;
412 Py_ssize_t consumed;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000413 PyObject *decoded;
Walter Dörwald6e390802007-08-17 16:41:28 +0000414
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000415 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
416 &pbuf, &errors, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000417 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000418 consumed = pbuf.len; /* This is overwritten unless final is true. */
419 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
420 &byteorder, final ? NULL : &consumed);
421 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000422 if (decoded == NULL)
423 return NULL;
424 return codec_tuple(decoded, consumed);
Walter Dörwald6e390802007-08-17 16:41:28 +0000425}
426
427static PyObject *
428utf_32_be_decode(PyObject *self,
429 PyObject *args)
430{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000431 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000432 const char *errors = NULL;
433 int byteorder = 1;
434 int final = 0;
435 Py_ssize_t consumed;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000436 PyObject *decoded;
Walter Dörwald6e390802007-08-17 16:41:28 +0000437
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000438 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
439 &pbuf, &errors, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000440 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000441 consumed = pbuf.len; /* This is overwritten unless final is true. */
442 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
443 &byteorder, final ? NULL : &consumed);
444 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000445 if (decoded == NULL)
446 return NULL;
447 return codec_tuple(decoded, consumed);
448}
449
450/* This non-standard version also provides access to the byteorder
451 parameter of the builtin UTF-32 codec.
452
453 It returns a tuple (unicode, bytesread, byteorder) with byteorder
454 being the value in effect at the end of data.
455
456*/
457
458static PyObject *
459utf_32_ex_decode(PyObject *self,
460 PyObject *args)
461{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000462 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000463 const char *errors = NULL;
464 int byteorder = 0;
465 PyObject *unicode, *tuple;
466 int final = 0;
467 Py_ssize_t consumed;
468
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000469 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
470 &pbuf, &errors, &byteorder, &final))
Walter Dörwald6e390802007-08-17 16:41:28 +0000471 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000472 consumed = pbuf.len; /* This is overwritten unless final is true. */
473 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
474 &byteorder, final ? NULL : &consumed);
475 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000476 if (unicode == NULL)
477 return NULL;
478 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
479 Py_DECREF(unicode);
480 return tuple;
481}
482
483static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000484unicode_escape_decode(PyObject *self,
485 PyObject *args)
486{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000487 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000488 const char *errors = NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000489 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000490
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000491 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
492 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000493 return NULL;
494
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000495 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
496 PyBuffer_Release(&pbuf);
497 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000498}
499
500static PyObject *
501raw_unicode_escape_decode(PyObject *self,
502 PyObject *args)
503{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000504 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000505 const char *errors = NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000506 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000507
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000508 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
509 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000510 return NULL;
511
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000512 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
513 PyBuffer_Release(&pbuf);
514 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000515}
516
517static PyObject *
518latin_1_decode(PyObject *self,
519 PyObject *args)
520{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000521 Py_buffer pbuf;
522 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000523 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000524
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000525 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
526 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000527 return NULL;
528
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000529 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
530 PyBuffer_Release(&pbuf);
531 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000532}
533
534static PyObject *
535ascii_decode(PyObject *self,
536 PyObject *args)
537{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000538 Py_buffer pbuf;
539 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000540 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000541
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000542 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
543 &pbuf, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000544 return NULL;
545
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000546 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
547 PyBuffer_Release(&pbuf);
548 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000549}
550
551static PyObject *
552charmap_decode(PyObject *self,
553 PyObject *args)
554{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000555 Py_buffer pbuf;
556 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000557 const char *errors = NULL;
558 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000559
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000560 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
561 &pbuf, &errors, &mapping))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000562 return NULL;
563 if (mapping == Py_None)
564 mapping = NULL;
565
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000566 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
567 PyBuffer_Release(&pbuf);
568 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000569}
570
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000571#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000572
573static PyObject *
574mbcs_decode(PyObject *self,
575 PyObject *args)
576{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000577 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000578 const char *errors = NULL;
Martin v. Löwis961b91b2006-08-02 13:53:55 +0000579 int final = 0;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000580 Py_ssize_t consumed;
581 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000582
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000583 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
584 &pbuf, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000585 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000586 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000587
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000588 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
589 final ? NULL : &consumed);
590 PyBuffer_Release(&pbuf);
591 if (decoded == NULL)
Martin v. Löwisd8251432006-06-14 05:21:04 +0000592 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000593 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000594}
595
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000596#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000597
Guido van Rossume2d67f92000-03-10 23:09:23 +0000598/* --- Encoder ------------------------------------------------------------ */
599
600static PyObject *
601readbuffer_encode(PyObject *self,
602 PyObject *args)
603{
604 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000605 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000606 const char *errors = NULL;
607
608 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
609 &data, &size, &errors))
610 return NULL;
611
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000612 return codec_tuple(PyString_FromStringAndSize(data, size),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000613 size);
614}
615
616static PyObject *
617charbuffer_encode(PyObject *self,
618 PyObject *args)
619{
620 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000621 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000622 const char *errors = NULL;
623
624 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
625 &data, &size, &errors))
626 return NULL;
627
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000628 return codec_tuple(PyString_FromStringAndSize(data, size),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000629 size);
630}
631
632static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000633unicode_internal_encode(PyObject *self,
634 PyObject *args)
635{
636 PyObject *obj;
637 const char *errors = NULL;
638 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000639 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000640
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000641 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
642 &obj, &errors))
643 return NULL;
644
645 if (PyUnicode_Check(obj)) {
646 data = PyUnicode_AS_DATA(obj);
647 size = PyUnicode_GET_DATA_SIZE(obj);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000648 return codec_tuple(PyString_FromStringAndSize(data, size),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000649 size);
650 }
651 else {
652 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
653 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000654 return codec_tuple(PyString_FromStringAndSize(data, size),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000655 size);
656 }
657}
658
659static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000660utf_7_encode(PyObject *self,
661 PyObject *args)
662{
663 PyObject *str, *v;
664 const char *errors = NULL;
665
666 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
667 &str, &errors))
668 return NULL;
669
670 str = PyUnicode_FromObject(str);
671 if (str == NULL)
672 return NULL;
673 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
674 PyUnicode_GET_SIZE(str),
Georg Brandl96a8c392006-05-29 21:04:52 +0000675 0,
676 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000677 errors),
678 PyUnicode_GET_SIZE(str));
679 Py_DECREF(str);
680 return v;
681}
682
683static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000684utf_8_encode(PyObject *self,
685 PyObject *args)
686{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000687 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000688 const char *errors = NULL;
689
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000690 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000691 &str, &errors))
692 return NULL;
693
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000694 str = PyUnicode_FromObject(str);
695 if (str == NULL)
696 return NULL;
697 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
698 PyUnicode_GET_SIZE(str),
699 errors),
700 PyUnicode_GET_SIZE(str));
701 Py_DECREF(str);
702 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000703}
704
705/* This version provides access to the byteorder parameter of the
706 builtin UTF-16 codecs as optional third argument. It defaults to 0
707 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000708 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000709
710*/
711
712static PyObject *
713utf_16_encode(PyObject *self,
714 PyObject *args)
715{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000716 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000717 const char *errors = NULL;
718 int byteorder = 0;
719
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000720 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000721 &str, &errors, &byteorder))
722 return NULL;
723
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000724 str = PyUnicode_FromObject(str);
725 if (str == NULL)
726 return NULL;
727 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
728 PyUnicode_GET_SIZE(str),
729 errors,
730 byteorder),
731 PyUnicode_GET_SIZE(str));
732 Py_DECREF(str);
733 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000734}
735
736static PyObject *
737utf_16_le_encode(PyObject *self,
738 PyObject *args)
739{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000740 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000741 const char *errors = NULL;
742
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000743 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000744 &str, &errors))
745 return NULL;
746
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000747 str = PyUnicode_FromObject(str);
748 if (str == NULL)
749 return NULL;
750 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000751 PyUnicode_GET_SIZE(str),
752 errors,
753 -1),
754 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000755 Py_DECREF(str);
756 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000757}
758
759static PyObject *
760utf_16_be_encode(PyObject *self,
761 PyObject *args)
762{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000763 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000764 const char *errors = NULL;
765
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000766 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000767 &str, &errors))
768 return NULL;
769
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000770 str = PyUnicode_FromObject(str);
771 if (str == NULL)
772 return NULL;
773 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
774 PyUnicode_GET_SIZE(str),
775 errors,
776 +1),
777 PyUnicode_GET_SIZE(str));
778 Py_DECREF(str);
779 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000780}
781
Walter Dörwald6e390802007-08-17 16:41:28 +0000782/* This version provides access to the byteorder parameter of the
783 builtin UTF-32 codecs as optional third argument. It defaults to 0
784 which means: use the native byte order and prepend the data with a
785 BOM mark.
786
787*/
788
789static PyObject *
790utf_32_encode(PyObject *self,
791 PyObject *args)
792{
793 PyObject *str, *v;
794 const char *errors = NULL;
795 int byteorder = 0;
796
797 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
798 &str, &errors, &byteorder))
799 return NULL;
800
801 str = PyUnicode_FromObject(str);
802 if (str == NULL)
803 return NULL;
804 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
805 PyUnicode_GET_SIZE(str),
806 errors,
807 byteorder),
808 PyUnicode_GET_SIZE(str));
809 Py_DECREF(str);
810 return v;
811}
812
813static PyObject *
814utf_32_le_encode(PyObject *self,
815 PyObject *args)
816{
817 PyObject *str, *v;
818 const char *errors = NULL;
819
820 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
821 &str, &errors))
822 return NULL;
823
824 str = PyUnicode_FromObject(str);
825 if (str == NULL)
826 return NULL;
827 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
828 PyUnicode_GET_SIZE(str),
829 errors,
830 -1),
831 PyUnicode_GET_SIZE(str));
832 Py_DECREF(str);
833 return v;
834}
835
836static PyObject *
837utf_32_be_encode(PyObject *self,
838 PyObject *args)
839{
840 PyObject *str, *v;
841 const char *errors = NULL;
842
843 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
844 &str, &errors))
845 return NULL;
846
847 str = PyUnicode_FromObject(str);
848 if (str == NULL)
849 return NULL;
850 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
851 PyUnicode_GET_SIZE(str),
852 errors,
853 +1),
854 PyUnicode_GET_SIZE(str));
855 Py_DECREF(str);
856 return v;
857}
858
Guido van Rossume2d67f92000-03-10 23:09:23 +0000859static PyObject *
860unicode_escape_encode(PyObject *self,
861 PyObject *args)
862{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000863 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000864 const char *errors = NULL;
865
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000866 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000867 &str, &errors))
868 return NULL;
869
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000870 str = PyUnicode_FromObject(str);
871 if (str == NULL)
872 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000873 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000874 PyUnicode_GET_SIZE(str)),
875 PyUnicode_GET_SIZE(str));
876 Py_DECREF(str);
877 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000878}
879
880static PyObject *
881raw_unicode_escape_encode(PyObject *self,
882 PyObject *args)
883{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000884 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000885 const char *errors = NULL;
886
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000887 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000888 &str, &errors))
889 return NULL;
890
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000891 str = PyUnicode_FromObject(str);
892 if (str == NULL)
893 return NULL;
894 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000895 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000896 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000897 PyUnicode_GET_SIZE(str));
898 Py_DECREF(str);
899 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000900}
901
902static PyObject *
903latin_1_encode(PyObject *self,
904 PyObject *args)
905{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000906 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000907 const char *errors = NULL;
908
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000909 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000910 &str, &errors))
911 return NULL;
912
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000913 str = PyUnicode_FromObject(str);
914 if (str == NULL)
915 return NULL;
916 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000917 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000918 PyUnicode_GET_SIZE(str),
919 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000920 PyUnicode_GET_SIZE(str));
921 Py_DECREF(str);
922 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000923}
924
925static PyObject *
926ascii_encode(PyObject *self,
927 PyObject *args)
928{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000929 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000930 const char *errors = NULL;
931
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000932 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000933 &str, &errors))
934 return NULL;
935
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000936 str = PyUnicode_FromObject(str);
937 if (str == NULL)
938 return NULL;
939 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000940 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000941 PyUnicode_GET_SIZE(str),
942 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000943 PyUnicode_GET_SIZE(str));
944 Py_DECREF(str);
945 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000946}
947
948static PyObject *
949charmap_encode(PyObject *self,
950 PyObject *args)
951{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000952 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000953 const char *errors = NULL;
954 PyObject *mapping = NULL;
955
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000956 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000957 &str, &errors, &mapping))
958 return NULL;
959 if (mapping == Py_None)
960 mapping = NULL;
961
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000962 str = PyUnicode_FromObject(str);
963 if (str == NULL)
964 return NULL;
965 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000966 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000967 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000968 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +0000969 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000970 PyUnicode_GET_SIZE(str));
971 Py_DECREF(str);
972 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000973}
974
Martin v. Löwis3f767792006-06-04 19:36:28 +0000975static PyObject*
976charmap_build(PyObject *self, PyObject *args)
977{
978 PyObject *map;
979 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
980 return NULL;
981 return PyUnicode_BuildEncodingMap(map);
982}
983
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000984#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000985
986static PyObject *
987mbcs_encode(PyObject *self,
988 PyObject *args)
989{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000990 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000991 const char *errors = NULL;
992
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000993 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +0000994 &str, &errors))
995 return NULL;
996
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000997 str = PyUnicode_FromObject(str);
998 if (str == NULL)
999 return NULL;
1000 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001001 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001002 PyUnicode_GET_SIZE(str),
1003 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001004 PyUnicode_GET_SIZE(str));
1005 Py_DECREF(str);
1006 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001007}
1008
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001009#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001010#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001011
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001012/* --- Error handler registry --------------------------------------------- */
1013
Walter Dörwald0ae29812002-10-31 13:36:29 +00001014PyDoc_STRVAR(register_error__doc__,
1015"register_error(errors, handler)\n\
1016\n\
1017Register the specified error handler under the name\n\
1018errors. handler must be a callable object, that\n\
1019will be called with an exception instance containing\n\
1020information about the location of the encoding/decoding\n\
1021error and must return a (replacement, new position) tuple.");
1022
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001023static PyObject *register_error(PyObject *self, PyObject *args)
1024{
1025 const char *name;
1026 PyObject *handler;
1027
1028 if (!PyArg_ParseTuple(args, "sO:register_error",
1029 &name, &handler))
1030 return NULL;
1031 if (PyCodec_RegisterError(name, handler))
1032 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +00001033 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001034}
1035
Walter Dörwald0ae29812002-10-31 13:36:29 +00001036PyDoc_STRVAR(lookup_error__doc__,
1037"lookup_error(errors) -> handler\n\
1038\n\
1039Return the error handler for the specified error handling name\n\
1040or raise a LookupError, if no handler exists under this name.");
1041
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001042static PyObject *lookup_error(PyObject *self, PyObject *args)
1043{
1044 const char *name;
1045
1046 if (!PyArg_ParseTuple(args, "s:lookup_error",
1047 &name))
1048 return NULL;
1049 return PyCodec_LookupError(name);
1050}
1051
Guido van Rossume2d67f92000-03-10 23:09:23 +00001052/* --- Module API --------------------------------------------------------- */
1053
1054static PyMethodDef _codecs_functions[] = {
Georg Brandl96a8c392006-05-29 21:04:52 +00001055 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001056 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001057 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001058 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001059 {"encode", codec_encode, METH_VARARGS,
1060 encode__doc__},
1061 {"decode", codec_decode, METH_VARARGS,
1062 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001063 {"escape_encode", escape_encode, METH_VARARGS},
1064 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001065#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001066 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1067 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1068 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1069 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1070 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1071 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1072 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1073 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1074 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1075 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1076 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald6e390802007-08-17 16:41:28 +00001077 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1078 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1079 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1080 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1081 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1082 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1083 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001084 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1085 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1086 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1087 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1088 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1089 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1090 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1091 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1092 {"ascii_encode", ascii_encode, METH_VARARGS},
1093 {"ascii_decode", ascii_decode, METH_VARARGS},
1094 {"charmap_encode", charmap_encode, METH_VARARGS},
1095 {"charmap_decode", charmap_decode, METH_VARARGS},
Martin v. Löwis3f767792006-06-04 19:36:28 +00001096 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001097 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1098 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001099#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001100 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1101 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001102#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001103#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +00001104 {"register_error", register_error, METH_VARARGS,
1105 register_error__doc__},
1106 {"lookup_error", lookup_error, METH_VARARGS,
1107 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001108 {NULL, NULL} /* sentinel */
1109};
1110
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001111PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001112init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001113{
1114 Py_InitModule("_codecs", _codecs_functions);
1115}