blob: 11d7cd0ad593d7ee54c7428b1c5748bf785cc57a [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Walter Dörwald219336a2007-07-19 13:04:38 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouc83ea132010-05-09 14:46:46 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Georg Brandl96a8c392006-05-29 21:04:52 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Georg Brandl96a8c392006-05-29 21:04:52 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Georg Brandl96a8c392006-05-29 21:04:52 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Walter Dörwald219336a2007-07-19 13:04:38 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Georg Brandl4c95e092009-03-15 21:32:06 +000064a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Georg Brandl96a8c392006-05-29 21:04:52 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
Martin v. Löwise2713be2005-03-08 15:03:08 +000097#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +000098 if (encoding == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +000099 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000100#else
101 if (encoding == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
Martin v. Löwise2713be2005-03-08 15:03:08 +0000104 }
105#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106
107 /* Encode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000108 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchling658c45f2008-10-03 12:26:42 +0000118as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
Brett Cannon3e377de2004-07-10 21:41:14 +0000124 const char *encoding = NULL;
125 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000126 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000127
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
130
Martin v. Löwise2713be2005-03-08 15:03:08 +0000131#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 if (encoding == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000134#else
135 if (encoding == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
Martin v. Löwise2713be2005-03-08 15:03:08 +0000138 }
139#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000149 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000150{
Georg Brandl96a8c392006-05-29 21:04:52 +0000151 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000152 if (unicode == NULL)
Georg Brandl96a8c392006-05-29 21:04:52 +0000153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000156 return v;
157}
158
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000162 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000163{
164 const char *errors = NULL;
165 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000166 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000167
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000169 &data, &size, &errors))
170 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000172 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000173}
174
175static PyObject *
176escape_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000178{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
182 Py_ssize_t consumed, len;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000183
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000184 if (!PyArg_ParseTuple(args, "S|z:escape_encode",
185 &str, &errors))
186 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000187
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000188 consumed = PyString_GET_SIZE(str);
189 str = PyString_Repr(str, 0);
190 if (!str)
191 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000192
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000193 /* The string will be quoted. Unquote, similar to unicode-escape. */
194 buf = PyString_AS_STRING (str);
195 len = PyString_GET_SIZE (str);
196 memmove(buf, buf+1, len-2);
197 if (_PyString_Resize(&str, len-2) < 0)
198 return NULL;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000199
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000200 return codec_tuple(str, consumed);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000201}
202
203#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000204/* --- Decoder ------------------------------------------------------------ */
205
206static PyObject *
207unicode_internal_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000208 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000209{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000210 PyObject *obj;
211 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000212 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000213 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000214
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000215 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000216 &obj, &errors))
217 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000218
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000219 if (PyUnicode_Check(obj)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000220 Py_INCREF(obj);
221 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000222 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000223 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
225 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000226
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000227 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
228 size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000229 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000230}
231
232static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000233utf_7_decode(PyObject *self,
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000234 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000235{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000236 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000237 const char *errors = NULL;
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000238 int final = 0;
239 Py_ssize_t consumed;
240 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000241
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000242 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000243 &pbuf, &errors, &final))
244 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000245 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000246
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000247 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000248 final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000249 PyBuffer_Release(&pbuf);
Amaury Forgeot d'Arc50879802007-11-20 23:31:27 +0000250 if (decoded == NULL)
251 return NULL;
252 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000253}
254
255static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000256utf_8_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000257 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000258{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000259 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000260 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000261 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000262 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000263 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000264
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000265 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 &pbuf, &errors, &final))
267 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000268 consumed = pbuf.len;
269
270 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000271 final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000272 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000273 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000274 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000275 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000276}
277
278static PyObject *
279utf_16_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000280 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000281{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000282 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000283 const char *errors = NULL;
284 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000285 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000286 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000287 PyObject *decoded;
288
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000289 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000290 &pbuf, &errors, &final))
291 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000292 consumed = pbuf.len; /* This is overwritten unless final is true. */
293 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000294 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000295 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000296 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000297 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000298 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000299}
300
301static PyObject *
302utf_16_le_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000303 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000304{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000305 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000306 const char *errors = NULL;
307 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000308 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000309 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000310 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000311
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000312 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000313 &pbuf, &errors, &final))
314 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000315
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000316 consumed = pbuf.len; /* This is overwritten unless final is true. */
317 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000318 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000319 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000320 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000321 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000322 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000323}
324
325static PyObject *
326utf_16_be_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000327 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000328{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000329 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000330 const char *errors = NULL;
331 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000332 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000333 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000334 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000335
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000336 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000337 &pbuf, &errors, &final))
338 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000339
340 consumed = pbuf.len; /* This is overwritten unless final is true. */
341 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000342 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000343 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000344 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000346 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000347}
348
349/* This non-standard version also provides access to the byteorder
350 parameter of the builtin UTF-16 codec.
351
352 It returns a tuple (unicode, bytesread, byteorder) with byteorder
353 being the value in effect at the end of data.
354
355*/
356
357static PyObject *
358utf_16_ex_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000359 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000360{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000361 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000362 const char *errors = NULL;
363 int byteorder = 0;
364 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000365 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000366 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000367
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000368 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000369 &pbuf, &errors, &byteorder, &final))
370 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000371 consumed = pbuf.len; /* This is overwritten unless final is true. */
372 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000373 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000374 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000375 if (unicode == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000376 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +0000377 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000378 Py_DECREF(unicode);
379 return tuple;
380}
381
382static PyObject *
Walter Dörwald6e390802007-08-17 16:41:28 +0000383utf_32_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000384 PyObject *args)
Walter Dörwald6e390802007-08-17 16:41:28 +0000385{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000386 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000387 const char *errors = NULL;
388 int byteorder = 0;
389 int final = 0;
390 Py_ssize_t consumed;
391 PyObject *decoded;
392
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000393 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 &pbuf, &errors, &final))
395 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000396 consumed = pbuf.len; /* This is overwritten unless final is true. */
397 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000398 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000399 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000400 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000401 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000402 return codec_tuple(decoded, consumed);
403}
404
405static PyObject *
406utf_32_le_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000407 PyObject *args)
Walter Dörwald6e390802007-08-17 16:41:28 +0000408{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000409 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000410 const char *errors = NULL;
411 int byteorder = -1;
412 int final = 0;
413 Py_ssize_t consumed;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000414 PyObject *decoded;
Walter Dörwald6e390802007-08-17 16:41:28 +0000415
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000416 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 &pbuf, &errors, &final))
418 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000419 consumed = pbuf.len; /* This is overwritten unless final is true. */
420 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000421 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000422 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000423 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000424 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000425 return codec_tuple(decoded, consumed);
Walter Dörwald6e390802007-08-17 16:41:28 +0000426}
427
428static PyObject *
429utf_32_be_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000430 PyObject *args)
Walter Dörwald6e390802007-08-17 16:41:28 +0000431{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000432 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000433 const char *errors = NULL;
434 int byteorder = 1;
435 int final = 0;
436 Py_ssize_t consumed;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000437 PyObject *decoded;
Walter Dörwald6e390802007-08-17 16:41:28 +0000438
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000439 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000440 &pbuf, &errors, &final))
441 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000442 consumed = pbuf.len; /* This is overwritten unless final is true. */
443 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000444 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000445 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000446 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000447 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000448 return codec_tuple(decoded, consumed);
449}
450
451/* This non-standard version also provides access to the byteorder
452 parameter of the builtin UTF-32 codec.
453
454 It returns a tuple (unicode, bytesread, byteorder) with byteorder
455 being the value in effect at the end of data.
456
457*/
458
459static PyObject *
460utf_32_ex_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000461 PyObject *args)
Walter Dörwald6e390802007-08-17 16:41:28 +0000462{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000463 Py_buffer pbuf;
Walter Dörwald6e390802007-08-17 16:41:28 +0000464 const char *errors = NULL;
465 int byteorder = 0;
466 PyObject *unicode, *tuple;
467 int final = 0;
468 Py_ssize_t consumed;
469
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000470 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000471 &pbuf, &errors, &byteorder, &final))
472 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000473 consumed = pbuf.len; /* This is overwritten unless final is true. */
474 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000475 &byteorder, final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000476 PyBuffer_Release(&pbuf);
Walter Dörwald6e390802007-08-17 16:41:28 +0000477 if (unicode == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000478 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000479 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
480 Py_DECREF(unicode);
481 return tuple;
482}
483
484static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000485unicode_escape_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000486 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000487{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000488 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000489 const char *errors = NULL;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000490 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000491
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000492 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000493 &pbuf, &errors))
494 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000495
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000496 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
497 PyBuffer_Release(&pbuf);
498 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000499}
500
501static PyObject *
502raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000503 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000504{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000506 const char *errors = NULL;
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000507 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000508
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000509 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000510 &pbuf, &errors))
511 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000512
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000513 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
514 PyBuffer_Release(&pbuf);
515 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000516}
517
518static PyObject *
519latin_1_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000520 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000521{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000522 Py_buffer pbuf;
523 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000524 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000525
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000526 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000527 &pbuf, &errors))
528 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000529
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000530 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
531 PyBuffer_Release(&pbuf);
532 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000533}
534
535static PyObject *
536ascii_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000537 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000538{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000539 Py_buffer pbuf;
540 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000541 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000542
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000543 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000544 &pbuf, &errors))
545 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000546
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000547 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
548 PyBuffer_Release(&pbuf);
549 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000550}
551
552static PyObject *
553charmap_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000554 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000555{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000556 Py_buffer pbuf;
557 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000558 const char *errors = NULL;
559 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000560
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000561 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000562 &pbuf, &errors, &mapping))
563 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000564 if (mapping == Py_None)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000565 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000566
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000567 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
568 PyBuffer_Release(&pbuf);
569 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000570}
571
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000572#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000573
574static PyObject *
575mbcs_decode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000576 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000577{
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000578 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000579 const char *errors = NULL;
Martin v. Löwis961b91b2006-08-02 13:53:55 +0000580 int final = 0;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000581 Py_ssize_t consumed;
582 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000583
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000584 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000585 &pbuf, &errors, &final))
586 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000587 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000588
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000589 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000590 final ? NULL : &consumed);
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000591 PyBuffer_Release(&pbuf);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000592 if (decoded == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000593 return NULL;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000594 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000595}
596
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000597#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000598
Guido van Rossume2d67f92000-03-10 23:09:23 +0000599/* --- Encoder ------------------------------------------------------------ */
600
601static PyObject *
602readbuffer_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000603 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000604{
605 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000606 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000607 const char *errors = NULL;
608
609 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000610 &data, &size, &errors))
611 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000612
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000613 return codec_tuple(PyString_FromStringAndSize(data, size),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000614 size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000615}
616
617static PyObject *
618charbuffer_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000619 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000620{
621 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000622 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000623 const char *errors = NULL;
624
625 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000626 &data, &size, &errors))
627 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000628
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000629 return codec_tuple(PyString_FromStringAndSize(data, size),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000630 size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000631}
632
633static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000634unicode_internal_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000635 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000636{
637 PyObject *obj;
638 const char *errors = NULL;
639 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000640 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000641
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000642 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000643 &obj, &errors))
644 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000645
646 if (PyUnicode_Check(obj)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000647 data = PyUnicode_AS_DATA(obj);
648 size = PyUnicode_GET_DATA_SIZE(obj);
649 return codec_tuple(PyString_FromStringAndSize(data, size),
650 PyUnicode_GET_SIZE(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000651 }
652 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000653 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
654 return NULL;
655 return codec_tuple(PyString_FromStringAndSize(data, size),
656 size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000657 }
658}
659
660static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000661utf_7_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000662 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000663{
664 PyObject *str, *v;
665 const char *errors = NULL;
666
667 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000668 &str, &errors))
669 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000670
671 str = PyUnicode_FromObject(str);
672 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000673 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000674 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000675 PyUnicode_GET_SIZE(str),
676 0,
677 0,
678 errors),
679 PyUnicode_GET_SIZE(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000680 Py_DECREF(str);
681 return v;
682}
683
684static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000685utf_8_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000686 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000687{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000688 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000689 const char *errors = NULL;
690
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000691 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000692 &str, &errors))
693 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000694
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000695 str = PyUnicode_FromObject(str);
696 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000697 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000698 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000699 PyUnicode_GET_SIZE(str),
700 errors),
701 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000702 Py_DECREF(str);
703 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000704}
705
706/* This version provides access to the byteorder parameter of the
707 builtin UTF-16 codecs as optional third argument. It defaults to 0
708 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000709 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000710
711*/
712
713static PyObject *
714utf_16_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000715 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000716{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000717 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000718 const char *errors = NULL;
719 int byteorder = 0;
720
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000721 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000722 &str, &errors, &byteorder))
723 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000724
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000725 str = PyUnicode_FromObject(str);
726 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000727 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000728 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000729 PyUnicode_GET_SIZE(str),
730 errors,
731 byteorder),
732 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000733 Py_DECREF(str);
734 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000735}
736
737static PyObject *
738utf_16_le_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000739 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000740{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000741 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000742 const char *errors = NULL;
743
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000744 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000745 &str, &errors))
746 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000747
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000748 str = PyUnicode_FromObject(str);
749 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000750 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000751 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000752 PyUnicode_GET_SIZE(str),
753 errors,
754 -1),
755 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000756 Py_DECREF(str);
757 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000758}
759
760static PyObject *
761utf_16_be_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000762 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000763{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000764 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000765 const char *errors = NULL;
766
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000767 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000768 &str, &errors))
769 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000770
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000771 str = PyUnicode_FromObject(str);
772 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000773 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000775 PyUnicode_GET_SIZE(str),
776 errors,
777 +1),
778 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000779 Py_DECREF(str);
780 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000781}
782
Walter Dörwald6e390802007-08-17 16:41:28 +0000783/* This version provides access to the byteorder parameter of the
784 builtin UTF-32 codecs as optional third argument. It defaults to 0
785 which means: use the native byte order and prepend the data with a
786 BOM mark.
787
788*/
789
790static PyObject *
791utf_32_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000792 PyObject *args)
Walter Dörwald6e390802007-08-17 16:41:28 +0000793{
794 PyObject *str, *v;
795 const char *errors = NULL;
796 int byteorder = 0;
797
798 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 &str, &errors, &byteorder))
800 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000801
802 str = PyUnicode_FromObject(str);
803 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000805 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 PyUnicode_GET_SIZE(str),
807 errors,
808 byteorder),
809 PyUnicode_GET_SIZE(str));
Walter Dörwald6e390802007-08-17 16:41:28 +0000810 Py_DECREF(str);
811 return v;
812}
813
814static PyObject *
815utf_32_le_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000816 PyObject *args)
Walter Dörwald6e390802007-08-17 16:41:28 +0000817{
818 PyObject *str, *v;
819 const char *errors = NULL;
820
821 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000822 &str, &errors))
823 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000824
825 str = PyUnicode_FromObject(str);
826 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000827 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000828 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000829 PyUnicode_GET_SIZE(str),
830 errors,
831 -1),
832 PyUnicode_GET_SIZE(str));
Walter Dörwald6e390802007-08-17 16:41:28 +0000833 Py_DECREF(str);
834 return v;
835}
836
837static PyObject *
838utf_32_be_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000839 PyObject *args)
Walter Dörwald6e390802007-08-17 16:41:28 +0000840{
841 PyObject *str, *v;
842 const char *errors = NULL;
843
844 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000845 &str, &errors))
846 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000847
848 str = PyUnicode_FromObject(str);
849 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000850 return NULL;
Walter Dörwald6e390802007-08-17 16:41:28 +0000851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000852 PyUnicode_GET_SIZE(str),
853 errors,
854 +1),
855 PyUnicode_GET_SIZE(str));
Walter Dörwald6e390802007-08-17 16:41:28 +0000856 Py_DECREF(str);
857 return v;
858}
859
Guido van Rossume2d67f92000-03-10 23:09:23 +0000860static PyObject *
861unicode_escape_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000862 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000863{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000864 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000865 const char *errors = NULL;
866
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000867 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000868 &str, &errors))
869 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000870
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000871 str = PyUnicode_FromObject(str);
872 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000873 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000874 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000875 PyUnicode_GET_SIZE(str)),
876 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000877 Py_DECREF(str);
878 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000879}
880
881static PyObject *
882raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000884{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000885 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000886 const char *errors = NULL;
887
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000888 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 &str, &errors))
890 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000891
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000892 str = PyUnicode_FromObject(str);
893 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000894 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000895 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000896 PyUnicode_AS_UNICODE(str),
897 PyUnicode_GET_SIZE(str)),
898 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000899 Py_DECREF(str);
900 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000901}
902
903static PyObject *
904latin_1_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000905 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000906{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000907 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000908 const char *errors = NULL;
909
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000910 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000911 &str, &errors))
912 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000913
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000914 str = PyUnicode_FromObject(str);
915 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000916 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000917 v = codec_tuple(PyUnicode_EncodeLatin1(
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000918 PyUnicode_AS_UNICODE(str),
919 PyUnicode_GET_SIZE(str),
920 errors),
921 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000922 Py_DECREF(str);
923 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000924}
925
926static PyObject *
927ascii_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000929{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000930 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000931 const char *errors = NULL;
932
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000933 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000934 &str, &errors))
935 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000936
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000937 str = PyUnicode_FromObject(str);
938 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000939 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000940 v = codec_tuple(PyUnicode_EncodeASCII(
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000941 PyUnicode_AS_UNICODE(str),
942 PyUnicode_GET_SIZE(str),
943 errors),
944 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000945 Py_DECREF(str);
946 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000947}
948
949static PyObject *
950charmap_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000951 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000952{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000953 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000954 const char *errors = NULL;
955 PyObject *mapping = NULL;
956
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000957 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000958 &str, &errors, &mapping))
959 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000960 if (mapping == Py_None)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000961 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000962
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000963 str = PyUnicode_FromObject(str);
964 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000965 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000966 v = codec_tuple(PyUnicode_EncodeCharmap(
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000967 PyUnicode_AS_UNICODE(str),
968 PyUnicode_GET_SIZE(str),
969 mapping,
970 errors),
971 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000972 Py_DECREF(str);
973 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000974}
975
Martin v. Löwis3f767792006-06-04 19:36:28 +0000976static PyObject*
977charmap_build(PyObject *self, PyObject *args)
978{
979 PyObject *map;
980 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
981 return NULL;
982 return PyUnicode_BuildEncodingMap(map);
983}
984
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000985#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000986
987static PyObject *
988mbcs_encode(PyObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000989 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000990{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000991 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000992 const char *errors = NULL;
993
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000994 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000995 &str, &errors))
996 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000997
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000998 str = PyUnicode_FromObject(str);
999 if (str == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001000 return NULL;
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001001 v = codec_tuple(PyUnicode_EncodeMBCS(
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001002 PyUnicode_AS_UNICODE(str),
1003 PyUnicode_GET_SIZE(str),
1004 errors),
1005 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001006 Py_DECREF(str);
1007 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001008}
1009
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001010#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001011#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001012
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001013/* --- Error handler registry --------------------------------------------- */
1014
Walter Dörwald0ae29812002-10-31 13:36:29 +00001015PyDoc_STRVAR(register_error__doc__,
1016"register_error(errors, handler)\n\
1017\n\
1018Register the specified error handler under the name\n\
1019errors. handler must be a callable object, that\n\
1020will be called with an exception instance containing\n\
1021information about the location of the encoding/decoding\n\
1022error and must return a (replacement, new position) tuple.");
1023
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001024static PyObject *register_error(PyObject *self, PyObject *args)
1025{
1026 const char *name;
1027 PyObject *handler;
1028
1029 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001030 &name, &handler))
1031 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001032 if (PyCodec_RegisterError(name, handler))
1033 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +00001034 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001035}
1036
Walter Dörwald0ae29812002-10-31 13:36:29 +00001037PyDoc_STRVAR(lookup_error__doc__,
1038"lookup_error(errors) -> handler\n\
1039\n\
1040Return the error handler for the specified error handling name\n\
1041or raise a LookupError, if no handler exists under this name.");
1042
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001043static PyObject *lookup_error(PyObject *self, PyObject *args)
1044{
1045 const char *name;
1046
1047 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001048 &name))
1049 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001050 return PyCodec_LookupError(name);
1051}
1052
Guido van Rossume2d67f92000-03-10 23:09:23 +00001053/* --- Module API --------------------------------------------------------- */
1054
1055static PyMethodDef _codecs_functions[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001056 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001057 register__doc__},
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001058 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001059 lookup__doc__},
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001060 {"encode", codec_encode, METH_VARARGS,
1061 encode__doc__},
1062 {"decode", codec_decode, METH_VARARGS,
1063 decode__doc__},
1064 {"escape_encode", escape_encode, METH_VARARGS},
1065 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001066#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001067 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1068 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1069 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1070 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1071 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1072 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1073 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1074 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1075 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1076 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1077 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1078 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1079 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1080 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1081 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1082 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1083 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1084 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1085 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1086 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1087 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1088 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1089 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1090 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1091 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1092 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1093 {"ascii_encode", ascii_encode, METH_VARARGS},
1094 {"ascii_decode", ascii_decode, METH_VARARGS},
1095 {"charmap_encode", charmap_encode, METH_VARARGS},
1096 {"charmap_decode", charmap_decode, METH_VARARGS},
1097 {"charmap_build", charmap_build, METH_VARARGS},
1098 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1099 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001100#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001101 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1102 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001103#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001104#endif /* Py_USING_UNICODE */
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001105 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001106 register_error__doc__},
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001107 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001108 lookup_error__doc__},
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001109 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001110};
1111
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001112PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001113init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001114{
1115 Py_InitModule("_codecs", _codecs_functions);
1116}