blob: 93cb1b702b5a816e7dbb7235957f27fee1055a50 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Guido van Rossume2d67f92000-03-10 23:09:23 +000045/* --- Registry ----------------------------------------------------------- */
46
Walter Dörwald0ae29812002-10-31 13:36:29 +000047PyDoc_STRVAR(register__doc__,
48"register(search_function)\n\
49\n\
50Register a codec search function. Search functions are expected to take\n\
51one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000052a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
53(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000054
Guido van Rossume2d67f92000-03-10 23:09:23 +000055static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000056PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000057{
Guido van Rossume2d67f92000-03-10 23:09:23 +000058 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000059 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000060
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000061 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000062}
63
Walter Dörwald0ae29812002-10-31 13:36:29 +000064PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000065"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000066\n\
67Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000068a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000069
Guido van Rossume2d67f92000-03-10 23:09:23 +000070static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000071PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000072{
73 char *encoding;
74
75 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000076 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000077
78 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000079}
80
Marc-André Lemburg3f419742004-07-10 12:06:10 +000081PyDoc_STRVAR(encode__doc__,
82"encode(obj, [encoding[,errors]]) -> object\n\
83\n\
84Encodes obj using the codec registered for encoding. encoding defaults\n\
85to the default encoding. errors may be given to set a different error\n\
86handling scheme. Default is 'strict' meaning that encoding errors raise\n\
87a ValueError. Other possible values are 'ignore', 'replace' and\n\
88'xmlcharrefreplace' as well as any other name registered with\n\
89codecs.register_error that can handle ValueErrors.");
90
91static PyObject *
92codec_encode(PyObject *self, PyObject *args)
93{
Brett Cannon3e377de2004-07-10 21:41:14 +000094 const char *encoding = NULL;
95 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000096 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000097
Marc-André Lemburg3f419742004-07-10 12:06:10 +000098 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
99 return NULL;
100
101 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000103
104 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000105 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106}
107
108PyDoc_STRVAR(decode__doc__,
109"decode(obj, [encoding[,errors]]) -> object\n\
110\n\
111Decodes obj using the codec registered for encoding. encoding defaults\n\
112to the default encoding. errors may be given to set a different error\n\
113handling scheme. Default is 'strict' meaning that encoding errors raise\n\
114a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000115as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000116able to handle ValueErrors.");
117
118static PyObject *
119codec_decode(PyObject *self, PyObject *args)
120{
Brett Cannon3e377de2004-07-10 21:41:14 +0000121 const char *encoding = NULL;
122 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000123 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000124
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000125 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
126 return NULL;
127
128 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000130
131 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000132 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000133}
134
Guido van Rossume2d67f92000-03-10 23:09:23 +0000135/* --- Helpers ------------------------------------------------------------ */
136
137static
138PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000140{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000142 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000143 return NULL;
144 v = Py_BuildValue("On", unicode, len);
145 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000146 return v;
147}
148
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000149/* --- String codecs ------------------------------------------------------ */
150static PyObject *
151escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000153{
154 const char *errors = NULL;
155 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000156 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000157
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000158 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 &data, &size, &errors))
160 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000161 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000163}
164
165static PyObject *
166escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000168{
Antoine Pitroud1188562010-06-09 16:38:55 +0000169 PyObject *str;
170 Py_ssize_t size;
171 Py_ssize_t newsize;
172 const char *errors = NULL;
173 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000174
Antoine Pitroud1188562010-06-09 16:38:55 +0000175 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
176 &PyBytes_Type, &str, &errors))
177 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000178
Antoine Pitroud1188562010-06-09 16:38:55 +0000179 size = PyBytes_GET_SIZE(str);
180 newsize = 4*size;
181 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
182 PyErr_SetString(PyExc_OverflowError,
183 "string is too large to encode");
184 return NULL;
185 }
186 v = PyBytes_FromStringAndSize(NULL, newsize);
187
188 if (v == NULL) {
189 return NULL;
190 }
191 else {
192 register Py_ssize_t i;
193 register char c;
194 register char *p = PyBytes_AS_STRING(v);
195
196 for (i = 0; i < size; i++) {
197 /* There's at least enough room for a hex escape */
198 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
199 c = PyBytes_AS_STRING(str)[i];
200 if (c == '\'' || c == '\\')
201 *p++ = '\\', *p++ = c;
202 else if (c == '\t')
203 *p++ = '\\', *p++ = 't';
204 else if (c == '\n')
205 *p++ = '\\', *p++ = 'n';
206 else if (c == '\r')
207 *p++ = '\\', *p++ = 'r';
208 else if (c < ' ' || c >= 0x7f) {
209 *p++ = '\\';
210 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200211 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
212 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000213 }
214 else
215 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000216 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000217 *p = '\0';
218 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
219 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000221 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000222
Antoine Pitroud1188562010-06-09 16:38:55 +0000223 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000224}
225
Guido van Rossume2d67f92000-03-10 23:09:23 +0000226/* --- Decoder ------------------------------------------------------------ */
227
228static PyObject *
229unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000231{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000232 PyObject *obj;
233 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000234 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000235 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000236
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000237 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 &obj, &errors))
239 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000240
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000241 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100242 if (PyUnicode_READY(obj) < 0)
243 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100245 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000246 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000247 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
249 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
252 size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000253 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000254}
255
256static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000257utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000258 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000259{
Antoine Pitroud1188562010-06-09 16:38:55 +0000260 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000261 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000262 int final = 0;
263 Py_ssize_t consumed;
264 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000265
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000266 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 &pbuf, &errors, &final))
268 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000269 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000270
Martin v. Löwis423be952008-08-13 15:53:07 +0000271 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000273 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000274 if (decoded == NULL)
275 return NULL;
276 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000277}
278
279static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000280utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000282{
Antoine Pitroud1188562010-06-09 16:38:55 +0000283 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000284 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000285 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000286 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000287 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000288
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000289 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 &pbuf, &errors, &final))
291 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000292 consumed = pbuf.len;
293
294 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000296 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000297 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000299 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000300}
301
302static PyObject *
303utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000305{
Antoine Pitroud1188562010-06-09 16:38:55 +0000306 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000307 const char *errors = NULL;
308 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000309 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000310 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000311 PyObject *decoded;
312
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000313 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 &pbuf, &errors, &final))
315 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000316 consumed = pbuf.len; /* This is overwritten unless final is true. */
317 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000319 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000320 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000322 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000323}
324
325static PyObject *
326utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000328{
Antoine Pitroud1188562010-06-09 16:38:55 +0000329 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000330 const char *errors = NULL;
331 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000332 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000333 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000334 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000335
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000336 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 &pbuf, &errors, &final))
338 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000339
Martin v. Löwis423be952008-08-13 15:53:07 +0000340 consumed = pbuf.len; /* This is overwritten unless final is true. */
341 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000343 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000344 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000346 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000347}
348
349static PyObject *
350utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000352{
Antoine Pitroud1188562010-06-09 16:38:55 +0000353 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000354 const char *errors = NULL;
355 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000356 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000358 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000359
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000360 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 &pbuf, &errors, &final))
362 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000363
364 consumed = pbuf.len; /* This is overwritten unless final is true. */
365 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000367 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000368 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000370 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000371}
372
373/* This non-standard version also provides access to the byteorder
374 parameter of the builtin UTF-16 codec.
375
376 It returns a tuple (unicode, bytesread, byteorder) with byteorder
377 being the value in effect at the end of data.
378
379*/
380
381static PyObject *
382utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000384{
Antoine Pitroud1188562010-06-09 16:38:55 +0000385 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000386 const char *errors = NULL;
387 int byteorder = 0;
388 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000389 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000390 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000391
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000392 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 &pbuf, &errors, &byteorder, &final))
394 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000395 consumed = pbuf.len; /* This is overwritten unless final is true. */
396 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000398 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000399 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000401 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000402 Py_DECREF(unicode);
403 return tuple;
404}
405
406static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000407utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000409{
Antoine Pitroud1188562010-06-09 16:38:55 +0000410 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000411 const char *errors = NULL;
412 int byteorder = 0;
413 int final = 0;
414 Py_ssize_t consumed;
415 PyObject *decoded;
416
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000417 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 &pbuf, &errors, &final))
419 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000420 consumed = pbuf.len; /* This is overwritten unless final is true. */
421 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000423 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000424 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000426 return codec_tuple(decoded, consumed);
427}
428
429static PyObject *
430utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000432{
Antoine Pitroud1188562010-06-09 16:38:55 +0000433 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000434 const char *errors = NULL;
435 int byteorder = -1;
436 int final = 0;
437 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000438 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000439
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000440 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 &pbuf, &errors, &final))
442 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000443 consumed = pbuf.len; /* This is overwritten unless final is true. */
444 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000446 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000447 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000449 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000450}
451
452static PyObject *
453utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000455{
Antoine Pitroud1188562010-06-09 16:38:55 +0000456 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000457 const char *errors = NULL;
458 int byteorder = 1;
459 int final = 0;
460 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000461 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000462
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000463 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 &pbuf, &errors, &final))
465 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000466 consumed = pbuf.len; /* This is overwritten unless final is true. */
467 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000469 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000470 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000472 return codec_tuple(decoded, consumed);
473}
474
475/* This non-standard version also provides access to the byteorder
476 parameter of the builtin UTF-32 codec.
477
478 It returns a tuple (unicode, bytesread, byteorder) with byteorder
479 being the value in effect at the end of data.
480
481*/
482
483static PyObject *
484utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000486{
Antoine Pitroud1188562010-06-09 16:38:55 +0000487 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000488 const char *errors = NULL;
489 int byteorder = 0;
490 PyObject *unicode, *tuple;
491 int final = 0;
492 Py_ssize_t consumed;
493
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000494 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000495 &pbuf, &errors, &byteorder, &final))
496 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000497 consumed = pbuf.len; /* This is overwritten unless final is true. */
498 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000500 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000501 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000503 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
504 Py_DECREF(unicode);
505 return tuple;
506}
507
508static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000509unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000511{
Antoine Pitroud1188562010-06-09 16:38:55 +0000512 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000513 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000515
Martin v. Löwis423be952008-08-13 15:53:07 +0000516 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 &pbuf, &errors))
518 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000519
Antoine Pitroud1188562010-06-09 16:38:55 +0000520 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
521 PyBuffer_Release(&pbuf);
522 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000523}
524
525static PyObject *
526raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000528{
Antoine Pitroud1188562010-06-09 16:38:55 +0000529 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000530 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000531 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000532
Martin v. Löwis423be952008-08-13 15:53:07 +0000533 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 &pbuf, &errors))
535 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000536
Antoine Pitroud1188562010-06-09 16:38:55 +0000537 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
538 PyBuffer_Release(&pbuf);
539 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000540}
541
542static PyObject *
543latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000545{
Antoine Pitroud1188562010-06-09 16:38:55 +0000546 Py_buffer pbuf;
547 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000548 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000549
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000550 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 &pbuf, &errors))
552 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000553
Antoine Pitroud1188562010-06-09 16:38:55 +0000554 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
555 PyBuffer_Release(&pbuf);
556 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000557}
558
559static PyObject *
560ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000562{
Antoine Pitroud1188562010-06-09 16:38:55 +0000563 Py_buffer pbuf;
564 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000565 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000566
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000567 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 &pbuf, &errors))
569 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000570
Antoine Pitroud1188562010-06-09 16:38:55 +0000571 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
572 PyBuffer_Release(&pbuf);
573 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000574}
575
576static PyObject *
577charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000579{
Antoine Pitroud1188562010-06-09 16:38:55 +0000580 Py_buffer pbuf;
581 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000582 const char *errors = NULL;
583 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000584
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000585 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 &pbuf, &errors, &mapping))
587 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000588 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000590
Antoine Pitroud1188562010-06-09 16:38:55 +0000591 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
592 PyBuffer_Release(&pbuf);
593 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000594}
595
Victor Stinner99b95382011-07-04 14:23:54 +0200596#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000597
598static PyObject *
599mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000600 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000601{
Antoine Pitroud1188562010-06-09 16:38:55 +0000602 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000603 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000604 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000605 Py_ssize_t consumed;
606 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000607
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000608 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 &pbuf, &errors, &final))
610 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000611 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000612
Martin v. Löwis423be952008-08-13 15:53:07 +0000613 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000615 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000616 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000618 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000619}
620
Victor Stinner3a50e702011-10-18 21:21:00 +0200621static PyObject *
622code_page_decode(PyObject *self,
623 PyObject *args)
624{
625 Py_buffer pbuf;
626 const char *errors = NULL;
627 int final = 0;
628 Py_ssize_t consumed;
629 PyObject *decoded = NULL;
630 int code_page;
631
632 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
633 &code_page, &pbuf, &errors, &final))
634 return NULL;
635 consumed = pbuf.len;
636
637 decoded = PyUnicode_DecodeCodePageStateful(code_page,
638 pbuf.buf, pbuf.len, errors,
639 final ? NULL : &consumed);
640 PyBuffer_Release(&pbuf);
641 if (decoded == NULL)
642 return NULL;
643 return codec_tuple(decoded, consumed);
644}
645
Victor Stinner99b95382011-07-04 14:23:54 +0200646#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000647
Guido van Rossume2d67f92000-03-10 23:09:23 +0000648/* --- Encoder ------------------------------------------------------------ */
649
650static PyObject *
651readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000653{
Martin v. Löwis423be952008-08-13 15:53:07 +0000654 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000655 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000657 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000658 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000659
Martin v. Löwis423be952008-08-13 15:53:07 +0000660 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000661 &pdata, &errors))
662 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000663 data = pdata.buf;
664 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000665
Martin v. Löwis423be952008-08-13 15:53:07 +0000666 result = PyBytes_FromStringAndSize(data, size);
667 PyBuffer_Release(&pdata);
668 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000669}
670
671static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000672unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000674{
675 PyObject *obj;
676 const char *errors = NULL;
677 const char *data;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100678 Py_ssize_t len, size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000679
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000680 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 &obj, &errors))
682 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000683
684 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100685 Py_UNICODE *u;
686
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100687 if (PyUnicode_READY(obj) < 0)
688 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100689
690 if (PyErr_WarnEx(PyExc_DeprecationWarning,
691 "unicode_internal codecs has been deprecated",
692 1))
693 return NULL;
694
695 u = PyUnicode_AsUnicodeAndSize(obj, &len);
696 if (u == NULL)
697 return NULL;
698 if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
699 return PyErr_NoMemory();
700 size = len * sizeof(Py_UNICODE);
701 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100702 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000703 }
704 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
706 return NULL;
707 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000708 }
709}
710
711static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000712utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000714{
715 PyObject *str, *v;
716 const char *errors = NULL;
717
718 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 &str, &errors))
720 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000721
722 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100723 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100725 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
726 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000727 Py_DECREF(str);
728 return v;
729}
730
731static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000732utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000734{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000735 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000736 const char *errors = NULL;
737
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000738 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 &str, &errors))
740 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000741
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000742 str = PyUnicode_FromObject(str);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200743 if (str == NULL || PyUnicode_READY(str) == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200745 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
746 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000747 Py_DECREF(str);
748 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000749}
750
751/* This version provides access to the byteorder parameter of the
752 builtin UTF-16 codecs as optional third argument. It defaults to 0
753 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000754 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000755
756*/
757
758static PyObject *
759utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000761{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000762 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000763 const char *errors = NULL;
764 int byteorder = 0;
765
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000766 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 &str, &errors, &byteorder))
768 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000769
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000770 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100771 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100773 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
774 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000775 Py_DECREF(str);
776 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000777}
778
779static PyObject *
780utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000782{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000783 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000784 const char *errors = NULL;
785
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000786 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 &str, &errors))
788 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000789
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000790 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100791 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100793 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
794 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000795 Py_DECREF(str);
796 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000797}
798
799static PyObject *
800utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000802{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000803 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000804 const char *errors = NULL;
805
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000806 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 &str, &errors))
808 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000809
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000810 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100811 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100813 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
814 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000815 Py_DECREF(str);
816 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000817}
818
Walter Dörwald41980ca2007-08-16 21:55:45 +0000819/* This version provides access to the byteorder parameter of the
820 builtin UTF-32 codecs as optional third argument. It defaults to 0
821 which means: use the native byte order and prepend the data with a
822 BOM mark.
823
824*/
825
826static PyObject *
827utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000829{
830 PyObject *str, *v;
831 const char *errors = NULL;
832 int byteorder = 0;
833
834 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 &str, &errors, &byteorder))
836 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000837
838 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100839 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100841 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
842 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000843 Py_DECREF(str);
844 return v;
845}
846
847static PyObject *
848utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000850{
851 PyObject *str, *v;
852 const char *errors = NULL;
853
854 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 &str, &errors))
856 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000857
858 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100859 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100861 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
862 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000863 Py_DECREF(str);
864 return v;
865}
866
867static PyObject *
868utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000870{
871 PyObject *str, *v;
872 const char *errors = NULL;
873
874 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 &str, &errors))
876 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000877
878 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100879 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100881 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
882 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000883 Py_DECREF(str);
884 return v;
885}
886
Guido van Rossume2d67f92000-03-10 23:09:23 +0000887static PyObject *
888unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000890{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000891 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000892 const char *errors = NULL;
893
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000894 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 &str, &errors))
896 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000897
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000898 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100899 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100901 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
902 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000903 Py_DECREF(str);
904 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000905}
906
907static PyObject *
908raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000910{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000911 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000912 const char *errors = NULL;
913
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000914 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 &str, &errors))
916 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000917
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000918 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100919 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100921 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
922 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000923 Py_DECREF(str);
924 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000925}
926
927static PyObject *
928latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000930{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000931 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000932 const char *errors = NULL;
933
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000934 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 &str, &errors))
936 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000937
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000938 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100939 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100941 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
942 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000943 Py_DECREF(str);
944 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000945}
946
947static PyObject *
948ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000950{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000951 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000952 const char *errors = NULL;
953
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000954 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 &str, &errors))
956 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000957
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000958 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100959 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100961 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
962 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000963 Py_DECREF(str);
964 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000965}
966
967static PyObject *
968charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000970{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000971 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000972 const char *errors = NULL;
973 PyObject *mapping = NULL;
974
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000975 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 &str, &errors, &mapping))
977 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000978 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000979 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000980
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000981 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100982 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 return NULL;
Martin v. Löwis23e275b2011-11-02 18:02:51 +0100984 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100985 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000986 Py_DECREF(str);
987 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000988}
989
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000990static PyObject*
991charmap_build(PyObject *self, PyObject *args)
992{
993 PyObject *map;
994 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
995 return NULL;
996 return PyUnicode_BuildEncodingMap(map);
997}
998
Victor Stinner99b95382011-07-04 14:23:54 +0200999#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001000
1001static PyObject *
1002mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001004{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001005 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001006 const char *errors = NULL;
1007
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001008 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 &str, &errors))
1010 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001011
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001012 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001013 if (str == NULL || PyUnicode_READY(str) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 return NULL;
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001015 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1016 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001017 Py_DECREF(str);
1018 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001019}
1020
Victor Stinner3a50e702011-10-18 21:21:00 +02001021static PyObject *
1022code_page_encode(PyObject *self,
1023 PyObject *args)
1024{
1025 PyObject *str, *v;
1026 const char *errors = NULL;
1027 int code_page;
1028
1029 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1030 &code_page, &str, &errors))
1031 return NULL;
1032
1033 str = PyUnicode_FromObject(str);
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001034 if (str == NULL || PyUnicode_READY(str) < 0)
Victor Stinner3a50e702011-10-18 21:21:00 +02001035 return NULL;
1036 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1037 str,
1038 errors),
1039 PyUnicode_GET_LENGTH(str));
1040 Py_DECREF(str);
1041 return v;
1042}
1043
Victor Stinner99b95382011-07-04 14:23:54 +02001044#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001045
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001046/* --- Error handler registry --------------------------------------------- */
1047
Walter Dörwald0ae29812002-10-31 13:36:29 +00001048PyDoc_STRVAR(register_error__doc__,
1049"register_error(errors, handler)\n\
1050\n\
1051Register the specified error handler under the name\n\
1052errors. handler must be a callable object, that\n\
1053will be called with an exception instance containing\n\
1054information about the location of the encoding/decoding\n\
1055error and must return a (replacement, new position) tuple.");
1056
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001057static PyObject *register_error(PyObject *self, PyObject *args)
1058{
1059 const char *name;
1060 PyObject *handler;
1061
1062 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 &name, &handler))
1064 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001065 if (PyCodec_RegisterError(name, handler))
1066 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001067 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001068}
1069
Walter Dörwald0ae29812002-10-31 13:36:29 +00001070PyDoc_STRVAR(lookup_error__doc__,
1071"lookup_error(errors) -> handler\n\
1072\n\
1073Return the error handler for the specified error handling name\n\
1074or raise a LookupError, if no handler exists under this name.");
1075
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001076static PyObject *lookup_error(PyObject *self, PyObject *args)
1077{
1078 const char *name;
1079
1080 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 &name))
1082 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001083 return PyCodec_LookupError(name);
1084}
1085
Guido van Rossume2d67f92000-03-10 23:09:23 +00001086/* --- Module API --------------------------------------------------------- */
1087
1088static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001090 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001092 lookup__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 {"encode", codec_encode, METH_VARARGS,
1094 encode__doc__},
1095 {"decode", codec_decode, METH_VARARGS,
1096 decode__doc__},
1097 {"escape_encode", escape_encode, METH_VARARGS},
1098 {"escape_decode", escape_decode, METH_VARARGS},
1099 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1100 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1101 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1102 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1103 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1104 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1105 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1106 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1107 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1108 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1109 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1110 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1111 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1112 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1113 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1114 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1115 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1116 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1117 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1118 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1119 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1120 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1121 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1122 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1123 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1124 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1125 {"ascii_encode", ascii_encode, METH_VARARGS},
1126 {"ascii_decode", ascii_decode, METH_VARARGS},
1127 {"charmap_encode", charmap_encode, METH_VARARGS},
1128 {"charmap_decode", charmap_decode, METH_VARARGS},
1129 {"charmap_build", charmap_build, METH_VARARGS},
1130 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001131#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1133 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001134 {"code_page_encode", code_page_encode, METH_VARARGS},
1135 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001136#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001138 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001140 lookup_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001142};
1143
Martin v. Löwis1a214512008-06-11 05:26:20 +00001144static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 PyModuleDef_HEAD_INIT,
1146 "_codecs",
1147 NULL,
1148 -1,
1149 _codecs_functions,
1150 NULL,
1151 NULL,
1152 NULL,
1153 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001154};
1155
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001156PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001157PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001158{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001160}