blob: 9b1194eae8cdf2740871b8905aa1434271014eb6 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Nick Coghlan8fad1672014-09-15 23:50:44 +120045/*[clinic input]
46module _codecs
47[clinic start generated code]*/
48/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
49
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030050#include "clinic/_codecsmodule.c.h"
Nick Coghlan8fad1672014-09-15 23:50:44 +120051
Guido van Rossume2d67f92000-03-10 23:09:23 +000052/* --- Registry ----------------------------------------------------------- */
53
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030054/*[clinic input]
55_codecs.register
56 search_function: object
57 /
Walter Dörwald0ae29812002-10-31 13:36:29 +000058
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030059Register a codec search function.
60
61Search functions are expected to take one argument, the encoding name in
62all lower case letters, and either return None, or a tuple of functions
63(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
64[clinic start generated code]*/
65
66static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +030067_codecs_register(PyObject *module, PyObject *search_function)
68/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +000069{
Guido van Rossume2d67f92000-03-10 23:09:23 +000070 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000071 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000072
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000073 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000074}
75
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030076/*[clinic input]
77_codecs.lookup
78 encoding: str
79 /
Walter Dörwald0ae29812002-10-31 13:36:29 +000080
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030081Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
82[clinic start generated code]*/
83
84static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +030085_codecs_lookup_impl(PyObject *module, const char *encoding)
86/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +000087{
Guido van Rossume2d67f92000-03-10 23:09:23 +000088 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000089}
90
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030091/*[clinic input]
92_codecs.encode
93 obj: object
Serhiy Storchakac97a9622015-08-09 12:23:08 +030094 encoding: str(c_default="NULL") = "utf-8"
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030095 errors: str(c_default="NULL") = "strict"
96
97Encodes obj using the codec registered for encoding.
98
Serhiy Storchakac97a9622015-08-09 12:23:08 +030099The default encoding is 'utf-8'. errors may be given to set a
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300100different error handling scheme. Default is 'strict' meaning that encoding
101errors raise a ValueError. Other possible values are 'ignore', 'replace'
102and 'backslashreplace' as well as any other name registered with
103codecs.register_error that can handle ValueErrors.
104[clinic start generated code]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000105
106static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300107_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300108 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300109/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000110{
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000111 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000113
114 /* Encode via the codec registry */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300115 return PyCodec_Encode(obj, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000116}
117
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300118/*[clinic input]
119_codecs.decode
120 obj: object
Serhiy Storchakac97a9622015-08-09 12:23:08 +0300121 encoding: str(c_default="NULL") = "utf-8"
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300122 errors: str(c_default="NULL") = "strict"
123
124Decodes obj using the codec registered for encoding.
125
Serhiy Storchakac97a9622015-08-09 12:23:08 +0300126Default encoding is 'utf-8'. errors may be given to set a
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300127different error handling scheme. Default is 'strict' meaning that encoding
128errors raise a ValueError. Other possible values are 'ignore', 'replace'
129and 'backslashreplace' as well as any other name registered with
130codecs.register_error that can handle ValueErrors.
131[clinic start generated code]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132
133static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300134_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300135 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300136/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000137{
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000138 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300142 return PyCodec_Decode(obj, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
Nick Coghlan8fad1672014-09-15 23:50:44 +1200147/*[clinic input]
148_codecs._forget_codec
149
150 encoding: str
151 /
152
153Purge the named codec from the internal codec lookup cache
154[clinic start generated code]*/
155
Nick Coghlan8fad1672014-09-15 23:50:44 +1200156static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300157_codecs__forget_codec_impl(PyObject *module, const char *encoding)
158/*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/
Nick Coghlan8fad1672014-09-15 23:50:44 +1200159{
160 if (_PyCodec_Forget(encoding) < 0) {
161 return NULL;
162 };
163 Py_RETURN_NONE;
164}
165
Guido van Rossume2d67f92000-03-10 23:09:23 +0000166static
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300167PyObject *codec_tuple(PyObject *decoded,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000169{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300170 if (decoded == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000171 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300172 return Py_BuildValue("Nn", decoded, len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000173}
174
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000175/* --- String codecs ------------------------------------------------------ */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300176/*[clinic input]
177_codecs.escape_decode
178 data: Py_buffer(accept={str, buffer})
179 errors: str(accept={str, NoneType}) = NULL
180 /
181[clinic start generated code]*/
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000182
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300183static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300184_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300185 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300186/*[clinic end generated code: output=505200ba8056979a input=0018edfd99db714d]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300187{
188 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
189 errors, 0, NULL);
190 return codec_tuple(decoded, data->len);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000191}
192
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300193/*[clinic input]
194_codecs.escape_encode
195 data: object(subclass_of='&PyBytes_Type')
196 errors: str(accept={str, NoneType}) = NULL
197 /
198[clinic start generated code]*/
199
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000200static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300201_codecs_escape_encode_impl(PyObject *module, PyObject *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300202 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300203/*[clinic end generated code: output=4af1d477834bab34 input=da9ded00992f32f2]*/
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000204{
Antoine Pitroud1188562010-06-09 16:38:55 +0000205 Py_ssize_t size;
206 Py_ssize_t newsize;
Antoine Pitroud1188562010-06-09 16:38:55 +0000207 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000208
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300209 size = PyBytes_GET_SIZE(data);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100210 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000211 PyErr_SetString(PyExc_OverflowError,
212 "string is too large to encode");
213 return NULL;
214 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100215 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000216 v = PyBytes_FromStringAndSize(NULL, newsize);
217
218 if (v == NULL) {
219 return NULL;
220 }
221 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200222 Py_ssize_t i;
223 char c;
224 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000225
226 for (i = 0; i < size; i++) {
227 /* There's at least enough room for a hex escape */
228 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300229 c = PyBytes_AS_STRING(data)[i];
Antoine Pitroud1188562010-06-09 16:38:55 +0000230 if (c == '\'' || c == '\\')
231 *p++ = '\\', *p++ = c;
232 else if (c == '\t')
233 *p++ = '\\', *p++ = 't';
234 else if (c == '\n')
235 *p++ = '\\', *p++ = 'n';
236 else if (c == '\r')
237 *p++ = '\\', *p++ = 'r';
238 else if (c < ' ' || c >= 0x7f) {
239 *p++ = '\\';
240 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200241 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
242 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000243 }
244 else
245 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000247 *p = '\0';
248 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
249 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000251 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000252
Antoine Pitroud1188562010-06-09 16:38:55 +0000253 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000254}
255
Guido van Rossume2d67f92000-03-10 23:09:23 +0000256/* --- Decoder ------------------------------------------------------------ */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300257/*[clinic input]
258_codecs.unicode_internal_decode
259 obj: object
260 errors: str(accept={str, NoneType}) = NULL
261 /
262[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000263
264static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300265_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300266 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300267/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000268{
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000269 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100270 if (PyUnicode_READY(obj) < 0)
271 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100273 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000274 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000275 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200276 Py_buffer view;
277 PyObject *result;
278 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000280
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200281 result = codec_tuple(
282 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
283 view.len);
284 PyBuffer_Release(&view);
285 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000286 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000287}
288
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300289/*[clinic input]
290_codecs.utf_7_decode
291 data: Py_buffer
292 errors: str(accept={str, NoneType}) = NULL
293 final: int(c_default="0") = False
294 /
295[clinic start generated code]*/
296
Guido van Rossume2d67f92000-03-10 23:09:23 +0000297static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300298_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300299 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300300/*[clinic end generated code: output=0cd3a944a32a4089 input=bc4d6247ecdb01e6]*/
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000301{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300302 Py_ssize_t consumed = data->len;
303 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
304 errors,
305 final ? NULL : &consumed);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000306 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000307}
308
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300309/*[clinic input]
310_codecs.utf_8_decode
311 data: Py_buffer
312 errors: str(accept={str, NoneType}) = NULL
313 final: int(c_default="0") = False
314 /
315[clinic start generated code]*/
316
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000317static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300318_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300319 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300320/*[clinic end generated code: output=10f74dec8d9bb8bf input=39161d71e7422ee2]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000321{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300322 Py_ssize_t consumed = data->len;
323 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
324 errors,
325 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000326 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000327}
328
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300329/*[clinic input]
330_codecs.utf_16_decode
331 data: Py_buffer
332 errors: str(accept={str, NoneType}) = NULL
333 final: int(c_default="0") = False
334 /
335[clinic start generated code]*/
336
Guido van Rossume2d67f92000-03-10 23:09:23 +0000337static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300338_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300339 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300340/*[clinic end generated code: output=783b442abcbcc2d0 input=f3cf01d1461007ce]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000341{
Guido van Rossume2d67f92000-03-10 23:09:23 +0000342 int byteorder = 0;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300343 /* This is overwritten unless final is true. */
344 Py_ssize_t consumed = data->len;
345 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
346 errors, &byteorder,
347 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000348 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000349}
350
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300351/*[clinic input]
352_codecs.utf_16_le_decode
353 data: Py_buffer
354 errors: str(accept={str, NoneType}) = NULL
355 final: int(c_default="0") = False
356 /
357[clinic start generated code]*/
358
Guido van Rossume2d67f92000-03-10 23:09:23 +0000359static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300360_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300361 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300362/*[clinic end generated code: output=899b9e6364379dcd input=a77e3bf97335d94e]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000363{
Guido van Rossume2d67f92000-03-10 23:09:23 +0000364 int byteorder = -1;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300365 /* This is overwritten unless final is true. */
366 Py_ssize_t consumed = data->len;
367 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
368 errors, &byteorder,
369 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000370 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000371}
372
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300373/*[clinic input]
374_codecs.utf_16_be_decode
375 data: Py_buffer
376 errors: str(accept={str, NoneType}) = NULL
377 final: int(c_default="0") = False
378 /
379[clinic start generated code]*/
380
Guido van Rossume2d67f92000-03-10 23:09:23 +0000381static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300382_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300383 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300384/*[clinic end generated code: output=49f6465ea07669c8 input=606f69fae91b5563]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000385{
Guido van Rossume2d67f92000-03-10 23:09:23 +0000386 int byteorder = 1;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300387 /* This is overwritten unless final is true. */
388 Py_ssize_t consumed = data->len;
389 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
390 errors, &byteorder,
391 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000392 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000393}
394
395/* This non-standard version also provides access to the byteorder
396 parameter of the builtin UTF-16 codec.
397
398 It returns a tuple (unicode, bytesread, byteorder) with byteorder
399 being the value in effect at the end of data.
400
401*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300402/*[clinic input]
403_codecs.utf_16_ex_decode
404 data: Py_buffer
405 errors: str(accept={str, NoneType}) = NULL
406 byteorder: int = 0
407 final: int(c_default="0") = False
408 /
409[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000410
411static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300412_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300413 const char *errors, int byteorder, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300414/*[clinic end generated code: output=0f385f251ecc1988 input=f6e7f697658c013e]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000415{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300416 /* This is overwritten unless final is true. */
417 Py_ssize_t consumed = data->len;
Walter Dörwald69652032004-09-07 20:24:22 +0000418
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300419 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
420 errors, &byteorder,
421 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000422 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300424 return Py_BuildValue("Nni", decoded, consumed, byteorder);
425}
426
427/*[clinic input]
428_codecs.utf_32_decode
429 data: Py_buffer
430 errors: str(accept={str, NoneType}) = NULL
431 final: int(c_default="0") = False
432 /
433[clinic start generated code]*/
434
435static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300436_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300437 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300438/*[clinic end generated code: output=2fc961807f7b145f input=86d4f41c6c2e763d]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300439{
440 int byteorder = 0;
441 /* This is overwritten unless final is true. */
442 Py_ssize_t consumed = data->len;
443 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
444 errors, &byteorder,
445 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000446 return codec_tuple(decoded, consumed);
447}
448
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300449/*[clinic input]
450_codecs.utf_32_le_decode
451 data: Py_buffer
452 errors: str(accept={str, NoneType}) = NULL
453 final: int(c_default="0") = False
454 /
455[clinic start generated code]*/
456
Walter Dörwald41980ca2007-08-16 21:55:45 +0000457static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300458_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300459 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300460/*[clinic end generated code: output=ec8f46b67a94f3e6 input=d18b650772d188ba]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000461{
Walter Dörwald41980ca2007-08-16 21:55:45 +0000462 int byteorder = -1;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300463 /* This is overwritten unless final is true. */
464 Py_ssize_t consumed = data->len;
465 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
466 errors, &byteorder,
467 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000468 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000469}
470
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300471/*[clinic input]
472_codecs.utf_32_be_decode
473 data: Py_buffer
474 errors: str(accept={str, NoneType}) = NULL
475 final: int(c_default="0") = False
476 /
477[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000478
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300479static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300480_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300481 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300482/*[clinic end generated code: output=ff82bae862c92c4e input=19c271b5d34926d8]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300483{
484 int byteorder = 1;
485 /* This is overwritten unless final is true. */
486 Py_ssize_t consumed = data->len;
487 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
488 errors, &byteorder,
489 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000490 return codec_tuple(decoded, consumed);
491}
492
493/* This non-standard version also provides access to the byteorder
494 parameter of the builtin UTF-32 codec.
495
496 It returns a tuple (unicode, bytesread, byteorder) with byteorder
497 being the value in effect at the end of data.
498
499*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300500/*[clinic input]
501_codecs.utf_32_ex_decode
502 data: Py_buffer
503 errors: str(accept={str, NoneType}) = NULL
504 byteorder: int = 0
505 final: int(c_default="0") = False
506 /
507[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000508
509static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300510_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300511 const char *errors, int byteorder, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300512/*[clinic end generated code: output=6bfb177dceaf4848 input=4af3e6ccfe34a076]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000513{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300514 Py_ssize_t consumed = data->len;
515 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
516 errors, &byteorder,
517 final ? NULL : &consumed);
518 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300520 return Py_BuildValue("Nni", decoded, consumed, byteorder);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000521}
522
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300523/*[clinic input]
524_codecs.unicode_escape_decode
525 data: Py_buffer(accept={str, buffer})
526 errors: str(accept={str, NoneType}) = NULL
527 /
528[clinic start generated code]*/
529
Walter Dörwald41980ca2007-08-16 21:55:45 +0000530static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300531_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300532 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300533/*[clinic end generated code: output=3ca3c917176b82ab input=49fd27d06813a7f5]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000534{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300535 PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
536 errors);
537 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000538}
539
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300540/*[clinic input]
541_codecs.raw_unicode_escape_decode
542 data: Py_buffer(accept={str, buffer})
543 errors: str(accept={str, NoneType}) = NULL
544 /
545[clinic start generated code]*/
546
Guido van Rossume2d67f92000-03-10 23:09:23 +0000547static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300548_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300549 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300550/*[clinic end generated code: output=c98eeb56028070a6 input=770903a211434ebc]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000551{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300552 PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
553 errors);
554 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000555}
556
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300557/*[clinic input]
558_codecs.latin_1_decode
559 data: Py_buffer
560 errors: str(accept={str, NoneType}) = NULL
561 /
562[clinic start generated code]*/
563
Guido van Rossume2d67f92000-03-10 23:09:23 +0000564static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300565_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300566 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300567/*[clinic end generated code: output=07f3dfa3f72c7d8f input=5cad0f1759c618ec]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000568{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300569 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
570 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000571}
572
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300573/*[clinic input]
574_codecs.ascii_decode
575 data: Py_buffer
576 errors: str(accept={str, NoneType}) = NULL
577 /
578[clinic start generated code]*/
579
Guido van Rossume2d67f92000-03-10 23:09:23 +0000580static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300581_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300582 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300583/*[clinic end generated code: output=2627d72058d42429 input=ad1106f64037bd16]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000584{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300585 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
586 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000587}
588
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300589/*[clinic input]
590_codecs.charmap_decode
591 data: Py_buffer
592 errors: str(accept={str, NoneType}) = NULL
593 mapping: object = NULL
594 /
595[clinic start generated code]*/
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000596
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300597static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300598_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300599 const char *errors, PyObject *mapping)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300600/*[clinic end generated code: output=2c335b09778cf895 input=19712ca35c5a80e2]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300601{
602 PyObject *decoded;
603
Guido van Rossume2d67f92000-03-10 23:09:23 +0000604 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000606
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300607 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
608 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000609}
610
Victor Stinner99b95382011-07-04 14:23:54 +0200611#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000612
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300613/*[clinic input]
614_codecs.mbcs_decode
615 data: Py_buffer
616 errors: str(accept={str, NoneType}) = NULL
617 final: int(c_default="0") = False
618 /
619[clinic start generated code]*/
620
Guido van Rossum24bdb042000-03-28 20:29:59 +0000621static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300622_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300623 const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300624/*[clinic end generated code: output=39b65b8598938c4b input=d492c1ca64f4fa8a]*/
Guido van Rossum24bdb042000-03-28 20:29:59 +0000625{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300626 Py_ssize_t consumed = data->len;
627 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
628 errors, final ? NULL : &consumed);
Martin v. Löwis423be952008-08-13 15:53:07 +0000629 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000630}
631
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300632/*[clinic input]
633_codecs.code_page_decode
634 codepage: int
635 data: Py_buffer
636 errors: str(accept={str, NoneType}) = NULL
637 final: int(c_default="0") = False
638 /
639[clinic start generated code]*/
640
Victor Stinner3a50e702011-10-18 21:21:00 +0200641static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300642_codecs_code_page_decode_impl(PyObject *module, int codepage,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300643 Py_buffer *data, const char *errors, int final)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300644/*[clinic end generated code: output=53008ea967da3fff input=4f3152a304e21d51]*/
Victor Stinner3a50e702011-10-18 21:21:00 +0200645{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300646 Py_ssize_t consumed = data->len;
Serhiy Storchaka1aa5e1d2015-05-12 14:00:22 +0300647 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300648 data->buf, data->len,
649 errors,
650 final ? NULL : &consumed);
Victor Stinner3a50e702011-10-18 21:21:00 +0200651 return codec_tuple(decoded, consumed);
652}
653
Victor Stinner99b95382011-07-04 14:23:54 +0200654#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000655
Guido van Rossume2d67f92000-03-10 23:09:23 +0000656/* --- Encoder ------------------------------------------------------------ */
657
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300658/*[clinic input]
659_codecs.readbuffer_encode
660 data: Py_buffer(accept={str, buffer})
661 errors: str(accept={str, NoneType}) = NULL
662 /
663[clinic start generated code]*/
664
Guido van Rossume2d67f92000-03-10 23:09:23 +0000665static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300666_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300667 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300668/*[clinic end generated code: output=c645ea7cdb3d6e86 input=b7c322b89d4ab923]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000669{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300670 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
671 return codec_tuple(result, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000672}
673
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300674/*[clinic input]
675_codecs.unicode_internal_encode
676 obj: object
677 errors: str(accept={str, NoneType}) = NULL
678 /
679[clinic start generated code]*/
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000680
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300681static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300682_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300683 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300684/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300685{
Ezio Melottiadc417c2011-11-17 12:23:34 +0200686 if (PyErr_WarnEx(PyExc_DeprecationWarning,
687 "unicode_internal codec has been deprecated",
688 1))
689 return NULL;
690
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000691 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100692 Py_UNICODE *u;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200693 Py_ssize_t len, size;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100694
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100695 if (PyUnicode_READY(obj) < 0)
696 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100697
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100698 u = PyUnicode_AsUnicodeAndSize(obj, &len);
699 if (u == NULL)
700 return NULL;
Victor Stinner049e5092014-08-17 22:20:00 +0200701 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100702 return PyErr_NoMemory();
703 size = len * sizeof(Py_UNICODE);
704 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100705 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000706 }
707 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200708 Py_buffer view;
709 PyObject *result;
710 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300712 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
713 view.len);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200714 PyBuffer_Release(&view);
715 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000716 }
717}
718
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300719/*[clinic input]
720_codecs.utf_7_encode
721 str: object
722 errors: str(accept={str, NoneType}) = NULL
723 /
724[clinic start generated code]*/
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000725
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300726static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300727_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300728 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300729/*[clinic end generated code: output=0feda21ffc921bc8 input=fd91a78f103b0421]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300730{
731 PyObject *v;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000732
733 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100734 if (str == NULL || PyUnicode_READY(str) < 0) {
735 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100737 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100738 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
739 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000740 Py_DECREF(str);
741 return v;
742}
743
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300744/*[clinic input]
745_codecs.utf_8_encode
746 str: object
747 errors: str(accept={str, NoneType}) = NULL
748 /
749[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000750
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300751static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300752_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300753 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300754/*[clinic end generated code: output=02bf47332b9c796c input=2c22d40532f071f3]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300755{
756 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000757
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000758 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100759 if (str == NULL || PyUnicode_READY(str) < 0) {
760 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000761 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100762 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
764 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000765 Py_DECREF(str);
766 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000767}
768
769/* This version provides access to the byteorder parameter of the
770 builtin UTF-16 codecs as optional third argument. It defaults to 0
771 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000772 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000773
774*/
775
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300776/*[clinic input]
777_codecs.utf_16_encode
778 str: object
779 errors: str(accept={str, NoneType}) = NULL
780 byteorder: int = 0
781 /
782[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000783
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300784static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300785_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300786 const char *errors, int byteorder)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300787/*[clinic end generated code: output=c654e13efa2e64e4 input=3935a489b2d5385e]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300788{
789 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000790
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000791 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100792 if (str == NULL || PyUnicode_READY(str) < 0) {
793 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000794 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100795 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100796 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
797 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000798 Py_DECREF(str);
799 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000800}
801
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300802/*[clinic input]
803_codecs.utf_16_le_encode
804 str: object
805 errors: str(accept={str, NoneType}) = NULL
806 /
807[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000808
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300809static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300810_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300811 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300812/*[clinic end generated code: output=431b01e55f2d4995 input=bc27df05d1d20dfe]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300813{
814 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000815
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000816 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100817 if (str == NULL || PyUnicode_READY(str) < 0) {
818 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100820 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100821 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
822 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000823 Py_DECREF(str);
824 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000825}
826
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300827/*[clinic input]
828_codecs.utf_16_be_encode
829 str: object
830 errors: str(accept={str, NoneType}) = NULL
831 /
832[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000833
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300834static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300835_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300836 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300837/*[clinic end generated code: output=96886a6fd54dcae3 input=5a69d4112763462b]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300838{
839 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000840
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000841 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100842 if (str == NULL || PyUnicode_READY(str) < 0) {
843 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100845 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100846 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
847 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000848 Py_DECREF(str);
849 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000850}
851
Walter Dörwald41980ca2007-08-16 21:55:45 +0000852/* This version provides access to the byteorder parameter of the
853 builtin UTF-32 codecs as optional third argument. It defaults to 0
854 which means: use the native byte order and prepend the data with a
855 BOM mark.
856
857*/
858
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300859/*[clinic input]
860_codecs.utf_32_encode
861 str: object
862 errors: str(accept={str, NoneType}) = NULL
863 byteorder: int = 0
864 /
865[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000866
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300867static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300868_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300869 const char *errors, int byteorder)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300870/*[clinic end generated code: output=5c760da0c09a8b83 input=434a1efa492b8d58]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300871{
872 PyObject *v;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000873
874 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100875 if (str == NULL || PyUnicode_READY(str) < 0) {
876 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100878 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100879 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
880 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000881 Py_DECREF(str);
882 return v;
883}
884
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300885/*[clinic input]
886_codecs.utf_32_le_encode
887 str: object
888 errors: str(accept={str, NoneType}) = NULL
889 /
890[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000891
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300892static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300893_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300894 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300895/*[clinic end generated code: output=b65cd176de8e36d6 input=dfa2d7dc78b99422]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300896{
897 PyObject *v;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000898
899 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100900 if (str == NULL || PyUnicode_READY(str) < 0) {
901 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100903 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100904 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
905 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000906 Py_DECREF(str);
907 return v;
908}
909
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300910/*[clinic input]
911_codecs.utf_32_be_encode
912 str: object
913 errors: str(accept={str, NoneType}) = NULL
914 /
915[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000916
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300917static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300918_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300919 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300920/*[clinic end generated code: output=1d9e71a9358709e9 input=4595617b18169002]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300921{
922 PyObject *v;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000923
924 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100925 if (str == NULL || PyUnicode_READY(str) < 0) {
926 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100928 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100929 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
930 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000931 Py_DECREF(str);
932 return v;
933}
934
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300935/*[clinic input]
936_codecs.unicode_escape_encode
937 str: object
938 errors: str(accept={str, NoneType}) = NULL
939 /
940[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000941
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300942static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300943_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300944 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300945/*[clinic end generated code: output=66271b30bc4f7a3c input=8273506f14076912]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300946{
947 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000948
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000949 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100950 if (str == NULL || PyUnicode_READY(str) < 0) {
951 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000952 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100953 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100954 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
955 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000956 Py_DECREF(str);
957 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000958}
959
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300960/*[clinic input]
961_codecs.raw_unicode_escape_encode
962 str: object
963 errors: str(accept={str, NoneType}) = NULL
964 /
965[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000966
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300967static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300968_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300969 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300970/*[clinic end generated code: output=a66a806ed01c830a input=181755d5dfacef3c]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300971{
972 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000973
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000974 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100975 if (str == NULL || PyUnicode_READY(str) < 0) {
976 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100978 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100979 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
980 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000981 Py_DECREF(str);
982 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000983}
984
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300985/*[clinic input]
986_codecs.latin_1_encode
987 str: object
988 errors: str(accept={str, NoneType}) = NULL
989 /
990[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000991
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300992static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300993_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300994 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300995/*[clinic end generated code: output=2c28c83a27884e08 input=f03f6dcf1d84bee4]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300996{
997 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000998
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000999 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001000 if (str == NULL || PyUnicode_READY(str) < 0) {
1001 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001003 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001004 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
1005 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001006 Py_DECREF(str);
1007 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001008}
1009
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001010/*[clinic input]
1011_codecs.ascii_encode
1012 str: object
1013 errors: str(accept={str, NoneType}) = NULL
1014 /
1015[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +00001016
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001017static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001018_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001019 const char *errors)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001020/*[clinic end generated code: output=b5e035182d33befc input=d87e25a10a593fee]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001021{
1022 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001023
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001024 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001025 if (str == NULL || PyUnicode_READY(str) < 0) {
1026 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001028 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001029 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
1030 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001031 Py_DECREF(str);
1032 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001033}
1034
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001035/*[clinic input]
1036_codecs.charmap_encode
1037 str: object
1038 errors: str(accept={str, NoneType}) = NULL
1039 mapping: object = NULL
1040 /
1041[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +00001042
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001043static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001044_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001045 const char *errors, PyObject *mapping)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001046/*[clinic end generated code: output=047476f48495a9e9 input=85f4172661e8dad9]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001047{
1048 PyObject *v;
1049
Guido van Rossume2d67f92000-03-10 23:09:23 +00001050 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001052
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001053 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001054 if (str == NULL || PyUnicode_READY(str) < 0) {
1055 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001057 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001058 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001059 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001060 Py_DECREF(str);
1061 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001062}
1063
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001064/*[clinic input]
1065_codecs.charmap_build
1066 map: unicode
1067 /
1068[clinic start generated code]*/
1069
1070static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001071_codecs_charmap_build_impl(PyObject *module, PyObject *map)
1072/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001073{
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001074 return PyUnicode_BuildEncodingMap(map);
1075}
1076
Victor Stinner99b95382011-07-04 14:23:54 +02001077#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001078
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001079/*[clinic input]
1080_codecs.mbcs_encode
1081 str: object
1082 errors: str(accept={str, NoneType}) = NULL
1083 /
1084[clinic start generated code]*/
Guido van Rossum24bdb042000-03-28 20:29:59 +00001085
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001086static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001087_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
1088/*[clinic end generated code: output=76e2e170c966c080 input=65c09ee1e4203263]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001089{
1090 PyObject *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001091
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001092 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001093 if (str == NULL || PyUnicode_READY(str) < 0) {
1094 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001096 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001097 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1098 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001099 Py_DECREF(str);
1100 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001101}
1102
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001103/*[clinic input]
1104_codecs.code_page_encode
1105 code_page: int
1106 str: object
1107 errors: str(accept={str, NoneType}) = NULL
1108 /
1109[clinic start generated code]*/
Victor Stinner3a50e702011-10-18 21:21:00 +02001110
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001111static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001112_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
1113 const char *errors)
1114/*[clinic end generated code: output=45673f6085657a9e input=c8562ec460c2e309]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001115{
1116 PyObject *v;
Victor Stinner3a50e702011-10-18 21:21:00 +02001117
1118 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001119 if (str == NULL || PyUnicode_READY(str) < 0) {
1120 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001121 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001122 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001123 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1124 str,
1125 errors),
1126 PyUnicode_GET_LENGTH(str));
1127 Py_DECREF(str);
1128 return v;
1129}
1130
Victor Stinner99b95382011-07-04 14:23:54 +02001131#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001132
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001133/* --- Error handler registry --------------------------------------------- */
1134
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001135/*[clinic input]
1136_codecs.register_error
1137 errors: str
1138 handler: object
1139 /
Walter Dörwald0ae29812002-10-31 13:36:29 +00001140
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001141Register the specified error handler under the name errors.
1142
1143handler must be a callable object, that will be called with an exception
1144instance containing information about the location of the encoding/decoding
1145error and must return a (replacement, new position) tuple.
1146[clinic start generated code]*/
1147
1148static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001149_codecs_register_error_impl(PyObject *module, const char *errors,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001150 PyObject *handler)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001151/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001152{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001153 if (PyCodec_RegisterError(errors, handler))
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001154 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001155 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001156}
1157
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001158/*[clinic input]
1159_codecs.lookup_error
1160 name: str
1161 /
Walter Dörwald0ae29812002-10-31 13:36:29 +00001162
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001163lookup_error(errors) -> handler
1164
1165Return the error handler for the specified error handling name or raise a
1166LookupError, if no handler exists under this name.
1167[clinic start generated code]*/
1168
1169static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001170_codecs_lookup_error_impl(PyObject *module, const char *name)
1171/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001172{
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001173 return PyCodec_LookupError(name);
1174}
1175
Guido van Rossume2d67f92000-03-10 23:09:23 +00001176/* --- Module API --------------------------------------------------------- */
1177
1178static PyMethodDef _codecs_functions[] = {
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001179 _CODECS_REGISTER_METHODDEF
1180 _CODECS_LOOKUP_METHODDEF
1181 _CODECS_ENCODE_METHODDEF
1182 _CODECS_DECODE_METHODDEF
1183 _CODECS_ESCAPE_ENCODE_METHODDEF
1184 _CODECS_ESCAPE_DECODE_METHODDEF
1185 _CODECS_UTF_8_ENCODE_METHODDEF
1186 _CODECS_UTF_8_DECODE_METHODDEF
1187 _CODECS_UTF_7_ENCODE_METHODDEF
1188 _CODECS_UTF_7_DECODE_METHODDEF
1189 _CODECS_UTF_16_ENCODE_METHODDEF
1190 _CODECS_UTF_16_LE_ENCODE_METHODDEF
1191 _CODECS_UTF_16_BE_ENCODE_METHODDEF
1192 _CODECS_UTF_16_DECODE_METHODDEF
1193 _CODECS_UTF_16_LE_DECODE_METHODDEF
1194 _CODECS_UTF_16_BE_DECODE_METHODDEF
1195 _CODECS_UTF_16_EX_DECODE_METHODDEF
1196 _CODECS_UTF_32_ENCODE_METHODDEF
1197 _CODECS_UTF_32_LE_ENCODE_METHODDEF
1198 _CODECS_UTF_32_BE_ENCODE_METHODDEF
1199 _CODECS_UTF_32_DECODE_METHODDEF
1200 _CODECS_UTF_32_LE_DECODE_METHODDEF
1201 _CODECS_UTF_32_BE_DECODE_METHODDEF
1202 _CODECS_UTF_32_EX_DECODE_METHODDEF
1203 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1204 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1205 _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
1206 _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
1207 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1208 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1209 _CODECS_LATIN_1_ENCODE_METHODDEF
1210 _CODECS_LATIN_1_DECODE_METHODDEF
1211 _CODECS_ASCII_ENCODE_METHODDEF
1212 _CODECS_ASCII_DECODE_METHODDEF
1213 _CODECS_CHARMAP_ENCODE_METHODDEF
1214 _CODECS_CHARMAP_DECODE_METHODDEF
1215 _CODECS_CHARMAP_BUILD_METHODDEF
1216 _CODECS_READBUFFER_ENCODE_METHODDEF
1217 _CODECS_MBCS_ENCODE_METHODDEF
1218 _CODECS_MBCS_DECODE_METHODDEF
1219 _CODECS_CODE_PAGE_ENCODE_METHODDEF
1220 _CODECS_CODE_PAGE_DECODE_METHODDEF
1221 _CODECS_REGISTER_ERROR_METHODDEF
1222 _CODECS_LOOKUP_ERROR_METHODDEF
Nick Coghlan8fad1672014-09-15 23:50:44 +12001223 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001225};
1226
Martin v. Löwis1a214512008-06-11 05:26:20 +00001227static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 PyModuleDef_HEAD_INIT,
1229 "_codecs",
1230 NULL,
1231 -1,
1232 _codecs_functions,
1233 NULL,
1234 NULL,
1235 NULL,
1236 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001237};
1238
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001239PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001240PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001241{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001243}