blob: 611ca82cac86cca44fc77bb4d0c4961517de9946 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
23 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000024 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
25 mbcs (on win32).
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027
28Written by Marc-Andre Lemburg (mal@lemburg.com).
29
Guido van Rossum16b1ad92000-08-03 16:24:25 +000030Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32 ------------------------------------------------------------------------ */
33
Martin v. Löwis18e16552006-02-15 17:27:45 +000034#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000035#include "Python.h"
36
Victor Stinner87a7c822011-11-10 20:05:55 +010037#ifdef MS_WINDOWS
38#include <windows.h>
39#endif
40
Nick Coghlan8fad1672014-09-15 23:50:44 +120041/*[clinic input]
42module _codecs
43[clinic start generated code]*/
44/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
45
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030046#include "clinic/_codecsmodule.c.h"
Nick Coghlan8fad1672014-09-15 23:50:44 +120047
Guido van Rossume2d67f92000-03-10 23:09:23 +000048/* --- Registry ----------------------------------------------------------- */
49
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030050/*[clinic input]
51_codecs.register
52 search_function: object
53 /
Walter Dörwald0ae29812002-10-31 13:36:29 +000054
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030055Register a codec search function.
56
57Search functions are expected to take one argument, the encoding name in
58all lower case letters, and either return None, or a tuple of functions
59(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
60[clinic start generated code]*/
61
62static PyObject *
63_codecs_register(PyModuleDef *module, PyObject *search_function)
64/*[clinic end generated code: output=d17608b6ad380eb8 input=369578467955cae4]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +000065{
Guido van Rossume2d67f92000-03-10 23:09:23 +000066 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000067 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000068
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000069 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000070}
71
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030072/*[clinic input]
73_codecs.lookup
74 encoding: str
75 /
Walter Dörwald0ae29812002-10-31 13:36:29 +000076
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030077Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
78[clinic start generated code]*/
79
80static PyObject *
81_codecs_lookup_impl(PyModuleDef *module, const char *encoding)
82/*[clinic end generated code: output=798e41aff0c04ef6 input=3c572c0db3febe9c]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +000083{
Guido van Rossume2d67f92000-03-10 23:09:23 +000084 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000085}
86
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030087/*[clinic input]
88_codecs.encode
89 obj: object
Serhiy Storchakac97a9622015-08-09 12:23:08 +030090 encoding: str(c_default="NULL") = "utf-8"
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030091 errors: str(c_default="NULL") = "strict"
92
93Encodes obj using the codec registered for encoding.
94
Serhiy Storchakac97a9622015-08-09 12:23:08 +030095The default encoding is 'utf-8'. errors may be given to set a
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +030096different error handling scheme. Default is 'strict' meaning that encoding
97errors raise a ValueError. Other possible values are 'ignore', 'replace'
98and 'backslashreplace' as well as any other name registered with
99codecs.register_error that can handle ValueErrors.
100[clinic start generated code]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000101
102static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300103_codecs_encode_impl(PyModuleDef *module, PyObject *obj, const char *encoding,
104 const char *errors)
Serhiy Storchakac97a9622015-08-09 12:23:08 +0300105/*[clinic end generated code: output=5c073f62249c8d7c input=cd5b685040ff61f0]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106{
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000107 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109
110 /* Encode via the codec registry */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300111 return PyCodec_Encode(obj, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000112}
113
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300114/*[clinic input]
115_codecs.decode
116 obj: object
Serhiy Storchakac97a9622015-08-09 12:23:08 +0300117 encoding: str(c_default="NULL") = "utf-8"
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300118 errors: str(c_default="NULL") = "strict"
119
120Decodes obj using the codec registered for encoding.
121
Serhiy Storchakac97a9622015-08-09 12:23:08 +0300122Default encoding is 'utf-8'. errors may be given to set a
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300123different error handling scheme. Default is 'strict' meaning that encoding
124errors raise a ValueError. Other possible values are 'ignore', 'replace'
125and 'backslashreplace' as well as any other name registered with
126codecs.register_error that can handle ValueErrors.
127[clinic start generated code]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128
129static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300130_codecs_decode_impl(PyModuleDef *module, PyObject *obj, const char *encoding,
131 const char *errors)
Serhiy Storchakac97a9622015-08-09 12:23:08 +0300132/*[clinic end generated code: output=c81cbf6189a7f878 input=7702c0cc2fa1add6]*/
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000133{
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000134 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000136
137 /* Decode via the codec registry */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300138 return PyCodec_Decode(obj, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000139}
140
Guido van Rossume2d67f92000-03-10 23:09:23 +0000141/* --- Helpers ------------------------------------------------------------ */
142
Nick Coghlan8fad1672014-09-15 23:50:44 +1200143/*[clinic input]
144_codecs._forget_codec
145
146 encoding: str
147 /
148
149Purge the named codec from the internal codec lookup cache
150[clinic start generated code]*/
151
Nick Coghlan8fad1672014-09-15 23:50:44 +1200152static PyObject *
153_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300154/*[clinic end generated code: output=b56a9b99d2d28080 input=18d5d92d0e386c38]*/
Nick Coghlan8fad1672014-09-15 23:50:44 +1200155{
156 if (_PyCodec_Forget(encoding) < 0) {
157 return NULL;
158 };
159 Py_RETURN_NONE;
160}
161
Guido van Rossume2d67f92000-03-10 23:09:23 +0000162static
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300163PyObject *codec_tuple(PyObject *decoded,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000165{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300166 if (decoded == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000167 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300168 return Py_BuildValue("Nn", decoded, len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000169}
170
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000171/* --- String codecs ------------------------------------------------------ */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300172/*[clinic input]
173_codecs.escape_decode
174 data: Py_buffer(accept={str, buffer})
175 errors: str(accept={str, NoneType}) = NULL
176 /
177[clinic start generated code]*/
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000178
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300179static PyObject *
180_codecs_escape_decode_impl(PyModuleDef *module, Py_buffer *data,
181 const char *errors)
182/*[clinic end generated code: output=648fa3e78d03e658 input=0018edfd99db714d]*/
183{
184 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
185 errors, 0, NULL);
186 return codec_tuple(decoded, data->len);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000187}
188
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300189/*[clinic input]
190_codecs.escape_encode
191 data: object(subclass_of='&PyBytes_Type')
192 errors: str(accept={str, NoneType}) = NULL
193 /
194[clinic start generated code]*/
195
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000196static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300197_codecs_escape_encode_impl(PyModuleDef *module, PyObject *data,
198 const char *errors)
199/*[clinic end generated code: output=fcd6f34fe4111c50 input=da9ded00992f32f2]*/
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000200{
Antoine Pitroud1188562010-06-09 16:38:55 +0000201 Py_ssize_t size;
202 Py_ssize_t newsize;
Antoine Pitroud1188562010-06-09 16:38:55 +0000203 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000204
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300205 size = PyBytes_GET_SIZE(data);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100206 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000207 PyErr_SetString(PyExc_OverflowError,
208 "string is too large to encode");
209 return NULL;
210 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100211 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000212 v = PyBytes_FromStringAndSize(NULL, newsize);
213
214 if (v == NULL) {
215 return NULL;
216 }
217 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200218 Py_ssize_t i;
219 char c;
220 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000221
222 for (i = 0; i < size; i++) {
223 /* There's at least enough room for a hex escape */
224 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300225 c = PyBytes_AS_STRING(data)[i];
Antoine Pitroud1188562010-06-09 16:38:55 +0000226 if (c == '\'' || c == '\\')
227 *p++ = '\\', *p++ = c;
228 else if (c == '\t')
229 *p++ = '\\', *p++ = 't';
230 else if (c == '\n')
231 *p++ = '\\', *p++ = 'n';
232 else if (c == '\r')
233 *p++ = '\\', *p++ = 'r';
234 else if (c < ' ' || c >= 0x7f) {
235 *p++ = '\\';
236 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200237 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
238 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000239 }
240 else
241 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000243 *p = '\0';
244 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
245 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000247 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000248
Antoine Pitroud1188562010-06-09 16:38:55 +0000249 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000250}
251
Guido van Rossume2d67f92000-03-10 23:09:23 +0000252/* --- Decoder ------------------------------------------------------------ */
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300253/*[clinic input]
254_codecs.unicode_internal_decode
255 obj: object
256 errors: str(accept={str, NoneType}) = NULL
257 /
258[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000259
260static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300261_codecs_unicode_internal_decode_impl(PyModuleDef *module, PyObject *obj,
262 const char *errors)
263/*[clinic end generated code: output=9fe47c2cd8807d92 input=8d57930aeda170c6]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000264{
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000265 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100266 if (PyUnicode_READY(obj) < 0)
267 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100269 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000270 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000271 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200272 Py_buffer view;
273 PyObject *result;
274 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000276
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200277 result = codec_tuple(
278 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
279 view.len);
280 PyBuffer_Release(&view);
281 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000282 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000283}
284
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300285/*[clinic input]
286_codecs.utf_7_decode
287 data: Py_buffer
288 errors: str(accept={str, NoneType}) = NULL
289 final: int(c_default="0") = False
290 /
291[clinic start generated code]*/
292
Guido van Rossume2d67f92000-03-10 23:09:23 +0000293static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300294_codecs_utf_7_decode_impl(PyModuleDef *module, Py_buffer *data,
295 const char *errors, int final)
296/*[clinic end generated code: output=ca945e907e72e827 input=bc4d6247ecdb01e6]*/
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000297{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300298 Py_ssize_t consumed = data->len;
299 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
300 errors,
301 final ? NULL : &consumed);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000302 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000303}
304
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300305/*[clinic input]
306_codecs.utf_8_decode
307 data: Py_buffer
308 errors: str(accept={str, NoneType}) = NULL
309 final: int(c_default="0") = False
310 /
311[clinic start generated code]*/
312
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000313static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300314_codecs_utf_8_decode_impl(PyModuleDef *module, Py_buffer *data,
315 const char *errors, int final)
316/*[clinic end generated code: output=7309f9ff4ef5c9b6 input=39161d71e7422ee2]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000317{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300318 Py_ssize_t consumed = data->len;
319 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
320 errors,
321 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000322 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000323}
324
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300325/*[clinic input]
326_codecs.utf_16_decode
327 data: Py_buffer
328 errors: str(accept={str, NoneType}) = NULL
329 final: int(c_default="0") = False
330 /
331[clinic start generated code]*/
332
Guido van Rossume2d67f92000-03-10 23:09:23 +0000333static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300334_codecs_utf_16_decode_impl(PyModuleDef *module, Py_buffer *data,
335 const char *errors, int final)
336/*[clinic end generated code: output=8d2fa0507d9bef2c input=f3cf01d1461007ce]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000337{
Guido van Rossume2d67f92000-03-10 23:09:23 +0000338 int byteorder = 0;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300339 /* This is overwritten unless final is true. */
340 Py_ssize_t consumed = data->len;
341 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
342 errors, &byteorder,
343 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000344 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000345}
346
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300347/*[clinic input]
348_codecs.utf_16_le_decode
349 data: Py_buffer
350 errors: str(accept={str, NoneType}) = NULL
351 final: int(c_default="0") = False
352 /
353[clinic start generated code]*/
354
Guido van Rossume2d67f92000-03-10 23:09:23 +0000355static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300356_codecs_utf_16_le_decode_impl(PyModuleDef *module, Py_buffer *data,
357 const char *errors, int final)
358/*[clinic end generated code: output=4fd621515ef4ce18 input=a77e3bf97335d94e]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000359{
Guido van Rossume2d67f92000-03-10 23:09:23 +0000360 int byteorder = -1;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300361 /* This is overwritten unless final is true. */
362 Py_ssize_t consumed = data->len;
363 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
364 errors, &byteorder,
365 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000366 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000367}
368
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300369/*[clinic input]
370_codecs.utf_16_be_decode
371 data: Py_buffer
372 errors: str(accept={str, NoneType}) = NULL
373 final: int(c_default="0") = False
374 /
375[clinic start generated code]*/
376
Guido van Rossume2d67f92000-03-10 23:09:23 +0000377static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300378_codecs_utf_16_be_decode_impl(PyModuleDef *module, Py_buffer *data,
379 const char *errors, int final)
380/*[clinic end generated code: output=792f4eacb3e1fa05 input=606f69fae91b5563]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000381{
Guido van Rossume2d67f92000-03-10 23:09:23 +0000382 int byteorder = 1;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300383 /* This is overwritten unless final is true. */
384 Py_ssize_t consumed = data->len;
385 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
386 errors, &byteorder,
387 final ? NULL : &consumed);
Walter Dörwald69652032004-09-07 20:24:22 +0000388 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000389}
390
391/* This non-standard version also provides access to the byteorder
392 parameter of the builtin UTF-16 codec.
393
394 It returns a tuple (unicode, bytesread, byteorder) with byteorder
395 being the value in effect at the end of data.
396
397*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300398/*[clinic input]
399_codecs.utf_16_ex_decode
400 data: Py_buffer
401 errors: str(accept={str, NoneType}) = NULL
402 byteorder: int = 0
403 final: int(c_default="0") = False
404 /
405[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000406
407static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300408_codecs_utf_16_ex_decode_impl(PyModuleDef *module, Py_buffer *data,
409 const char *errors, int byteorder, int final)
410/*[clinic end generated code: output=f136a186dc2defa0 input=f6e7f697658c013e]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000411{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300412 /* This is overwritten unless final is true. */
413 Py_ssize_t consumed = data->len;
Walter Dörwald69652032004-09-07 20:24:22 +0000414
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300415 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
416 errors, &byteorder,
417 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000418 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300420 return Py_BuildValue("Nni", decoded, consumed, byteorder);
421}
422
423/*[clinic input]
424_codecs.utf_32_decode
425 data: Py_buffer
426 errors: str(accept={str, NoneType}) = NULL
427 final: int(c_default="0") = False
428 /
429[clinic start generated code]*/
430
431static PyObject *
432_codecs_utf_32_decode_impl(PyModuleDef *module, Py_buffer *data,
433 const char *errors, int final)
434/*[clinic end generated code: output=b7635e55857e8efb input=86d4f41c6c2e763d]*/
435{
436 int byteorder = 0;
437 /* This is overwritten unless final is true. */
438 Py_ssize_t consumed = data->len;
439 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
440 errors, &byteorder,
441 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000442 return codec_tuple(decoded, consumed);
443}
444
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300445/*[clinic input]
446_codecs.utf_32_le_decode
447 data: Py_buffer
448 errors: str(accept={str, NoneType}) = NULL
449 final: int(c_default="0") = False
450 /
451[clinic start generated code]*/
452
Walter Dörwald41980ca2007-08-16 21:55:45 +0000453static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300454_codecs_utf_32_le_decode_impl(PyModuleDef *module, Py_buffer *data,
455 const char *errors, int final)
456/*[clinic end generated code: output=a79d1787d8ddf988 input=d18b650772d188ba]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000457{
Walter Dörwald41980ca2007-08-16 21:55:45 +0000458 int byteorder = -1;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300459 /* This is overwritten unless final is true. */
460 Py_ssize_t consumed = data->len;
461 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
462 errors, &byteorder,
463 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000464 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000465}
466
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300467/*[clinic input]
468_codecs.utf_32_be_decode
469 data: Py_buffer
470 errors: str(accept={str, NoneType}) = NULL
471 final: int(c_default="0") = False
472 /
473[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000474
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300475static PyObject *
476_codecs_utf_32_be_decode_impl(PyModuleDef *module, Py_buffer *data,
477 const char *errors, int final)
478/*[clinic end generated code: output=a8356b0f36779981 input=19c271b5d34926d8]*/
479{
480 int byteorder = 1;
481 /* This is overwritten unless final is true. */
482 Py_ssize_t consumed = data->len;
483 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
484 errors, &byteorder,
485 final ? NULL : &consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000486 return codec_tuple(decoded, consumed);
487}
488
489/* This non-standard version also provides access to the byteorder
490 parameter of the builtin UTF-32 codec.
491
492 It returns a tuple (unicode, bytesread, byteorder) with byteorder
493 being the value in effect at the end of data.
494
495*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300496/*[clinic input]
497_codecs.utf_32_ex_decode
498 data: Py_buffer
499 errors: str(accept={str, NoneType}) = NULL
500 byteorder: int = 0
501 final: int(c_default="0") = False
502 /
503[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000504
505static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300506_codecs_utf_32_ex_decode_impl(PyModuleDef *module, Py_buffer *data,
507 const char *errors, int byteorder, int final)
508/*[clinic end generated code: output=ab8c70977c1992f5 input=4af3e6ccfe34a076]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000509{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300510 Py_ssize_t consumed = data->len;
511 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
512 errors, &byteorder,
513 final ? NULL : &consumed);
514 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300516 return Py_BuildValue("Nni", decoded, consumed, byteorder);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000517}
518
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300519/*[clinic input]
520_codecs.unicode_escape_decode
521 data: Py_buffer(accept={str, buffer})
522 errors: str(accept={str, NoneType}) = NULL
523 /
524[clinic start generated code]*/
525
Walter Dörwald41980ca2007-08-16 21:55:45 +0000526static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300527_codecs_unicode_escape_decode_impl(PyModuleDef *module, Py_buffer *data,
528 const char *errors)
529/*[clinic end generated code: output=d1aa63f2620c4999 input=49fd27d06813a7f5]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000530{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300531 PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
532 errors);
533 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000534}
535
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300536/*[clinic input]
537_codecs.raw_unicode_escape_decode
538 data: Py_buffer(accept={str, buffer})
539 errors: str(accept={str, NoneType}) = NULL
540 /
541[clinic start generated code]*/
542
Guido van Rossume2d67f92000-03-10 23:09:23 +0000543static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300544_codecs_raw_unicode_escape_decode_impl(PyModuleDef *module, Py_buffer *data,
545 const char *errors)
546/*[clinic end generated code: output=0bf96cc182d81379 input=770903a211434ebc]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000547{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300548 PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
549 errors);
550 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000551}
552
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300553/*[clinic input]
554_codecs.latin_1_decode
555 data: Py_buffer
556 errors: str(accept={str, NoneType}) = NULL
557 /
558[clinic start generated code]*/
559
Guido van Rossume2d67f92000-03-10 23:09:23 +0000560static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300561_codecs_latin_1_decode_impl(PyModuleDef *module, Py_buffer *data,
562 const char *errors)
563/*[clinic end generated code: output=66b916f5055aaf13 input=5cad0f1759c618ec]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000564{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300565 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
566 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000567}
568
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300569/*[clinic input]
570_codecs.ascii_decode
571 data: Py_buffer
572 errors: str(accept={str, NoneType}) = NULL
573 /
574[clinic start generated code]*/
575
Guido van Rossume2d67f92000-03-10 23:09:23 +0000576static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300577_codecs_ascii_decode_impl(PyModuleDef *module, Py_buffer *data,
578 const char *errors)
579/*[clinic end generated code: output=7f213a1b5cdafc65 input=ad1106f64037bd16]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000580{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300581 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
582 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000583}
584
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300585/*[clinic input]
586_codecs.charmap_decode
587 data: Py_buffer
588 errors: str(accept={str, NoneType}) = NULL
589 mapping: object = NULL
590 /
591[clinic start generated code]*/
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000592
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300593static PyObject *
594_codecs_charmap_decode_impl(PyModuleDef *module, Py_buffer *data,
595 const char *errors, PyObject *mapping)
596/*[clinic end generated code: output=87d27f365098bbae input=19712ca35c5a80e2]*/
597{
598 PyObject *decoded;
599
Guido van Rossume2d67f92000-03-10 23:09:23 +0000600 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000602
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300603 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
604 return codec_tuple(decoded, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000605}
606
Victor Stinner99b95382011-07-04 14:23:54 +0200607#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000608
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300609/*[clinic input]
610_codecs.mbcs_decode
611 data: Py_buffer
612 errors: str(accept={str, NoneType}) = NULL
613 final: int(c_default="0") = False
614 /
615[clinic start generated code]*/
616
Guido van Rossum24bdb042000-03-28 20:29:59 +0000617static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300618_codecs_mbcs_decode_impl(PyModuleDef *module, Py_buffer *data,
619 const char *errors, int final)
620/*[clinic end generated code: output=0ebaf3a5b20e53fa input=d492c1ca64f4fa8a]*/
Guido van Rossum24bdb042000-03-28 20:29:59 +0000621{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300622 Py_ssize_t consumed = data->len;
623 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
624 errors, final ? NULL : &consumed);
Martin v. Löwis423be952008-08-13 15:53:07 +0000625 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000626}
627
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300628/*[clinic input]
629_codecs.code_page_decode
630 codepage: int
631 data: Py_buffer
632 errors: str(accept={str, NoneType}) = NULL
633 final: int(c_default="0") = False
634 /
635[clinic start generated code]*/
636
Victor Stinner3a50e702011-10-18 21:21:00 +0200637static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300638_codecs_code_page_decode_impl(PyModuleDef *module, int codepage,
639 Py_buffer *data, const char *errors, int final)
640/*[clinic end generated code: output=4318e3d9971e31ba input=4f3152a304e21d51]*/
Victor Stinner3a50e702011-10-18 21:21:00 +0200641{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300642 Py_ssize_t consumed = data->len;
Serhiy Storchaka1aa5e1d2015-05-12 14:00:22 +0300643 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300644 data->buf, data->len,
645 errors,
646 final ? NULL : &consumed);
Victor Stinner3a50e702011-10-18 21:21:00 +0200647 return codec_tuple(decoded, consumed);
648}
649
Victor Stinner99b95382011-07-04 14:23:54 +0200650#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000651
Guido van Rossume2d67f92000-03-10 23:09:23 +0000652/* --- Encoder ------------------------------------------------------------ */
653
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300654/*[clinic input]
655_codecs.readbuffer_encode
656 data: Py_buffer(accept={str, buffer})
657 errors: str(accept={str, NoneType}) = NULL
658 /
659[clinic start generated code]*/
660
Guido van Rossume2d67f92000-03-10 23:09:23 +0000661static PyObject *
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300662_codecs_readbuffer_encode_impl(PyModuleDef *module, Py_buffer *data,
663 const char *errors)
664/*[clinic end generated code: output=319cc24083299859 input=b7c322b89d4ab923]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000665{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300666 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
667 return codec_tuple(result, data->len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000668}
669
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300670/*[clinic input]
671_codecs.unicode_internal_encode
672 obj: object
673 errors: str(accept={str, NoneType}) = NULL
674 /
675[clinic start generated code]*/
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000676
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300677static PyObject *
678_codecs_unicode_internal_encode_impl(PyModuleDef *module, PyObject *obj,
679 const char *errors)
680/*[clinic end generated code: output=be08457068ad503b input=8628f0280cf5ba61]*/
681{
Ezio Melottiadc417c2011-11-17 12:23:34 +0200682 if (PyErr_WarnEx(PyExc_DeprecationWarning,
683 "unicode_internal codec has been deprecated",
684 1))
685 return NULL;
686
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000687 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100688 Py_UNICODE *u;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200689 Py_ssize_t len, size;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100690
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100691 if (PyUnicode_READY(obj) < 0)
692 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100693
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100694 u = PyUnicode_AsUnicodeAndSize(obj, &len);
695 if (u == NULL)
696 return NULL;
Victor Stinner049e5092014-08-17 22:20:00 +0200697 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100698 return PyErr_NoMemory();
699 size = len * sizeof(Py_UNICODE);
700 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100701 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000702 }
703 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200704 Py_buffer view;
705 PyObject *result;
706 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 return NULL;
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300708 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
709 view.len);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200710 PyBuffer_Release(&view);
711 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000712 }
713}
714
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300715/*[clinic input]
716_codecs.utf_7_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300717 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300718 errors: str(accept={str, NoneType}) = NULL
719 /
720[clinic start generated code]*/
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000721
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300722static PyObject *
723_codecs_utf_7_encode_impl(PyModuleDef *module, PyObject *str,
724 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300725/*[clinic end generated code: output=a7accc496a32b759 input=d1a47579e79cbe15]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300726{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300727 return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
728 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000729}
730
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300731/*[clinic input]
732_codecs.utf_8_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300733 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300734 errors: str(accept={str, NoneType}) = NULL
735 /
736[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000737
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300738static PyObject *
739_codecs_utf_8_encode_impl(PyModuleDef *module, PyObject *str,
740 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300741/*[clinic end generated code: output=ec831d80e7aedede input=42e3ba73c4392eef]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300742{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300743 return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
744 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000745}
746
747/* This version provides access to the byteorder parameter of the
748 builtin UTF-16 codecs as optional third argument. It defaults to 0
749 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000750 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000751
752*/
753
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300754/*[clinic input]
755_codecs.utf_16_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300756 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300757 errors: str(accept={str, NoneType}) = NULL
758 byteorder: int = 0
759 /
760[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000761
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300762static PyObject *
763_codecs_utf_16_encode_impl(PyModuleDef *module, PyObject *str,
764 const char *errors, int byteorder)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300765/*[clinic end generated code: output=93ac58e960a9ee4d input=ff46416b04edb944]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300766{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300767 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
768 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000769}
770
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300771/*[clinic input]
772_codecs.utf_16_le_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300773 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300774 errors: str(accept={str, NoneType}) = NULL
775 /
776[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000777
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300778static PyObject *
779_codecs_utf_16_le_encode_impl(PyModuleDef *module, PyObject *str,
780 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300781/*[clinic end generated code: output=422bedb8da34fb66 input=cb385455ea8f2fe0]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300782{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300783 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
784 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000785}
786
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300787/*[clinic input]
788_codecs.utf_16_be_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300789 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300790 errors: str(accept={str, NoneType}) = NULL
791 /
792[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000793
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300794static PyObject *
795_codecs_utf_16_be_encode_impl(PyModuleDef *module, PyObject *str,
796 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300797/*[clinic end generated code: output=3aa7ee9502acdd77 input=9119997066bdaefd]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300798{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300799 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
800 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000801}
802
Walter Dörwald41980ca2007-08-16 21:55:45 +0000803/* This version provides access to the byteorder parameter of the
804 builtin UTF-32 codecs as optional third argument. It defaults to 0
805 which means: use the native byte order and prepend the data with a
806 BOM mark.
807
808*/
809
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300810/*[clinic input]
811_codecs.utf_32_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300812 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300813 errors: str(accept={str, NoneType}) = NULL
814 byteorder: int = 0
815 /
816[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000817
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300818static PyObject *
819_codecs_utf_32_encode_impl(PyModuleDef *module, PyObject *str,
820 const char *errors, int byteorder)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300821/*[clinic end generated code: output=3e7d5a003b02baed input=c5e77da82fbe5c2a]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300822{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300823 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
824 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000825}
826
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300827/*[clinic input]
828_codecs.utf_32_le_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300829 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300830 errors: str(accept={str, NoneType}) = NULL
831 /
832[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000833
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300834static PyObject *
835_codecs_utf_32_le_encode_impl(PyModuleDef *module, PyObject *str,
836 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300837/*[clinic end generated code: output=5dda641cd33dbfc2 input=9993b25fe0877848]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300838{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300839 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
840 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000841}
842
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300843/*[clinic input]
844_codecs.utf_32_be_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300845 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300846 errors: str(accept={str, NoneType}) = NULL
847 /
848[clinic start generated code]*/
Walter Dörwald41980ca2007-08-16 21:55:45 +0000849
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300850static PyObject *
851_codecs_utf_32_be_encode_impl(PyModuleDef *module, PyObject *str,
852 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300853/*[clinic end generated code: output=ccca8b44d91a7c7a input=d3e0ccaa02920431]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300854{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300855 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
856 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000857}
858
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300859/*[clinic input]
860_codecs.unicode_escape_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300861 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300862 errors: str(accept={str, NoneType}) = NULL
863 /
864[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000865
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300866static PyObject *
867_codecs_unicode_escape_encode_impl(PyModuleDef *module, PyObject *str,
868 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300869/*[clinic end generated code: output=389f23d2b8f8d80b input=65d9eefca65b455a]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300870{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300871 return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
872 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000873}
874
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300875/*[clinic input]
876_codecs.raw_unicode_escape_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300877 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300878 errors: str(accept={str, NoneType}) = NULL
879 /
880[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000881
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300882static PyObject *
883_codecs_raw_unicode_escape_encode_impl(PyModuleDef *module, PyObject *str,
884 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300885/*[clinic end generated code: output=fec4e39d6ec37a62 input=5aa33e4a133391ab]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300886{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300887 return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
888 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000889}
890
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300891/*[clinic input]
892_codecs.latin_1_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300893 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300894 errors: str(accept={str, NoneType}) = NULL
895 /
896[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000897
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300898static PyObject *
899_codecs_latin_1_encode_impl(PyModuleDef *module, PyObject *str,
900 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300901/*[clinic end generated code: output=ecf00eb8e48c889c input=30b11c9e49a65150]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300902{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300903 return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
904 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000905}
906
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300907/*[clinic input]
908_codecs.ascii_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300909 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300910 errors: str(accept={str, NoneType}) = NULL
911 /
912[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000913
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300914static PyObject *
915_codecs_ascii_encode_impl(PyModuleDef *module, PyObject *str,
916 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300917/*[clinic end generated code: output=a9d18fc6b6b91cfb input=843a1d268e6dfa8e]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300918{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300919 return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
920 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000921}
922
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300923/*[clinic input]
924_codecs.charmap_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300925 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300926 errors: str(accept={str, NoneType}) = NULL
927 mapping: object = NULL
928 /
929[clinic start generated code]*/
Guido van Rossume2d67f92000-03-10 23:09:23 +0000930
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300931static PyObject *
932_codecs_charmap_encode_impl(PyModuleDef *module, PyObject *str,
933 const char *errors, PyObject *mapping)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300934/*[clinic end generated code: output=14ca42b83853c643 input=0752cde07a6d6d00]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300935{
Guido van Rossume2d67f92000-03-10 23:09:23 +0000936 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000938
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300939 return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
940 PyUnicode_GET_LENGTH(str));
Guido van Rossume2d67f92000-03-10 23:09:23 +0000941}
942
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300943/*[clinic input]
944_codecs.charmap_build
945 map: unicode
946 /
947[clinic start generated code]*/
948
949static PyObject *
950_codecs_charmap_build_impl(PyModuleDef *module, PyObject *map)
951/*[clinic end generated code: output=9485b58fa44afa6a input=d91a91d1717dbc6d]*/
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000952{
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000953 return PyUnicode_BuildEncodingMap(map);
954}
955
Victor Stinner99b95382011-07-04 14:23:54 +0200956#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000957
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300958/*[clinic input]
959_codecs.mbcs_encode
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300960 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300961 errors: str(accept={str, NoneType}) = NULL
962 /
963[clinic start generated code]*/
Guido van Rossum24bdb042000-03-28 20:29:59 +0000964
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300965static PyObject *
966_codecs_mbcs_encode_impl(PyModuleDef *module, PyObject *str,
967 const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300968/*[clinic end generated code: output=d1a013bc68798bd7 input=de471e0815947553]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300969{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300970 return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
971 PyUnicode_GET_LENGTH(str));
Guido van Rossum24bdb042000-03-28 20:29:59 +0000972}
973
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300974/*[clinic input]
975_codecs.code_page_encode
976 code_page: int
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300977 str: unicode
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300978 errors: str(accept={str, NoneType}) = NULL
979 /
980[clinic start generated code]*/
Victor Stinner3a50e702011-10-18 21:21:00 +0200981
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300982static PyObject *
983_codecs_code_page_encode_impl(PyModuleDef *module, int code_page,
984 PyObject *str, const char *errors)
Serhiy Storchaka2eb6b0d2016-04-14 12:30:54 +0300985/*[clinic end generated code: output=3b406618dbfbce25 input=786421ae617d680b]*/
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300986{
Serhiy Storchaka21a663e2016-04-13 15:37:23 +0300987 return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
988 PyUnicode_GET_LENGTH(str));
Victor Stinner3a50e702011-10-18 21:21:00 +0200989}
990
Victor Stinner99b95382011-07-04 14:23:54 +0200991#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000992
Walter Dörwald3aeb6322002-09-02 13:14:32 +0000993/* --- Error handler registry --------------------------------------------- */
994
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +0300995/*[clinic input]
996_codecs.register_error
997 errors: str
998 handler: object
999 /
Walter Dörwald0ae29812002-10-31 13:36:29 +00001000
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001001Register the specified error handler under the name errors.
1002
1003handler must be a callable object, that will be called with an exception
1004instance containing information about the location of the encoding/decoding
1005error and must return a (replacement, new position) tuple.
1006[clinic start generated code]*/
1007
1008static PyObject *
1009_codecs_register_error_impl(PyModuleDef *module, const char *errors,
1010 PyObject *handler)
1011/*[clinic end generated code: output=be00d3b1849ce68a input=5e6709203c2e33fe]*/
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001012{
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001013 if (PyCodec_RegisterError(errors, handler))
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001014 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001015 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001016}
1017
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001018/*[clinic input]
1019_codecs.lookup_error
1020 name: str
1021 /
Walter Dörwald0ae29812002-10-31 13:36:29 +00001022
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001023lookup_error(errors) -> handler
1024
1025Return the error handler for the specified error handling name or raise a
1026LookupError, if no handler exists under this name.
1027[clinic start generated code]*/
1028
1029static PyObject *
1030_codecs_lookup_error_impl(PyModuleDef *module, const char *name)
1031/*[clinic end generated code: output=731e6df8c83c6158 input=4775dd65e6235aba]*/
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001032{
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001033 return PyCodec_LookupError(name);
1034}
1035
Guido van Rossume2d67f92000-03-10 23:09:23 +00001036/* --- Module API --------------------------------------------------------- */
1037
1038static PyMethodDef _codecs_functions[] = {
Serhiy Storchaka0c59ff62015-05-12 13:15:57 +03001039 _CODECS_REGISTER_METHODDEF
1040 _CODECS_LOOKUP_METHODDEF
1041 _CODECS_ENCODE_METHODDEF
1042 _CODECS_DECODE_METHODDEF
1043 _CODECS_ESCAPE_ENCODE_METHODDEF
1044 _CODECS_ESCAPE_DECODE_METHODDEF
1045 _CODECS_UTF_8_ENCODE_METHODDEF
1046 _CODECS_UTF_8_DECODE_METHODDEF
1047 _CODECS_UTF_7_ENCODE_METHODDEF
1048 _CODECS_UTF_7_DECODE_METHODDEF
1049 _CODECS_UTF_16_ENCODE_METHODDEF
1050 _CODECS_UTF_16_LE_ENCODE_METHODDEF
1051 _CODECS_UTF_16_BE_ENCODE_METHODDEF
1052 _CODECS_UTF_16_DECODE_METHODDEF
1053 _CODECS_UTF_16_LE_DECODE_METHODDEF
1054 _CODECS_UTF_16_BE_DECODE_METHODDEF
1055 _CODECS_UTF_16_EX_DECODE_METHODDEF
1056 _CODECS_UTF_32_ENCODE_METHODDEF
1057 _CODECS_UTF_32_LE_ENCODE_METHODDEF
1058 _CODECS_UTF_32_BE_ENCODE_METHODDEF
1059 _CODECS_UTF_32_DECODE_METHODDEF
1060 _CODECS_UTF_32_LE_DECODE_METHODDEF
1061 _CODECS_UTF_32_BE_DECODE_METHODDEF
1062 _CODECS_UTF_32_EX_DECODE_METHODDEF
1063 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1064 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1065 _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
1066 _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
1067 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1068 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1069 _CODECS_LATIN_1_ENCODE_METHODDEF
1070 _CODECS_LATIN_1_DECODE_METHODDEF
1071 _CODECS_ASCII_ENCODE_METHODDEF
1072 _CODECS_ASCII_DECODE_METHODDEF
1073 _CODECS_CHARMAP_ENCODE_METHODDEF
1074 _CODECS_CHARMAP_DECODE_METHODDEF
1075 _CODECS_CHARMAP_BUILD_METHODDEF
1076 _CODECS_READBUFFER_ENCODE_METHODDEF
1077 _CODECS_MBCS_ENCODE_METHODDEF
1078 _CODECS_MBCS_DECODE_METHODDEF
1079 _CODECS_CODE_PAGE_ENCODE_METHODDEF
1080 _CODECS_CODE_PAGE_DECODE_METHODDEF
1081 _CODECS_REGISTER_ERROR_METHODDEF
1082 _CODECS_LOOKUP_ERROR_METHODDEF
Nick Coghlan8fad1672014-09-15 23:50:44 +12001083 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001085};
1086
Martin v. Löwis1a214512008-06-11 05:26:20 +00001087static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 PyModuleDef_HEAD_INIT,
1089 "_codecs",
1090 NULL,
1091 -1,
1092 _codecs_functions,
1093 NULL,
1094 NULL,
1095 NULL,
1096 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001097};
1098
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001099PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001100PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001101{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001103}