blob: 52f34793c416c55a113ab836aef9fad1e38cd656 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Nick Coghlan8fad1672014-09-15 23:50:44 +120045/*[clinic input]
46module _codecs
47[clinic start generated code]*/
48/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
49
50
Guido van Rossume2d67f92000-03-10 23:09:23 +000051/* --- Registry ----------------------------------------------------------- */
52
Walter Dörwald0ae29812002-10-31 13:36:29 +000053PyDoc_STRVAR(register__doc__,
54"register(search_function)\n\
55\n\
56Register a codec search function. Search functions are expected to take\n\
Nick Coghlanb9fdb7a2015-01-07 00:22:00 +100057one argument, the encoding name in all lower case letters, and either\n\
58return None, or a tuple of functions (encoder, decoder, stream_reader,\n\
59stream_writer) (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000060
Guido van Rossume2d67f92000-03-10 23:09:23 +000061static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000062PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000063{
Guido van Rossume2d67f92000-03-10 23:09:23 +000064 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000065 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000066
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000067 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000068}
69
Walter Dörwald0ae29812002-10-31 13:36:29 +000070PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000071"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000072\n\
73Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000074a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000075
Guido van Rossume2d67f92000-03-10 23:09:23 +000076static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000078{
79 char *encoding;
80
81 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000083
84 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000085}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
98codec_encode(PyObject *self, PyObject *args)
99{
Brett Cannon3e377de2004-07-10 21:41:14 +0000100 const char *encoding = NULL;
101 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000103
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000104 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
105 return NULL;
106
107 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109
110 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000111 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000112}
113
114PyDoc_STRVAR(decode__doc__,
115"decode(obj, [encoding[,errors]]) -> object\n\
116\n\
117Decodes obj using the codec registered for encoding. encoding defaults\n\
118to the default encoding. errors may be given to set a different error\n\
119handling scheme. Default is 'strict' meaning that encoding errors raise\n\
120a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000121as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000122able to handle ValueErrors.");
123
124static PyObject *
125codec_decode(PyObject *self, PyObject *args)
126{
Brett Cannon3e377de2004-07-10 21:41:14 +0000127 const char *encoding = NULL;
128 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000129 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000130
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000131 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
132 return NULL;
133
134 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000136
137 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000138 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000139}
140
Guido van Rossume2d67f92000-03-10 23:09:23 +0000141/* --- Helpers ------------------------------------------------------------ */
142
Nick Coghlan8fad1672014-09-15 23:50:44 +1200143/*[clinic input]
144_codecs._forget_codec
145
146 encoding: str
147 /
148
149Purge the named codec from the internal codec lookup cache
150[clinic start generated code]*/
151
152PyDoc_STRVAR(_codecs__forget_codec__doc__,
153"_forget_codec($module, encoding, /)\n"
154"--\n"
155"\n"
156"Purge the named codec from the internal codec lookup cache");
157
158#define _CODECS__FORGET_CODEC_METHODDEF \
159 {"_forget_codec", (PyCFunction)_codecs__forget_codec, METH_VARARGS, _codecs__forget_codec__doc__},
160
161static PyObject *
162_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding);
163
164static PyObject *
165_codecs__forget_codec(PyModuleDef *module, PyObject *args)
166{
167 PyObject *return_value = NULL;
168 const char *encoding;
169
170 if (!PyArg_ParseTuple(args,
171 "s:_forget_codec",
172 &encoding))
173 goto exit;
174 return_value = _codecs__forget_codec_impl(module, encoding);
175
176exit:
177 return return_value;
178}
179
180static PyObject *
181_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding)
182/*[clinic end generated code: output=a75e631591702a5c input=18d5d92d0e386c38]*/
183{
184 if (_PyCodec_Forget(encoding) < 0) {
185 return NULL;
186 };
187 Py_RETURN_NONE;
188}
189
Guido van Rossume2d67f92000-03-10 23:09:23 +0000190static
191PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000193{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000194 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000195 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000196 return NULL;
197 v = Py_BuildValue("On", unicode, len);
198 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000199 return v;
200}
201
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000202/* --- String codecs ------------------------------------------------------ */
203static PyObject *
204escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000205 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000206{
207 const char *errors = NULL;
208 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000209 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000210
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000211 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 &data, &size, &errors))
213 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000214 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000216}
217
218static PyObject *
219escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000221{
Antoine Pitroud1188562010-06-09 16:38:55 +0000222 PyObject *str;
223 Py_ssize_t size;
224 Py_ssize_t newsize;
225 const char *errors = NULL;
226 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000227
Antoine Pitroud1188562010-06-09 16:38:55 +0000228 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
229 &PyBytes_Type, &str, &errors))
230 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000231
Antoine Pitroud1188562010-06-09 16:38:55 +0000232 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100233 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000234 PyErr_SetString(PyExc_OverflowError,
235 "string is too large to encode");
236 return NULL;
237 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100238 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000239 v = PyBytes_FromStringAndSize(NULL, newsize);
240
241 if (v == NULL) {
242 return NULL;
243 }
244 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200245 Py_ssize_t i;
246 char c;
247 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000248
249 for (i = 0; i < size; i++) {
250 /* There's at least enough room for a hex escape */
251 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
252 c = PyBytes_AS_STRING(str)[i];
253 if (c == '\'' || c == '\\')
254 *p++ = '\\', *p++ = c;
255 else if (c == '\t')
256 *p++ = '\\', *p++ = 't';
257 else if (c == '\n')
258 *p++ = '\\', *p++ = 'n';
259 else if (c == '\r')
260 *p++ = '\\', *p++ = 'r';
261 else if (c < ' ' || c >= 0x7f) {
262 *p++ = '\\';
263 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200264 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
265 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000266 }
267 else
268 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000270 *p = '\0';
271 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
272 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000274 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000275
Antoine Pitroud1188562010-06-09 16:38:55 +0000276 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000277}
278
Guido van Rossume2d67f92000-03-10 23:09:23 +0000279/* --- Decoder ------------------------------------------------------------ */
280
281static PyObject *
282unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000284{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000285 PyObject *obj;
286 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000287
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000288 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 &obj, &errors))
290 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000291
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000292 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100293 if (PyUnicode_READY(obj) < 0)
294 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100296 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000297 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000298 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200299 Py_buffer view;
300 PyObject *result;
301 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000303
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200304 result = codec_tuple(
305 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
306 view.len);
307 PyBuffer_Release(&view);
308 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000309 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000310}
311
312static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000313utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000314 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000315{
Antoine Pitroud1188562010-06-09 16:38:55 +0000316 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000317 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000318 int final = 0;
319 Py_ssize_t consumed;
320 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000321
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000322 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 &pbuf, &errors, &final))
324 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000325 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000326
Martin v. Löwis423be952008-08-13 15:53:07 +0000327 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000329 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000330 if (decoded == NULL)
331 return NULL;
332 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000333}
334
335static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000336utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000338{
Antoine Pitroud1188562010-06-09 16:38:55 +0000339 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000340 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000341 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000342 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000343 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000344
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000345 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 &pbuf, &errors, &final))
347 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000348 consumed = pbuf.len;
349
350 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000352 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000353 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000355 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000356}
357
358static PyObject *
359utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000361{
Antoine Pitroud1188562010-06-09 16:38:55 +0000362 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000363 const char *errors = NULL;
364 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000365 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000366 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000367 PyObject *decoded;
368
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000369 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 &pbuf, &errors, &final))
371 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000372 consumed = pbuf.len; /* This is overwritten unless final is true. */
373 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000375 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000376 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000378 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000379}
380
381static PyObject *
382utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000384{
Antoine Pitroud1188562010-06-09 16:38:55 +0000385 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000386 const char *errors = NULL;
387 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000388 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000389 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000390 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000391
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000392 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 &pbuf, &errors, &final))
394 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000395
Martin v. Löwis423be952008-08-13 15:53:07 +0000396 consumed = pbuf.len; /* This is overwritten unless final is true. */
397 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000399 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000400 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000402 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000403}
404
405static PyObject *
406utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000407 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000408{
Antoine Pitroud1188562010-06-09 16:38:55 +0000409 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000410 const char *errors = NULL;
411 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000412 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000413 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000414 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000415
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000416 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 &pbuf, &errors, &final))
418 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000419
420 consumed = pbuf.len; /* This is overwritten unless final is true. */
421 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000423 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000424 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000426 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000427}
428
429/* This non-standard version also provides access to the byteorder
430 parameter of the builtin UTF-16 codec.
431
432 It returns a tuple (unicode, bytesread, byteorder) with byteorder
433 being the value in effect at the end of data.
434
435*/
436
437static PyObject *
438utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000440{
Antoine Pitroud1188562010-06-09 16:38:55 +0000441 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000442 const char *errors = NULL;
443 int byteorder = 0;
444 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000445 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000446 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000447
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000448 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 &pbuf, &errors, &byteorder, &final))
450 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000451 consumed = pbuf.len; /* This is overwritten unless final is true. */
452 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000454 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000455 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000457 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000458 Py_DECREF(unicode);
459 return tuple;
460}
461
462static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000463utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000465{
Antoine Pitroud1188562010-06-09 16:38:55 +0000466 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000467 const char *errors = NULL;
468 int byteorder = 0;
469 int final = 0;
470 Py_ssize_t consumed;
471 PyObject *decoded;
472
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000473 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 &pbuf, &errors, &final))
475 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000476 consumed = pbuf.len; /* This is overwritten unless final is true. */
477 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000479 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000480 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000482 return codec_tuple(decoded, consumed);
483}
484
485static PyObject *
486utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000488{
Antoine Pitroud1188562010-06-09 16:38:55 +0000489 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000490 const char *errors = NULL;
491 int byteorder = -1;
492 int final = 0;
493 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000494 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000495
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000496 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 &pbuf, &errors, &final))
498 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000499 consumed = pbuf.len; /* This is overwritten unless final is true. */
500 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000502 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000503 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000505 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000506}
507
508static PyObject *
509utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000511{
Antoine Pitroud1188562010-06-09 16:38:55 +0000512 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000513 const char *errors = NULL;
514 int byteorder = 1;
515 int final = 0;
516 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000517 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000518
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000519 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000520 &pbuf, &errors, &final))
521 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000522 consumed = pbuf.len; /* This is overwritten unless final is true. */
523 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000525 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000526 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000528 return codec_tuple(decoded, consumed);
529}
530
531/* This non-standard version also provides access to the byteorder
532 parameter of the builtin UTF-32 codec.
533
534 It returns a tuple (unicode, bytesread, byteorder) with byteorder
535 being the value in effect at the end of data.
536
537*/
538
539static PyObject *
540utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000542{
Antoine Pitroud1188562010-06-09 16:38:55 +0000543 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000544 const char *errors = NULL;
545 int byteorder = 0;
546 PyObject *unicode, *tuple;
547 int final = 0;
548 Py_ssize_t consumed;
549
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000550 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 &pbuf, &errors, &byteorder, &final))
552 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000553 consumed = pbuf.len; /* This is overwritten unless final is true. */
554 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000556 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000557 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000559 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
560 Py_DECREF(unicode);
561 return tuple;
562}
563
564static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000565unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000567{
Antoine Pitroud1188562010-06-09 16:38:55 +0000568 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000569 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000571
Martin v. Löwis423be952008-08-13 15:53:07 +0000572 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 &pbuf, &errors))
574 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000575
Antoine Pitroud1188562010-06-09 16:38:55 +0000576 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
577 PyBuffer_Release(&pbuf);
578 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000579}
580
581static PyObject *
582raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000583 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000584{
Antoine Pitroud1188562010-06-09 16:38:55 +0000585 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000586 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000587 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000588
Martin v. Löwis423be952008-08-13 15:53:07 +0000589 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 &pbuf, &errors))
591 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000592
Antoine Pitroud1188562010-06-09 16:38:55 +0000593 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
594 PyBuffer_Release(&pbuf);
595 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000596}
597
598static PyObject *
599latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000600 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000601{
Antoine Pitroud1188562010-06-09 16:38:55 +0000602 Py_buffer pbuf;
603 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000604 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000605
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000606 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 &pbuf, &errors))
608 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000609
Antoine Pitroud1188562010-06-09 16:38:55 +0000610 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
611 PyBuffer_Release(&pbuf);
612 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000613}
614
615static PyObject *
616ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000618{
Antoine Pitroud1188562010-06-09 16:38:55 +0000619 Py_buffer pbuf;
620 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000621 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000622
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000623 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 &pbuf, &errors))
625 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000626
Antoine Pitroud1188562010-06-09 16:38:55 +0000627 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
628 PyBuffer_Release(&pbuf);
629 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000630}
631
632static PyObject *
633charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000635{
Antoine Pitroud1188562010-06-09 16:38:55 +0000636 Py_buffer pbuf;
637 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000638 const char *errors = NULL;
639 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000640
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000641 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 &pbuf, &errors, &mapping))
643 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000644 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000646
Antoine Pitroud1188562010-06-09 16:38:55 +0000647 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
648 PyBuffer_Release(&pbuf);
649 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000650}
651
Victor Stinner99b95382011-07-04 14:23:54 +0200652#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000653
654static PyObject *
655mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000657{
Antoine Pitroud1188562010-06-09 16:38:55 +0000658 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000659 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000660 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000661 Py_ssize_t consumed;
662 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000663
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000664 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 &pbuf, &errors, &final))
666 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000667 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000668
Martin v. Löwis423be952008-08-13 15:53:07 +0000669 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000671 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000672 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000674 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000675}
676
Victor Stinner3a50e702011-10-18 21:21:00 +0200677static PyObject *
678code_page_decode(PyObject *self,
679 PyObject *args)
680{
681 Py_buffer pbuf;
682 const char *errors = NULL;
683 int final = 0;
684 Py_ssize_t consumed;
685 PyObject *decoded = NULL;
686 int code_page;
687
688 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
689 &code_page, &pbuf, &errors, &final))
690 return NULL;
691 consumed = pbuf.len;
692
693 decoded = PyUnicode_DecodeCodePageStateful(code_page,
694 pbuf.buf, pbuf.len, errors,
695 final ? NULL : &consumed);
696 PyBuffer_Release(&pbuf);
697 if (decoded == NULL)
698 return NULL;
699 return codec_tuple(decoded, consumed);
700}
701
Victor Stinner99b95382011-07-04 14:23:54 +0200702#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000703
Guido van Rossume2d67f92000-03-10 23:09:23 +0000704/* --- Encoder ------------------------------------------------------------ */
705
706static PyObject *
707readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000709{
Martin v. Löwis423be952008-08-13 15:53:07 +0000710 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000711 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000712 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000713 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000714 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000715
Martin v. Löwis423be952008-08-13 15:53:07 +0000716 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 &pdata, &errors))
718 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000719 data = pdata.buf;
720 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000721
Martin v. Löwis423be952008-08-13 15:53:07 +0000722 result = PyBytes_FromStringAndSize(data, size);
723 PyBuffer_Release(&pdata);
724 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000725}
726
727static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000728unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000730{
731 PyObject *obj;
732 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000733
Ezio Melottiadc417c2011-11-17 12:23:34 +0200734 if (PyErr_WarnEx(PyExc_DeprecationWarning,
735 "unicode_internal codec has been deprecated",
736 1))
737 return NULL;
738
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000739 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 &obj, &errors))
741 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000742
743 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100744 Py_UNICODE *u;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200745 Py_ssize_t len, size;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100746
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100747 if (PyUnicode_READY(obj) < 0)
748 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100749
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100750 u = PyUnicode_AsUnicodeAndSize(obj, &len);
751 if (u == NULL)
752 return NULL;
753 if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
754 return PyErr_NoMemory();
755 size = len * sizeof(Py_UNICODE);
756 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100757 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000758 }
759 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200760 Py_buffer view;
761 PyObject *result;
762 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200764 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len), view.len);
765 PyBuffer_Release(&view);
766 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000767 }
768}
769
770static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000771utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000773{
774 PyObject *str, *v;
775 const char *errors = NULL;
776
777 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 &str, &errors))
779 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000780
781 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100782 if (str == NULL || PyUnicode_READY(str) < 0) {
783 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100785 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100786 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
787 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000788 Py_DECREF(str);
789 return v;
790}
791
792static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000793utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000794 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000795{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000796 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000797 const char *errors = NULL;
798
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000799 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 &str, &errors))
801 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000802
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000803 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100804 if (str == NULL || PyUnicode_READY(str) < 0) {
805 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000806 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100807 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
809 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000810 Py_DECREF(str);
811 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000812}
813
814/* This version provides access to the byteorder parameter of the
815 builtin UTF-16 codecs as optional third argument. It defaults to 0
816 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000817 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000818
819*/
820
821static PyObject *
822utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000824{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000825 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000826 const char *errors = NULL;
827 int byteorder = 0;
828
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000829 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 &str, &errors, &byteorder))
831 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000832
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000833 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100834 if (str == NULL || PyUnicode_READY(str) < 0) {
835 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100837 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100838 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
839 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000840 Py_DECREF(str);
841 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000842}
843
844static PyObject *
845utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000847{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000848 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000849 const char *errors = NULL;
850
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000851 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 &str, &errors))
853 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000854
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000855 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100856 if (str == NULL || PyUnicode_READY(str) < 0) {
857 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100859 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100860 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
861 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000862 Py_DECREF(str);
863 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000864}
865
866static PyObject *
867utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000869{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000870 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000871 const char *errors = NULL;
872
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000873 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 &str, &errors))
875 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000876
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000877 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100878 if (str == NULL || PyUnicode_READY(str) < 0) {
879 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100881 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100882 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
883 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000884 Py_DECREF(str);
885 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000886}
887
Walter Dörwald41980ca2007-08-16 21:55:45 +0000888/* This version provides access to the byteorder parameter of the
889 builtin UTF-32 codecs as optional third argument. It defaults to 0
890 which means: use the native byte order and prepend the data with a
891 BOM mark.
892
893*/
894
895static PyObject *
896utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000898{
899 PyObject *str, *v;
900 const char *errors = NULL;
901 int byteorder = 0;
902
903 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 &str, &errors, &byteorder))
905 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000906
907 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100908 if (str == NULL || PyUnicode_READY(str) < 0) {
909 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100911 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100912 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
913 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000914 Py_DECREF(str);
915 return v;
916}
917
918static PyObject *
919utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000921{
922 PyObject *str, *v;
923 const char *errors = NULL;
924
925 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 &str, &errors))
927 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000928
929 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100930 if (str == NULL || PyUnicode_READY(str) < 0) {
931 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100933 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100934 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
935 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000936 Py_DECREF(str);
937 return v;
938}
939
940static PyObject *
941utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000943{
944 PyObject *str, *v;
945 const char *errors = NULL;
946
947 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 &str, &errors))
949 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000950
951 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100952 if (str == NULL || PyUnicode_READY(str) < 0) {
953 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100955 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100956 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
957 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000958 Py_DECREF(str);
959 return v;
960}
961
Guido van Rossume2d67f92000-03-10 23:09:23 +0000962static PyObject *
963unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000965{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000966 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000967 const char *errors = NULL;
968
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000969 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 &str, &errors))
971 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000972
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000973 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100974 if (str == NULL || PyUnicode_READY(str) < 0) {
975 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100977 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100978 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
979 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000980 Py_DECREF(str);
981 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000982}
983
984static PyObject *
985raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000987{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000988 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000989 const char *errors = NULL;
990
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000991 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 &str, &errors))
993 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000994
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000995 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100996 if (str == NULL || PyUnicode_READY(str) < 0) {
997 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000998 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100999 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001000 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
1001 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001002 Py_DECREF(str);
1003 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001004}
1005
1006static PyObject *
1007latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001009{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001010 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001011 const char *errors = NULL;
1012
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001013 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 &str, &errors))
1015 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001016
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001017 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001018 if (str == NULL || PyUnicode_READY(str) < 0) {
1019 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001021 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001022 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
1023 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001024 Py_DECREF(str);
1025 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001026}
1027
1028static PyObject *
1029ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001031{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001032 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001033 const char *errors = NULL;
1034
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001035 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 &str, &errors))
1037 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001038
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001039 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001040 if (str == NULL || PyUnicode_READY(str) < 0) {
1041 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001043 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001044 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
1045 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001046 Py_DECREF(str);
1047 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001048}
1049
1050static PyObject *
1051charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001053{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001054 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001055 const char *errors = NULL;
1056 PyObject *mapping = NULL;
1057
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001058 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 &str, &errors, &mapping))
1060 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001061 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001063
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001064 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001065 if (str == NULL || PyUnicode_READY(str) < 0) {
1066 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001068 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001069 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001070 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001071 Py_DECREF(str);
1072 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001073}
1074
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001075static PyObject*
1076charmap_build(PyObject *self, PyObject *args)
1077{
1078 PyObject *map;
1079 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1080 return NULL;
1081 return PyUnicode_BuildEncodingMap(map);
1082}
1083
Victor Stinner99b95382011-07-04 14:23:54 +02001084#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001085
1086static PyObject *
1087mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001089{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001090 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001091 const char *errors = NULL;
1092
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001093 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 &str, &errors))
1095 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001096
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001097 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001098 if (str == NULL || PyUnicode_READY(str) < 0) {
1099 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001101 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001102 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1103 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001104 Py_DECREF(str);
1105 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001106}
1107
Victor Stinner3a50e702011-10-18 21:21:00 +02001108static PyObject *
1109code_page_encode(PyObject *self,
1110 PyObject *args)
1111{
1112 PyObject *str, *v;
1113 const char *errors = NULL;
1114 int code_page;
1115
1116 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1117 &code_page, &str, &errors))
1118 return NULL;
1119
1120 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001121 if (str == NULL || PyUnicode_READY(str) < 0) {
1122 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001123 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001124 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001125 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1126 str,
1127 errors),
1128 PyUnicode_GET_LENGTH(str));
1129 Py_DECREF(str);
1130 return v;
1131}
1132
Victor Stinner99b95382011-07-04 14:23:54 +02001133#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001134
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001135/* --- Error handler registry --------------------------------------------- */
1136
Walter Dörwald0ae29812002-10-31 13:36:29 +00001137PyDoc_STRVAR(register_error__doc__,
1138"register_error(errors, handler)\n\
1139\n\
1140Register the specified error handler under the name\n\
1141errors. handler must be a callable object, that\n\
1142will be called with an exception instance containing\n\
1143information about the location of the encoding/decoding\n\
1144error and must return a (replacement, new position) tuple.");
1145
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001146static PyObject *register_error(PyObject *self, PyObject *args)
1147{
1148 const char *name;
1149 PyObject *handler;
1150
1151 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 &name, &handler))
1153 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001154 if (PyCodec_RegisterError(name, handler))
1155 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001156 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001157}
1158
Walter Dörwald0ae29812002-10-31 13:36:29 +00001159PyDoc_STRVAR(lookup_error__doc__,
1160"lookup_error(errors) -> handler\n\
1161\n\
1162Return the error handler for the specified error handling name\n\
1163or raise a LookupError, if no handler exists under this name.");
1164
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001165static PyObject *lookup_error(PyObject *self, PyObject *args)
1166{
1167 const char *name;
1168
1169 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 &name))
1171 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001172 return PyCodec_LookupError(name);
1173}
1174
Guido van Rossume2d67f92000-03-10 23:09:23 +00001175/* --- Module API --------------------------------------------------------- */
1176
1177static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001179 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001181 lookup__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 {"encode", codec_encode, METH_VARARGS,
1183 encode__doc__},
1184 {"decode", codec_decode, METH_VARARGS,
1185 decode__doc__},
1186 {"escape_encode", escape_encode, METH_VARARGS},
1187 {"escape_decode", escape_decode, METH_VARARGS},
1188 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1189 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1190 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1191 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1192 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1193 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1194 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1195 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1196 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1197 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1198 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1199 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1200 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1201 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1202 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1203 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1204 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1205 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1206 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1207 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1208 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1209 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1210 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1211 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1212 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1213 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1214 {"ascii_encode", ascii_encode, METH_VARARGS},
1215 {"ascii_decode", ascii_decode, METH_VARARGS},
1216 {"charmap_encode", charmap_encode, METH_VARARGS},
1217 {"charmap_decode", charmap_decode, METH_VARARGS},
1218 {"charmap_build", charmap_build, METH_VARARGS},
1219 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001220#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1222 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001223 {"code_page_encode", code_page_encode, METH_VARARGS},
1224 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001225#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001227 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001229 lookup_error__doc__},
Nick Coghlan8fad1672014-09-15 23:50:44 +12001230 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001232};
1233
Martin v. Löwis1a214512008-06-11 05:26:20 +00001234static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 PyModuleDef_HEAD_INIT,
1236 "_codecs",
1237 NULL,
1238 -1,
1239 _codecs_functions,
1240 NULL,
1241 NULL,
1242 NULL,
1243 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001244};
1245
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001246PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001247PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001250}