blob: ac25998d7baa3451ca61cef4dd46abe0c48195a6 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Guido van Rossume2d67f92000-03-10 23:09:23 +000045/* --- Registry ----------------------------------------------------------- */
46
Walter Dörwald0ae29812002-10-31 13:36:29 +000047PyDoc_STRVAR(register__doc__,
48"register(search_function)\n\
49\n\
50Register a codec search function. Search functions are expected to take\n\
51one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000052a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
53(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000054
Guido van Rossume2d67f92000-03-10 23:09:23 +000055static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000056PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000057{
Guido van Rossume2d67f92000-03-10 23:09:23 +000058 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000059 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000060
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000061 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000062}
63
Walter Dörwald0ae29812002-10-31 13:36:29 +000064PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000065"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000066\n\
67Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000068a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000069
Guido van Rossume2d67f92000-03-10 23:09:23 +000070static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000071PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000072{
73 char *encoding;
74
75 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000076 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000077
78 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000079}
80
Marc-André Lemburg3f419742004-07-10 12:06:10 +000081PyDoc_STRVAR(encode__doc__,
82"encode(obj, [encoding[,errors]]) -> object\n\
83\n\
84Encodes obj using the codec registered for encoding. encoding defaults\n\
85to the default encoding. errors may be given to set a different error\n\
86handling scheme. Default is 'strict' meaning that encoding errors raise\n\
87a ValueError. Other possible values are 'ignore', 'replace' and\n\
88'xmlcharrefreplace' as well as any other name registered with\n\
89codecs.register_error that can handle ValueErrors.");
90
91static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +020092codec_encode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +000093{
Victor Stinnera57dfd02014-05-14 17:13:14 +020094 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +000095 const char *encoding = NULL;
96 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000097 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000098
Victor Stinnera57dfd02014-05-14 17:13:14 +020099 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:encode", kwlist,
100 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000101 return NULL;
102
103 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000105
106 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000107 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000108}
109
110PyDoc_STRVAR(decode__doc__,
111"decode(obj, [encoding[,errors]]) -> object\n\
112\n\
113Decodes obj using the codec registered for encoding. encoding defaults\n\
114to the default encoding. errors may be given to set a different error\n\
115handling scheme. Default is 'strict' meaning that encoding errors raise\n\
116a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000117as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000118able to handle ValueErrors.");
119
120static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +0200121codec_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000122{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200123 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000124 const char *encoding = NULL;
125 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000126 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000127
Victor Stinnera57dfd02014-05-14 17:13:14 +0200128 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:decode", kwlist,
129 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000130 return NULL;
131
132 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000134
135 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000137}
138
Guido van Rossume2d67f92000-03-10 23:09:23 +0000139/* --- Helpers ------------------------------------------------------------ */
140
141static
142PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000144{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000145 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000146 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000147 return NULL;
148 v = Py_BuildValue("On", unicode, len);
149 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000150 return v;
151}
152
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000153/* --- String codecs ------------------------------------------------------ */
154static PyObject *
155escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000157{
158 const char *errors = NULL;
159 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000160 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000161
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000162 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 &data, &size, &errors))
164 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000165 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000167}
168
169static PyObject *
170escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000172{
Antoine Pitroud1188562010-06-09 16:38:55 +0000173 PyObject *str;
174 Py_ssize_t size;
175 Py_ssize_t newsize;
176 const char *errors = NULL;
177 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000178
Antoine Pitroud1188562010-06-09 16:38:55 +0000179 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
180 &PyBytes_Type, &str, &errors))
181 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000182
Antoine Pitroud1188562010-06-09 16:38:55 +0000183 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100184 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000185 PyErr_SetString(PyExc_OverflowError,
186 "string is too large to encode");
187 return NULL;
188 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100189 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000190 v = PyBytes_FromStringAndSize(NULL, newsize);
191
192 if (v == NULL) {
193 return NULL;
194 }
195 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200196 Py_ssize_t i;
197 char c;
198 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000199
200 for (i = 0; i < size; i++) {
201 /* There's at least enough room for a hex escape */
202 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
203 c = PyBytes_AS_STRING(str)[i];
204 if (c == '\'' || c == '\\')
205 *p++ = '\\', *p++ = c;
206 else if (c == '\t')
207 *p++ = '\\', *p++ = 't';
208 else if (c == '\n')
209 *p++ = '\\', *p++ = 'n';
210 else if (c == '\r')
211 *p++ = '\\', *p++ = 'r';
212 else if (c < ' ' || c >= 0x7f) {
213 *p++ = '\\';
214 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200215 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
216 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000217 }
218 else
219 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000221 *p = '\0';
222 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
223 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000225 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000226
Antoine Pitroud1188562010-06-09 16:38:55 +0000227 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000228}
229
Guido van Rossume2d67f92000-03-10 23:09:23 +0000230/* --- Decoder ------------------------------------------------------------ */
231
232static PyObject *
233unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000235{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000236 PyObject *obj;
237 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000238 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000240
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000241 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 &obj, &errors))
243 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000244
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000245 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100246 if (PyUnicode_READY(obj) < 0)
247 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100249 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000250 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000251 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
253 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
256 size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000257 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000258}
259
260static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000261utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000262 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000263{
Antoine Pitroud1188562010-06-09 16:38:55 +0000264 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000265 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000266 int final = 0;
267 Py_ssize_t consumed;
268 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000269
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000270 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 &pbuf, &errors, &final))
272 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000273 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000274
Martin v. Löwis423be952008-08-13 15:53:07 +0000275 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000277 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000278 if (decoded == NULL)
279 return NULL;
280 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000281}
282
283static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000284utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000286{
Antoine Pitroud1188562010-06-09 16:38:55 +0000287 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000288 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000289 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000290 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000291 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000292
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000293 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 &pbuf, &errors, &final))
295 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000296 consumed = pbuf.len;
297
298 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000300 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000301 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000303 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000304}
305
306static PyObject *
307utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000309{
Antoine Pitroud1188562010-06-09 16:38:55 +0000310 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000311 const char *errors = NULL;
312 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000313 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000314 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000315 PyObject *decoded;
316
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000317 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 &pbuf, &errors, &final))
319 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000320 consumed = pbuf.len; /* This is overwritten unless final is true. */
321 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000322 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000323 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000324 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000326 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000327}
328
329static PyObject *
330utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000332{
Antoine Pitroud1188562010-06-09 16:38:55 +0000333 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000334 const char *errors = NULL;
335 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000336 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000337 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000338 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000339
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000340 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 &pbuf, &errors, &final))
342 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000343
Martin v. Löwis423be952008-08-13 15:53:07 +0000344 consumed = pbuf.len; /* This is overwritten unless final is true. */
345 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000347 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000348 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000350 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000351}
352
353static PyObject *
354utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000356{
Antoine Pitroud1188562010-06-09 16:38:55 +0000357 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000358 const char *errors = NULL;
359 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000360 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000361 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000362 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000363
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000364 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 &pbuf, &errors, &final))
366 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000367
368 consumed = pbuf.len; /* This is overwritten unless final is true. */
369 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000371 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000372 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000374 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000375}
376
377/* This non-standard version also provides access to the byteorder
378 parameter of the builtin UTF-16 codec.
379
380 It returns a tuple (unicode, bytesread, byteorder) with byteorder
381 being the value in effect at the end of data.
382
383*/
384
385static PyObject *
386utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000388{
Antoine Pitroud1188562010-06-09 16:38:55 +0000389 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000390 const char *errors = NULL;
391 int byteorder = 0;
392 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000393 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000394 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000395
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000396 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 &pbuf, &errors, &byteorder, &final))
398 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000399 consumed = pbuf.len; /* This is overwritten unless final is true. */
400 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000402 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000403 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000405 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000406 Py_DECREF(unicode);
407 return tuple;
408}
409
410static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000411utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000413{
Antoine Pitroud1188562010-06-09 16:38:55 +0000414 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000415 const char *errors = NULL;
416 int byteorder = 0;
417 int final = 0;
418 Py_ssize_t consumed;
419 PyObject *decoded;
420
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000421 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 &pbuf, &errors, &final))
423 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000424 consumed = pbuf.len; /* This is overwritten unless final is true. */
425 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000427 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000428 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000430 return codec_tuple(decoded, consumed);
431}
432
433static PyObject *
434utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000436{
Antoine Pitroud1188562010-06-09 16:38:55 +0000437 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000438 const char *errors = NULL;
439 int byteorder = -1;
440 int final = 0;
441 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000442 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000443
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000444 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 &pbuf, &errors, &final))
446 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000447 consumed = pbuf.len; /* This is overwritten unless final is true. */
448 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000450 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000451 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000453 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000454}
455
456static PyObject *
457utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000459{
Antoine Pitroud1188562010-06-09 16:38:55 +0000460 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000461 const char *errors = NULL;
462 int byteorder = 1;
463 int final = 0;
464 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000465 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000466
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000467 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 &pbuf, &errors, &final))
469 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000470 consumed = pbuf.len; /* This is overwritten unless final is true. */
471 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000473 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000474 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000476 return codec_tuple(decoded, consumed);
477}
478
479/* This non-standard version also provides access to the byteorder
480 parameter of the builtin UTF-32 codec.
481
482 It returns a tuple (unicode, bytesread, byteorder) with byteorder
483 being the value in effect at the end of data.
484
485*/
486
487static PyObject *
488utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000490{
Antoine Pitroud1188562010-06-09 16:38:55 +0000491 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000492 const char *errors = NULL;
493 int byteorder = 0;
494 PyObject *unicode, *tuple;
495 int final = 0;
496 Py_ssize_t consumed;
497
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000498 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 &pbuf, &errors, &byteorder, &final))
500 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000501 consumed = pbuf.len; /* This is overwritten unless final is true. */
502 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000503 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000504 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000505 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000507 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
508 Py_DECREF(unicode);
509 return tuple;
510}
511
512static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000513unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000515{
Antoine Pitroud1188562010-06-09 16:38:55 +0000516 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000517 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000519
Martin v. Löwis423be952008-08-13 15:53:07 +0000520 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 &pbuf, &errors))
522 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000523
Antoine Pitroud1188562010-06-09 16:38:55 +0000524 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
525 PyBuffer_Release(&pbuf);
526 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000527}
528
529static PyObject *
530raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000532{
Antoine Pitroud1188562010-06-09 16:38:55 +0000533 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000534 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000535 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000536
Martin v. Löwis423be952008-08-13 15:53:07 +0000537 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 &pbuf, &errors))
539 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000540
Antoine Pitroud1188562010-06-09 16:38:55 +0000541 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
542 PyBuffer_Release(&pbuf);
543 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000544}
545
546static PyObject *
547latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000549{
Antoine Pitroud1188562010-06-09 16:38:55 +0000550 Py_buffer pbuf;
551 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000552 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000553
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000554 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 &pbuf, &errors))
556 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000557
Antoine Pitroud1188562010-06-09 16:38:55 +0000558 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
559 PyBuffer_Release(&pbuf);
560 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000561}
562
563static PyObject *
564ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000566{
Antoine Pitroud1188562010-06-09 16:38:55 +0000567 Py_buffer pbuf;
568 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000569 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000570
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000571 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 &pbuf, &errors))
573 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000574
Antoine Pitroud1188562010-06-09 16:38:55 +0000575 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
576 PyBuffer_Release(&pbuf);
577 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000578}
579
580static PyObject *
581charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000583{
Antoine Pitroud1188562010-06-09 16:38:55 +0000584 Py_buffer pbuf;
585 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000586 const char *errors = NULL;
587 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000588
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000589 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 &pbuf, &errors, &mapping))
591 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000592 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000593 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000594
Antoine Pitroud1188562010-06-09 16:38:55 +0000595 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
596 PyBuffer_Release(&pbuf);
597 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000598}
599
Victor Stinner99b95382011-07-04 14:23:54 +0200600#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000601
602static PyObject *
603mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000605{
Antoine Pitroud1188562010-06-09 16:38:55 +0000606 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000607 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000608 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000609 Py_ssize_t consumed;
610 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000611
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000612 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 &pbuf, &errors, &final))
614 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000615 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000616
Martin v. Löwis423be952008-08-13 15:53:07 +0000617 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000619 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000620 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000622 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000623}
624
Victor Stinner3a50e702011-10-18 21:21:00 +0200625static PyObject *
626code_page_decode(PyObject *self,
627 PyObject *args)
628{
629 Py_buffer pbuf;
630 const char *errors = NULL;
631 int final = 0;
632 Py_ssize_t consumed;
633 PyObject *decoded = NULL;
634 int code_page;
635
636 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
637 &code_page, &pbuf, &errors, &final))
638 return NULL;
639 consumed = pbuf.len;
640
641 decoded = PyUnicode_DecodeCodePageStateful(code_page,
642 pbuf.buf, pbuf.len, errors,
643 final ? NULL : &consumed);
644 PyBuffer_Release(&pbuf);
645 if (decoded == NULL)
646 return NULL;
647 return codec_tuple(decoded, consumed);
648}
649
Victor Stinner99b95382011-07-04 14:23:54 +0200650#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000651
Guido van Rossume2d67f92000-03-10 23:09:23 +0000652/* --- Encoder ------------------------------------------------------------ */
653
654static PyObject *
655readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000657{
Martin v. Löwis423be952008-08-13 15:53:07 +0000658 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000659 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000660 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000661 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000662 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000663
Martin v. Löwis423be952008-08-13 15:53:07 +0000664 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 &pdata, &errors))
666 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000667 data = pdata.buf;
668 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000669
Martin v. Löwis423be952008-08-13 15:53:07 +0000670 result = PyBytes_FromStringAndSize(data, size);
671 PyBuffer_Release(&pdata);
672 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000673}
674
675static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000676unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000678{
679 PyObject *obj;
680 const char *errors = NULL;
681 const char *data;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100682 Py_ssize_t len, size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000683
Ezio Melottiadc417c2011-11-17 12:23:34 +0200684 if (PyErr_WarnEx(PyExc_DeprecationWarning,
685 "unicode_internal codec has been deprecated",
686 1))
687 return NULL;
688
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000689 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 &obj, &errors))
691 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000692
693 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100694 Py_UNICODE *u;
695
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100696 if (PyUnicode_READY(obj) < 0)
697 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100698
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100699 u = PyUnicode_AsUnicodeAndSize(obj, &len);
700 if (u == NULL)
701 return NULL;
Victor Stinner049e5092014-08-17 22:20:00 +0200702 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100703 return PyErr_NoMemory();
704 size = len * sizeof(Py_UNICODE);
705 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100706 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000707 }
708 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
710 return NULL;
711 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000712 }
713}
714
715static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000716utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000718{
719 PyObject *str, *v;
720 const char *errors = NULL;
721
722 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 &str, &errors))
724 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000725
726 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100727 if (str == NULL || PyUnicode_READY(str) < 0) {
728 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100730 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100731 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
732 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000733 Py_DECREF(str);
734 return v;
735}
736
737static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000738utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000740{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000741 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000742 const char *errors = NULL;
743
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000744 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000745 &str, &errors))
746 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000747
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000748 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100749 if (str == NULL || PyUnicode_READY(str) < 0) {
750 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100752 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200753 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
754 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000755 Py_DECREF(str);
756 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000757}
758
759/* This version provides access to the byteorder parameter of the
760 builtin UTF-16 codecs as optional third argument. It defaults to 0
761 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000762 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000763
764*/
765
766static PyObject *
767utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000769{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000770 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000771 const char *errors = NULL;
772 int byteorder = 0;
773
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000774 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 &str, &errors, &byteorder))
776 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000777
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000778 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100779 if (str == NULL || PyUnicode_READY(str) < 0) {
780 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100782 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100783 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
784 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000785 Py_DECREF(str);
786 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000787}
788
789static PyObject *
790utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000792{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000793 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000794 const char *errors = NULL;
795
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000796 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 &str, &errors))
798 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000799
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000800 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100801 if (str == NULL || PyUnicode_READY(str) < 0) {
802 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100804 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100805 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
806 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000807 Py_DECREF(str);
808 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000809}
810
811static PyObject *
812utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000814{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000815 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000816 const char *errors = NULL;
817
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000818 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 &str, &errors))
820 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000821
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000822 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100823 if (str == NULL || PyUnicode_READY(str) < 0) {
824 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100826 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100827 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
828 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000829 Py_DECREF(str);
830 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000831}
832
Walter Dörwald41980ca2007-08-16 21:55:45 +0000833/* This version provides access to the byteorder parameter of the
834 builtin UTF-32 codecs as optional third argument. It defaults to 0
835 which means: use the native byte order and prepend the data with a
836 BOM mark.
837
838*/
839
840static PyObject *
841utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000843{
844 PyObject *str, *v;
845 const char *errors = NULL;
846 int byteorder = 0;
847
848 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 &str, &errors, &byteorder))
850 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000851
852 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100853 if (str == NULL || PyUnicode_READY(str) < 0) {
854 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100856 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100857 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
858 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000859 Py_DECREF(str);
860 return v;
861}
862
863static PyObject *
864utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000866{
867 PyObject *str, *v;
868 const char *errors = NULL;
869
870 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 &str, &errors))
872 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000873
874 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100875 if (str == NULL || PyUnicode_READY(str) < 0) {
876 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100878 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100879 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
880 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000881 Py_DECREF(str);
882 return v;
883}
884
885static PyObject *
886utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000888{
889 PyObject *str, *v;
890 const char *errors = NULL;
891
892 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 &str, &errors))
894 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000895
896 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100897 if (str == NULL || PyUnicode_READY(str) < 0) {
898 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100900 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100901 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
902 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000903 Py_DECREF(str);
904 return v;
905}
906
Guido van Rossume2d67f92000-03-10 23:09:23 +0000907static PyObject *
908unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000910{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000911 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000912 const char *errors = NULL;
913
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000914 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 &str, &errors))
916 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000917
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000918 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100919 if (str == NULL || PyUnicode_READY(str) < 0) {
920 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100922 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100923 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
924 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000925 Py_DECREF(str);
926 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000927}
928
929static PyObject *
930raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000931 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000932{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000933 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000934 const char *errors = NULL;
935
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000936 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 &str, &errors))
938 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000939
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000940 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100941 if (str == NULL || PyUnicode_READY(str) < 0) {
942 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100944 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100945 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
946 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000947 Py_DECREF(str);
948 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000949}
950
951static PyObject *
952latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000954{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000955 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000956 const char *errors = NULL;
957
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000958 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 &str, &errors))
960 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000961
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000962 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100963 if (str == NULL || PyUnicode_READY(str) < 0) {
964 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100966 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100967 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
968 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000969 Py_DECREF(str);
970 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000971}
972
973static PyObject *
974ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000976{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000977 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000978 const char *errors = NULL;
979
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000980 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 &str, &errors))
982 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000983
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000984 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100985 if (str == NULL || PyUnicode_READY(str) < 0) {
986 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100988 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100989 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
990 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000991 Py_DECREF(str);
992 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000993}
994
995static PyObject *
996charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000998{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000999 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001000 const char *errors = NULL;
1001 PyObject *mapping = NULL;
1002
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001003 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001004 &str, &errors, &mapping))
1005 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001006 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001008
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001009 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001010 if (str == NULL || PyUnicode_READY(str) < 0) {
1011 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001013 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001014 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001015 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001016 Py_DECREF(str);
1017 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001018}
1019
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001020static PyObject*
1021charmap_build(PyObject *self, PyObject *args)
1022{
1023 PyObject *map;
1024 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1025 return NULL;
1026 return PyUnicode_BuildEncodingMap(map);
1027}
1028
Victor Stinner99b95382011-07-04 14:23:54 +02001029#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001030
1031static PyObject *
1032mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001034{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001035 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001036 const char *errors = NULL;
1037
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001038 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 &str, &errors))
1040 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001041
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001042 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001043 if (str == NULL || PyUnicode_READY(str) < 0) {
1044 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001046 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001047 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1048 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001049 Py_DECREF(str);
1050 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001051}
1052
Victor Stinner3a50e702011-10-18 21:21:00 +02001053static PyObject *
1054code_page_encode(PyObject *self,
1055 PyObject *args)
1056{
1057 PyObject *str, *v;
1058 const char *errors = NULL;
1059 int code_page;
1060
1061 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1062 &code_page, &str, &errors))
1063 return NULL;
1064
1065 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001066 if (str == NULL || PyUnicode_READY(str) < 0) {
1067 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001068 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001069 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001070 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1071 str,
1072 errors),
1073 PyUnicode_GET_LENGTH(str));
1074 Py_DECREF(str);
1075 return v;
1076}
1077
Victor Stinner99b95382011-07-04 14:23:54 +02001078#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001079
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001080/* --- Error handler registry --------------------------------------------- */
1081
Walter Dörwald0ae29812002-10-31 13:36:29 +00001082PyDoc_STRVAR(register_error__doc__,
1083"register_error(errors, handler)\n\
1084\n\
1085Register the specified error handler under the name\n\
1086errors. handler must be a callable object, that\n\
1087will be called with an exception instance containing\n\
1088information about the location of the encoding/decoding\n\
1089error and must return a (replacement, new position) tuple.");
1090
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001091static PyObject *register_error(PyObject *self, PyObject *args)
1092{
1093 const char *name;
1094 PyObject *handler;
1095
1096 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 &name, &handler))
1098 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001099 if (PyCodec_RegisterError(name, handler))
1100 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001101 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001102}
1103
Walter Dörwald0ae29812002-10-31 13:36:29 +00001104PyDoc_STRVAR(lookup_error__doc__,
1105"lookup_error(errors) -> handler\n\
1106\n\
1107Return the error handler for the specified error handling name\n\
1108or raise a LookupError, if no handler exists under this name.");
1109
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001110static PyObject *lookup_error(PyObject *self, PyObject *args)
1111{
1112 const char *name;
1113
1114 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 &name))
1116 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001117 return PyCodec_LookupError(name);
1118}
1119
Guido van Rossume2d67f92000-03-10 23:09:23 +00001120/* --- Module API --------------------------------------------------------- */
1121
1122static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001124 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001126 lookup__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001127 {"encode", (PyCFunction)codec_encode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 encode__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001129 {"decode", (PyCFunction)codec_decode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 decode__doc__},
1131 {"escape_encode", escape_encode, METH_VARARGS},
1132 {"escape_decode", escape_decode, METH_VARARGS},
1133 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1134 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1135 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1136 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1137 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1138 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1139 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1140 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1141 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1142 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1143 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1144 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1145 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1146 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1147 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1148 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1149 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1150 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1151 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1152 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1153 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1154 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1155 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1156 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1157 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1158 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1159 {"ascii_encode", ascii_encode, METH_VARARGS},
1160 {"ascii_decode", ascii_decode, METH_VARARGS},
1161 {"charmap_encode", charmap_encode, METH_VARARGS},
1162 {"charmap_decode", charmap_decode, METH_VARARGS},
1163 {"charmap_build", charmap_build, METH_VARARGS},
1164 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001165#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1167 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001168 {"code_page_encode", code_page_encode, METH_VARARGS},
1169 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001170#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001172 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001174 lookup_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001176};
1177
Martin v. Löwis1a214512008-06-11 05:26:20 +00001178static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 PyModuleDef_HEAD_INIT,
1180 "_codecs",
1181 NULL,
1182 -1,
1183 _codecs_functions,
1184 NULL,
1185 NULL,
1186 NULL,
1187 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001188};
1189
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001190PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001191PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001194}