blob: d8b4bbda28a3f7384fe12edf5422a696f6f6a302 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Nick Coghlan8fad1672014-09-15 23:50:44 +120045/*[clinic input]
46module _codecs
47[clinic start generated code]*/
48/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
49
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030050#include "clinic/_codecsmodule.c.h"
Nick Coghlan8fad1672014-09-15 23:50:44 +120051
Guido van Rossume2d67f92000-03-10 23:09:23 +000052/* --- Registry ----------------------------------------------------------- */
53
Walter Dörwald0ae29812002-10-31 13:36:29 +000054PyDoc_STRVAR(register__doc__,
55"register(search_function)\n\
56\n\
57Register a codec search function. Search functions are expected to take\n\
Nick Coghlanb9fdb7a2015-01-07 00:22:00 +100058one argument, the encoding name in all lower case letters, and either\n\
59return None, or a tuple of functions (encoder, decoder, stream_reader,\n\
60stream_writer) (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000061
Guido van Rossume2d67f92000-03-10 23:09:23 +000062static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000063PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000064{
Guido van Rossume2d67f92000-03-10 23:09:23 +000065 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000066 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000067
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000068 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000069}
70
Walter Dörwald0ae29812002-10-31 13:36:29 +000071PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000072"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000073\n\
74Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000075a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000076
Guido van Rossume2d67f92000-03-10 23:09:23 +000077static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000078PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000079{
80 char *encoding;
81
82 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000083 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000084
85 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000086}
87
Marc-André Lemburg3f419742004-07-10 12:06:10 +000088PyDoc_STRVAR(encode__doc__,
89"encode(obj, [encoding[,errors]]) -> object\n\
90\n\
91Encodes obj using the codec registered for encoding. encoding defaults\n\
92to the default encoding. errors may be given to set a different error\n\
93handling scheme. Default is 'strict' meaning that encoding errors raise\n\
94a ValueError. Other possible values are 'ignore', 'replace' and\n\
95'xmlcharrefreplace' as well as any other name registered with\n\
96codecs.register_error that can handle ValueErrors.");
97
98static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +020099codec_encode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000100{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200101 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000102 const char *encoding = NULL;
103 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000104 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000105
Victor Stinnera57dfd02014-05-14 17:13:14 +0200106 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:encode", kwlist,
107 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000108 return NULL;
109
110 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000112
113 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000114 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000115}
116
117PyDoc_STRVAR(decode__doc__,
118"decode(obj, [encoding[,errors]]) -> object\n\
119\n\
120Decodes obj using the codec registered for encoding. encoding defaults\n\
121to the default encoding. errors may be given to set a different error\n\
122handling scheme. Default is 'strict' meaning that encoding errors raise\n\
123a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000124as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000125able to handle ValueErrors.");
126
127static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +0200128codec_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000129{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200130 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000131 const char *encoding = NULL;
132 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000133 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000134
Victor Stinnera57dfd02014-05-14 17:13:14 +0200135 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:decode", kwlist,
136 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000137 return NULL;
138
139 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000141
142 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000143 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000144}
145
Guido van Rossume2d67f92000-03-10 23:09:23 +0000146/* --- Helpers ------------------------------------------------------------ */
147
Nick Coghlan8fad1672014-09-15 23:50:44 +1200148/*[clinic input]
149_codecs._forget_codec
150
151 encoding: str
152 /
153
154Purge the named codec from the internal codec lookup cache
155[clinic start generated code]*/
156
Nick Coghlan8fad1672014-09-15 23:50:44 +1200157static PyObject *
158_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300159/*[clinic end generated code: output=b56a9b99d2d28080 input=18d5d92d0e386c38]*/
Nick Coghlan8fad1672014-09-15 23:50:44 +1200160{
161 if (_PyCodec_Forget(encoding) < 0) {
162 return NULL;
163 };
164 Py_RETURN_NONE;
165}
166
Guido van Rossume2d67f92000-03-10 23:09:23 +0000167static
168PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000170{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000171 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000172 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000173 return NULL;
174 v = Py_BuildValue("On", unicode, len);
175 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000176 return v;
177}
178
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000179/* --- String codecs ------------------------------------------------------ */
180static PyObject *
181escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000183{
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200184 Py_buffer pbuf;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000185 const char *errors = NULL;
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200186 PyObject *result;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000187
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200188 if (!PyArg_ParseTuple(args, "s*|z:escape_decode",
189 &pbuf, &errors))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 return NULL;
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200191 result = codec_tuple(
192 PyBytes_DecodeEscape(pbuf.buf, pbuf.len, errors, 0, NULL),
193 pbuf.len);
194 PyBuffer_Release(&pbuf);
195 return result;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000196}
197
198static PyObject *
199escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000201{
Antoine Pitroud1188562010-06-09 16:38:55 +0000202 PyObject *str;
203 Py_ssize_t size;
204 Py_ssize_t newsize;
205 const char *errors = NULL;
206 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000207
Antoine Pitroud1188562010-06-09 16:38:55 +0000208 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
209 &PyBytes_Type, &str, &errors))
210 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000211
Antoine Pitroud1188562010-06-09 16:38:55 +0000212 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100213 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000214 PyErr_SetString(PyExc_OverflowError,
215 "string is too large to encode");
216 return NULL;
217 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100218 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000219 v = PyBytes_FromStringAndSize(NULL, newsize);
220
221 if (v == NULL) {
222 return NULL;
223 }
224 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200225 Py_ssize_t i;
226 char c;
227 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000228
229 for (i = 0; i < size; i++) {
230 /* There's at least enough room for a hex escape */
231 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
232 c = PyBytes_AS_STRING(str)[i];
233 if (c == '\'' || c == '\\')
234 *p++ = '\\', *p++ = c;
235 else if (c == '\t')
236 *p++ = '\\', *p++ = 't';
237 else if (c == '\n')
238 *p++ = '\\', *p++ = 'n';
239 else if (c == '\r')
240 *p++ = '\\', *p++ = 'r';
241 else if (c < ' ' || c >= 0x7f) {
242 *p++ = '\\';
243 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200244 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
245 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000246 }
247 else
248 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000250 *p = '\0';
251 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
252 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000254 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000255
Antoine Pitroud1188562010-06-09 16:38:55 +0000256 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000257}
258
Guido van Rossume2d67f92000-03-10 23:09:23 +0000259/* --- Decoder ------------------------------------------------------------ */
260
261static PyObject *
262unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000264{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000265 PyObject *obj;
266 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000267
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000268 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 &obj, &errors))
270 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000271
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000272 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100273 if (PyUnicode_READY(obj) < 0)
274 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100276 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000277 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000278 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200279 Py_buffer view;
280 PyObject *result;
281 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000282 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000283
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200284 result = codec_tuple(
285 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
286 view.len);
287 PyBuffer_Release(&view);
288 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000289 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000290}
291
292static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000293utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000294 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000295{
Antoine Pitroud1188562010-06-09 16:38:55 +0000296 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000297 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000298 int final = 0;
299 Py_ssize_t consumed;
300 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000301
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000302 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 &pbuf, &errors, &final))
304 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000305 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000306
Martin v. Löwis423be952008-08-13 15:53:07 +0000307 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000309 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000310 if (decoded == NULL)
311 return NULL;
312 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000313}
314
315static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000316utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000318{
Antoine Pitroud1188562010-06-09 16:38:55 +0000319 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000320 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000321 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000322 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000323 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000324
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000325 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 &pbuf, &errors, &final))
327 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000328 consumed = pbuf.len;
329
330 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000332 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000333 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000335 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000336}
337
338static PyObject *
339utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000341{
Antoine Pitroud1188562010-06-09 16:38:55 +0000342 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000343 const char *errors = NULL;
344 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000345 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000346 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000347 PyObject *decoded;
348
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000349 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 &pbuf, &errors, &final))
351 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000352 consumed = pbuf.len; /* This is overwritten unless final is true. */
353 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000355 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000356 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000358 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000359}
360
361static PyObject *
362utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000364{
Antoine Pitroud1188562010-06-09 16:38:55 +0000365 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000366 const char *errors = NULL;
367 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000368 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000369 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000370 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000371
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000372 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 &pbuf, &errors, &final))
374 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000375
Martin v. Löwis423be952008-08-13 15:53:07 +0000376 consumed = pbuf.len; /* This is overwritten unless final is true. */
377 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000379 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000380 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000382 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000383}
384
385static PyObject *
386utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000388{
Antoine Pitroud1188562010-06-09 16:38:55 +0000389 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000390 const char *errors = NULL;
391 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000392 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000393 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000394 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000395
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000396 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 &pbuf, &errors, &final))
398 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000399
400 consumed = pbuf.len; /* This is overwritten unless final is true. */
401 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000403 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000404 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000406 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000407}
408
409/* This non-standard version also provides access to the byteorder
410 parameter of the builtin UTF-16 codec.
411
412 It returns a tuple (unicode, bytesread, byteorder) with byteorder
413 being the value in effect at the end of data.
414
415*/
416
417static PyObject *
418utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000420{
Antoine Pitroud1188562010-06-09 16:38:55 +0000421 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000422 const char *errors = NULL;
423 int byteorder = 0;
424 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000425 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000426 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000427
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000428 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 &pbuf, &errors, &byteorder, &final))
430 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000431 consumed = pbuf.len; /* This is overwritten unless final is true. */
432 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000434 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000435 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000437 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000438 Py_DECREF(unicode);
439 return tuple;
440}
441
442static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000443utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000445{
Antoine Pitroud1188562010-06-09 16:38:55 +0000446 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000447 const char *errors = NULL;
448 int byteorder = 0;
449 int final = 0;
450 Py_ssize_t consumed;
451 PyObject *decoded;
452
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000453 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 &pbuf, &errors, &final))
455 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000456 consumed = pbuf.len; /* This is overwritten unless final is true. */
457 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000459 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000460 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000462 return codec_tuple(decoded, consumed);
463}
464
465static PyObject *
466utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000468{
Antoine Pitroud1188562010-06-09 16:38:55 +0000469 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000470 const char *errors = NULL;
471 int byteorder = -1;
472 int final = 0;
473 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000474 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000475
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000476 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000477 &pbuf, &errors, &final))
478 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000479 consumed = pbuf.len; /* This is overwritten unless final is true. */
480 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000482 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000483 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000485 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000486}
487
488static PyObject *
489utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000490 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000491{
Antoine Pitroud1188562010-06-09 16:38:55 +0000492 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000493 const char *errors = NULL;
494 int byteorder = 1;
495 int final = 0;
496 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000497 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000498
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000499 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 &pbuf, &errors, &final))
501 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000502 consumed = pbuf.len; /* This is overwritten unless final is true. */
503 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000505 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000506 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000508 return codec_tuple(decoded, consumed);
509}
510
511/* This non-standard version also provides access to the byteorder
512 parameter of the builtin UTF-32 codec.
513
514 It returns a tuple (unicode, bytesread, byteorder) with byteorder
515 being the value in effect at the end of data.
516
517*/
518
519static PyObject *
520utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000522{
Antoine Pitroud1188562010-06-09 16:38:55 +0000523 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000524 const char *errors = NULL;
525 int byteorder = 0;
526 PyObject *unicode, *tuple;
527 int final = 0;
528 Py_ssize_t consumed;
529
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000530 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 &pbuf, &errors, &byteorder, &final))
532 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000533 consumed = pbuf.len; /* This is overwritten unless final is true. */
534 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000535 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000536 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000537 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000539 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
540 Py_DECREF(unicode);
541 return tuple;
542}
543
544static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000545unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000547{
Antoine Pitroud1188562010-06-09 16:38:55 +0000548 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000549 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000551
Martin v. Löwis423be952008-08-13 15:53:07 +0000552 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 &pbuf, &errors))
554 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000555
Antoine Pitroud1188562010-06-09 16:38:55 +0000556 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
557 PyBuffer_Release(&pbuf);
558 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000559}
560
561static PyObject *
562raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000564{
Antoine Pitroud1188562010-06-09 16:38:55 +0000565 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000566 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000567 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000568
Martin v. Löwis423be952008-08-13 15:53:07 +0000569 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 &pbuf, &errors))
571 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000572
Antoine Pitroud1188562010-06-09 16:38:55 +0000573 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
574 PyBuffer_Release(&pbuf);
575 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000576}
577
578static PyObject *
579latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000581{
Antoine Pitroud1188562010-06-09 16:38:55 +0000582 Py_buffer pbuf;
583 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000584 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000585
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000586 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 &pbuf, &errors))
588 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000589
Antoine Pitroud1188562010-06-09 16:38:55 +0000590 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
591 PyBuffer_Release(&pbuf);
592 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000593}
594
595static PyObject *
596ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000598{
Antoine Pitroud1188562010-06-09 16:38:55 +0000599 Py_buffer pbuf;
600 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000601 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000602
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000603 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 &pbuf, &errors))
605 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000606
Antoine Pitroud1188562010-06-09 16:38:55 +0000607 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
608 PyBuffer_Release(&pbuf);
609 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000610}
611
612static PyObject *
613charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000615{
Antoine Pitroud1188562010-06-09 16:38:55 +0000616 Py_buffer pbuf;
617 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000618 const char *errors = NULL;
619 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000620
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000621 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000622 &pbuf, &errors, &mapping))
623 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000624 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000626
Antoine Pitroud1188562010-06-09 16:38:55 +0000627 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
628 PyBuffer_Release(&pbuf);
629 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000630}
631
Victor Stinner99b95382011-07-04 14:23:54 +0200632#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000633
634static PyObject *
635mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000637{
Antoine Pitroud1188562010-06-09 16:38:55 +0000638 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000639 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000640 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000641 Py_ssize_t consumed;
642 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000643
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000644 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 &pbuf, &errors, &final))
646 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000647 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000648
Martin v. Löwis423be952008-08-13 15:53:07 +0000649 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000651 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000652 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000654 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000655}
656
Victor Stinner3a50e702011-10-18 21:21:00 +0200657static PyObject *
658code_page_decode(PyObject *self,
659 PyObject *args)
660{
661 Py_buffer pbuf;
662 const char *errors = NULL;
663 int final = 0;
664 Py_ssize_t consumed;
665 PyObject *decoded = NULL;
666 int code_page;
667
668 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
669 &code_page, &pbuf, &errors, &final))
670 return NULL;
671 consumed = pbuf.len;
672
673 decoded = PyUnicode_DecodeCodePageStateful(code_page,
674 pbuf.buf, pbuf.len, errors,
675 final ? NULL : &consumed);
676 PyBuffer_Release(&pbuf);
677 if (decoded == NULL)
678 return NULL;
679 return codec_tuple(decoded, consumed);
680}
681
Victor Stinner99b95382011-07-04 14:23:54 +0200682#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000683
Guido van Rossume2d67f92000-03-10 23:09:23 +0000684/* --- Encoder ------------------------------------------------------------ */
685
686static PyObject *
687readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000689{
Martin v. Löwis423be952008-08-13 15:53:07 +0000690 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000691 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000692 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000693 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000694 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000695
Martin v. Löwis423be952008-08-13 15:53:07 +0000696 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 &pdata, &errors))
698 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000699 data = pdata.buf;
700 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000701
Martin v. Löwis423be952008-08-13 15:53:07 +0000702 result = PyBytes_FromStringAndSize(data, size);
703 PyBuffer_Release(&pdata);
704 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000705}
706
707static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000708unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000710{
711 PyObject *obj;
712 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000713
Ezio Melottiadc417c2011-11-17 12:23:34 +0200714 if (PyErr_WarnEx(PyExc_DeprecationWarning,
715 "unicode_internal codec has been deprecated",
716 1))
717 return NULL;
718
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000719 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 &obj, &errors))
721 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000722
723 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100724 Py_UNICODE *u;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200725 Py_ssize_t len, size;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100726
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100727 if (PyUnicode_READY(obj) < 0)
728 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100729
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100730 u = PyUnicode_AsUnicodeAndSize(obj, &len);
731 if (u == NULL)
732 return NULL;
Victor Stinner049e5092014-08-17 22:20:00 +0200733 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100734 return PyErr_NoMemory();
735 size = len * sizeof(Py_UNICODE);
736 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100737 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000738 }
739 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200740 Py_buffer view;
741 PyObject *result;
742 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200744 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len), view.len);
745 PyBuffer_Release(&view);
746 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000747 }
748}
749
750static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000751utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000753{
754 PyObject *str, *v;
755 const char *errors = NULL;
756
757 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 &str, &errors))
759 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000760
761 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100762 if (str == NULL || PyUnicode_READY(str) < 0) {
763 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000764 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100765 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100766 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
767 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000768 Py_DECREF(str);
769 return v;
770}
771
772static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000773utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000774 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000775{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000776 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000777 const char *errors = NULL;
778
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000779 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 &str, &errors))
781 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000782
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000783 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100784 if (str == NULL || PyUnicode_READY(str) < 0) {
785 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100787 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200788 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
789 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000790 Py_DECREF(str);
791 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000792}
793
794/* This version provides access to the byteorder parameter of the
795 builtin UTF-16 codecs as optional third argument. It defaults to 0
796 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000797 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000798
799*/
800
801static PyObject *
802utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000804{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000805 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000806 const char *errors = NULL;
807 int byteorder = 0;
808
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000809 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 &str, &errors, &byteorder))
811 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000812
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000813 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100814 if (str == NULL || PyUnicode_READY(str) < 0) {
815 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100817 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100818 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
819 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000820 Py_DECREF(str);
821 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000822}
823
824static PyObject *
825utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000827{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000828 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000829 const char *errors = NULL;
830
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000831 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 &str, &errors))
833 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000834
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000835 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100836 if (str == NULL || PyUnicode_READY(str) < 0) {
837 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100839 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100840 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
841 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000842 Py_DECREF(str);
843 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000844}
845
846static PyObject *
847utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000848 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000849{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000850 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000851 const char *errors = NULL;
852
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000853 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 &str, &errors))
855 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000856
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000857 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100858 if (str == NULL || PyUnicode_READY(str) < 0) {
859 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100861 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100862 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
863 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000864 Py_DECREF(str);
865 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000866}
867
Walter Dörwald41980ca2007-08-16 21:55:45 +0000868/* This version provides access to the byteorder parameter of the
869 builtin UTF-32 codecs as optional third argument. It defaults to 0
870 which means: use the native byte order and prepend the data with a
871 BOM mark.
872
873*/
874
875static PyObject *
876utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000878{
879 PyObject *str, *v;
880 const char *errors = NULL;
881 int byteorder = 0;
882
883 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 &str, &errors, &byteorder))
885 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000886
887 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100888 if (str == NULL || PyUnicode_READY(str) < 0) {
889 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100891 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100892 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
893 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000894 Py_DECREF(str);
895 return v;
896}
897
898static PyObject *
899utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000901{
902 PyObject *str, *v;
903 const char *errors = NULL;
904
905 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 &str, &errors))
907 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000908
909 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100910 if (str == NULL || PyUnicode_READY(str) < 0) {
911 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100913 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100914 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
915 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000916 Py_DECREF(str);
917 return v;
918}
919
920static PyObject *
921utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000923{
924 PyObject *str, *v;
925 const char *errors = NULL;
926
927 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 &str, &errors))
929 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000930
931 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100932 if (str == NULL || PyUnicode_READY(str) < 0) {
933 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000934 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100935 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100936 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
937 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000938 Py_DECREF(str);
939 return v;
940}
941
Guido van Rossume2d67f92000-03-10 23:09:23 +0000942static PyObject *
943unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000945{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000946 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000947 const char *errors = NULL;
948
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000949 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 &str, &errors))
951 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000952
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000953 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100954 if (str == NULL || PyUnicode_READY(str) < 0) {
955 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100957 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100958 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
959 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000960 Py_DECREF(str);
961 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000962}
963
964static PyObject *
965raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000967{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000968 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000969 const char *errors = NULL;
970
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000971 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 &str, &errors))
973 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000974
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000975 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100976 if (str == NULL || PyUnicode_READY(str) < 0) {
977 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100979 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100980 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
981 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000982 Py_DECREF(str);
983 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000984}
985
986static PyObject *
987latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000988 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000989{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000990 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000991 const char *errors = NULL;
992
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000993 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 &str, &errors))
995 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000996
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000997 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100998 if (str == NULL || PyUnicode_READY(str) < 0) {
999 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001001 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001002 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
1003 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001004 Py_DECREF(str);
1005 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001006}
1007
1008static PyObject *
1009ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001011{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001012 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001013 const char *errors = NULL;
1014
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001015 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 &str, &errors))
1017 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001018
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001019 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001020 if (str == NULL || PyUnicode_READY(str) < 0) {
1021 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001023 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001024 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
1025 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001026 Py_DECREF(str);
1027 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001028}
1029
1030static PyObject *
1031charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001033{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001034 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001035 const char *errors = NULL;
1036 PyObject *mapping = NULL;
1037
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001038 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 &str, &errors, &mapping))
1040 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001041 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001043
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001044 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001045 if (str == NULL || PyUnicode_READY(str) < 0) {
1046 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001048 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001049 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001050 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001051 Py_DECREF(str);
1052 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001053}
1054
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001055static PyObject*
1056charmap_build(PyObject *self, PyObject *args)
1057{
1058 PyObject *map;
1059 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1060 return NULL;
1061 return PyUnicode_BuildEncodingMap(map);
1062}
1063
Victor Stinner99b95382011-07-04 14:23:54 +02001064#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001065
1066static PyObject *
1067mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001069{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001070 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001071 const char *errors = NULL;
1072
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001073 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 &str, &errors))
1075 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001076
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001077 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001078 if (str == NULL || PyUnicode_READY(str) < 0) {
1079 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001081 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001082 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1083 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001084 Py_DECREF(str);
1085 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001086}
1087
Victor Stinner3a50e702011-10-18 21:21:00 +02001088static PyObject *
1089code_page_encode(PyObject *self,
1090 PyObject *args)
1091{
1092 PyObject *str, *v;
1093 const char *errors = NULL;
1094 int code_page;
1095
1096 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1097 &code_page, &str, &errors))
1098 return NULL;
1099
1100 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001101 if (str == NULL || PyUnicode_READY(str) < 0) {
1102 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001103 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001104 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001105 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1106 str,
1107 errors),
1108 PyUnicode_GET_LENGTH(str));
1109 Py_DECREF(str);
1110 return v;
1111}
1112
Victor Stinner99b95382011-07-04 14:23:54 +02001113#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001114
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001115/* --- Error handler registry --------------------------------------------- */
1116
Walter Dörwald0ae29812002-10-31 13:36:29 +00001117PyDoc_STRVAR(register_error__doc__,
1118"register_error(errors, handler)\n\
1119\n\
1120Register the specified error handler under the name\n\
1121errors. handler must be a callable object, that\n\
1122will be called with an exception instance containing\n\
1123information about the location of the encoding/decoding\n\
1124error and must return a (replacement, new position) tuple.");
1125
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001126static PyObject *register_error(PyObject *self, PyObject *args)
1127{
1128 const char *name;
1129 PyObject *handler;
1130
1131 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 &name, &handler))
1133 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001134 if (PyCodec_RegisterError(name, handler))
1135 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001136 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001137}
1138
Walter Dörwald0ae29812002-10-31 13:36:29 +00001139PyDoc_STRVAR(lookup_error__doc__,
1140"lookup_error(errors) -> handler\n\
1141\n\
1142Return the error handler for the specified error handling name\n\
1143or raise a LookupError, if no handler exists under this name.");
1144
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001145static PyObject *lookup_error(PyObject *self, PyObject *args)
1146{
1147 const char *name;
1148
1149 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 &name))
1151 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001152 return PyCodec_LookupError(name);
1153}
1154
Guido van Rossume2d67f92000-03-10 23:09:23 +00001155/* --- Module API --------------------------------------------------------- */
1156
1157static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001159 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001161 lookup__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001162 {"encode", (PyCFunction)codec_encode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 encode__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001164 {"decode", (PyCFunction)codec_decode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 decode__doc__},
1166 {"escape_encode", escape_encode, METH_VARARGS},
1167 {"escape_decode", escape_decode, METH_VARARGS},
1168 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1169 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1170 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1171 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1172 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1173 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1174 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1175 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1176 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1177 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1178 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1179 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1180 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1181 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1182 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1183 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1184 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1185 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1186 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1187 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1188 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1189 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1190 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1191 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1192 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1193 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1194 {"ascii_encode", ascii_encode, METH_VARARGS},
1195 {"ascii_decode", ascii_decode, METH_VARARGS},
1196 {"charmap_encode", charmap_encode, METH_VARARGS},
1197 {"charmap_decode", charmap_decode, METH_VARARGS},
1198 {"charmap_build", charmap_build, METH_VARARGS},
1199 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001200#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1202 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001203 {"code_page_encode", code_page_encode, METH_VARARGS},
1204 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001205#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001207 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001209 lookup_error__doc__},
Nick Coghlan8fad1672014-09-15 23:50:44 +12001210 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001212};
1213
Martin v. Löwis1a214512008-06-11 05:26:20 +00001214static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 PyModuleDef_HEAD_INIT,
1216 "_codecs",
1217 NULL,
1218 -1,
1219 _codecs_functions,
1220 NULL,
1221 NULL,
1222 NULL,
1223 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001224};
1225
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001226PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001227PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001230}