blob: b9268cec2f146bde8d212ae595e5a7ec62d567a7 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Nick Coghlan8fad1672014-09-15 23:50:44 +120045/*[clinic input]
46module _codecs
47[clinic start generated code]*/
48/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
49
50
Guido van Rossume2d67f92000-03-10 23:09:23 +000051/* --- Registry ----------------------------------------------------------- */
52
Walter Dörwald0ae29812002-10-31 13:36:29 +000053PyDoc_STRVAR(register__doc__,
54"register(search_function)\n\
55\n\
56Register a codec search function. Search functions are expected to take\n\
Nick Coghlanb9fdb7a2015-01-07 00:22:00 +100057one argument, the encoding name in all lower case letters, and either\n\
58return None, or a tuple of functions (encoder, decoder, stream_reader,\n\
59stream_writer) (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000060
Guido van Rossume2d67f92000-03-10 23:09:23 +000061static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000062PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000063{
Guido van Rossume2d67f92000-03-10 23:09:23 +000064 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000065 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000066
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000067 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000068}
69
Walter Dörwald0ae29812002-10-31 13:36:29 +000070PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000071"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000072\n\
73Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000074a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000075
Guido van Rossume2d67f92000-03-10 23:09:23 +000076static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000078{
79 char *encoding;
80
81 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000083
84 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000085}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +020098codec_encode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +000099{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200100 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000101 const char *encoding = NULL;
102 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000103 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000104
Victor Stinnera57dfd02014-05-14 17:13:14 +0200105 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:encode", kwlist,
106 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000107 return NULL;
108
109 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000111
112 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000113 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000114}
115
116PyDoc_STRVAR(decode__doc__,
117"decode(obj, [encoding[,errors]]) -> object\n\
118\n\
119Decodes obj using the codec registered for encoding. encoding defaults\n\
120to the default encoding. errors may be given to set a different error\n\
121handling scheme. Default is 'strict' meaning that encoding errors raise\n\
122a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000123as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000124able to handle ValueErrors.");
125
126static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +0200127codec_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200129 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000130 const char *encoding = NULL;
131 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000133
Victor Stinnera57dfd02014-05-14 17:13:14 +0200134 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:decode", kwlist,
135 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000136 return NULL;
137
138 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
Nick Coghlan8fad1672014-09-15 23:50:44 +1200147/*[clinic input]
148_codecs._forget_codec
149
150 encoding: str
151 /
152
153Purge the named codec from the internal codec lookup cache
154[clinic start generated code]*/
155
156PyDoc_STRVAR(_codecs__forget_codec__doc__,
157"_forget_codec($module, encoding, /)\n"
158"--\n"
159"\n"
160"Purge the named codec from the internal codec lookup cache");
161
162#define _CODECS__FORGET_CODEC_METHODDEF \
163 {"_forget_codec", (PyCFunction)_codecs__forget_codec, METH_VARARGS, _codecs__forget_codec__doc__},
164
165static PyObject *
166_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding);
167
168static PyObject *
169_codecs__forget_codec(PyModuleDef *module, PyObject *args)
170{
171 PyObject *return_value = NULL;
172 const char *encoding;
173
174 if (!PyArg_ParseTuple(args,
175 "s:_forget_codec",
176 &encoding))
177 goto exit;
178 return_value = _codecs__forget_codec_impl(module, encoding);
179
180exit:
181 return return_value;
182}
183
184static PyObject *
185_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding)
186/*[clinic end generated code: output=a75e631591702a5c input=18d5d92d0e386c38]*/
187{
188 if (_PyCodec_Forget(encoding) < 0) {
189 return NULL;
190 };
191 Py_RETURN_NONE;
192}
193
Guido van Rossume2d67f92000-03-10 23:09:23 +0000194static
195PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000197{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000198 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000199 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000200 return NULL;
201 v = Py_BuildValue("On", unicode, len);
202 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000203 return v;
204}
205
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000206/* --- String codecs ------------------------------------------------------ */
207static PyObject *
208escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000210{
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200211 Py_buffer pbuf;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000212 const char *errors = NULL;
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200213 PyObject *result;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000214
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200215 if (!PyArg_ParseTuple(args, "s*|z:escape_decode",
216 &pbuf, &errors))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 return NULL;
Serhiy Storchaka8490f5a2015-03-20 09:00:36 +0200218 result = codec_tuple(
219 PyBytes_DecodeEscape(pbuf.buf, pbuf.len, errors, 0, NULL),
220 pbuf.len);
221 PyBuffer_Release(&pbuf);
222 return result;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000223}
224
225static PyObject *
226escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000228{
Antoine Pitroud1188562010-06-09 16:38:55 +0000229 PyObject *str;
230 Py_ssize_t size;
231 Py_ssize_t newsize;
232 const char *errors = NULL;
233 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000234
Antoine Pitroud1188562010-06-09 16:38:55 +0000235 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
236 &PyBytes_Type, &str, &errors))
237 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000238
Antoine Pitroud1188562010-06-09 16:38:55 +0000239 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100240 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000241 PyErr_SetString(PyExc_OverflowError,
242 "string is too large to encode");
243 return NULL;
244 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100245 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000246 v = PyBytes_FromStringAndSize(NULL, newsize);
247
248 if (v == NULL) {
249 return NULL;
250 }
251 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200252 Py_ssize_t i;
253 char c;
254 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000255
256 for (i = 0; i < size; i++) {
257 /* There's at least enough room for a hex escape */
258 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
259 c = PyBytes_AS_STRING(str)[i];
260 if (c == '\'' || c == '\\')
261 *p++ = '\\', *p++ = c;
262 else if (c == '\t')
263 *p++ = '\\', *p++ = 't';
264 else if (c == '\n')
265 *p++ = '\\', *p++ = 'n';
266 else if (c == '\r')
267 *p++ = '\\', *p++ = 'r';
268 else if (c < ' ' || c >= 0x7f) {
269 *p++ = '\\';
270 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200271 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
272 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000273 }
274 else
275 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000277 *p = '\0';
278 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
279 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000281 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000282
Antoine Pitroud1188562010-06-09 16:38:55 +0000283 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000284}
285
Guido van Rossume2d67f92000-03-10 23:09:23 +0000286/* --- Decoder ------------------------------------------------------------ */
287
288static PyObject *
289unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000291{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000292 PyObject *obj;
293 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000294
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000295 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 &obj, &errors))
297 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000298
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000299 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100300 if (PyUnicode_READY(obj) < 0)
301 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100303 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000304 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000305 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200306 Py_buffer view;
307 PyObject *result;
308 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000310
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200311 result = codec_tuple(
312 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
313 view.len);
314 PyBuffer_Release(&view);
315 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000316 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000317}
318
319static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000320utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000321 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000322{
Antoine Pitroud1188562010-06-09 16:38:55 +0000323 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000324 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000325 int final = 0;
326 Py_ssize_t consumed;
327 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000328
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000329 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 &pbuf, &errors, &final))
331 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000332 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000333
Martin v. Löwis423be952008-08-13 15:53:07 +0000334 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000336 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000337 if (decoded == NULL)
338 return NULL;
339 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000340}
341
342static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000343utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000345{
Antoine Pitroud1188562010-06-09 16:38:55 +0000346 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000347 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000348 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000349 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000350 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000351
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000352 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 &pbuf, &errors, &final))
354 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000355 consumed = pbuf.len;
356
357 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000359 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000360 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000362 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000363}
364
365static PyObject *
366utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000368{
Antoine Pitroud1188562010-06-09 16:38:55 +0000369 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000370 const char *errors = NULL;
371 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000372 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000373 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000374 PyObject *decoded;
375
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000376 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 &pbuf, &errors, &final))
378 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000379 consumed = pbuf.len; /* This is overwritten unless final is true. */
380 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000382 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000383 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000385 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000386}
387
388static PyObject *
389utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000391{
Antoine Pitroud1188562010-06-09 16:38:55 +0000392 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000393 const char *errors = NULL;
394 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000395 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000396 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000397 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000398
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000399 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 &pbuf, &errors, &final))
401 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000402
Martin v. Löwis423be952008-08-13 15:53:07 +0000403 consumed = pbuf.len; /* This is overwritten unless final is true. */
404 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000406 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000407 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000409 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000410}
411
412static PyObject *
413utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000415{
Antoine Pitroud1188562010-06-09 16:38:55 +0000416 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000417 const char *errors = NULL;
418 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000419 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000420 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000421 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000422
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000423 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 &pbuf, &errors, &final))
425 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000426
427 consumed = pbuf.len; /* This is overwritten unless final is true. */
428 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000430 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000431 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000432 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000433 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000434}
435
436/* This non-standard version also provides access to the byteorder
437 parameter of the builtin UTF-16 codec.
438
439 It returns a tuple (unicode, bytesread, byteorder) with byteorder
440 being the value in effect at the end of data.
441
442*/
443
444static PyObject *
445utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000447{
Antoine Pitroud1188562010-06-09 16:38:55 +0000448 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000449 const char *errors = NULL;
450 int byteorder = 0;
451 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000452 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000453 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000454
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000455 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 &pbuf, &errors, &byteorder, &final))
457 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000458 consumed = pbuf.len; /* This is overwritten unless final is true. */
459 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000461 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000462 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000464 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000465 Py_DECREF(unicode);
466 return tuple;
467}
468
469static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000470utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000472{
Antoine Pitroud1188562010-06-09 16:38:55 +0000473 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000474 const char *errors = NULL;
475 int byteorder = 0;
476 int final = 0;
477 Py_ssize_t consumed;
478 PyObject *decoded;
479
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000480 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 &pbuf, &errors, &final))
482 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000483 consumed = pbuf.len; /* This is overwritten unless final is true. */
484 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000486 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000487 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000489 return codec_tuple(decoded, consumed);
490}
491
492static PyObject *
493utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000495{
Antoine Pitroud1188562010-06-09 16:38:55 +0000496 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000497 const char *errors = NULL;
498 int byteorder = -1;
499 int final = 0;
500 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000501 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000502
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000503 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 &pbuf, &errors, &final))
505 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000506 consumed = pbuf.len; /* This is overwritten unless final is true. */
507 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000509 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000510 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000512 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000513}
514
515static PyObject *
516utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000518{
Antoine Pitroud1188562010-06-09 16:38:55 +0000519 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000520 const char *errors = NULL;
521 int byteorder = 1;
522 int final = 0;
523 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000524 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000525
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000526 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 &pbuf, &errors, &final))
528 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000529 consumed = pbuf.len; /* This is overwritten unless final is true. */
530 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000532 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000533 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000535 return codec_tuple(decoded, consumed);
536}
537
538/* This non-standard version also provides access to the byteorder
539 parameter of the builtin UTF-32 codec.
540
541 It returns a tuple (unicode, bytesread, byteorder) with byteorder
542 being the value in effect at the end of data.
543
544*/
545
546static PyObject *
547utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000549{
Antoine Pitroud1188562010-06-09 16:38:55 +0000550 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000551 const char *errors = NULL;
552 int byteorder = 0;
553 PyObject *unicode, *tuple;
554 int final = 0;
555 Py_ssize_t consumed;
556
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000557 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 &pbuf, &errors, &byteorder, &final))
559 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000560 consumed = pbuf.len; /* This is overwritten unless final is true. */
561 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000563 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000564 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000566 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
567 Py_DECREF(unicode);
568 return tuple;
569}
570
571static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000572unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000574{
Antoine Pitroud1188562010-06-09 16:38:55 +0000575 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000576 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000578
Martin v. Löwis423be952008-08-13 15:53:07 +0000579 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 &pbuf, &errors))
581 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000582
Antoine Pitroud1188562010-06-09 16:38:55 +0000583 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
584 PyBuffer_Release(&pbuf);
585 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000586}
587
588static PyObject *
589raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000591{
Antoine Pitroud1188562010-06-09 16:38:55 +0000592 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000593 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000594 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000595
Martin v. Löwis423be952008-08-13 15:53:07 +0000596 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 &pbuf, &errors))
598 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000599
Antoine Pitroud1188562010-06-09 16:38:55 +0000600 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
601 PyBuffer_Release(&pbuf);
602 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000603}
604
605static PyObject *
606latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000608{
Antoine Pitroud1188562010-06-09 16:38:55 +0000609 Py_buffer pbuf;
610 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000611 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000612
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000613 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 &pbuf, &errors))
615 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000616
Antoine Pitroud1188562010-06-09 16:38:55 +0000617 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
618 PyBuffer_Release(&pbuf);
619 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000620}
621
622static PyObject *
623ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000625{
Antoine Pitroud1188562010-06-09 16:38:55 +0000626 Py_buffer pbuf;
627 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000628 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000629
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000630 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 &pbuf, &errors))
632 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000633
Antoine Pitroud1188562010-06-09 16:38:55 +0000634 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
635 PyBuffer_Release(&pbuf);
636 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000637}
638
639static PyObject *
640charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000642{
Antoine Pitroud1188562010-06-09 16:38:55 +0000643 Py_buffer pbuf;
644 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000645 const char *errors = NULL;
646 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000647
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000648 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000649 &pbuf, &errors, &mapping))
650 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000651 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000653
Antoine Pitroud1188562010-06-09 16:38:55 +0000654 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
655 PyBuffer_Release(&pbuf);
656 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000657}
658
Victor Stinner99b95382011-07-04 14:23:54 +0200659#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000660
661static PyObject *
662mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000664{
Antoine Pitroud1188562010-06-09 16:38:55 +0000665 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000666 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000667 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000668 Py_ssize_t consumed;
669 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000670
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000671 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 &pbuf, &errors, &final))
673 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000674 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000675
Martin v. Löwis423be952008-08-13 15:53:07 +0000676 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000678 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000679 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000681 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000682}
683
Victor Stinner3a50e702011-10-18 21:21:00 +0200684static PyObject *
685code_page_decode(PyObject *self,
686 PyObject *args)
687{
688 Py_buffer pbuf;
689 const char *errors = NULL;
690 int final = 0;
691 Py_ssize_t consumed;
692 PyObject *decoded = NULL;
693 int code_page;
694
695 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
696 &code_page, &pbuf, &errors, &final))
697 return NULL;
698 consumed = pbuf.len;
699
700 decoded = PyUnicode_DecodeCodePageStateful(code_page,
701 pbuf.buf, pbuf.len, errors,
702 final ? NULL : &consumed);
703 PyBuffer_Release(&pbuf);
704 if (decoded == NULL)
705 return NULL;
706 return codec_tuple(decoded, consumed);
707}
708
Victor Stinner99b95382011-07-04 14:23:54 +0200709#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000710
Guido van Rossume2d67f92000-03-10 23:09:23 +0000711/* --- Encoder ------------------------------------------------------------ */
712
713static PyObject *
714readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000716{
Martin v. Löwis423be952008-08-13 15:53:07 +0000717 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000718 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000719 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000720 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000721 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000722
Martin v. Löwis423be952008-08-13 15:53:07 +0000723 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 &pdata, &errors))
725 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000726 data = pdata.buf;
727 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000728
Martin v. Löwis423be952008-08-13 15:53:07 +0000729 result = PyBytes_FromStringAndSize(data, size);
730 PyBuffer_Release(&pdata);
731 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000732}
733
734static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000735unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000737{
738 PyObject *obj;
739 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000740
Ezio Melottiadc417c2011-11-17 12:23:34 +0200741 if (PyErr_WarnEx(PyExc_DeprecationWarning,
742 "unicode_internal codec has been deprecated",
743 1))
744 return NULL;
745
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000746 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 &obj, &errors))
748 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000749
750 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100751 Py_UNICODE *u;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200752 Py_ssize_t len, size;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100753
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100754 if (PyUnicode_READY(obj) < 0)
755 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100756
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100757 u = PyUnicode_AsUnicodeAndSize(obj, &len);
758 if (u == NULL)
759 return NULL;
Victor Stinner049e5092014-08-17 22:20:00 +0200760 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100761 return PyErr_NoMemory();
762 size = len * sizeof(Py_UNICODE);
763 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100764 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000765 }
766 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200767 Py_buffer view;
768 PyObject *result;
769 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200771 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len), view.len);
772 PyBuffer_Release(&view);
773 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000774 }
775}
776
777static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000778utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000780{
781 PyObject *str, *v;
782 const char *errors = NULL;
783
784 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 &str, &errors))
786 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000787
788 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100789 if (str == NULL || PyUnicode_READY(str) < 0) {
790 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100792 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100793 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
794 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000795 Py_DECREF(str);
796 return v;
797}
798
799static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000800utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000802{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000803 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000804 const char *errors = NULL;
805
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000806 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 &str, &errors))
808 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000809
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000810 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100811 if (str == NULL || PyUnicode_READY(str) < 0) {
812 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100814 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200815 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
816 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000817 Py_DECREF(str);
818 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000819}
820
821/* This version provides access to the byteorder parameter of the
822 builtin UTF-16 codecs as optional third argument. It defaults to 0
823 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000824 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000825
826*/
827
828static PyObject *
829utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000831{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000832 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000833 const char *errors = NULL;
834 int byteorder = 0;
835
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000836 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 &str, &errors, &byteorder))
838 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000839
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000840 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100841 if (str == NULL || PyUnicode_READY(str) < 0) {
842 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100844 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100845 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
846 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000847 Py_DECREF(str);
848 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000849}
850
851static PyObject *
852utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000854{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000855 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000856 const char *errors = NULL;
857
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000858 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 &str, &errors))
860 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000861
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000862 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100863 if (str == NULL || PyUnicode_READY(str) < 0) {
864 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100866 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100867 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
868 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000869 Py_DECREF(str);
870 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000871}
872
873static PyObject *
874utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000876{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000877 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000878 const char *errors = NULL;
879
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000880 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 &str, &errors))
882 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000883
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000884 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100885 if (str == NULL || PyUnicode_READY(str) < 0) {
886 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100888 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100889 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
890 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000891 Py_DECREF(str);
892 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000893}
894
Walter Dörwald41980ca2007-08-16 21:55:45 +0000895/* This version provides access to the byteorder parameter of the
896 builtin UTF-32 codecs as optional third argument. It defaults to 0
897 which means: use the native byte order and prepend the data with a
898 BOM mark.
899
900*/
901
902static PyObject *
903utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000905{
906 PyObject *str, *v;
907 const char *errors = NULL;
908 int byteorder = 0;
909
910 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 &str, &errors, &byteorder))
912 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000913
914 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100915 if (str == NULL || PyUnicode_READY(str) < 0) {
916 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100918 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100919 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
920 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000921 Py_DECREF(str);
922 return v;
923}
924
925static PyObject *
926utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000928{
929 PyObject *str, *v;
930 const char *errors = NULL;
931
932 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 &str, &errors))
934 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000935
936 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100937 if (str == NULL || PyUnicode_READY(str) < 0) {
938 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100940 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100941 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
942 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000943 Py_DECREF(str);
944 return v;
945}
946
947static PyObject *
948utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000950{
951 PyObject *str, *v;
952 const char *errors = NULL;
953
954 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 &str, &errors))
956 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000957
958 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100959 if (str == NULL || PyUnicode_READY(str) < 0) {
960 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100962 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100963 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
964 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000965 Py_DECREF(str);
966 return v;
967}
968
Guido van Rossume2d67f92000-03-10 23:09:23 +0000969static PyObject *
970unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000972{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000973 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000974 const char *errors = NULL;
975
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000976 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 &str, &errors))
978 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000979
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000980 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100981 if (str == NULL || PyUnicode_READY(str) < 0) {
982 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100984 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100985 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
986 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000987 Py_DECREF(str);
988 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000989}
990
991static PyObject *
992raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000994{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000995 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000996 const char *errors = NULL;
997
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000998 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 &str, &errors))
1000 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001001
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001002 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001003 if (str == NULL || PyUnicode_READY(str) < 0) {
1004 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001006 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001007 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
1008 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001009 Py_DECREF(str);
1010 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001011}
1012
1013static PyObject *
1014latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001016{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001017 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001018 const char *errors = NULL;
1019
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001020 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 &str, &errors))
1022 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001023
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001024 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001025 if (str == NULL || PyUnicode_READY(str) < 0) {
1026 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001028 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001029 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
1030 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001031 Py_DECREF(str);
1032 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001033}
1034
1035static PyObject *
1036ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001038{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001039 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001040 const char *errors = NULL;
1041
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001042 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 &str, &errors))
1044 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001045
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001046 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001047 if (str == NULL || PyUnicode_READY(str) < 0) {
1048 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001050 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001051 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
1052 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001053 Py_DECREF(str);
1054 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001055}
1056
1057static PyObject *
1058charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001060{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001061 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001062 const char *errors = NULL;
1063 PyObject *mapping = NULL;
1064
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001065 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 &str, &errors, &mapping))
1067 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001068 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001070
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001071 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001072 if (str == NULL || PyUnicode_READY(str) < 0) {
1073 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001075 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001076 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001077 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001078 Py_DECREF(str);
1079 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001080}
1081
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001082static PyObject*
1083charmap_build(PyObject *self, PyObject *args)
1084{
1085 PyObject *map;
1086 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1087 return NULL;
1088 return PyUnicode_BuildEncodingMap(map);
1089}
1090
Victor Stinner99b95382011-07-04 14:23:54 +02001091#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001092
1093static PyObject *
1094mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001096{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001097 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001098 const char *errors = NULL;
1099
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001100 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 &str, &errors))
1102 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001103
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001104 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001105 if (str == NULL || PyUnicode_READY(str) < 0) {
1106 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001108 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001109 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1110 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001111 Py_DECREF(str);
1112 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001113}
1114
Victor Stinner3a50e702011-10-18 21:21:00 +02001115static PyObject *
1116code_page_encode(PyObject *self,
1117 PyObject *args)
1118{
1119 PyObject *str, *v;
1120 const char *errors = NULL;
1121 int code_page;
1122
1123 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1124 &code_page, &str, &errors))
1125 return NULL;
1126
1127 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001128 if (str == NULL || PyUnicode_READY(str) < 0) {
1129 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001130 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001131 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001132 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1133 str,
1134 errors),
1135 PyUnicode_GET_LENGTH(str));
1136 Py_DECREF(str);
1137 return v;
1138}
1139
Victor Stinner99b95382011-07-04 14:23:54 +02001140#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001141
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001142/* --- Error handler registry --------------------------------------------- */
1143
Walter Dörwald0ae29812002-10-31 13:36:29 +00001144PyDoc_STRVAR(register_error__doc__,
1145"register_error(errors, handler)\n\
1146\n\
1147Register the specified error handler under the name\n\
1148errors. handler must be a callable object, that\n\
1149will be called with an exception instance containing\n\
1150information about the location of the encoding/decoding\n\
1151error and must return a (replacement, new position) tuple.");
1152
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001153static PyObject *register_error(PyObject *self, PyObject *args)
1154{
1155 const char *name;
1156 PyObject *handler;
1157
1158 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 &name, &handler))
1160 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001161 if (PyCodec_RegisterError(name, handler))
1162 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001163 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001164}
1165
Walter Dörwald0ae29812002-10-31 13:36:29 +00001166PyDoc_STRVAR(lookup_error__doc__,
1167"lookup_error(errors) -> handler\n\
1168\n\
1169Return the error handler for the specified error handling name\n\
1170or raise a LookupError, if no handler exists under this name.");
1171
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001172static PyObject *lookup_error(PyObject *self, PyObject *args)
1173{
1174 const char *name;
1175
1176 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 &name))
1178 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001179 return PyCodec_LookupError(name);
1180}
1181
Guido van Rossume2d67f92000-03-10 23:09:23 +00001182/* --- Module API --------------------------------------------------------- */
1183
1184static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001186 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001188 lookup__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001189 {"encode", (PyCFunction)codec_encode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 encode__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001191 {"decode", (PyCFunction)codec_decode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 decode__doc__},
1193 {"escape_encode", escape_encode, METH_VARARGS},
1194 {"escape_decode", escape_decode, METH_VARARGS},
1195 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1196 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1197 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1198 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1199 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1200 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1201 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1202 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1203 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1204 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1205 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1206 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1207 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1208 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1209 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1210 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1211 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1212 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1213 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1214 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1215 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1216 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1217 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1218 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1219 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1220 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1221 {"ascii_encode", ascii_encode, METH_VARARGS},
1222 {"ascii_decode", ascii_decode, METH_VARARGS},
1223 {"charmap_encode", charmap_encode, METH_VARARGS},
1224 {"charmap_decode", charmap_decode, METH_VARARGS},
1225 {"charmap_build", charmap_build, METH_VARARGS},
1226 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001227#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1229 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001230 {"code_page_encode", code_page_encode, METH_VARARGS},
1231 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001232#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001234 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001236 lookup_error__doc__},
Nick Coghlan8fad1672014-09-15 23:50:44 +12001237 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001239};
1240
Martin v. Löwis1a214512008-06-11 05:26:20 +00001241static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 PyModuleDef_HEAD_INIT,
1243 "_codecs",
1244 NULL,
1245 -1,
1246 _codecs_functions,
1247 NULL,
1248 NULL,
1249 NULL,
1250 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001251};
1252
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001253PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001254PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001255{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001257}