blob: bf408afeca983b9df8cd07eb0ce4317167a335d6 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Nick Coghlan8fad1672014-09-15 23:50:44 +120045/*[clinic input]
46module _codecs
47[clinic start generated code]*/
48/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
49
50
Guido van Rossume2d67f92000-03-10 23:09:23 +000051/* --- Registry ----------------------------------------------------------- */
52
Walter Dörwald0ae29812002-10-31 13:36:29 +000053PyDoc_STRVAR(register__doc__,
54"register(search_function)\n\
55\n\
56Register a codec search function. Search functions are expected to take\n\
Nick Coghlanb9fdb7a2015-01-07 00:22:00 +100057one argument, the encoding name in all lower case letters, and either\n\
58return None, or a tuple of functions (encoder, decoder, stream_reader,\n\
59stream_writer) (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000060
Guido van Rossume2d67f92000-03-10 23:09:23 +000061static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000062PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000063{
Guido van Rossume2d67f92000-03-10 23:09:23 +000064 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000065 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000066
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000067 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000068}
69
Walter Dörwald0ae29812002-10-31 13:36:29 +000070PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000071"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000072\n\
73Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000074a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000075
Guido van Rossume2d67f92000-03-10 23:09:23 +000076static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000078{
79 char *encoding;
80
81 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000083
84 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000085}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +020098codec_encode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +000099{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200100 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000101 const char *encoding = NULL;
102 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000103 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000104
Victor Stinnera57dfd02014-05-14 17:13:14 +0200105 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:encode", kwlist,
106 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000107 return NULL;
108
109 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000111
112 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000113 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000114}
115
116PyDoc_STRVAR(decode__doc__,
117"decode(obj, [encoding[,errors]]) -> object\n\
118\n\
119Decodes obj using the codec registered for encoding. encoding defaults\n\
120to the default encoding. errors may be given to set a different error\n\
121handling scheme. Default is 'strict' meaning that encoding errors raise\n\
122a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000123as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000124able to handle ValueErrors.");
125
126static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +0200127codec_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200129 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000130 const char *encoding = NULL;
131 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000133
Victor Stinnera57dfd02014-05-14 17:13:14 +0200134 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:decode", kwlist,
135 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000136 return NULL;
137
138 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
Nick Coghlan8fad1672014-09-15 23:50:44 +1200147/*[clinic input]
148_codecs._forget_codec
149
150 encoding: str
151 /
152
153Purge the named codec from the internal codec lookup cache
154[clinic start generated code]*/
155
156PyDoc_STRVAR(_codecs__forget_codec__doc__,
157"_forget_codec($module, encoding, /)\n"
158"--\n"
159"\n"
160"Purge the named codec from the internal codec lookup cache");
161
162#define _CODECS__FORGET_CODEC_METHODDEF \
163 {"_forget_codec", (PyCFunction)_codecs__forget_codec, METH_VARARGS, _codecs__forget_codec__doc__},
164
165static PyObject *
166_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding);
167
168static PyObject *
169_codecs__forget_codec(PyModuleDef *module, PyObject *args)
170{
171 PyObject *return_value = NULL;
172 const char *encoding;
173
174 if (!PyArg_ParseTuple(args,
175 "s:_forget_codec",
176 &encoding))
177 goto exit;
178 return_value = _codecs__forget_codec_impl(module, encoding);
179
180exit:
181 return return_value;
182}
183
184static PyObject *
185_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding)
186/*[clinic end generated code: output=a75e631591702a5c input=18d5d92d0e386c38]*/
187{
188 if (_PyCodec_Forget(encoding) < 0) {
189 return NULL;
190 };
191 Py_RETURN_NONE;
192}
193
Guido van Rossume2d67f92000-03-10 23:09:23 +0000194static
195PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000197{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000198 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000199 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000200 return NULL;
201 v = Py_BuildValue("On", unicode, len);
202 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000203 return v;
204}
205
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000206/* --- String codecs ------------------------------------------------------ */
207static PyObject *
208escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000210{
211 const char *errors = NULL;
212 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000213 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000214
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000215 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000216 &data, &size, &errors))
217 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000218 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000220}
221
222static PyObject *
223escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000225{
Antoine Pitroud1188562010-06-09 16:38:55 +0000226 PyObject *str;
227 Py_ssize_t size;
228 Py_ssize_t newsize;
229 const char *errors = NULL;
230 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000231
Antoine Pitroud1188562010-06-09 16:38:55 +0000232 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
233 &PyBytes_Type, &str, &errors))
234 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000235
Antoine Pitroud1188562010-06-09 16:38:55 +0000236 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100237 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000238 PyErr_SetString(PyExc_OverflowError,
239 "string is too large to encode");
240 return NULL;
241 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100242 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000243 v = PyBytes_FromStringAndSize(NULL, newsize);
244
245 if (v == NULL) {
246 return NULL;
247 }
248 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200249 Py_ssize_t i;
250 char c;
251 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000252
253 for (i = 0; i < size; i++) {
254 /* There's at least enough room for a hex escape */
255 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
256 c = PyBytes_AS_STRING(str)[i];
257 if (c == '\'' || c == '\\')
258 *p++ = '\\', *p++ = c;
259 else if (c == '\t')
260 *p++ = '\\', *p++ = 't';
261 else if (c == '\n')
262 *p++ = '\\', *p++ = 'n';
263 else if (c == '\r')
264 *p++ = '\\', *p++ = 'r';
265 else if (c < ' ' || c >= 0x7f) {
266 *p++ = '\\';
267 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200268 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
269 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000270 }
271 else
272 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000274 *p = '\0';
275 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
276 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000278 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000279
Antoine Pitroud1188562010-06-09 16:38:55 +0000280 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000281}
282
Guido van Rossume2d67f92000-03-10 23:09:23 +0000283/* --- Decoder ------------------------------------------------------------ */
284
285static PyObject *
286unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000288{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000289 PyObject *obj;
290 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000291
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000292 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 &obj, &errors))
294 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000295
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000296 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100297 if (PyUnicode_READY(obj) < 0)
298 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100300 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000301 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000302 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200303 Py_buffer view;
304 PyObject *result;
305 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000307
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200308 result = codec_tuple(
309 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
310 view.len);
311 PyBuffer_Release(&view);
312 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000313 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000314}
315
316static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000317utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000318 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000319{
Antoine Pitroud1188562010-06-09 16:38:55 +0000320 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000321 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000322 int final = 0;
323 Py_ssize_t consumed;
324 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000325
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000326 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 &pbuf, &errors, &final))
328 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000329 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000330
Martin v. Löwis423be952008-08-13 15:53:07 +0000331 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000333 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000334 if (decoded == NULL)
335 return NULL;
336 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000337}
338
339static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000340utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000342{
Antoine Pitroud1188562010-06-09 16:38:55 +0000343 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000344 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000345 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000346 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000347 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000348
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000349 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 &pbuf, &errors, &final))
351 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000352 consumed = pbuf.len;
353
354 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000356 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000357 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000359 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000360}
361
362static PyObject *
363utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000365{
Antoine Pitroud1188562010-06-09 16:38:55 +0000366 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000367 const char *errors = NULL;
368 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000369 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000370 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000371 PyObject *decoded;
372
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000373 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 &pbuf, &errors, &final))
375 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000376 consumed = pbuf.len; /* This is overwritten unless final is true. */
377 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000379 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000380 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000382 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000383}
384
385static PyObject *
386utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000388{
Antoine Pitroud1188562010-06-09 16:38:55 +0000389 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000390 const char *errors = NULL;
391 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000392 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000393 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000394 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000395
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000396 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 &pbuf, &errors, &final))
398 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000399
Martin v. Löwis423be952008-08-13 15:53:07 +0000400 consumed = pbuf.len; /* This is overwritten unless final is true. */
401 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000403 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000404 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000406 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000407}
408
409static PyObject *
410utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000412{
Antoine Pitroud1188562010-06-09 16:38:55 +0000413 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000414 const char *errors = NULL;
415 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000416 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000417 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000418 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000419
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000420 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000421 &pbuf, &errors, &final))
422 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000423
424 consumed = pbuf.len; /* This is overwritten unless final is true. */
425 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000427 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000428 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000430 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000431}
432
433/* This non-standard version also provides access to the byteorder
434 parameter of the builtin UTF-16 codec.
435
436 It returns a tuple (unicode, bytesread, byteorder) with byteorder
437 being the value in effect at the end of data.
438
439*/
440
441static PyObject *
442utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000444{
Antoine Pitroud1188562010-06-09 16:38:55 +0000445 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000446 const char *errors = NULL;
447 int byteorder = 0;
448 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000449 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000450 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000451
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000452 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 &pbuf, &errors, &byteorder, &final))
454 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000455 consumed = pbuf.len; /* This is overwritten unless final is true. */
456 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000458 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000459 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000461 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000462 Py_DECREF(unicode);
463 return tuple;
464}
465
466static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000467utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000469{
Antoine Pitroud1188562010-06-09 16:38:55 +0000470 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000471 const char *errors = NULL;
472 int byteorder = 0;
473 int final = 0;
474 Py_ssize_t consumed;
475 PyObject *decoded;
476
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000477 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 &pbuf, &errors, &final))
479 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000480 consumed = pbuf.len; /* This is overwritten unless final is true. */
481 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000483 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000484 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000486 return codec_tuple(decoded, consumed);
487}
488
489static PyObject *
490utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000492{
Antoine Pitroud1188562010-06-09 16:38:55 +0000493 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000494 const char *errors = NULL;
495 int byteorder = -1;
496 int final = 0;
497 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000498 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000499
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000500 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 &pbuf, &errors, &final))
502 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000503 consumed = pbuf.len; /* This is overwritten unless final is true. */
504 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000506 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000507 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000509 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000510}
511
512static PyObject *
513utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000515{
Antoine Pitroud1188562010-06-09 16:38:55 +0000516 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000517 const char *errors = NULL;
518 int byteorder = 1;
519 int final = 0;
520 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000521 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000522
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000523 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 &pbuf, &errors, &final))
525 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000526 consumed = pbuf.len; /* This is overwritten unless final is true. */
527 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000529 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000530 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000532 return codec_tuple(decoded, consumed);
533}
534
535/* This non-standard version also provides access to the byteorder
536 parameter of the builtin UTF-32 codec.
537
538 It returns a tuple (unicode, bytesread, byteorder) with byteorder
539 being the value in effect at the end of data.
540
541*/
542
543static PyObject *
544utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000546{
Antoine Pitroud1188562010-06-09 16:38:55 +0000547 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000548 const char *errors = NULL;
549 int byteorder = 0;
550 PyObject *unicode, *tuple;
551 int final = 0;
552 Py_ssize_t consumed;
553
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000554 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 &pbuf, &errors, &byteorder, &final))
556 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000557 consumed = pbuf.len; /* This is overwritten unless final is true. */
558 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000560 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000561 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000563 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
564 Py_DECREF(unicode);
565 return tuple;
566}
567
568static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000569unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000571{
Antoine Pitroud1188562010-06-09 16:38:55 +0000572 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000573 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000574 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000575
Martin v. Löwis423be952008-08-13 15:53:07 +0000576 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 &pbuf, &errors))
578 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000579
Antoine Pitroud1188562010-06-09 16:38:55 +0000580 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
581 PyBuffer_Release(&pbuf);
582 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000583}
584
585static PyObject *
586raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000588{
Antoine Pitroud1188562010-06-09 16:38:55 +0000589 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000590 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000591 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000592
Martin v. Löwis423be952008-08-13 15:53:07 +0000593 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000594 &pbuf, &errors))
595 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000596
Antoine Pitroud1188562010-06-09 16:38:55 +0000597 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
598 PyBuffer_Release(&pbuf);
599 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000600}
601
602static PyObject *
603latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000605{
Antoine Pitroud1188562010-06-09 16:38:55 +0000606 Py_buffer pbuf;
607 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000608 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000609
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000610 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 &pbuf, &errors))
612 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000613
Antoine Pitroud1188562010-06-09 16:38:55 +0000614 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
615 PyBuffer_Release(&pbuf);
616 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000617}
618
619static PyObject *
620ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000622{
Antoine Pitroud1188562010-06-09 16:38:55 +0000623 Py_buffer pbuf;
624 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000625 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000626
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000627 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 &pbuf, &errors))
629 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000630
Antoine Pitroud1188562010-06-09 16:38:55 +0000631 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
632 PyBuffer_Release(&pbuf);
633 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000634}
635
636static PyObject *
637charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000639{
Antoine Pitroud1188562010-06-09 16:38:55 +0000640 Py_buffer pbuf;
641 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000642 const char *errors = NULL;
643 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000644
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000645 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 &pbuf, &errors, &mapping))
647 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000648 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000649 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000650
Antoine Pitroud1188562010-06-09 16:38:55 +0000651 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
652 PyBuffer_Release(&pbuf);
653 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000654}
655
Victor Stinner99b95382011-07-04 14:23:54 +0200656#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000657
658static PyObject *
659mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000661{
Antoine Pitroud1188562010-06-09 16:38:55 +0000662 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000663 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000664 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000665 Py_ssize_t consumed;
666 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000667
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000668 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000669 &pbuf, &errors, &final))
670 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000671 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000672
Martin v. Löwis423be952008-08-13 15:53:07 +0000673 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000675 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000676 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000678 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000679}
680
Victor Stinner3a50e702011-10-18 21:21:00 +0200681static PyObject *
682code_page_decode(PyObject *self,
683 PyObject *args)
684{
685 Py_buffer pbuf;
686 const char *errors = NULL;
687 int final = 0;
688 Py_ssize_t consumed;
689 PyObject *decoded = NULL;
690 int code_page;
691
692 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
693 &code_page, &pbuf, &errors, &final))
694 return NULL;
695 consumed = pbuf.len;
696
697 decoded = PyUnicode_DecodeCodePageStateful(code_page,
698 pbuf.buf, pbuf.len, errors,
699 final ? NULL : &consumed);
700 PyBuffer_Release(&pbuf);
701 if (decoded == NULL)
702 return NULL;
703 return codec_tuple(decoded, consumed);
704}
705
Victor Stinner99b95382011-07-04 14:23:54 +0200706#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000707
Guido van Rossume2d67f92000-03-10 23:09:23 +0000708/* --- Encoder ------------------------------------------------------------ */
709
710static PyObject *
711readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000713{
Martin v. Löwis423be952008-08-13 15:53:07 +0000714 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000715 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000716 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000717 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000718 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000719
Martin v. Löwis423be952008-08-13 15:53:07 +0000720 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 &pdata, &errors))
722 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000723 data = pdata.buf;
724 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000725
Martin v. Löwis423be952008-08-13 15:53:07 +0000726 result = PyBytes_FromStringAndSize(data, size);
727 PyBuffer_Release(&pdata);
728 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000729}
730
731static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000732unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000734{
735 PyObject *obj;
736 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000737
Ezio Melottiadc417c2011-11-17 12:23:34 +0200738 if (PyErr_WarnEx(PyExc_DeprecationWarning,
739 "unicode_internal codec has been deprecated",
740 1))
741 return NULL;
742
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000743 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 &obj, &errors))
745 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000746
747 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100748 Py_UNICODE *u;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200749 Py_ssize_t len, size;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100750
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100751 if (PyUnicode_READY(obj) < 0)
752 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100753
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100754 u = PyUnicode_AsUnicodeAndSize(obj, &len);
755 if (u == NULL)
756 return NULL;
Victor Stinner049e5092014-08-17 22:20:00 +0200757 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100758 return PyErr_NoMemory();
759 size = len * sizeof(Py_UNICODE);
760 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100761 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000762 }
763 else {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200764 Py_buffer view;
765 PyObject *result;
766 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200768 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len), view.len);
769 PyBuffer_Release(&view);
770 return result;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000771 }
772}
773
774static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000775utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000776 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000777{
778 PyObject *str, *v;
779 const char *errors = NULL;
780
781 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 &str, &errors))
783 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000784
785 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100786 if (str == NULL || PyUnicode_READY(str) < 0) {
787 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100789 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100790 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
791 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000792 Py_DECREF(str);
793 return v;
794}
795
796static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000797utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000799{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000800 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000801 const char *errors = NULL;
802
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000803 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000804 &str, &errors))
805 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000806
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000807 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100808 if (str == NULL || PyUnicode_READY(str) < 0) {
809 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100811 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200812 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
813 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000814 Py_DECREF(str);
815 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000816}
817
818/* This version provides access to the byteorder parameter of the
819 builtin UTF-16 codecs as optional third argument. It defaults to 0
820 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000821 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000822
823*/
824
825static PyObject *
826utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000828{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000829 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000830 const char *errors = NULL;
831 int byteorder = 0;
832
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000833 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 &str, &errors, &byteorder))
835 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000836
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000837 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100838 if (str == NULL || PyUnicode_READY(str) < 0) {
839 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100841 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100842 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
843 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000844 Py_DECREF(str);
845 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000846}
847
848static PyObject *
849utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000851{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000852 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000853 const char *errors = NULL;
854
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000855 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 &str, &errors))
857 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000858
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000859 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100860 if (str == NULL || PyUnicode_READY(str) < 0) {
861 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100863 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100864 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
865 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000866 Py_DECREF(str);
867 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000868}
869
870static PyObject *
871utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000873{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000874 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000875 const char *errors = NULL;
876
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000877 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 &str, &errors))
879 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000880
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000881 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100882 if (str == NULL || PyUnicode_READY(str) < 0) {
883 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100885 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100886 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
887 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000888 Py_DECREF(str);
889 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000890}
891
Walter Dörwald41980ca2007-08-16 21:55:45 +0000892/* This version provides access to the byteorder parameter of the
893 builtin UTF-32 codecs as optional third argument. It defaults to 0
894 which means: use the native byte order and prepend the data with a
895 BOM mark.
896
897*/
898
899static PyObject *
900utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000902{
903 PyObject *str, *v;
904 const char *errors = NULL;
905 int byteorder = 0;
906
907 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 &str, &errors, &byteorder))
909 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000910
911 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100912 if (str == NULL || PyUnicode_READY(str) < 0) {
913 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100915 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100916 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
917 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000918 Py_DECREF(str);
919 return v;
920}
921
922static PyObject *
923utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000925{
926 PyObject *str, *v;
927 const char *errors = NULL;
928
929 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000930 &str, &errors))
931 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000932
933 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100934 if (str == NULL || PyUnicode_READY(str) < 0) {
935 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100937 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100938 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
939 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000940 Py_DECREF(str);
941 return v;
942}
943
944static PyObject *
945utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000947{
948 PyObject *str, *v;
949 const char *errors = NULL;
950
951 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000952 &str, &errors))
953 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000954
955 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100956 if (str == NULL || PyUnicode_READY(str) < 0) {
957 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100959 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100960 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
961 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000962 Py_DECREF(str);
963 return v;
964}
965
Guido van Rossume2d67f92000-03-10 23:09:23 +0000966static PyObject *
967unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000968 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000969{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000970 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000971 const char *errors = NULL;
972
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000973 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000974 &str, &errors))
975 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000976
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000977 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100978 if (str == NULL || PyUnicode_READY(str) < 0) {
979 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100981 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100982 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
983 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000984 Py_DECREF(str);
985 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000986}
987
988static PyObject *
989raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000991{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000992 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000993 const char *errors = NULL;
994
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000995 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 &str, &errors))
997 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000998
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000999 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001000 if (str == NULL || PyUnicode_READY(str) < 0) {
1001 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001003 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001004 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
1005 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001006 Py_DECREF(str);
1007 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001008}
1009
1010static PyObject *
1011latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001013{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001014 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001015 const char *errors = NULL;
1016
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001017 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 &str, &errors))
1019 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001020
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001021 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001022 if (str == NULL || PyUnicode_READY(str) < 0) {
1023 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001025 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001026 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
1027 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001028 Py_DECREF(str);
1029 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001030}
1031
1032static PyObject *
1033ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001035{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001036 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001037 const char *errors = NULL;
1038
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001039 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 &str, &errors))
1041 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001042
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001043 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001044 if (str == NULL || PyUnicode_READY(str) < 0) {
1045 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001047 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001048 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
1049 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001050 Py_DECREF(str);
1051 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001052}
1053
1054static PyObject *
1055charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001057{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001058 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001059 const char *errors = NULL;
1060 PyObject *mapping = NULL;
1061
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001062 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 &str, &errors, &mapping))
1064 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001065 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001067
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001068 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001069 if (str == NULL || PyUnicode_READY(str) < 0) {
1070 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001072 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001073 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001074 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001075 Py_DECREF(str);
1076 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001077}
1078
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001079static PyObject*
1080charmap_build(PyObject *self, PyObject *args)
1081{
1082 PyObject *map;
1083 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1084 return NULL;
1085 return PyUnicode_BuildEncodingMap(map);
1086}
1087
Victor Stinner99b95382011-07-04 14:23:54 +02001088#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001089
1090static PyObject *
1091mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001093{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001094 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001095 const char *errors = NULL;
1096
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001097 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 &str, &errors))
1099 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001100
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001101 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001102 if (str == NULL || PyUnicode_READY(str) < 0) {
1103 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001105 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001106 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1107 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001108 Py_DECREF(str);
1109 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001110}
1111
Victor Stinner3a50e702011-10-18 21:21:00 +02001112static PyObject *
1113code_page_encode(PyObject *self,
1114 PyObject *args)
1115{
1116 PyObject *str, *v;
1117 const char *errors = NULL;
1118 int code_page;
1119
1120 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1121 &code_page, &str, &errors))
1122 return NULL;
1123
1124 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001125 if (str == NULL || PyUnicode_READY(str) < 0) {
1126 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001127 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001128 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001129 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1130 str,
1131 errors),
1132 PyUnicode_GET_LENGTH(str));
1133 Py_DECREF(str);
1134 return v;
1135}
1136
Victor Stinner99b95382011-07-04 14:23:54 +02001137#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001138
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001139/* --- Error handler registry --------------------------------------------- */
1140
Walter Dörwald0ae29812002-10-31 13:36:29 +00001141PyDoc_STRVAR(register_error__doc__,
1142"register_error(errors, handler)\n\
1143\n\
1144Register the specified error handler under the name\n\
1145errors. handler must be a callable object, that\n\
1146will be called with an exception instance containing\n\
1147information about the location of the encoding/decoding\n\
1148error and must return a (replacement, new position) tuple.");
1149
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001150static PyObject *register_error(PyObject *self, PyObject *args)
1151{
1152 const char *name;
1153 PyObject *handler;
1154
1155 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 &name, &handler))
1157 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001158 if (PyCodec_RegisterError(name, handler))
1159 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001160 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001161}
1162
Walter Dörwald0ae29812002-10-31 13:36:29 +00001163PyDoc_STRVAR(lookup_error__doc__,
1164"lookup_error(errors) -> handler\n\
1165\n\
1166Return the error handler for the specified error handling name\n\
1167or raise a LookupError, if no handler exists under this name.");
1168
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001169static PyObject *lookup_error(PyObject *self, PyObject *args)
1170{
1171 const char *name;
1172
1173 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 &name))
1175 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001176 return PyCodec_LookupError(name);
1177}
1178
Guido van Rossume2d67f92000-03-10 23:09:23 +00001179/* --- Module API --------------------------------------------------------- */
1180
1181static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001183 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001185 lookup__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001186 {"encode", (PyCFunction)codec_encode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 encode__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001188 {"decode", (PyCFunction)codec_decode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 decode__doc__},
1190 {"escape_encode", escape_encode, METH_VARARGS},
1191 {"escape_decode", escape_decode, METH_VARARGS},
1192 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1193 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1194 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1195 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1196 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1197 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1198 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1199 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1200 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1201 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1202 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1203 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1204 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1205 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1206 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1207 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1208 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1209 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1210 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1211 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1212 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1213 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1214 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1215 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1216 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1217 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1218 {"ascii_encode", ascii_encode, METH_VARARGS},
1219 {"ascii_decode", ascii_decode, METH_VARARGS},
1220 {"charmap_encode", charmap_encode, METH_VARARGS},
1221 {"charmap_decode", charmap_decode, METH_VARARGS},
1222 {"charmap_build", charmap_build, METH_VARARGS},
1223 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001224#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1226 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001227 {"code_page_encode", code_page_encode, METH_VARARGS},
1228 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001229#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001231 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001233 lookup_error__doc__},
Nick Coghlan8fad1672014-09-15 23:50:44 +12001234 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001236};
1237
Martin v. Löwis1a214512008-06-11 05:26:20 +00001238static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 PyModuleDef_HEAD_INIT,
1240 "_codecs",
1241 NULL,
1242 -1,
1243 _codecs_functions,
1244 NULL,
1245 NULL,
1246 NULL,
1247 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001248};
1249
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001250PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001251PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001254}