blob: 83aaa647f869cf983588b0d5d7b8be6fe311ba0a [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Nick Coghlan8fad1672014-09-15 23:50:44 +120045/*[clinic input]
46module _codecs
47[clinic start generated code]*/
48/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
49
50
Guido van Rossume2d67f92000-03-10 23:09:23 +000051/* --- Registry ----------------------------------------------------------- */
52
Walter Dörwald0ae29812002-10-31 13:36:29 +000053PyDoc_STRVAR(register__doc__,
54"register(search_function)\n\
55\n\
56Register a codec search function. Search functions are expected to take\n\
57one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000058a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
59(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000060
Guido van Rossume2d67f92000-03-10 23:09:23 +000061static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000062PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000063{
Guido van Rossume2d67f92000-03-10 23:09:23 +000064 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000065 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000066
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000067 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000068}
69
Walter Dörwald0ae29812002-10-31 13:36:29 +000070PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000071"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000072\n\
73Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000074a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000075
Guido van Rossume2d67f92000-03-10 23:09:23 +000076static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000078{
79 char *encoding;
80
81 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000083
84 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000085}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +020098codec_encode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +000099{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200100 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000101 const char *encoding = NULL;
102 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000103 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000104
Victor Stinnera57dfd02014-05-14 17:13:14 +0200105 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:encode", kwlist,
106 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000107 return NULL;
108
109 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000111
112 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000113 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000114}
115
116PyDoc_STRVAR(decode__doc__,
117"decode(obj, [encoding[,errors]]) -> object\n\
118\n\
119Decodes obj using the codec registered for encoding. encoding defaults\n\
120to the default encoding. errors may be given to set a different error\n\
121handling scheme. Default is 'strict' meaning that encoding errors raise\n\
122a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000123as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000124able to handle ValueErrors.");
125
126static PyObject *
Victor Stinnera57dfd02014-05-14 17:13:14 +0200127codec_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128{
Victor Stinnera57dfd02014-05-14 17:13:14 +0200129 static char *kwlist[] = {"obj", "encoding", "errors", NULL};
Brett Cannon3e377de2004-07-10 21:41:14 +0000130 const char *encoding = NULL;
131 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000133
Victor Stinnera57dfd02014-05-14 17:13:14 +0200134 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|ss:decode", kwlist,
135 &v, &encoding, &errors))
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000136 return NULL;
137
138 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
Nick Coghlan8fad1672014-09-15 23:50:44 +1200147/*[clinic input]
148_codecs._forget_codec
149
150 encoding: str
151 /
152
153Purge the named codec from the internal codec lookup cache
154[clinic start generated code]*/
155
156PyDoc_STRVAR(_codecs__forget_codec__doc__,
157"_forget_codec($module, encoding, /)\n"
158"--\n"
159"\n"
160"Purge the named codec from the internal codec lookup cache");
161
162#define _CODECS__FORGET_CODEC_METHODDEF \
163 {"_forget_codec", (PyCFunction)_codecs__forget_codec, METH_VARARGS, _codecs__forget_codec__doc__},
164
165static PyObject *
166_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding);
167
168static PyObject *
169_codecs__forget_codec(PyModuleDef *module, PyObject *args)
170{
171 PyObject *return_value = NULL;
172 const char *encoding;
173
174 if (!PyArg_ParseTuple(args,
175 "s:_forget_codec",
176 &encoding))
177 goto exit;
178 return_value = _codecs__forget_codec_impl(module, encoding);
179
180exit:
181 return return_value;
182}
183
184static PyObject *
185_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding)
186/*[clinic end generated code: output=a75e631591702a5c input=18d5d92d0e386c38]*/
187{
188 if (_PyCodec_Forget(encoding) < 0) {
189 return NULL;
190 };
191 Py_RETURN_NONE;
192}
193
Guido van Rossume2d67f92000-03-10 23:09:23 +0000194static
195PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000197{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000198 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000199 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000200 return NULL;
201 v = Py_BuildValue("On", unicode, len);
202 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000203 return v;
204}
205
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000206/* --- String codecs ------------------------------------------------------ */
207static PyObject *
208escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000210{
211 const char *errors = NULL;
212 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000213 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000214
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000215 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000216 &data, &size, &errors))
217 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000218 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000220}
221
222static PyObject *
223escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000225{
Antoine Pitroud1188562010-06-09 16:38:55 +0000226 PyObject *str;
227 Py_ssize_t size;
228 Py_ssize_t newsize;
229 const char *errors = NULL;
230 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000231
Antoine Pitroud1188562010-06-09 16:38:55 +0000232 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
233 &PyBytes_Type, &str, &errors))
234 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000235
Antoine Pitroud1188562010-06-09 16:38:55 +0000236 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100237 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000238 PyErr_SetString(PyExc_OverflowError,
239 "string is too large to encode");
240 return NULL;
241 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100242 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000243 v = PyBytes_FromStringAndSize(NULL, newsize);
244
245 if (v == NULL) {
246 return NULL;
247 }
248 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200249 Py_ssize_t i;
250 char c;
251 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000252
253 for (i = 0; i < size; i++) {
254 /* There's at least enough room for a hex escape */
255 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
256 c = PyBytes_AS_STRING(str)[i];
257 if (c == '\'' || c == '\\')
258 *p++ = '\\', *p++ = c;
259 else if (c == '\t')
260 *p++ = '\\', *p++ = 't';
261 else if (c == '\n')
262 *p++ = '\\', *p++ = 'n';
263 else if (c == '\r')
264 *p++ = '\\', *p++ = 'r';
265 else if (c < ' ' || c >= 0x7f) {
266 *p++ = '\\';
267 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200268 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
269 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000270 }
271 else
272 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000274 *p = '\0';
275 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
276 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000278 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000279
Antoine Pitroud1188562010-06-09 16:38:55 +0000280 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000281}
282
Guido van Rossume2d67f92000-03-10 23:09:23 +0000283/* --- Decoder ------------------------------------------------------------ */
284
285static PyObject *
286unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000288{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000289 PyObject *obj;
290 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000291 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000292 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000293
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000294 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 &obj, &errors))
296 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000297
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000298 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100299 if (PyUnicode_READY(obj) < 0)
300 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000301 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100302 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000303 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000304 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
306 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
309 size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000310 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000311}
312
313static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000314utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000315 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000316{
Antoine Pitroud1188562010-06-09 16:38:55 +0000317 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000318 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000319 int final = 0;
320 Py_ssize_t consumed;
321 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000322
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000323 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000324 &pbuf, &errors, &final))
325 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000326 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000327
Martin v. Löwis423be952008-08-13 15:53:07 +0000328 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000329 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000330 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000331 if (decoded == NULL)
332 return NULL;
333 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000334}
335
336static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000337utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000339{
Antoine Pitroud1188562010-06-09 16:38:55 +0000340 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000341 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000342 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000343 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000344 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000345
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000346 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 &pbuf, &errors, &final))
348 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000349 consumed = pbuf.len;
350
351 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000353 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000354 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000356 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000357}
358
359static PyObject *
360utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000362{
Antoine Pitroud1188562010-06-09 16:38:55 +0000363 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000364 const char *errors = NULL;
365 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000366 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000367 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000368 PyObject *decoded;
369
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000370 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 &pbuf, &errors, &final))
372 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000373 consumed = pbuf.len; /* This is overwritten unless final is true. */
374 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000376 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000377 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000379 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000380}
381
382static PyObject *
383utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000385{
Antoine Pitroud1188562010-06-09 16:38:55 +0000386 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000387 const char *errors = NULL;
388 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000389 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000390 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000391 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000392
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000393 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 &pbuf, &errors, &final))
395 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000396
Martin v. Löwis423be952008-08-13 15:53:07 +0000397 consumed = pbuf.len; /* This is overwritten unless final is true. */
398 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000400 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000401 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000403 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000404}
405
406static PyObject *
407utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000409{
Antoine Pitroud1188562010-06-09 16:38:55 +0000410 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000411 const char *errors = NULL;
412 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000413 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000414 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000415 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000416
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000417 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 &pbuf, &errors, &final))
419 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000420
421 consumed = pbuf.len; /* This is overwritten unless final is true. */
422 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000424 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000425 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000427 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000428}
429
430/* This non-standard version also provides access to the byteorder
431 parameter of the builtin UTF-16 codec.
432
433 It returns a tuple (unicode, bytesread, byteorder) with byteorder
434 being the value in effect at the end of data.
435
436*/
437
438static PyObject *
439utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000440 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000441{
Antoine Pitroud1188562010-06-09 16:38:55 +0000442 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000443 const char *errors = NULL;
444 int byteorder = 0;
445 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000446 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000447 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000448
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000449 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 &pbuf, &errors, &byteorder, &final))
451 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000452 consumed = pbuf.len; /* This is overwritten unless final is true. */
453 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000455 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000456 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000458 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000459 Py_DECREF(unicode);
460 return tuple;
461}
462
463static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000464utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000466{
Antoine Pitroud1188562010-06-09 16:38:55 +0000467 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000468 const char *errors = NULL;
469 int byteorder = 0;
470 int final = 0;
471 Py_ssize_t consumed;
472 PyObject *decoded;
473
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000474 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 &pbuf, &errors, &final))
476 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000477 consumed = pbuf.len; /* This is overwritten unless final is true. */
478 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000480 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000481 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000483 return codec_tuple(decoded, consumed);
484}
485
486static PyObject *
487utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000489{
Antoine Pitroud1188562010-06-09 16:38:55 +0000490 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000491 const char *errors = NULL;
492 int byteorder = -1;
493 int final = 0;
494 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000495 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000496
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000497 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 &pbuf, &errors, &final))
499 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000500 consumed = pbuf.len; /* This is overwritten unless final is true. */
501 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000503 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000504 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000506 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000507}
508
509static PyObject *
510utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000512{
Antoine Pitroud1188562010-06-09 16:38:55 +0000513 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000514 const char *errors = NULL;
515 int byteorder = 1;
516 int final = 0;
517 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000518 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000519
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000520 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 &pbuf, &errors, &final))
522 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000523 consumed = pbuf.len; /* This is overwritten unless final is true. */
524 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000526 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000527 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000529 return codec_tuple(decoded, consumed);
530}
531
532/* This non-standard version also provides access to the byteorder
533 parameter of the builtin UTF-32 codec.
534
535 It returns a tuple (unicode, bytesread, byteorder) with byteorder
536 being the value in effect at the end of data.
537
538*/
539
540static PyObject *
541utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000543{
Antoine Pitroud1188562010-06-09 16:38:55 +0000544 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000545 const char *errors = NULL;
546 int byteorder = 0;
547 PyObject *unicode, *tuple;
548 int final = 0;
549 Py_ssize_t consumed;
550
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000551 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 &pbuf, &errors, &byteorder, &final))
553 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000554 consumed = pbuf.len; /* This is overwritten unless final is true. */
555 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000557 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000558 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000560 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
561 Py_DECREF(unicode);
562 return tuple;
563}
564
565static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000566unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000568{
Antoine Pitroud1188562010-06-09 16:38:55 +0000569 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000570 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000572
Martin v. Löwis423be952008-08-13 15:53:07 +0000573 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000574 &pbuf, &errors))
575 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000576
Antoine Pitroud1188562010-06-09 16:38:55 +0000577 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
578 PyBuffer_Release(&pbuf);
579 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000580}
581
582static PyObject *
583raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000584 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000585{
Antoine Pitroud1188562010-06-09 16:38:55 +0000586 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000587 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000588 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000589
Martin v. Löwis423be952008-08-13 15:53:07 +0000590 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000591 &pbuf, &errors))
592 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000593
Antoine Pitroud1188562010-06-09 16:38:55 +0000594 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
595 PyBuffer_Release(&pbuf);
596 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000597}
598
599static PyObject *
600latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000602{
Antoine Pitroud1188562010-06-09 16:38:55 +0000603 Py_buffer pbuf;
604 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000605 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000606
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000607 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 &pbuf, &errors))
609 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000610
Antoine Pitroud1188562010-06-09 16:38:55 +0000611 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
612 PyBuffer_Release(&pbuf);
613 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000614}
615
616static PyObject *
617ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000619{
Antoine Pitroud1188562010-06-09 16:38:55 +0000620 Py_buffer pbuf;
621 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000622 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000623
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000624 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 &pbuf, &errors))
626 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000627
Antoine Pitroud1188562010-06-09 16:38:55 +0000628 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
629 PyBuffer_Release(&pbuf);
630 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000631}
632
633static PyObject *
634charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000636{
Antoine Pitroud1188562010-06-09 16:38:55 +0000637 Py_buffer pbuf;
638 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000639 const char *errors = NULL;
640 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000641
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000642 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000643 &pbuf, &errors, &mapping))
644 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000645 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000647
Antoine Pitroud1188562010-06-09 16:38:55 +0000648 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
649 PyBuffer_Release(&pbuf);
650 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000651}
652
Victor Stinner99b95382011-07-04 14:23:54 +0200653#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000654
655static PyObject *
656mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000658{
Antoine Pitroud1188562010-06-09 16:38:55 +0000659 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000660 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000661 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000662 Py_ssize_t consumed;
663 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000664
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000665 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 &pbuf, &errors, &final))
667 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000668 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000669
Martin v. Löwis423be952008-08-13 15:53:07 +0000670 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000671 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000672 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000673 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000675 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000676}
677
Victor Stinner3a50e702011-10-18 21:21:00 +0200678static PyObject *
679code_page_decode(PyObject *self,
680 PyObject *args)
681{
682 Py_buffer pbuf;
683 const char *errors = NULL;
684 int final = 0;
685 Py_ssize_t consumed;
686 PyObject *decoded = NULL;
687 int code_page;
688
689 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
690 &code_page, &pbuf, &errors, &final))
691 return NULL;
692 consumed = pbuf.len;
693
694 decoded = PyUnicode_DecodeCodePageStateful(code_page,
695 pbuf.buf, pbuf.len, errors,
696 final ? NULL : &consumed);
697 PyBuffer_Release(&pbuf);
698 if (decoded == NULL)
699 return NULL;
700 return codec_tuple(decoded, consumed);
701}
702
Victor Stinner99b95382011-07-04 14:23:54 +0200703#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000704
Guido van Rossume2d67f92000-03-10 23:09:23 +0000705/* --- Encoder ------------------------------------------------------------ */
706
707static PyObject *
708readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000710{
Martin v. Löwis423be952008-08-13 15:53:07 +0000711 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000712 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000713 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000714 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000715 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000716
Martin v. Löwis423be952008-08-13 15:53:07 +0000717 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 &pdata, &errors))
719 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000720 data = pdata.buf;
721 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000722
Martin v. Löwis423be952008-08-13 15:53:07 +0000723 result = PyBytes_FromStringAndSize(data, size);
724 PyBuffer_Release(&pdata);
725 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000726}
727
728static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000729unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000731{
732 PyObject *obj;
733 const char *errors = NULL;
734 const char *data;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100735 Py_ssize_t len, size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000736
Ezio Melottiadc417c2011-11-17 12:23:34 +0200737 if (PyErr_WarnEx(PyExc_DeprecationWarning,
738 "unicode_internal codec has been deprecated",
739 1))
740 return NULL;
741
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000742 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 &obj, &errors))
744 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000745
746 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100747 Py_UNICODE *u;
748
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100749 if (PyUnicode_READY(obj) < 0)
750 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100751
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100752 u = PyUnicode_AsUnicodeAndSize(obj, &len);
753 if (u == NULL)
754 return NULL;
Victor Stinner049e5092014-08-17 22:20:00 +0200755 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100756 return PyErr_NoMemory();
757 size = len * sizeof(Py_UNICODE);
758 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100759 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000760 }
761 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
763 return NULL;
764 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000765 }
766}
767
768static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000769utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000771{
772 PyObject *str, *v;
773 const char *errors = NULL;
774
775 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000776 &str, &errors))
777 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000778
779 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100780 if (str == NULL || PyUnicode_READY(str) < 0) {
781 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100783 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100784 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
785 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000786 Py_DECREF(str);
787 return v;
788}
789
790static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000791utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000793{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000794 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000795 const char *errors = NULL;
796
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000797 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 &str, &errors))
799 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000800
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000801 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100802 if (str == NULL || PyUnicode_READY(str) < 0) {
803 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000804 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100805 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200806 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
807 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000808 Py_DECREF(str);
809 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000810}
811
812/* This version provides access to the byteorder parameter of the
813 builtin UTF-16 codecs as optional third argument. It defaults to 0
814 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000815 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000816
817*/
818
819static PyObject *
820utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000822{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000823 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000824 const char *errors = NULL;
825 int byteorder = 0;
826
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000827 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 &str, &errors, &byteorder))
829 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000830
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000831 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100832 if (str == NULL || PyUnicode_READY(str) < 0) {
833 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100835 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100836 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
837 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000838 Py_DECREF(str);
839 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000840}
841
842static PyObject *
843utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000845{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000846 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000847 const char *errors = NULL;
848
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000849 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 &str, &errors))
851 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000852
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000853 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100854 if (str == NULL || PyUnicode_READY(str) < 0) {
855 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100857 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100858 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
859 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000860 Py_DECREF(str);
861 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000862}
863
864static PyObject *
865utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000866 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000867{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000868 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000869 const char *errors = NULL;
870
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000871 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 &str, &errors))
873 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000874
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000875 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100876 if (str == NULL || PyUnicode_READY(str) < 0) {
877 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100879 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100880 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
881 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000882 Py_DECREF(str);
883 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000884}
885
Walter Dörwald41980ca2007-08-16 21:55:45 +0000886/* This version provides access to the byteorder parameter of the
887 builtin UTF-32 codecs as optional third argument. It defaults to 0
888 which means: use the native byte order and prepend the data with a
889 BOM mark.
890
891*/
892
893static PyObject *
894utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000896{
897 PyObject *str, *v;
898 const char *errors = NULL;
899 int byteorder = 0;
900
901 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 &str, &errors, &byteorder))
903 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000904
905 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100906 if (str == NULL || PyUnicode_READY(str) < 0) {
907 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100909 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100910 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
911 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000912 Py_DECREF(str);
913 return v;
914}
915
916static PyObject *
917utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000919{
920 PyObject *str, *v;
921 const char *errors = NULL;
922
923 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 &str, &errors))
925 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000926
927 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100928 if (str == NULL || PyUnicode_READY(str) < 0) {
929 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000930 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100931 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100932 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
933 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000934 Py_DECREF(str);
935 return v;
936}
937
938static PyObject *
939utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000941{
942 PyObject *str, *v;
943 const char *errors = NULL;
944
945 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 &str, &errors))
947 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000948
949 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100950 if (str == NULL || PyUnicode_READY(str) < 0) {
951 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000952 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100953 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100954 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
955 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000956 Py_DECREF(str);
957 return v;
958}
959
Guido van Rossume2d67f92000-03-10 23:09:23 +0000960static PyObject *
961unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000963{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000964 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000965 const char *errors = NULL;
966
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000967 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000968 &str, &errors))
969 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000970
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000971 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100972 if (str == NULL || PyUnicode_READY(str) < 0) {
973 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000974 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100975 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100976 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
977 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000978 Py_DECREF(str);
979 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000980}
981
982static PyObject *
983raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000984 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000985{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000986 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000987 const char *errors = NULL;
988
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000989 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 &str, &errors))
991 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000992
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000993 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100994 if (str == NULL || PyUnicode_READY(str) < 0) {
995 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100997 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100998 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
999 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001000 Py_DECREF(str);
1001 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001002}
1003
1004static PyObject *
1005latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001006 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001007{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001008 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001009 const char *errors = NULL;
1010
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001011 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 &str, &errors))
1013 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001014
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001015 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001016 if (str == NULL || PyUnicode_READY(str) < 0) {
1017 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001019 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001020 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
1021 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001022 Py_DECREF(str);
1023 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001024}
1025
1026static PyObject *
1027ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001029{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001030 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001031 const char *errors = NULL;
1032
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001033 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 &str, &errors))
1035 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001036
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001037 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001038 if (str == NULL || PyUnicode_READY(str) < 0) {
1039 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001041 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001042 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
1043 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001044 Py_DECREF(str);
1045 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001046}
1047
1048static PyObject *
1049charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001051{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001052 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001053 const char *errors = NULL;
1054 PyObject *mapping = NULL;
1055
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001056 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 &str, &errors, &mapping))
1058 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001059 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001061
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001062 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001063 if (str == NULL || PyUnicode_READY(str) < 0) {
1064 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001066 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001067 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001068 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001069 Py_DECREF(str);
1070 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001071}
1072
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001073static PyObject*
1074charmap_build(PyObject *self, PyObject *args)
1075{
1076 PyObject *map;
1077 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1078 return NULL;
1079 return PyUnicode_BuildEncodingMap(map);
1080}
1081
Victor Stinner99b95382011-07-04 14:23:54 +02001082#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001083
1084static PyObject *
1085mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001087{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001088 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001089 const char *errors = NULL;
1090
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001091 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 &str, &errors))
1093 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001094
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001095 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001096 if (str == NULL || PyUnicode_READY(str) < 0) {
1097 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001099 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001100 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1101 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001102 Py_DECREF(str);
1103 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001104}
1105
Victor Stinner3a50e702011-10-18 21:21:00 +02001106static PyObject *
1107code_page_encode(PyObject *self,
1108 PyObject *args)
1109{
1110 PyObject *str, *v;
1111 const char *errors = NULL;
1112 int code_page;
1113
1114 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1115 &code_page, &str, &errors))
1116 return NULL;
1117
1118 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001119 if (str == NULL || PyUnicode_READY(str) < 0) {
1120 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001121 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001122 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001123 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1124 str,
1125 errors),
1126 PyUnicode_GET_LENGTH(str));
1127 Py_DECREF(str);
1128 return v;
1129}
1130
Victor Stinner99b95382011-07-04 14:23:54 +02001131#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001132
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001133/* --- Error handler registry --------------------------------------------- */
1134
Walter Dörwald0ae29812002-10-31 13:36:29 +00001135PyDoc_STRVAR(register_error__doc__,
1136"register_error(errors, handler)\n\
1137\n\
1138Register the specified error handler under the name\n\
1139errors. handler must be a callable object, that\n\
1140will be called with an exception instance containing\n\
1141information about the location of the encoding/decoding\n\
1142error and must return a (replacement, new position) tuple.");
1143
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001144static PyObject *register_error(PyObject *self, PyObject *args)
1145{
1146 const char *name;
1147 PyObject *handler;
1148
1149 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 &name, &handler))
1151 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001152 if (PyCodec_RegisterError(name, handler))
1153 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001154 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001155}
1156
Walter Dörwald0ae29812002-10-31 13:36:29 +00001157PyDoc_STRVAR(lookup_error__doc__,
1158"lookup_error(errors) -> handler\n\
1159\n\
1160Return the error handler for the specified error handling name\n\
1161or raise a LookupError, if no handler exists under this name.");
1162
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001163static PyObject *lookup_error(PyObject *self, PyObject *args)
1164{
1165 const char *name;
1166
1167 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 &name))
1169 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001170 return PyCodec_LookupError(name);
1171}
1172
Guido van Rossume2d67f92000-03-10 23:09:23 +00001173/* --- Module API --------------------------------------------------------- */
1174
1175static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001177 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001179 lookup__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001180 {"encode", (PyCFunction)codec_encode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 encode__doc__},
Victor Stinnera57dfd02014-05-14 17:13:14 +02001182 {"decode", (PyCFunction)codec_decode, METH_VARARGS|METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 decode__doc__},
1184 {"escape_encode", escape_encode, METH_VARARGS},
1185 {"escape_decode", escape_decode, METH_VARARGS},
1186 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1187 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1188 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1189 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1190 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1191 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1192 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1193 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1194 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1195 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1196 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1197 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1198 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1199 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1200 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1201 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1202 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1203 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1204 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1205 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1206 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1207 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1208 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1209 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1210 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1211 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1212 {"ascii_encode", ascii_encode, METH_VARARGS},
1213 {"ascii_decode", ascii_decode, METH_VARARGS},
1214 {"charmap_encode", charmap_encode, METH_VARARGS},
1215 {"charmap_decode", charmap_decode, METH_VARARGS},
1216 {"charmap_build", charmap_build, METH_VARARGS},
1217 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001218#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1220 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001221 {"code_page_encode", code_page_encode, METH_VARARGS},
1222 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001223#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001225 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001227 lookup_error__doc__},
Nick Coghlan8fad1672014-09-15 23:50:44 +12001228 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001230};
1231
Martin v. Löwis1a214512008-06-11 05:26:20 +00001232static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 PyModuleDef_HEAD_INIT,
1234 "_codecs",
1235 NULL,
1236 -1,
1237 _codecs_functions,
1238 NULL,
1239 NULL,
1240 NULL,
1241 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001242};
1243
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001244PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001245PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001246{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001248}