blob: 2bd751a8150fb56574c47ccbf93599e8f7b37913 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Nick Coghlan8fad1672014-09-15 23:50:44 +120045/*[clinic input]
46module _codecs
47[clinic start generated code]*/
48/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
49
50
Guido van Rossume2d67f92000-03-10 23:09:23 +000051/* --- Registry ----------------------------------------------------------- */
52
Walter Dörwald0ae29812002-10-31 13:36:29 +000053PyDoc_STRVAR(register__doc__,
54"register(search_function)\n\
55\n\
56Register a codec search function. Search functions are expected to take\n\
Nick Coghlanb9fdb7a2015-01-07 00:22:00 +100057one argument, the encoding name in all lower case letters, and either\n\
58return None, or a tuple of functions (encoder, decoder, stream_reader,\n\
59stream_writer) (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000060
Guido van Rossume2d67f92000-03-10 23:09:23 +000061static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000062PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000063{
Guido van Rossume2d67f92000-03-10 23:09:23 +000064 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000065 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000066
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000067 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000068}
69
Walter Dörwald0ae29812002-10-31 13:36:29 +000070PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000071"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000072\n\
73Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000074a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000075
Guido van Rossume2d67f92000-03-10 23:09:23 +000076static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000078{
79 char *encoding;
80
81 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000083
84 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000085}
86
Marc-André Lemburg3f419742004-07-10 12:06:10 +000087PyDoc_STRVAR(encode__doc__,
88"encode(obj, [encoding[,errors]]) -> object\n\
89\n\
90Encodes obj using the codec registered for encoding. encoding defaults\n\
91to the default encoding. errors may be given to set a different error\n\
92handling scheme. Default is 'strict' meaning that encoding errors raise\n\
93a ValueError. Other possible values are 'ignore', 'replace' and\n\
94'xmlcharrefreplace' as well as any other name registered with\n\
95codecs.register_error that can handle ValueErrors.");
96
97static PyObject *
98codec_encode(PyObject *self, PyObject *args)
99{
Brett Cannon3e377de2004-07-10 21:41:14 +0000100 const char *encoding = NULL;
101 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000102 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000103
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000104 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
105 return NULL;
106
107 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109
110 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000111 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000112}
113
114PyDoc_STRVAR(decode__doc__,
115"decode(obj, [encoding[,errors]]) -> object\n\
116\n\
117Decodes obj using the codec registered for encoding. encoding defaults\n\
118to the default encoding. errors may be given to set a different error\n\
119handling scheme. Default is 'strict' meaning that encoding errors raise\n\
120a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000121as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000122able to handle ValueErrors.");
123
124static PyObject *
125codec_decode(PyObject *self, PyObject *args)
126{
Brett Cannon3e377de2004-07-10 21:41:14 +0000127 const char *encoding = NULL;
128 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000129 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000130
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000131 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
132 return NULL;
133
134 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000136
137 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000138 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000139}
140
Guido van Rossume2d67f92000-03-10 23:09:23 +0000141/* --- Helpers ------------------------------------------------------------ */
142
Nick Coghlan8fad1672014-09-15 23:50:44 +1200143/*[clinic input]
144_codecs._forget_codec
145
146 encoding: str
147 /
148
149Purge the named codec from the internal codec lookup cache
150[clinic start generated code]*/
151
152PyDoc_STRVAR(_codecs__forget_codec__doc__,
153"_forget_codec($module, encoding, /)\n"
154"--\n"
155"\n"
156"Purge the named codec from the internal codec lookup cache");
157
158#define _CODECS__FORGET_CODEC_METHODDEF \
159 {"_forget_codec", (PyCFunction)_codecs__forget_codec, METH_VARARGS, _codecs__forget_codec__doc__},
160
161static PyObject *
162_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding);
163
164static PyObject *
165_codecs__forget_codec(PyModuleDef *module, PyObject *args)
166{
167 PyObject *return_value = NULL;
168 const char *encoding;
169
170 if (!PyArg_ParseTuple(args,
171 "s:_forget_codec",
172 &encoding))
173 goto exit;
174 return_value = _codecs__forget_codec_impl(module, encoding);
175
176exit:
177 return return_value;
178}
179
180static PyObject *
181_codecs__forget_codec_impl(PyModuleDef *module, const char *encoding)
182/*[clinic end generated code: output=a75e631591702a5c input=18d5d92d0e386c38]*/
183{
184 if (_PyCodec_Forget(encoding) < 0) {
185 return NULL;
186 };
187 Py_RETURN_NONE;
188}
189
Guido van Rossume2d67f92000-03-10 23:09:23 +0000190static
191PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000193{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000194 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000195 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000196 return NULL;
197 v = Py_BuildValue("On", unicode, len);
198 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000199 return v;
200}
201
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000202/* --- String codecs ------------------------------------------------------ */
203static PyObject *
204escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000205 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000206{
207 const char *errors = NULL;
208 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000209 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000210
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000211 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 &data, &size, &errors))
213 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000214 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000216}
217
218static PyObject *
219escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000221{
Antoine Pitroud1188562010-06-09 16:38:55 +0000222 PyObject *str;
223 Py_ssize_t size;
224 Py_ssize_t newsize;
225 const char *errors = NULL;
226 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000227
Antoine Pitroud1188562010-06-09 16:38:55 +0000228 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
229 &PyBytes_Type, &str, &errors))
230 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000231
Antoine Pitroud1188562010-06-09 16:38:55 +0000232 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100233 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000234 PyErr_SetString(PyExc_OverflowError,
235 "string is too large to encode");
236 return NULL;
237 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100238 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000239 v = PyBytes_FromStringAndSize(NULL, newsize);
240
241 if (v == NULL) {
242 return NULL;
243 }
244 else {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200245 Py_ssize_t i;
246 char c;
247 char *p = PyBytes_AS_STRING(v);
Antoine Pitroud1188562010-06-09 16:38:55 +0000248
249 for (i = 0; i < size; i++) {
250 /* There's at least enough room for a hex escape */
251 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
252 c = PyBytes_AS_STRING(str)[i];
253 if (c == '\'' || c == '\\')
254 *p++ = '\\', *p++ = c;
255 else if (c == '\t')
256 *p++ = '\\', *p++ = 't';
257 else if (c == '\n')
258 *p++ = '\\', *p++ = 'n';
259 else if (c == '\r')
260 *p++ = '\\', *p++ = 'r';
261 else if (c < ' ' || c >= 0x7f) {
262 *p++ = '\\';
263 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200264 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
265 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000266 }
267 else
268 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000270 *p = '\0';
271 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
272 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000274 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000275
Antoine Pitroud1188562010-06-09 16:38:55 +0000276 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000277}
278
Guido van Rossume2d67f92000-03-10 23:09:23 +0000279/* --- Decoder ------------------------------------------------------------ */
280
281static PyObject *
282unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000284{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000285 PyObject *obj;
286 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000287 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000288 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000289
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000290 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 &obj, &errors))
292 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000293
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000294 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100295 if (PyUnicode_READY(obj) < 0)
296 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100298 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000299 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000300 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000301 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
302 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
305 size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000306 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000307}
308
309static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000310utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000311 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000312{
Antoine Pitroud1188562010-06-09 16:38:55 +0000313 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000314 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000315 int final = 0;
316 Py_ssize_t consumed;
317 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000318
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000319 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 &pbuf, &errors, &final))
321 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000322 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000323
Martin v. Löwis423be952008-08-13 15:53:07 +0000324 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000326 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000327 if (decoded == NULL)
328 return NULL;
329 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000330}
331
332static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000333utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000335{
Antoine Pitroud1188562010-06-09 16:38:55 +0000336 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000337 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000338 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000339 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000340 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000341
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000342 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000343 &pbuf, &errors, &final))
344 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000345 consumed = pbuf.len;
346
347 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000349 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000350 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000352 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000353}
354
355static PyObject *
356utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000358{
Antoine Pitroud1188562010-06-09 16:38:55 +0000359 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000360 const char *errors = NULL;
361 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000362 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000364 PyObject *decoded;
365
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000366 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 &pbuf, &errors, &final))
368 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000369 consumed = pbuf.len; /* This is overwritten unless final is true. */
370 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000372 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000373 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000375 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000376}
377
378static PyObject *
379utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000381{
Antoine Pitroud1188562010-06-09 16:38:55 +0000382 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000383 const char *errors = NULL;
384 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000385 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000386 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000387 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000388
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000389 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 &pbuf, &errors, &final))
391 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000392
Martin v. Löwis423be952008-08-13 15:53:07 +0000393 consumed = pbuf.len; /* This is overwritten unless final is true. */
394 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000396 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000397 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000399 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000400}
401
402static PyObject *
403utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000405{
Antoine Pitroud1188562010-06-09 16:38:55 +0000406 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000407 const char *errors = NULL;
408 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000409 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000410 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000411 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000412
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000413 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 &pbuf, &errors, &final))
415 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000416
417 consumed = pbuf.len; /* This is overwritten unless final is true. */
418 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000420 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000421 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000423 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000424}
425
426/* This non-standard version also provides access to the byteorder
427 parameter of the builtin UTF-16 codec.
428
429 It returns a tuple (unicode, bytesread, byteorder) with byteorder
430 being the value in effect at the end of data.
431
432*/
433
434static PyObject *
435utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000437{
Antoine Pitroud1188562010-06-09 16:38:55 +0000438 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000439 const char *errors = NULL;
440 int byteorder = 0;
441 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000442 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000443 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000444
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000445 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 &pbuf, &errors, &byteorder, &final))
447 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000448 consumed = pbuf.len; /* This is overwritten unless final is true. */
449 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000451 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000452 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000454 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000455 Py_DECREF(unicode);
456 return tuple;
457}
458
459static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000460utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000462{
Antoine Pitroud1188562010-06-09 16:38:55 +0000463 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000464 const char *errors = NULL;
465 int byteorder = 0;
466 int final = 0;
467 Py_ssize_t consumed;
468 PyObject *decoded;
469
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000470 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 &pbuf, &errors, &final))
472 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000473 consumed = pbuf.len; /* This is overwritten unless final is true. */
474 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000476 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000477 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000479 return codec_tuple(decoded, consumed);
480}
481
482static PyObject *
483utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000485{
Antoine Pitroud1188562010-06-09 16:38:55 +0000486 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000487 const char *errors = NULL;
488 int byteorder = -1;
489 int final = 0;
490 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000491 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000492
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000493 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 &pbuf, &errors, &final))
495 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000496 consumed = pbuf.len; /* This is overwritten unless final is true. */
497 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000499 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000500 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000502 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000503}
504
505static PyObject *
506utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000508{
Antoine Pitroud1188562010-06-09 16:38:55 +0000509 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000510 const char *errors = NULL;
511 int byteorder = 1;
512 int final = 0;
513 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000514 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000515
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000516 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 &pbuf, &errors, &final))
518 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000519 consumed = pbuf.len; /* This is overwritten unless final is true. */
520 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000522 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000523 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000525 return codec_tuple(decoded, consumed);
526}
527
528/* This non-standard version also provides access to the byteorder
529 parameter of the builtin UTF-32 codec.
530
531 It returns a tuple (unicode, bytesread, byteorder) with byteorder
532 being the value in effect at the end of data.
533
534*/
535
536static PyObject *
537utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000539{
Antoine Pitroud1188562010-06-09 16:38:55 +0000540 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000541 const char *errors = NULL;
542 int byteorder = 0;
543 PyObject *unicode, *tuple;
544 int final = 0;
545 Py_ssize_t consumed;
546
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000547 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 &pbuf, &errors, &byteorder, &final))
549 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000550 consumed = pbuf.len; /* This is overwritten unless final is true. */
551 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000553 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000554 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000556 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
557 Py_DECREF(unicode);
558 return tuple;
559}
560
561static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000562unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000564{
Antoine Pitroud1188562010-06-09 16:38:55 +0000565 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000566 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000568
Martin v. Löwis423be952008-08-13 15:53:07 +0000569 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 &pbuf, &errors))
571 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000572
Antoine Pitroud1188562010-06-09 16:38:55 +0000573 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
574 PyBuffer_Release(&pbuf);
575 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000576}
577
578static PyObject *
579raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000581{
Antoine Pitroud1188562010-06-09 16:38:55 +0000582 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000583 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000584 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000585
Martin v. Löwis423be952008-08-13 15:53:07 +0000586 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 &pbuf, &errors))
588 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000589
Antoine Pitroud1188562010-06-09 16:38:55 +0000590 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
591 PyBuffer_Release(&pbuf);
592 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000593}
594
595static PyObject *
596latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000598{
Antoine Pitroud1188562010-06-09 16:38:55 +0000599 Py_buffer pbuf;
600 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000601 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000602
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000603 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 &pbuf, &errors))
605 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000606
Antoine Pitroud1188562010-06-09 16:38:55 +0000607 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
608 PyBuffer_Release(&pbuf);
609 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000610}
611
612static PyObject *
613ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000615{
Antoine Pitroud1188562010-06-09 16:38:55 +0000616 Py_buffer pbuf;
617 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000618 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000619
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000620 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 &pbuf, &errors))
622 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000623
Antoine Pitroud1188562010-06-09 16:38:55 +0000624 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
625 PyBuffer_Release(&pbuf);
626 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000627}
628
629static PyObject *
630charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000632{
Antoine Pitroud1188562010-06-09 16:38:55 +0000633 Py_buffer pbuf;
634 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000635 const char *errors = NULL;
636 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000637
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000638 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 &pbuf, &errors, &mapping))
640 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000641 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000643
Antoine Pitroud1188562010-06-09 16:38:55 +0000644 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
645 PyBuffer_Release(&pbuf);
646 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000647}
648
Victor Stinner99b95382011-07-04 14:23:54 +0200649#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000650
651static PyObject *
652mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000654{
Antoine Pitroud1188562010-06-09 16:38:55 +0000655 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000656 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000657 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000658 Py_ssize_t consumed;
659 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000660
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000661 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 &pbuf, &errors, &final))
663 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000664 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000665
Martin v. Löwis423be952008-08-13 15:53:07 +0000666 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000668 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000669 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000671 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000672}
673
Victor Stinner3a50e702011-10-18 21:21:00 +0200674static PyObject *
675code_page_decode(PyObject *self,
676 PyObject *args)
677{
678 Py_buffer pbuf;
679 const char *errors = NULL;
680 int final = 0;
681 Py_ssize_t consumed;
682 PyObject *decoded = NULL;
683 int code_page;
684
685 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
686 &code_page, &pbuf, &errors, &final))
687 return NULL;
688 consumed = pbuf.len;
689
690 decoded = PyUnicode_DecodeCodePageStateful(code_page,
691 pbuf.buf, pbuf.len, errors,
692 final ? NULL : &consumed);
693 PyBuffer_Release(&pbuf);
694 if (decoded == NULL)
695 return NULL;
696 return codec_tuple(decoded, consumed);
697}
698
Victor Stinner99b95382011-07-04 14:23:54 +0200699#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000700
Guido van Rossume2d67f92000-03-10 23:09:23 +0000701/* --- Encoder ------------------------------------------------------------ */
702
703static PyObject *
704readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000706{
Martin v. Löwis423be952008-08-13 15:53:07 +0000707 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000708 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000710 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000711 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000712
Martin v. Löwis423be952008-08-13 15:53:07 +0000713 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 &pdata, &errors))
715 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000716 data = pdata.buf;
717 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000718
Martin v. Löwis423be952008-08-13 15:53:07 +0000719 result = PyBytes_FromStringAndSize(data, size);
720 PyBuffer_Release(&pdata);
721 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000722}
723
724static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000725unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000727{
728 PyObject *obj;
729 const char *errors = NULL;
730 const char *data;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100731 Py_ssize_t len, size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000732
Ezio Melottiadc417c2011-11-17 12:23:34 +0200733 if (PyErr_WarnEx(PyExc_DeprecationWarning,
734 "unicode_internal codec has been deprecated",
735 1))
736 return NULL;
737
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000738 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 &obj, &errors))
740 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000741
742 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100743 Py_UNICODE *u;
744
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100745 if (PyUnicode_READY(obj) < 0)
746 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100747
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100748 u = PyUnicode_AsUnicodeAndSize(obj, &len);
749 if (u == NULL)
750 return NULL;
751 if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
752 return PyErr_NoMemory();
753 size = len * sizeof(Py_UNICODE);
754 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100755 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000756 }
757 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
759 return NULL;
760 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000761 }
762}
763
764static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000765utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000767{
768 PyObject *str, *v;
769 const char *errors = NULL;
770
771 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 &str, &errors))
773 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000774
775 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100776 if (str == NULL || PyUnicode_READY(str) < 0) {
777 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100779 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100780 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
781 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000782 Py_DECREF(str);
783 return v;
784}
785
786static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000787utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000789{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000790 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000791 const char *errors = NULL;
792
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000793 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000794 &str, &errors))
795 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000796
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000797 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100798 if (str == NULL || PyUnicode_READY(str) < 0) {
799 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100801 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
803 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000804 Py_DECREF(str);
805 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000806}
807
808/* This version provides access to the byteorder parameter of the
809 builtin UTF-16 codecs as optional third argument. It defaults to 0
810 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000811 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000812
813*/
814
815static PyObject *
816utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000818{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000819 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000820 const char *errors = NULL;
821 int byteorder = 0;
822
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000823 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000824 &str, &errors, &byteorder))
825 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000826
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000827 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100828 if (str == NULL || PyUnicode_READY(str) < 0) {
829 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100831 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100832 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
833 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000834 Py_DECREF(str);
835 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000836}
837
838static PyObject *
839utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000841{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000842 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000843 const char *errors = NULL;
844
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000845 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 &str, &errors))
847 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000848
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000849 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100850 if (str == NULL || PyUnicode_READY(str) < 0) {
851 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100853 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100854 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
855 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000856 Py_DECREF(str);
857 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000858}
859
860static PyObject *
861utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000863{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000864 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000865 const char *errors = NULL;
866
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000867 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 &str, &errors))
869 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000870
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000871 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100872 if (str == NULL || PyUnicode_READY(str) < 0) {
873 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100875 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100876 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
877 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000878 Py_DECREF(str);
879 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000880}
881
Walter Dörwald41980ca2007-08-16 21:55:45 +0000882/* This version provides access to the byteorder parameter of the
883 builtin UTF-32 codecs as optional third argument. It defaults to 0
884 which means: use the native byte order and prepend the data with a
885 BOM mark.
886
887*/
888
889static PyObject *
890utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000892{
893 PyObject *str, *v;
894 const char *errors = NULL;
895 int byteorder = 0;
896
897 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 &str, &errors, &byteorder))
899 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000900
901 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100902 if (str == NULL || PyUnicode_READY(str) < 0) {
903 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100905 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100906 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
907 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000908 Py_DECREF(str);
909 return v;
910}
911
912static PyObject *
913utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000915{
916 PyObject *str, *v;
917 const char *errors = NULL;
918
919 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 &str, &errors))
921 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000922
923 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100924 if (str == NULL || PyUnicode_READY(str) < 0) {
925 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100927 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100928 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
929 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000930 Py_DECREF(str);
931 return v;
932}
933
934static PyObject *
935utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000937{
938 PyObject *str, *v;
939 const char *errors = NULL;
940
941 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 &str, &errors))
943 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000944
945 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100946 if (str == NULL || PyUnicode_READY(str) < 0) {
947 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100949 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100950 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
951 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000952 Py_DECREF(str);
953 return v;
954}
955
Guido van Rossume2d67f92000-03-10 23:09:23 +0000956static PyObject *
957unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000959{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000960 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000961 const char *errors = NULL;
962
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000963 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 &str, &errors))
965 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000966
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000967 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100968 if (str == NULL || PyUnicode_READY(str) < 0) {
969 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100971 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100972 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
973 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000974 Py_DECREF(str);
975 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000976}
977
978static PyObject *
979raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000981{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000982 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000983 const char *errors = NULL;
984
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000985 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 &str, &errors))
987 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000988
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000989 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100990 if (str == NULL || PyUnicode_READY(str) < 0) {
991 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100993 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100994 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
995 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000996 Py_DECREF(str);
997 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000998}
999
1000static PyObject *
1001latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001003{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001004 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001005 const char *errors = NULL;
1006
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001007 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 &str, &errors))
1009 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001010
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001011 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001012 if (str == NULL || PyUnicode_READY(str) < 0) {
1013 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001015 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001016 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
1017 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001018 Py_DECREF(str);
1019 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001020}
1021
1022static PyObject *
1023ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001025{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001026 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001027 const char *errors = NULL;
1028
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001029 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 &str, &errors))
1031 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001032
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001033 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001034 if (str == NULL || PyUnicode_READY(str) < 0) {
1035 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001037 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001038 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
1039 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001040 Py_DECREF(str);
1041 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001042}
1043
1044static PyObject *
1045charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001047{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001048 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001049 const char *errors = NULL;
1050 PyObject *mapping = NULL;
1051
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001052 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 &str, &errors, &mapping))
1054 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001055 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001057
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001058 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001059 if (str == NULL || PyUnicode_READY(str) < 0) {
1060 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001062 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001063 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001064 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001065 Py_DECREF(str);
1066 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001067}
1068
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001069static PyObject*
1070charmap_build(PyObject *self, PyObject *args)
1071{
1072 PyObject *map;
1073 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1074 return NULL;
1075 return PyUnicode_BuildEncodingMap(map);
1076}
1077
Victor Stinner99b95382011-07-04 14:23:54 +02001078#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001079
1080static PyObject *
1081mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001083{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001084 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001085 const char *errors = NULL;
1086
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001087 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 &str, &errors))
1089 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001090
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001091 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001092 if (str == NULL || PyUnicode_READY(str) < 0) {
1093 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001095 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001096 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1097 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001098 Py_DECREF(str);
1099 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001100}
1101
Victor Stinner3a50e702011-10-18 21:21:00 +02001102static PyObject *
1103code_page_encode(PyObject *self,
1104 PyObject *args)
1105{
1106 PyObject *str, *v;
1107 const char *errors = NULL;
1108 int code_page;
1109
1110 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1111 &code_page, &str, &errors))
1112 return NULL;
1113
1114 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001115 if (str == NULL || PyUnicode_READY(str) < 0) {
1116 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001117 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001118 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001119 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1120 str,
1121 errors),
1122 PyUnicode_GET_LENGTH(str));
1123 Py_DECREF(str);
1124 return v;
1125}
1126
Victor Stinner99b95382011-07-04 14:23:54 +02001127#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001128
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001129/* --- Error handler registry --------------------------------------------- */
1130
Walter Dörwald0ae29812002-10-31 13:36:29 +00001131PyDoc_STRVAR(register_error__doc__,
1132"register_error(errors, handler)\n\
1133\n\
1134Register the specified error handler under the name\n\
1135errors. handler must be a callable object, that\n\
1136will be called with an exception instance containing\n\
1137information about the location of the encoding/decoding\n\
1138error and must return a (replacement, new position) tuple.");
1139
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001140static PyObject *register_error(PyObject *self, PyObject *args)
1141{
1142 const char *name;
1143 PyObject *handler;
1144
1145 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 &name, &handler))
1147 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001148 if (PyCodec_RegisterError(name, handler))
1149 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001150 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001151}
1152
Walter Dörwald0ae29812002-10-31 13:36:29 +00001153PyDoc_STRVAR(lookup_error__doc__,
1154"lookup_error(errors) -> handler\n\
1155\n\
1156Return the error handler for the specified error handling name\n\
1157or raise a LookupError, if no handler exists under this name.");
1158
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001159static PyObject *lookup_error(PyObject *self, PyObject *args)
1160{
1161 const char *name;
1162
1163 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 &name))
1165 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001166 return PyCodec_LookupError(name);
1167}
1168
Guido van Rossume2d67f92000-03-10 23:09:23 +00001169/* --- Module API --------------------------------------------------------- */
1170
1171static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001173 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001175 lookup__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 {"encode", codec_encode, METH_VARARGS,
1177 encode__doc__},
1178 {"decode", codec_decode, METH_VARARGS,
1179 decode__doc__},
1180 {"escape_encode", escape_encode, METH_VARARGS},
1181 {"escape_decode", escape_decode, METH_VARARGS},
1182 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1183 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1184 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1185 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1186 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1187 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1188 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1189 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1190 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1191 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1192 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1193 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1194 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1195 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1196 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1197 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1198 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1199 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1200 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1201 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1202 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1203 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1204 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1205 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1206 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1207 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1208 {"ascii_encode", ascii_encode, METH_VARARGS},
1209 {"ascii_decode", ascii_decode, METH_VARARGS},
1210 {"charmap_encode", charmap_encode, METH_VARARGS},
1211 {"charmap_decode", charmap_decode, METH_VARARGS},
1212 {"charmap_build", charmap_build, METH_VARARGS},
1213 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001214#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1216 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001217 {"code_page_encode", code_page_encode, METH_VARARGS},
1218 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001219#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001221 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001223 lookup_error__doc__},
Nick Coghlan8fad1672014-09-15 23:50:44 +12001224 _CODECS__FORGET_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001226};
1227
Martin v. Löwis1a214512008-06-11 05:26:20 +00001228static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 PyModuleDef_HEAD_INIT,
1230 "_codecs",
1231 NULL,
1232 -1,
1233 _codecs_functions,
1234 NULL,
1235 NULL,
1236 NULL,
1237 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001238};
1239
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001240PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001241PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001244}