blob: 40037b1dc675e2d53968a50893429da4833e9fdd [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Guido van Rossum36e0a922007-07-20 04:05:57 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 (string object, bytes consumed)
Guido van Rossume2d67f92000-03-10 23:09:23 +000019
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
Victor Stinner87a7c822011-11-10 20:05:55 +010041#ifdef MS_WINDOWS
42#include <windows.h>
43#endif
44
Guido van Rossume2d67f92000-03-10 23:09:23 +000045/* --- Registry ----------------------------------------------------------- */
46
Walter Dörwald0ae29812002-10-31 13:36:29 +000047PyDoc_STRVAR(register__doc__,
48"register(search_function)\n\
49\n\
50Register a codec search function. Search functions are expected to take\n\
51one argument, the encoding name in all lower case letters, and return\n\
Guido van Rossum36e0a922007-07-20 04:05:57 +000052a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
53(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000054
Guido van Rossume2d67f92000-03-10 23:09:23 +000055static
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000056PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000057{
Guido van Rossume2d67f92000-03-10 23:09:23 +000058 if (PyCodec_Register(search_function))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000059 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000060
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000061 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000062}
63
Walter Dörwald0ae29812002-10-31 13:36:29 +000064PyDoc_STRVAR(lookup__doc__,
Guido van Rossum36e0a922007-07-20 04:05:57 +000065"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000066\n\
67Looks up a codec tuple in the Python codec registry and returns\n\
Benjamin Petersonf07d0022009-03-21 17:31:58 +000068a CodecInfo object.");
Walter Dörwald0ae29812002-10-31 13:36:29 +000069
Guido van Rossume2d67f92000-03-10 23:09:23 +000070static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000071PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000072{
73 char *encoding;
74
75 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000076 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000077
78 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000079}
80
Marc-André Lemburg3f419742004-07-10 12:06:10 +000081PyDoc_STRVAR(encode__doc__,
82"encode(obj, [encoding[,errors]]) -> object\n\
83\n\
84Encodes obj using the codec registered for encoding. encoding defaults\n\
85to the default encoding. errors may be given to set a different error\n\
86handling scheme. Default is 'strict' meaning that encoding errors raise\n\
87a ValueError. Other possible values are 'ignore', 'replace' and\n\
88'xmlcharrefreplace' as well as any other name registered with\n\
89codecs.register_error that can handle ValueErrors.");
90
91static PyObject *
92codec_encode(PyObject *self, PyObject *args)
93{
Brett Cannon3e377de2004-07-10 21:41:14 +000094 const char *encoding = NULL;
95 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000096 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000097
Marc-André Lemburg3f419742004-07-10 12:06:10 +000098 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
99 return NULL;
100
101 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000103
104 /* Encode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000105 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106}
107
108PyDoc_STRVAR(decode__doc__,
109"decode(obj, [encoding[,errors]]) -> object\n\
110\n\
111Decodes obj using the codec registered for encoding. encoding defaults\n\
112to the default encoding. errors may be given to set a different error\n\
113handling scheme. Default is 'strict' meaning that encoding errors raise\n\
114a ValueError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonf10a79a2008-10-11 00:49:57 +0000115as well as any other name registered with codecs.register_error that is\n\
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000116able to handle ValueErrors.");
117
118static PyObject *
119codec_decode(PyObject *self, PyObject *args)
120{
Brett Cannon3e377de2004-07-10 21:41:14 +0000121 const char *encoding = NULL;
122 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000123 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000124
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000125 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
126 return NULL;
127
128 if (encoding == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000130
131 /* Decode via the codec registry */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000132 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000133}
134
Guido van Rossume2d67f92000-03-10 23:09:23 +0000135/* --- Helpers ------------------------------------------------------------ */
136
137static
138PyObject *codec_tuple(PyObject *unicode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000140{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000141 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000142 if (unicode == NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000143 return NULL;
144 v = Py_BuildValue("On", unicode, len);
145 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000146 return v;
147}
148
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000149/* --- String codecs ------------------------------------------------------ */
150static PyObject *
151escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000153{
154 const char *errors = NULL;
155 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000156 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000157
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000158 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 &data, &size, &errors))
160 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000161 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000163}
164
165static PyObject *
166escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 PyObject *args)
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000168{
Antoine Pitroud1188562010-06-09 16:38:55 +0000169 PyObject *str;
170 Py_ssize_t size;
171 Py_ssize_t newsize;
172 const char *errors = NULL;
173 PyObject *v;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000174
Antoine Pitroud1188562010-06-09 16:38:55 +0000175 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
176 &PyBytes_Type, &str, &errors))
177 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000178
Antoine Pitroud1188562010-06-09 16:38:55 +0000179 size = PyBytes_GET_SIZE(str);
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100180 if (size > PY_SSIZE_T_MAX / 4) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000181 PyErr_SetString(PyExc_OverflowError,
182 "string is too large to encode");
183 return NULL;
184 }
Mark Dickinsonc04ddff2012-10-06 18:04:49 +0100185 newsize = 4*size;
Antoine Pitroud1188562010-06-09 16:38:55 +0000186 v = PyBytes_FromStringAndSize(NULL, newsize);
187
188 if (v == NULL) {
189 return NULL;
190 }
191 else {
192 register Py_ssize_t i;
193 register char c;
194 register char *p = PyBytes_AS_STRING(v);
195
196 for (i = 0; i < size; i++) {
197 /* There's at least enough room for a hex escape */
198 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
199 c = PyBytes_AS_STRING(str)[i];
200 if (c == '\'' || c == '\\')
201 *p++ = '\\', *p++ = c;
202 else if (c == '\t')
203 *p++ = '\\', *p++ = 't';
204 else if (c == '\n')
205 *p++ = '\\', *p++ = 'n';
206 else if (c == '\r')
207 *p++ = '\\', *p++ = 'r';
208 else if (c < ' ' || c >= 0x7f) {
209 *p++ = '\\';
210 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200211 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
212 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitroud1188562010-06-09 16:38:55 +0000213 }
214 else
215 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000216 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000217 *p = '\0';
218 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
219 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 }
Antoine Pitroud1188562010-06-09 16:38:55 +0000221 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000222
Antoine Pitroud1188562010-06-09 16:38:55 +0000223 return codec_tuple(v, size);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000224}
225
Guido van Rossume2d67f92000-03-10 23:09:23 +0000226/* --- Decoder ------------------------------------------------------------ */
227
228static PyObject *
229unicode_internal_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000231{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000232 PyObject *obj;
233 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000234 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000235 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000236
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000237 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 &obj, &errors))
239 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000240
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000241 if (PyUnicode_Check(obj)) {
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100242 if (PyUnicode_READY(obj) < 0)
243 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 Py_INCREF(obj);
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100245 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000246 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000247 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
249 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
252 size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000253 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000254}
255
256static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000257utf_7_decode(PyObject *self,
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000258 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000259{
Antoine Pitroud1188562010-06-09 16:38:55 +0000260 Py_buffer pbuf;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000261 const char *errors = NULL;
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000262 int final = 0;
263 Py_ssize_t consumed;
264 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000265
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000266 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 &pbuf, &errors, &final))
268 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000269 consumed = pbuf.len;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000270
Martin v. Löwis423be952008-08-13 15:53:07 +0000271 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000273 PyBuffer_Release(&pbuf);
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000274 if (decoded == NULL)
275 return NULL;
276 return codec_tuple(decoded, consumed);
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000277}
278
279static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000280utf_8_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000282{
Antoine Pitroud1188562010-06-09 16:38:55 +0000283 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000284 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000285 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000286 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000287 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000288
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000289 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 &pbuf, &errors, &final))
291 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000292 consumed = pbuf.len;
293
294 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000296 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000297 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000299 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000300}
301
302static PyObject *
303utf_16_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000305{
Antoine Pitroud1188562010-06-09 16:38:55 +0000306 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000307 const char *errors = NULL;
308 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000309 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000310 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000311 PyObject *decoded;
312
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000313 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 &pbuf, &errors, &final))
315 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000316 consumed = pbuf.len; /* This is overwritten unless final is true. */
317 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000319 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000320 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000322 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000323}
324
325static PyObject *
326utf_16_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000328{
Antoine Pitroud1188562010-06-09 16:38:55 +0000329 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000330 const char *errors = NULL;
331 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000332 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000333 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000334 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000335
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000336 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 &pbuf, &errors, &final))
338 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000339
Martin v. Löwis423be952008-08-13 15:53:07 +0000340 consumed = pbuf.len; /* This is overwritten unless final is true. */
341 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000343 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000344 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000346 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000347}
348
349static PyObject *
350utf_16_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000352{
Antoine Pitroud1188562010-06-09 16:38:55 +0000353 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000354 const char *errors = NULL;
355 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000356 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000357 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000358 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000359
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000360 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 &pbuf, &errors, &final))
362 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000363
364 consumed = pbuf.len; /* This is overwritten unless final is true. */
365 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000367 PyBuffer_Release(&pbuf);
Walter Dörwald69652032004-09-07 20:24:22 +0000368 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 return NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000370 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000371}
372
373/* This non-standard version also provides access to the byteorder
374 parameter of the builtin UTF-16 codec.
375
376 It returns a tuple (unicode, bytesread, byteorder) with byteorder
377 being the value in effect at the end of data.
378
379*/
380
381static PyObject *
382utf_16_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000384{
Antoine Pitroud1188562010-06-09 16:38:55 +0000385 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000386 const char *errors = NULL;
387 int byteorder = 0;
388 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000389 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000390 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000391
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000392 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 &pbuf, &errors, &byteorder, &final))
394 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000395 consumed = pbuf.len; /* This is overwritten unless final is true. */
396 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000398 PyBuffer_Release(&pbuf);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000399 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000401 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000402 Py_DECREF(unicode);
403 return tuple;
404}
405
406static PyObject *
Walter Dörwald41980ca2007-08-16 21:55:45 +0000407utf_32_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000409{
Antoine Pitroud1188562010-06-09 16:38:55 +0000410 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000411 const char *errors = NULL;
412 int byteorder = 0;
413 int final = 0;
414 Py_ssize_t consumed;
415 PyObject *decoded;
416
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000417 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 &pbuf, &errors, &final))
419 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000420 consumed = pbuf.len; /* This is overwritten unless final is true. */
421 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000423 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000424 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000426 return codec_tuple(decoded, consumed);
427}
428
429static PyObject *
430utf_32_le_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000432{
Antoine Pitroud1188562010-06-09 16:38:55 +0000433 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000434 const char *errors = NULL;
435 int byteorder = -1;
436 int final = 0;
437 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000438 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000439
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000440 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 &pbuf, &errors, &final))
442 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000443 consumed = pbuf.len; /* This is overwritten unless final is true. */
444 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000446 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000447 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000449 return codec_tuple(decoded, consumed);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000450}
451
452static PyObject *
453utf_32_be_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000455{
Antoine Pitroud1188562010-06-09 16:38:55 +0000456 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000457 const char *errors = NULL;
458 int byteorder = 1;
459 int final = 0;
460 Py_ssize_t consumed;
Martin v. Löwis423be952008-08-13 15:53:07 +0000461 PyObject *decoded;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000462
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000463 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 &pbuf, &errors, &final))
465 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000466 consumed = pbuf.len; /* This is overwritten unless final is true. */
467 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000469 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000470 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000472 return codec_tuple(decoded, consumed);
473}
474
475/* This non-standard version also provides access to the byteorder
476 parameter of the builtin UTF-32 codec.
477
478 It returns a tuple (unicode, bytesread, byteorder) with byteorder
479 being the value in effect at the end of data.
480
481*/
482
483static PyObject *
484utf_32_ex_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000486{
Antoine Pitroud1188562010-06-09 16:38:55 +0000487 Py_buffer pbuf;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000488 const char *errors = NULL;
489 int byteorder = 0;
490 PyObject *unicode, *tuple;
491 int final = 0;
492 Py_ssize_t consumed;
493
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000494 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000495 &pbuf, &errors, &byteorder, &final))
496 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000497 consumed = pbuf.len; /* This is overwritten unless final is true. */
498 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 &byteorder, final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000500 PyBuffer_Release(&pbuf);
Walter Dörwald41980ca2007-08-16 21:55:45 +0000501 if (unicode == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000503 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
504 Py_DECREF(unicode);
505 return tuple;
506}
507
508static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000509unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000511{
Antoine Pitroud1188562010-06-09 16:38:55 +0000512 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000513 const char *errors = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000515
Martin v. Löwis423be952008-08-13 15:53:07 +0000516 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 &pbuf, &errors))
518 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000519
Antoine Pitroud1188562010-06-09 16:38:55 +0000520 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
521 PyBuffer_Release(&pbuf);
522 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000523}
524
525static PyObject *
526raw_unicode_escape_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000528{
Antoine Pitroud1188562010-06-09 16:38:55 +0000529 Py_buffer pbuf;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000530 const char *errors = NULL;
Antoine Pitroud1188562010-06-09 16:38:55 +0000531 PyObject *unicode;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000532
Martin v. Löwis423be952008-08-13 15:53:07 +0000533 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 &pbuf, &errors))
535 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000536
Antoine Pitroud1188562010-06-09 16:38:55 +0000537 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
538 PyBuffer_Release(&pbuf);
539 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000540}
541
542static PyObject *
543latin_1_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000545{
Antoine Pitroud1188562010-06-09 16:38:55 +0000546 Py_buffer pbuf;
547 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000548 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000549
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000550 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 &pbuf, &errors))
552 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000553
Antoine Pitroud1188562010-06-09 16:38:55 +0000554 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
555 PyBuffer_Release(&pbuf);
556 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000557}
558
559static PyObject *
560ascii_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000562{
Antoine Pitroud1188562010-06-09 16:38:55 +0000563 Py_buffer pbuf;
564 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000565 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000566
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000567 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 &pbuf, &errors))
569 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000570
Antoine Pitroud1188562010-06-09 16:38:55 +0000571 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
572 PyBuffer_Release(&pbuf);
573 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000574}
575
576static PyObject *
577charmap_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000579{
Antoine Pitroud1188562010-06-09 16:38:55 +0000580 Py_buffer pbuf;
581 PyObject *unicode;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000582 const char *errors = NULL;
583 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000584
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000585 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 &pbuf, &errors, &mapping))
587 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000588 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000590
Antoine Pitroud1188562010-06-09 16:38:55 +0000591 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
592 PyBuffer_Release(&pbuf);
593 return codec_tuple(unicode, pbuf.len);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000594}
595
Victor Stinner99b95382011-07-04 14:23:54 +0200596#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +0000597
598static PyObject *
599mbcs_decode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000600 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000601{
Antoine Pitroud1188562010-06-09 16:38:55 +0000602 Py_buffer pbuf;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000603 const char *errors = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000604 int final = 0;
Martin v. Löwis423be952008-08-13 15:53:07 +0000605 Py_ssize_t consumed;
606 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000607
Antoine Pitrou81fabdb2009-01-22 10:11:36 +0000608 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 &pbuf, &errors, &final))
610 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000611 consumed = pbuf.len;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000612
Martin v. Löwis423be952008-08-13 15:53:07 +0000613 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 final ? NULL : &consumed);
Antoine Pitroud1188562010-06-09 16:38:55 +0000615 PyBuffer_Release(&pbuf);
Martin v. Löwis423be952008-08-13 15:53:07 +0000616 if (decoded == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000618 return codec_tuple(decoded, consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000619}
620
Victor Stinner3a50e702011-10-18 21:21:00 +0200621static PyObject *
622code_page_decode(PyObject *self,
623 PyObject *args)
624{
625 Py_buffer pbuf;
626 const char *errors = NULL;
627 int final = 0;
628 Py_ssize_t consumed;
629 PyObject *decoded = NULL;
630 int code_page;
631
632 if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
633 &code_page, &pbuf, &errors, &final))
634 return NULL;
635 consumed = pbuf.len;
636
637 decoded = PyUnicode_DecodeCodePageStateful(code_page,
638 pbuf.buf, pbuf.len, errors,
639 final ? NULL : &consumed);
640 PyBuffer_Release(&pbuf);
641 if (decoded == NULL)
642 return NULL;
643 return codec_tuple(decoded, consumed);
644}
645
Victor Stinner99b95382011-07-04 14:23:54 +0200646#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000647
Guido van Rossume2d67f92000-03-10 23:09:23 +0000648/* --- Encoder ------------------------------------------------------------ */
649
650static PyObject *
651readbuffer_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000653{
Martin v. Löwis423be952008-08-13 15:53:07 +0000654 Py_buffer pdata;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000655 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000656 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000657 const char *errors = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000658 PyObject *result;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000659
Martin v. Löwis423be952008-08-13 15:53:07 +0000660 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000661 &pdata, &errors))
662 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000663 data = pdata.buf;
664 size = pdata.len;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000665
Martin v. Löwis423be952008-08-13 15:53:07 +0000666 result = PyBytes_FromStringAndSize(data, size);
667 PyBuffer_Release(&pdata);
668 return codec_tuple(result, size);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000669}
670
671static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000672unicode_internal_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 PyObject *args)
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000674{
675 PyObject *obj;
676 const char *errors = NULL;
677 const char *data;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100678 Py_ssize_t len, size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000679
Ezio Melottiadc417c2011-11-17 12:23:34 +0200680 if (PyErr_WarnEx(PyExc_DeprecationWarning,
681 "unicode_internal codec has been deprecated",
682 1))
683 return NULL;
684
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000685 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000686 &obj, &errors))
687 return NULL;
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000688
689 if (PyUnicode_Check(obj)) {
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100690 Py_UNICODE *u;
691
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100692 if (PyUnicode_READY(obj) < 0)
693 return NULL;
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100694
Victor Stinner9f4b1e92011-11-10 20:56:30 +0100695 u = PyUnicode_AsUnicodeAndSize(obj, &len);
696 if (u == NULL)
697 return NULL;
698 if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
699 return PyErr_NoMemory();
700 size = len * sizeof(Py_UNICODE);
701 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100702 PyUnicode_GET_LENGTH(obj));
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000703 }
704 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
706 return NULL;
707 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000708 }
709}
710
711static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000712utf_7_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 PyObject *args)
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000714{
715 PyObject *str, *v;
716 const char *errors = NULL;
717
718 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 &str, &errors))
720 return NULL;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000721
722 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100723 if (str == NULL || PyUnicode_READY(str) < 0) {
724 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100726 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100727 v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
728 PyUnicode_GET_LENGTH(str));
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000729 Py_DECREF(str);
730 return v;
731}
732
733static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000734utf_8_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000736{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000737 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000738 const char *errors = NULL;
739
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000740 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 &str, &errors))
742 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000743
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000744 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100745 if (str == NULL || PyUnicode_READY(str) < 0) {
746 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100748 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200749 v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
750 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000751 Py_DECREF(str);
752 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000753}
754
755/* This version provides access to the byteorder parameter of the
756 builtin UTF-16 codecs as optional third argument. It defaults to 0
757 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000758 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000759
760*/
761
762static PyObject *
763utf_16_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000764 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000765{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000766 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000767 const char *errors = NULL;
768 int byteorder = 0;
769
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000770 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 &str, &errors, &byteorder))
772 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000773
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000774 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100775 if (str == NULL || PyUnicode_READY(str) < 0) {
776 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100778 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100779 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
780 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000781 Py_DECREF(str);
782 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000783}
784
785static PyObject *
786utf_16_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000788{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000789 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000790 const char *errors = NULL;
791
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000792 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 &str, &errors))
794 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000795
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000796 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100797 if (str == NULL || PyUnicode_READY(str) < 0) {
798 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100800 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100801 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
802 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000803 Py_DECREF(str);
804 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000805}
806
807static PyObject *
808utf_16_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000810{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000811 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000812 const char *errors = NULL;
813
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000814 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 &str, &errors))
816 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000817
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000818 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100819 if (str == NULL || PyUnicode_READY(str) < 0) {
820 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100822 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100823 v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
824 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000825 Py_DECREF(str);
826 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000827}
828
Walter Dörwald41980ca2007-08-16 21:55:45 +0000829/* This version provides access to the byteorder parameter of the
830 builtin UTF-32 codecs as optional third argument. It defaults to 0
831 which means: use the native byte order and prepend the data with a
832 BOM mark.
833
834*/
835
836static PyObject *
837utf_32_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000839{
840 PyObject *str, *v;
841 const char *errors = NULL;
842 int byteorder = 0;
843
844 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 &str, &errors, &byteorder))
846 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000847
848 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100849 if (str == NULL || PyUnicode_READY(str) < 0) {
850 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100852 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100853 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
854 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000855 Py_DECREF(str);
856 return v;
857}
858
859static PyObject *
860utf_32_le_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000862{
863 PyObject *str, *v;
864 const char *errors = NULL;
865
866 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 &str, &errors))
868 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000869
870 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100871 if (str == NULL || PyUnicode_READY(str) < 0) {
872 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100874 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100875 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
876 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000877 Py_DECREF(str);
878 return v;
879}
880
881static PyObject *
882utf_32_be_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 PyObject *args)
Walter Dörwald41980ca2007-08-16 21:55:45 +0000884{
885 PyObject *str, *v;
886 const char *errors = NULL;
887
888 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 &str, &errors))
890 return NULL;
Walter Dörwald41980ca2007-08-16 21:55:45 +0000891
892 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100893 if (str == NULL || PyUnicode_READY(str) < 0) {
894 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100896 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100897 v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
898 PyUnicode_GET_LENGTH(str));
Walter Dörwald41980ca2007-08-16 21:55:45 +0000899 Py_DECREF(str);
900 return v;
901}
902
Guido van Rossume2d67f92000-03-10 23:09:23 +0000903static PyObject *
904unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000906{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000907 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000908 const char *errors = NULL;
909
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000910 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 &str, &errors))
912 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000913
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000914 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100915 if (str == NULL || PyUnicode_READY(str) < 0) {
916 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100918 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100919 v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
920 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000921 Py_DECREF(str);
922 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000923}
924
925static PyObject *
926raw_unicode_escape_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000928{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000929 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000930 const char *errors = NULL;
931
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000932 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 &str, &errors))
934 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000935
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000936 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100937 if (str == NULL || PyUnicode_READY(str) < 0) {
938 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100940 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100941 v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
942 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000943 Py_DECREF(str);
944 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000945}
946
947static PyObject *
948latin_1_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000950{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000951 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000952 const char *errors = NULL;
953
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000954 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 &str, &errors))
956 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000957
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000958 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100959 if (str == NULL || PyUnicode_READY(str) < 0) {
960 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100962 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100963 v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
964 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000965 Py_DECREF(str);
966 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000967}
968
969static PyObject *
970ascii_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000972{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000973 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000974 const char *errors = NULL;
975
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000976 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 &str, &errors))
978 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000979
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000980 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100981 if (str == NULL || PyUnicode_READY(str) < 0) {
982 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +0100984 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +0100985 v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
986 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000987 Py_DECREF(str);
988 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000989}
990
991static PyObject *
992charmap_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000994{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000995 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000996 const char *errors = NULL;
997 PyObject *mapping = NULL;
998
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000999 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 &str, &errors, &mapping))
1001 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001002 if (mapping == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 mapping = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001004
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001005 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001006 if (str == NULL || PyUnicode_READY(str) < 0) {
1007 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001009 }
Martin v. Löwis23e275b2011-11-02 18:02:51 +01001010 v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001011 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001012 Py_DECREF(str);
1013 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +00001014}
1015
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001016static PyObject*
1017charmap_build(PyObject *self, PyObject *args)
1018{
1019 PyObject *map;
1020 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1021 return NULL;
1022 return PyUnicode_BuildEncodingMap(map);
1023}
1024
Victor Stinner99b95382011-07-04 14:23:54 +02001025#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001026
1027static PyObject *
1028mbcs_encode(PyObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 PyObject *args)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001030{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001031 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001032 const char *errors = NULL;
1033
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001034 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 &str, &errors))
1036 return NULL;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001037
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001038 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001039 if (str == NULL || PyUnicode_READY(str) < 0) {
1040 Py_XDECREF(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001042 }
Martin v. Löwis1db7c132011-11-10 18:24:32 +01001043 v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1044 PyUnicode_GET_LENGTH(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001045 Py_DECREF(str);
1046 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001047}
1048
Victor Stinner3a50e702011-10-18 21:21:00 +02001049static PyObject *
1050code_page_encode(PyObject *self,
1051 PyObject *args)
1052{
1053 PyObject *str, *v;
1054 const char *errors = NULL;
1055 int code_page;
1056
1057 if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
1058 &code_page, &str, &errors))
1059 return NULL;
1060
1061 str = PyUnicode_FromObject(str);
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001062 if (str == NULL || PyUnicode_READY(str) < 0) {
1063 Py_XDECREF(str);
Victor Stinner3a50e702011-10-18 21:21:00 +02001064 return NULL;
Antoine Pitrouf72d4ef2011-11-12 18:05:15 +01001065 }
Victor Stinner3a50e702011-10-18 21:21:00 +02001066 v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
1067 str,
1068 errors),
1069 PyUnicode_GET_LENGTH(str));
1070 Py_DECREF(str);
1071 return v;
1072}
1073
Victor Stinner99b95382011-07-04 14:23:54 +02001074#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001075
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001076/* --- Error handler registry --------------------------------------------- */
1077
Walter Dörwald0ae29812002-10-31 13:36:29 +00001078PyDoc_STRVAR(register_error__doc__,
1079"register_error(errors, handler)\n\
1080\n\
1081Register the specified error handler under the name\n\
1082errors. handler must be a callable object, that\n\
1083will be called with an exception instance containing\n\
1084information about the location of the encoding/decoding\n\
1085error and must return a (replacement, new position) tuple.");
1086
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001087static PyObject *register_error(PyObject *self, PyObject *args)
1088{
1089 const char *name;
1090 PyObject *handler;
1091
1092 if (!PyArg_ParseTuple(args, "sO:register_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 &name, &handler))
1094 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001095 if (PyCodec_RegisterError(name, handler))
1096 return NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001097 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001098}
1099
Walter Dörwald0ae29812002-10-31 13:36:29 +00001100PyDoc_STRVAR(lookup_error__doc__,
1101"lookup_error(errors) -> handler\n\
1102\n\
1103Return the error handler for the specified error handling name\n\
1104or raise a LookupError, if no handler exists under this name.");
1105
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001106static PyObject *lookup_error(PyObject *self, PyObject *args)
1107{
1108 const char *name;
1109
1110 if (!PyArg_ParseTuple(args, "s:lookup_error",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 &name))
1112 return NULL;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001113 return PyCodec_LookupError(name);
1114}
1115
Guido van Rossume2d67f92000-03-10 23:09:23 +00001116/* --- Module API --------------------------------------------------------- */
1117
1118static PyMethodDef _codecs_functions[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001120 register__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001122 lookup__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 {"encode", codec_encode, METH_VARARGS,
1124 encode__doc__},
1125 {"decode", codec_decode, METH_VARARGS,
1126 decode__doc__},
1127 {"escape_encode", escape_encode, METH_VARARGS},
1128 {"escape_decode", escape_decode, METH_VARARGS},
1129 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1130 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1131 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1132 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1133 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1134 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1135 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1136 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1137 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1138 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1139 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1140 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1141 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1142 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1143 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1144 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1145 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1146 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1147 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1148 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1149 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1150 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1151 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1152 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1153 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1154 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1155 {"ascii_encode", ascii_encode, METH_VARARGS},
1156 {"ascii_decode", ascii_decode, METH_VARARGS},
1157 {"charmap_encode", charmap_encode, METH_VARARGS},
1158 {"charmap_decode", charmap_decode, METH_VARARGS},
1159 {"charmap_build", charmap_build, METH_VARARGS},
1160 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
Victor Stinner99b95382011-07-04 14:23:54 +02001161#ifdef HAVE_MBCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1163 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Victor Stinner3a50e702011-10-18 21:21:00 +02001164 {"code_page_encode", code_page_encode, METH_VARARGS},
1165 {"code_page_decode", code_page_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001166#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 {"register_error", register_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001168 register_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 {"lookup_error", lookup_error, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001170 lookup_error__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 {NULL, NULL} /* sentinel */
Guido van Rossume2d67f92000-03-10 23:09:23 +00001172};
1173
Martin v. Löwis1a214512008-06-11 05:26:20 +00001174static struct PyModuleDef codecsmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 PyModuleDef_HEAD_INIT,
1176 "_codecs",
1177 NULL,
1178 -1,
1179 _codecs_functions,
1180 NULL,
1181 NULL,
1182 NULL,
1183 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001184};
1185
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001186PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001187PyInit__codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 return PyModule_Create(&codecsmodule);
Guido van Rossume2d67f92000-03-10 23:09:23 +00001190}