blob: 0716f3a68d75b03feb4d5f9921c132cec752f8e3 [file] [log] [blame]
Guido van Rossume2d67f92000-03-10 23:09:23 +00001/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
Walter Dörwald219336a2007-07-19 13:04:38 +000013 lookup(encoding) -> CodecInfo object
Guido van Rossume2d67f92000-03-10 23:09:23 +000014
15 The builtin Unicode codecs use the following interface:
16
Walter Dörwald9fd115c2005-11-02 08:30:08 +000017 <encoding>_encode(Unicode_object[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000018 (string object, bytes consumed)
19
Walter Dörwald9fd115c2005-11-02 08:30:08 +000020 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
Guido van Rossume2d67f92000-03-10 23:09:23 +000021 (Unicode object, bytes consumed)
22
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000023 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
Guido van Rossume2d67f92000-03-10 23:09:23 +000027 These <encoding>s are available: utf_8, unicode_escape,
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +000028 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
Guido van Rossume2d67f92000-03-10 23:09:23 +000031
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
Guido van Rossum16b1ad92000-08-03 16:24:25 +000034Copyright (c) Corporation for National Research Initiatives.
Guido van Rossume2d67f92000-03-10 23:09:23 +000035
36 ------------------------------------------------------------------------ */
37
Martin v. Löwis18e16552006-02-15 17:27:45 +000038#define PY_SSIZE_T_CLEAN
Guido van Rossume2d67f92000-03-10 23:09:23 +000039#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
Walter Dörwald0ae29812002-10-31 13:36:29 +000043PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000048a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000050
Guido van Rossume2d67f92000-03-10 23:09:23 +000051static
Georg Brandl96a8c392006-05-29 21:04:52 +000052PyObject *codec_register(PyObject *self, PyObject *search_function)
Guido van Rossume2d67f92000-03-10 23:09:23 +000053{
Guido van Rossume2d67f92000-03-10 23:09:23 +000054 if (PyCodec_Register(search_function))
Georg Brandl96a8c392006-05-29 21:04:52 +000055 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000056
Georg Brandl96a8c392006-05-29 21:04:52 +000057 Py_RETURN_NONE;
Guido van Rossume2d67f92000-03-10 23:09:23 +000058}
59
Walter Dörwald0ae29812002-10-31 13:36:29 +000060PyDoc_STRVAR(lookup__doc__,
Walter Dörwald219336a2007-07-19 13:04:38 +000061"lookup(encoding) -> CodecInfo\n\
Walter Dörwald0ae29812002-10-31 13:36:29 +000062\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
Walter Dörwald219336a2007-07-19 13:04:38 +000064a tuple of function (or a CodecInfo object).");
Walter Dörwald0ae29812002-10-31 13:36:29 +000065
Guido van Rossume2d67f92000-03-10 23:09:23 +000066static
Marc-André Lemburg3f419742004-07-10 12:06:10 +000067PyObject *codec_lookup(PyObject *self, PyObject *args)
Guido van Rossume2d67f92000-03-10 23:09:23 +000068{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
Georg Brandl96a8c392006-05-29 21:04:52 +000072 return NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +000073
74 return _PyCodec_Lookup(encoding);
Guido van Rossume2d67f92000-03-10 23:09:23 +000075}
76
Marc-André Lemburg3f419742004-07-10 12:06:10 +000077PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
Brett Cannon3e377de2004-07-10 21:41:14 +000090 const char *encoding = NULL;
91 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +000092 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +000093
Marc-André Lemburg3f419742004-07-10 12:06:10 +000094 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
Martin v. Löwise2713be2005-03-08 15:03:08 +000097#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +000098 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000100#else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
104 }
105#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000106
107 /* Encode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000108 return PyCodec_Encode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
118as well as any other name registerd with codecs.register_error that is\n\
119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
Brett Cannon3e377de2004-07-10 21:41:14 +0000124 const char *encoding = NULL;
125 const char *errors = NULL;
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000126 PyObject *v;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000127
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
130
Martin v. Löwise2713be2005-03-08 15:03:08 +0000131#ifdef Py_USING_UNICODE
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwise2713be2005-03-08 15:03:08 +0000134#else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
138 }
139#endif
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000140
141 /* Decode via the codec registry */
Georg Brandl96a8c392006-05-29 21:04:52 +0000142 return PyCodec_Decode(v, encoding, errors);
Marc-André Lemburg3f419742004-07-10 12:06:10 +0000143}
144
Guido van Rossume2d67f92000-03-10 23:09:23 +0000145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
Martin v. Löwis66851282006-04-22 11:40:03 +0000149 Py_ssize_t len)
Guido van Rossume2d67f92000-03-10 23:09:23 +0000150{
Georg Brandl96a8c392006-05-29 21:04:52 +0000151 PyObject *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000152 if (unicode == NULL)
Georg Brandl96a8c392006-05-29 21:04:52 +0000153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000156 return v;
157}
158
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
162 PyObject *args)
163{
164 const char *errors = NULL;
165 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000166 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000167
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000172 size);
173}
174
175static PyObject *
176escape_encode(PyObject *self,
177 PyObject *args)
178{
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
Martin v. Löwis66851282006-04-22 11:40:03 +0000182 Py_ssize_t len;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000183
184 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
185 &PyString_Type, &str, &errors))
186 return NULL;
187
188 str = PyString_Repr(str, 0);
189 if (!str)
190 return NULL;
191
192 /* The string will be quoted. Unquote, similar to unicode-escape. */
193 buf = PyString_AS_STRING (str);
194 len = PyString_GET_SIZE (str);
195 memmove(buf, buf+1, len-2);
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000196 if (_PyString_Resize(&str, len-2) < 0)
197 return NULL;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000198
199 return codec_tuple(str, PyString_Size(str));
200}
201
202#ifdef Py_USING_UNICODE
Guido van Rossume2d67f92000-03-10 23:09:23 +0000203/* --- Decoder ------------------------------------------------------------ */
204
205static PyObject *
206unicode_internal_decode(PyObject *self,
207 PyObject *args)
208{
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000209 PyObject *obj;
210 const char *errors = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000211 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000212 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000213
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000214 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
215 &obj, &errors))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000216 return NULL;
217
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000218 if (PyUnicode_Check(obj)) {
219 Py_INCREF(obj);
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000220 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
Marc-André Lemburg29273c82003-02-04 19:35:03 +0000221 }
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000222 else {
223 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
224 return NULL;
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000225
226 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000227 size);
228 }
Guido van Rossume2d67f92000-03-10 23:09:23 +0000229}
230
231static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000232utf_7_decode(PyObject *self,
233 PyObject *args)
234{
235 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t size;
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000237 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000238
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000239 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
240 &data, &size, &errors))
241 return NULL;
242
243 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
244 size);
245}
246
247static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000248utf_8_decode(PyObject *self,
249 PyObject *args)
250{
251 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000252 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000253 const char *errors = NULL;
Walter Dörwald69652032004-09-07 20:24:22 +0000254 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000255 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000256 PyObject *decoded = NULL;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000257
Walter Dörwald69652032004-09-07 20:24:22 +0000258 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
259 &data, &size, &errors, &final))
260 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000261 if (size < 0) {
262 PyErr_SetString(PyExc_ValueError, "negative argument");
263 return 0;
264 }
Walter Dörwald69652032004-09-07 20:24:22 +0000265 consumed = size;
266
267 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
268 final ? NULL : &consumed);
269 if (decoded == NULL)
270 return NULL;
271 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000272}
273
274static PyObject *
275utf_16_decode(PyObject *self,
276 PyObject *args)
277{
278 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000279 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000280 const char *errors = NULL;
281 int byteorder = 0;
Walter Dörwald69652032004-09-07 20:24:22 +0000282 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000283 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000284 PyObject *decoded;
285
286 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
287 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000288 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000289 if (size < 0) {
290 PyErr_SetString(PyExc_ValueError, "negative argument");
291 return 0;
292 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000293 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000294 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
295 final ? NULL : &consumed);
296 if (decoded == NULL)
297 return NULL;
298 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000299}
300
301static PyObject *
302utf_16_le_decode(PyObject *self,
303 PyObject *args)
304{
305 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000306 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000307 const char *errors = NULL;
308 int byteorder = -1;
Walter Dörwald69652032004-09-07 20:24:22 +0000309 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000310 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000311 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000312
Walter Dörwald69652032004-09-07 20:24:22 +0000313 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
314 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000315 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000316
Martin v. Löwis18e16552006-02-15 17:27:45 +0000317 if (size < 0) {
318 PyErr_SetString(PyExc_ValueError, "negative argument");
319 return 0;
320 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000321 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000322 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
323 &byteorder, final ? NULL : &consumed);
324 if (decoded == NULL)
325 return NULL;
326 return codec_tuple(decoded, consumed);
327
Guido van Rossume2d67f92000-03-10 23:09:23 +0000328}
329
330static PyObject *
331utf_16_be_decode(PyObject *self,
332 PyObject *args)
333{
334 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000335 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000336 const char *errors = NULL;
337 int byteorder = 1;
Walter Dörwald69652032004-09-07 20:24:22 +0000338 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000339 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000340 PyObject *decoded = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000341
Walter Dörwald69652032004-09-07 20:24:22 +0000342 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
343 &data, &size, &errors, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000344 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000345 if (size < 0) {
346 PyErr_SetString(PyExc_ValueError, "negative argument");
347 return 0;
348 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000349 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000350 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
351 &byteorder, final ? NULL : &consumed);
352 if (decoded == NULL)
353 return NULL;
354 return codec_tuple(decoded, consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000355}
356
357/* This non-standard version also provides access to the byteorder
358 parameter of the builtin UTF-16 codec.
359
360 It returns a tuple (unicode, bytesread, byteorder) with byteorder
361 being the value in effect at the end of data.
362
363*/
364
365static PyObject *
366utf_16_ex_decode(PyObject *self,
367 PyObject *args)
368{
369 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000370 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000371 const char *errors = NULL;
372 int byteorder = 0;
373 PyObject *unicode, *tuple;
Walter Dörwald69652032004-09-07 20:24:22 +0000374 int final = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000375 Py_ssize_t consumed;
Walter Dörwald69652032004-09-07 20:24:22 +0000376
377 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
378 &data, &size, &errors, &byteorder, &final))
Guido van Rossume2d67f92000-03-10 23:09:23 +0000379 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000380 if (size < 0) {
381 PyErr_SetString(PyExc_ValueError, "negative argument");
382 return 0;
383 }
Martin v. Löwisd532ba02006-05-27 08:54:29 +0000384 consumed = size; /* This is overwritten unless final is true. */
Walter Dörwald69652032004-09-07 20:24:22 +0000385 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
386 final ? NULL : &consumed);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000387 if (unicode == NULL)
388 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +0000389 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Guido van Rossume2d67f92000-03-10 23:09:23 +0000390 Py_DECREF(unicode);
391 return tuple;
392}
393
394static PyObject *
Walter Dörwald6e390802007-08-17 16:41:28 +0000395utf_32_decode(PyObject *self,
396 PyObject *args)
397{
398 const char *data;
399 Py_ssize_t size;
400 const char *errors = NULL;
401 int byteorder = 0;
402 int final = 0;
403 Py_ssize_t consumed;
404 PyObject *decoded;
405
406 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
407 &data, &size, &errors, &final))
408 return NULL;
409 if (size < 0) {
410 PyErr_SetString(PyExc_ValueError, "negative argument");
411 return 0;
412 }
413 consumed = size; /* This is overwritten unless final is true. */
414 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
415 final ? NULL : &consumed);
416 if (decoded == NULL)
417 return NULL;
418 return codec_tuple(decoded, consumed);
419}
420
421static PyObject *
422utf_32_le_decode(PyObject *self,
423 PyObject *args)
424{
425 const char *data;
426 Py_ssize_t size;
427 const char *errors = NULL;
428 int byteorder = -1;
429 int final = 0;
430 Py_ssize_t consumed;
431 PyObject *decoded = NULL;
432
433 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
434 &data, &size, &errors, &final))
435 return NULL;
436
437 if (size < 0) {
438 PyErr_SetString(PyExc_ValueError, "negative argument");
439 return 0;
440 }
441 consumed = size; /* This is overwritten unless final is true. */
442 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
443 &byteorder, final ? NULL : &consumed);
444 if (decoded == NULL)
445 return NULL;
446 return codec_tuple(decoded, consumed);
447
448}
449
450static PyObject *
451utf_32_be_decode(PyObject *self,
452 PyObject *args)
453{
454 const char *data;
455 Py_ssize_t size;
456 const char *errors = NULL;
457 int byteorder = 1;
458 int final = 0;
459 Py_ssize_t consumed;
460 PyObject *decoded = NULL;
461
462 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
463 &data, &size, &errors, &final))
464 return NULL;
465 if (size < 0) {
466 PyErr_SetString(PyExc_ValueError, "negative argument");
467 return 0;
468 }
469 consumed = size; /* This is overwritten unless final is true. */
470 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
471 &byteorder, final ? NULL : &consumed);
472 if (decoded == NULL)
473 return NULL;
474 return codec_tuple(decoded, consumed);
475}
476
477/* This non-standard version also provides access to the byteorder
478 parameter of the builtin UTF-32 codec.
479
480 It returns a tuple (unicode, bytesread, byteorder) with byteorder
481 being the value in effect at the end of data.
482
483*/
484
485static PyObject *
486utf_32_ex_decode(PyObject *self,
487 PyObject *args)
488{
489 const char *data;
490 Py_ssize_t size;
491 const char *errors = NULL;
492 int byteorder = 0;
493 PyObject *unicode, *tuple;
494 int final = 0;
495 Py_ssize_t consumed;
496
497 if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
498 &data, &size, &errors, &byteorder, &final))
499 return NULL;
500 if (size < 0) {
501 PyErr_SetString(PyExc_ValueError, "negative argument");
502 return 0;
503 }
504 consumed = size; /* This is overwritten unless final is true. */
505 unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
506 final ? NULL : &consumed);
507 if (unicode == NULL)
508 return NULL;
509 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
510 Py_DECREF(unicode);
511 return tuple;
512}
513
514static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000515unicode_escape_decode(PyObject *self,
516 PyObject *args)
517{
518 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000519 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000520 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000521
Guido van Rossume2d67f92000-03-10 23:09:23 +0000522 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
523 &data, &size, &errors))
524 return NULL;
525
526 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
527 size);
528}
529
530static PyObject *
531raw_unicode_escape_decode(PyObject *self,
532 PyObject *args)
533{
534 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000535 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000536 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000537
Guido van Rossume2d67f92000-03-10 23:09:23 +0000538 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
539 &data, &size, &errors))
540 return NULL;
541
542 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
543 size);
544}
545
546static PyObject *
547latin_1_decode(PyObject *self,
548 PyObject *args)
549{
550 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000551 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000552 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000553
Guido van Rossume2d67f92000-03-10 23:09:23 +0000554 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
555 &data, &size, &errors))
556 return NULL;
557
558 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
559 size);
560}
561
562static PyObject *
563ascii_decode(PyObject *self,
564 PyObject *args)
565{
566 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000567 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000568 const char *errors = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000569
Guido van Rossume2d67f92000-03-10 23:09:23 +0000570 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
571 &data, &size, &errors))
572 return NULL;
573
574 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
575 size);
576}
577
578static PyObject *
579charmap_decode(PyObject *self,
580 PyObject *args)
581{
582 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000583 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000584 const char *errors = NULL;
585 PyObject *mapping = NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000586
Guido van Rossume2d67f92000-03-10 23:09:23 +0000587 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
588 &data, &size, &errors, &mapping))
589 return NULL;
590 if (mapping == Py_None)
591 mapping = NULL;
592
593 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
594 size);
595}
596
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000597#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +0000598
599static PyObject *
600mbcs_decode(PyObject *self,
601 PyObject *args)
602{
603 const char *data;
Martin v. Löwisd8251432006-06-14 05:21:04 +0000604 Py_ssize_t size, consumed;
Guido van Rossum24bdb042000-03-28 20:29:59 +0000605 const char *errors = NULL;
Martin v. Löwis961b91b2006-08-02 13:53:55 +0000606 int final = 0;
Martin v. Löwisd8251432006-06-14 05:21:04 +0000607 PyObject *decoded;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000608
Martin v. Löwisd8251432006-06-14 05:21:04 +0000609 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
610 &data, &size, &errors, &final))
Guido van Rossum24bdb042000-03-28 20:29:59 +0000611 return NULL;
612
Martin v. Löwisd8251432006-06-14 05:21:04 +0000613 decoded = PyUnicode_DecodeMBCSStateful(
614 data, size, errors, final ? NULL : &consumed);
615 if (!decoded)
616 return NULL;
617 return codec_tuple(decoded, final ? size : consumed);
Guido van Rossum24bdb042000-03-28 20:29:59 +0000618}
619
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000620#endif /* MS_WINDOWS */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000621
Guido van Rossume2d67f92000-03-10 23:09:23 +0000622/* --- Encoder ------------------------------------------------------------ */
623
624static PyObject *
625readbuffer_encode(PyObject *self,
626 PyObject *args)
627{
628 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000629 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000630 const char *errors = NULL;
631
632 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
633 &data, &size, &errors))
634 return NULL;
635
636 return codec_tuple(PyString_FromStringAndSize(data, size),
637 size);
638}
639
640static PyObject *
641charbuffer_encode(PyObject *self,
642 PyObject *args)
643{
644 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000645 Py_ssize_t size;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000646 const char *errors = NULL;
647
648 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
649 &data, &size, &errors))
650 return NULL;
651
652 return codec_tuple(PyString_FromStringAndSize(data, size),
653 size);
654}
655
656static PyObject *
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000657unicode_internal_encode(PyObject *self,
658 PyObject *args)
659{
660 PyObject *obj;
661 const char *errors = NULL;
662 const char *data;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000663 Py_ssize_t size;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000664
Marc-André Lemburgb425f5e2000-09-21 21:09:45 +0000665 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
666 &obj, &errors))
667 return NULL;
668
669 if (PyUnicode_Check(obj)) {
670 data = PyUnicode_AS_DATA(obj);
671 size = PyUnicode_GET_DATA_SIZE(obj);
672 return codec_tuple(PyString_FromStringAndSize(data, size),
673 size);
674 }
675 else {
676 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
677 return NULL;
678 return codec_tuple(PyString_FromStringAndSize(data, size),
679 size);
680 }
681}
682
683static PyObject *
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000684utf_7_encode(PyObject *self,
685 PyObject *args)
686{
687 PyObject *str, *v;
688 const char *errors = NULL;
689
690 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
691 &str, &errors))
692 return NULL;
693
694 str = PyUnicode_FromObject(str);
695 if (str == NULL)
696 return NULL;
697 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
698 PyUnicode_GET_SIZE(str),
Georg Brandl96a8c392006-05-29 21:04:52 +0000699 0,
700 0,
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000701 errors),
702 PyUnicode_GET_SIZE(str));
703 Py_DECREF(str);
704 return v;
705}
706
707static PyObject *
Guido van Rossume2d67f92000-03-10 23:09:23 +0000708utf_8_encode(PyObject *self,
709 PyObject *args)
710{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000711 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000712 const char *errors = NULL;
713
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000714 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000715 &str, &errors))
716 return NULL;
717
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000718 str = PyUnicode_FromObject(str);
719 if (str == NULL)
720 return NULL;
721 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
722 PyUnicode_GET_SIZE(str),
723 errors),
724 PyUnicode_GET_SIZE(str));
725 Py_DECREF(str);
726 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000727}
728
729/* This version provides access to the byteorder parameter of the
730 builtin UTF-16 codecs as optional third argument. It defaults to 0
731 which means: use the native byte order and prepend the data with a
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000732 BOM mark.
Guido van Rossume2d67f92000-03-10 23:09:23 +0000733
734*/
735
736static PyObject *
737utf_16_encode(PyObject *self,
738 PyObject *args)
739{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000740 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000741 const char *errors = NULL;
742 int byteorder = 0;
743
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000744 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000745 &str, &errors, &byteorder))
746 return NULL;
747
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000748 str = PyUnicode_FromObject(str);
749 if (str == NULL)
750 return NULL;
751 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
752 PyUnicode_GET_SIZE(str),
753 errors,
754 byteorder),
755 PyUnicode_GET_SIZE(str));
756 Py_DECREF(str);
757 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000758}
759
760static PyObject *
761utf_16_le_encode(PyObject *self,
762 PyObject *args)
763{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000764 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000765 const char *errors = NULL;
766
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000767 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000768 &str, &errors))
769 return NULL;
770
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000771 str = PyUnicode_FromObject(str);
772 if (str == NULL)
773 return NULL;
774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000775 PyUnicode_GET_SIZE(str),
776 errors,
777 -1),
778 PyUnicode_GET_SIZE(str));
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000779 Py_DECREF(str);
780 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000781}
782
783static PyObject *
784utf_16_be_encode(PyObject *self,
785 PyObject *args)
786{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000787 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000788 const char *errors = NULL;
789
Marc-André Lemburg4157dd52001-06-17 18:32:36 +0000790 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000791 &str, &errors))
792 return NULL;
793
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000794 str = PyUnicode_FromObject(str);
795 if (str == NULL)
796 return NULL;
797 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
798 PyUnicode_GET_SIZE(str),
799 errors,
800 +1),
801 PyUnicode_GET_SIZE(str));
802 Py_DECREF(str);
803 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000804}
805
Walter Dörwald6e390802007-08-17 16:41:28 +0000806/* This version provides access to the byteorder parameter of the
807 builtin UTF-32 codecs as optional third argument. It defaults to 0
808 which means: use the native byte order and prepend the data with a
809 BOM mark.
810
811*/
812
813static PyObject *
814utf_32_encode(PyObject *self,
815 PyObject *args)
816{
817 PyObject *str, *v;
818 const char *errors = NULL;
819 int byteorder = 0;
820
821 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
822 &str, &errors, &byteorder))
823 return NULL;
824
825 str = PyUnicode_FromObject(str);
826 if (str == NULL)
827 return NULL;
828 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
829 PyUnicode_GET_SIZE(str),
830 errors,
831 byteorder),
832 PyUnicode_GET_SIZE(str));
833 Py_DECREF(str);
834 return v;
835}
836
837static PyObject *
838utf_32_le_encode(PyObject *self,
839 PyObject *args)
840{
841 PyObject *str, *v;
842 const char *errors = NULL;
843
844 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
845 &str, &errors))
846 return NULL;
847
848 str = PyUnicode_FromObject(str);
849 if (str == NULL)
850 return NULL;
851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
852 PyUnicode_GET_SIZE(str),
853 errors,
854 -1),
855 PyUnicode_GET_SIZE(str));
856 Py_DECREF(str);
857 return v;
858}
859
860static PyObject *
861utf_32_be_encode(PyObject *self,
862 PyObject *args)
863{
864 PyObject *str, *v;
865 const char *errors = NULL;
866
867 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
868 &str, &errors))
869 return NULL;
870
871 str = PyUnicode_FromObject(str);
872 if (str == NULL)
873 return NULL;
874 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
875 PyUnicode_GET_SIZE(str),
876 errors,
877 +1),
878 PyUnicode_GET_SIZE(str));
879 Py_DECREF(str);
880 return v;
881}
882
Guido van Rossume2d67f92000-03-10 23:09:23 +0000883static PyObject *
884unicode_escape_encode(PyObject *self,
885 PyObject *args)
886{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000887 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000888 const char *errors = NULL;
889
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000890 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000891 &str, &errors))
892 return NULL;
893
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000894 str = PyUnicode_FromObject(str);
895 if (str == NULL)
896 return NULL;
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000897 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000898 PyUnicode_GET_SIZE(str)),
899 PyUnicode_GET_SIZE(str));
900 Py_DECREF(str);
901 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000902}
903
904static PyObject *
905raw_unicode_escape_encode(PyObject *self,
906 PyObject *args)
907{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000908 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000909 const char *errors = NULL;
910
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000911 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000912 &str, &errors))
913 return NULL;
914
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000915 str = PyUnicode_FromObject(str);
916 if (str == NULL)
917 return NULL;
918 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000919 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000920 PyUnicode_GET_SIZE(str)),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000921 PyUnicode_GET_SIZE(str));
922 Py_DECREF(str);
923 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000924}
925
926static PyObject *
927latin_1_encode(PyObject *self,
928 PyObject *args)
929{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000930 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000931 const char *errors = NULL;
932
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000933 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000934 &str, &errors))
935 return NULL;
936
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000937 str = PyUnicode_FromObject(str);
938 if (str == NULL)
939 return NULL;
940 v = codec_tuple(PyUnicode_EncodeLatin1(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000941 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000942 PyUnicode_GET_SIZE(str),
943 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000944 PyUnicode_GET_SIZE(str));
945 Py_DECREF(str);
946 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000947}
948
949static PyObject *
950ascii_encode(PyObject *self,
951 PyObject *args)
952{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000953 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000954 const char *errors = NULL;
955
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000956 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000957 &str, &errors))
958 return NULL;
959
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000960 str = PyUnicode_FromObject(str);
961 if (str == NULL)
962 return NULL;
963 v = codec_tuple(PyUnicode_EncodeASCII(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000964 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000965 PyUnicode_GET_SIZE(str),
966 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000967 PyUnicode_GET_SIZE(str));
968 Py_DECREF(str);
969 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000970}
971
972static PyObject *
973charmap_encode(PyObject *self,
974 PyObject *args)
975{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000976 PyObject *str, *v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000977 const char *errors = NULL;
978 PyObject *mapping = NULL;
979
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000980 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
Guido van Rossume2d67f92000-03-10 23:09:23 +0000981 &str, &errors, &mapping))
982 return NULL;
983 if (mapping == Py_None)
984 mapping = NULL;
985
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000986 str = PyUnicode_FromObject(str);
987 if (str == NULL)
988 return NULL;
989 v = codec_tuple(PyUnicode_EncodeCharmap(
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000990 PyUnicode_AS_UNICODE(str),
Guido van Rossume2d67f92000-03-10 23:09:23 +0000991 PyUnicode_GET_SIZE(str),
Walter Dörwald9fd115c2005-11-02 08:30:08 +0000992 mapping,
Guido van Rossume2d67f92000-03-10 23:09:23 +0000993 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +0000994 PyUnicode_GET_SIZE(str));
995 Py_DECREF(str);
996 return v;
Guido van Rossume2d67f92000-03-10 23:09:23 +0000997}
998
Martin v. Löwis3f767792006-06-04 19:36:28 +0000999static PyObject*
1000charmap_build(PyObject *self, PyObject *args)
1001{
1002 PyObject *map;
1003 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1004 return NULL;
1005 return PyUnicode_BuildEncodingMap(map);
1006}
1007
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001008#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Guido van Rossum24bdb042000-03-28 20:29:59 +00001009
1010static PyObject *
1011mbcs_encode(PyObject *self,
1012 PyObject *args)
1013{
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001014 PyObject *str, *v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001015 const char *errors = NULL;
1016
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001017 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
Guido van Rossum24bdb042000-03-28 20:29:59 +00001018 &str, &errors))
1019 return NULL;
1020
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001021 str = PyUnicode_FromObject(str);
1022 if (str == NULL)
1023 return NULL;
1024 v = codec_tuple(PyUnicode_EncodeMBCS(
Walter Dörwald9fd115c2005-11-02 08:30:08 +00001025 PyUnicode_AS_UNICODE(str),
Guido van Rossum24bdb042000-03-28 20:29:59 +00001026 PyUnicode_GET_SIZE(str),
1027 errors),
Marc-André Lemburg5f0e29e2000-07-05 11:24:13 +00001028 PyUnicode_GET_SIZE(str));
1029 Py_DECREF(str);
1030 return v;
Guido van Rossum24bdb042000-03-28 20:29:59 +00001031}
1032
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001033#endif /* MS_WINDOWS */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001034#endif /* Py_USING_UNICODE */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001035
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001036/* --- Error handler registry --------------------------------------------- */
1037
Walter Dörwald0ae29812002-10-31 13:36:29 +00001038PyDoc_STRVAR(register_error__doc__,
1039"register_error(errors, handler)\n\
1040\n\
1041Register the specified error handler under the name\n\
1042errors. handler must be a callable object, that\n\
1043will be called with an exception instance containing\n\
1044information about the location of the encoding/decoding\n\
1045error and must return a (replacement, new position) tuple.");
1046
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001047static PyObject *register_error(PyObject *self, PyObject *args)
1048{
1049 const char *name;
1050 PyObject *handler;
1051
1052 if (!PyArg_ParseTuple(args, "sO:register_error",
1053 &name, &handler))
1054 return NULL;
1055 if (PyCodec_RegisterError(name, handler))
1056 return NULL;
Georg Brandl96a8c392006-05-29 21:04:52 +00001057 Py_RETURN_NONE;
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001058}
1059
Walter Dörwald0ae29812002-10-31 13:36:29 +00001060PyDoc_STRVAR(lookup_error__doc__,
1061"lookup_error(errors) -> handler\n\
1062\n\
1063Return the error handler for the specified error handling name\n\
1064or raise a LookupError, if no handler exists under this name.");
1065
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001066static PyObject *lookup_error(PyObject *self, PyObject *args)
1067{
1068 const char *name;
1069
1070 if (!PyArg_ParseTuple(args, "s:lookup_error",
1071 &name))
1072 return NULL;
1073 return PyCodec_LookupError(name);
1074}
1075
Guido van Rossume2d67f92000-03-10 23:09:23 +00001076/* --- Module API --------------------------------------------------------- */
1077
1078static PyMethodDef _codecs_functions[] = {
Georg Brandl96a8c392006-05-29 21:04:52 +00001079 {"register", codec_register, METH_O,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001080 register__doc__},
Marc-André Lemburg3f419742004-07-10 12:06:10 +00001081 {"lookup", codec_lookup, METH_VARARGS,
Walter Dörwald0ae29812002-10-31 13:36:29 +00001082 lookup__doc__},
Brett Cannon3e377de2004-07-10 21:41:14 +00001083 {"encode", codec_encode, METH_VARARGS,
1084 encode__doc__},
1085 {"decode", codec_decode, METH_VARARGS,
1086 decode__doc__},
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001087 {"escape_encode", escape_encode, METH_VARARGS},
1088 {"escape_decode", escape_decode, METH_VARARGS},
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001089#ifdef Py_USING_UNICODE
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001090 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1091 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1092 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1093 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1094 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1095 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1096 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1097 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1098 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1099 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1100 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
Walter Dörwald6e390802007-08-17 16:41:28 +00001101 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1102 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1103 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1104 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1105 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1106 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1107 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001108 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1109 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1110 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1111 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1112 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1113 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1114 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1115 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1116 {"ascii_encode", ascii_encode, METH_VARARGS},
1117 {"ascii_decode", ascii_decode, METH_VARARGS},
1118 {"charmap_encode", charmap_encode, METH_VARARGS},
1119 {"charmap_decode", charmap_decode, METH_VARARGS},
Martin v. Löwis3f767792006-06-04 19:36:28 +00001120 {"charmap_build", charmap_build, METH_VARARGS},
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001121 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1122 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
Martin v. Löwis6238d2b2002-06-30 15:26:10 +00001123#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
Martin v. Löwis43b936d2002-01-17 23:15:58 +00001124 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1125 {"mbcs_decode", mbcs_decode, METH_VARARGS},
Guido van Rossum24bdb042000-03-28 20:29:59 +00001126#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001127#endif /* Py_USING_UNICODE */
Walter Dörwald0ae29812002-10-31 13:36:29 +00001128 {"register_error", register_error, METH_VARARGS,
1129 register_error__doc__},
1130 {"lookup_error", lookup_error, METH_VARARGS,
1131 lookup_error__doc__},
Guido van Rossume2d67f92000-03-10 23:09:23 +00001132 {NULL, NULL} /* sentinel */
1133};
1134
Mark Hammondfe51c6d2002-08-02 02:27:13 +00001135PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001136init_codecs(void)
Guido van Rossume2d67f92000-03-10 23:09:23 +00001137{
1138 Py_InitModule("_codecs", _codecs_functions);
1139}