Modules/_codecsmodule.c - platform/external/python/cpython3 - Gitiles

 /* ------------------------------------------------------------------------

    _codecs -- Provides access to the codec registry and the builtin
               codecs.

    This module should never be imported directly. The standard library
    module "codecs" wraps this builtin module for use within Python.

    The codec registry is accessible via:

      register(search_function) -> None

      lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)

    The builtin Unicode codecs use the following interface:

      <encoding>_encode(Unicode_object[,errors='strict']) ->
      	(string object, bytes consumed)

      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
         (Unicode object, bytes consumed)

    <encoding>_encode() interfaces also accept non-Unicode object as
    input. The objects are then converted to Unicode using
    PyUnicode_FromObject() prior to applying the conversion.

    These <encoding>s are available: utf_8, unicode_escape,
    raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
    mbcs (on win32).


 Written by Marc-Andre Lemburg (mal@lemburg.com).

 Copyright (c) Corporation for National Research Initiatives.

    ------------------------------------------------------------------------ */

 #include "Python.h"

 /* --- Registry ----------------------------------------------------------- */

 static
 PyObject *codecregister(PyObject *self, PyObject *args)
 {
     PyObject *search_function;

     if (!PyArg_ParseTuple(args, "O:register", &search_function))
         goto onError;

     if (PyCodec_Register(search_function))
 	goto onError;

     Py_INCREF(Py_None);
     return Py_None;

  onError:
     return NULL;
 }

 static
 PyObject *codeclookup(PyObject *self, PyObject *args)
 {
     char *encoding;

     if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
         goto onError;

     return _PyCodec_Lookup(encoding);

  onError:
     return NULL;
 }

 /* --- Helpers ------------------------------------------------------------ */

 static
 PyObject *codec_tuple(PyObject *unicode,
 		      int len)
 {
     PyObject *v,*w;

     if (unicode == NULL)
 	return NULL;
     v = PyTuple_New(2);
     if (v == NULL) {
 	Py_DECREF(unicode);
 	return NULL;
     }
     PyTuple_SET_ITEM(v,0,unicode);
     w = PyInt_FromLong(len);
     if (w == NULL) {
 	Py_DECREF(v);
 	return NULL;
     }
     PyTuple_SET_ITEM(v,1,w);
     return v;
 }

 /* --- String codecs ------------------------------------------------------ */
 static PyObject *
 escape_decode(PyObject *self,
 	      PyObject *args)
 {
     const char *errors = NULL;
     const char *data;
     int size;

     if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
 			  &data, &size, &errors))
 	return NULL;
     return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
 		       size);
 }

 static PyObject *
 escape_encode(PyObject *self,
 	      PyObject *args)
 {
 	PyObject *str;
 	const char *errors = NULL;
 	char *buf;
 	int len;

 	if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
 			      &PyString_Type, &str, &errors))
 		return NULL;

 	str = PyString_Repr(str, 0);
 	if (!str)
 		return NULL;

 	/* The string will be quoted. Unquote, similar to unicode-escape. */
 	buf = PyString_AS_STRING (str);
 	len = PyString_GET_SIZE (str);
 	memmove(buf, buf+1, len-2);
 	_PyString_Resize(&str, len-2);

 	return codec_tuple(str, PyString_Size(str));
 }

 #ifdef Py_USING_UNICODE
 /* --- Decoder ------------------------------------------------------------ */

 static PyObject *
 unicode_internal_decode(PyObject *self,
 			PyObject *args)
 {
     PyObject *obj;
     const char *errors = NULL;
     const char *data;
     int size;

     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
 			  &obj, &errors))
 	return NULL;

     if (PyUnicode_Check(obj))
 	return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
     else {
 	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 	    return NULL;
 	return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
 						 size / sizeof(Py_UNICODE)),
 			   size);
     }
 }

 static PyObject *
 utf_7_decode(PyObject *self,
 	    PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
 		       size);
 }

 static PyObject *
 utf_8_decode(PyObject *self,
 	    PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
 		       size);
 }

 static PyObject *
 utf_16_decode(PyObject *self,
 	    PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;
     int byteorder = 0;

     if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
 			  &data, &size, &errors))
 	return NULL;
     return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
 		       size);
 }

 static PyObject *
 utf_16_le_decode(PyObject *self,
 		 PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;
     int byteorder = -1;

     if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
 			  &data, &size, &errors))
 	return NULL;
     return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
 		       size);
 }

 static PyObject *
 utf_16_be_decode(PyObject *self,
 		 PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;
     int byteorder = 1;

     if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
 			  &data, &size, &errors))
 	return NULL;
     return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
 		       size);
 }

 /* This non-standard version also provides access to the byteorder
    parameter of the builtin UTF-16 codec.

    It returns a tuple (unicode, bytesread, byteorder) with byteorder
    being the value in effect at the end of data.

 */

 static PyObject *
 utf_16_ex_decode(PyObject *self,
 		 PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;
     int byteorder = 0;
     PyObject *unicode, *tuple;

     if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
 			  &data, &size, &errors, &byteorder))
 	return NULL;

     unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
     if (unicode == NULL)
 	return NULL;
     tuple = Py_BuildValue("Oii", unicode, size, byteorder);
     Py_DECREF(unicode);
     return tuple;
 }

 static PyObject *
 unicode_escape_decode(PyObject *self,
 		     PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
 		       size);
 }

 static PyObject *
 raw_unicode_escape_decode(PyObject *self,
 			PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
 		       size);
 }

 static PyObject *
 latin_1_decode(PyObject *self,
 	       PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
 		       size);
 }

 static PyObject *
 ascii_decode(PyObject *self,
 	     PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
 		       size);
 }

 static PyObject *
 charmap_decode(PyObject *self,
 	       PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;
     PyObject *mapping = NULL;

     if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
 			  &data, &size, &errors, &mapping))
 	return NULL;
     if (mapping == Py_None)
 	mapping = NULL;

     return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
 		       size);
 }

 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)

 static PyObject *
 mbcs_decode(PyObject *self,
 	    PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
 		       size);
 }

 #endif /* MS_WINDOWS */

 /* --- Encoder ------------------------------------------------------------ */

 static PyObject *
 readbuffer_encode(PyObject *self,
 		  PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyString_FromStringAndSize(data, size),
 		       size);
 }

 static PyObject *
 charbuffer_encode(PyObject *self,
 		  PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
 			  &data, &size, &errors))
 	return NULL;

     return codec_tuple(PyString_FromStringAndSize(data, size),
 		       size);
 }

 static PyObject *
 unicode_internal_encode(PyObject *self,
 			PyObject *args)
 {
     PyObject *obj;
     const char *errors = NULL;
     const char *data;
     int size;

     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
 			  &obj, &errors))
 	return NULL;

     if (PyUnicode_Check(obj)) {
 	data = PyUnicode_AS_DATA(obj);
 	size = PyUnicode_GET_DATA_SIZE(obj);
 	return codec_tuple(PyString_FromStringAndSize(data, size),
 			   size);
     }
     else {
 	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 	    return NULL;
 	return codec_tuple(PyString_FromStringAndSize(data, size),
 			   size);
     }
 }

 static PyObject *
 utf_7_encode(PyObject *self,
 	    PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
 					 PyUnicode_GET_SIZE(str),
                      0,
                      0,
 					 errors),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 utf_8_encode(PyObject *self,
 	    PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
 					 PyUnicode_GET_SIZE(str),
 					 errors),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 /* This version provides access to the byteorder parameter of the
    builtin UTF-16 codecs as optional third argument. It defaults to 0
    which means: use the native byte order and prepend the data with a
    BOM mark.

 */

 static PyObject *
 utf_16_encode(PyObject *self,
 	    PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;
     int byteorder = 0;

     if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
 			  &str, &errors, &byteorder))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 					  PyUnicode_GET_SIZE(str),
 					  errors,
 					  byteorder),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 utf_16_le_encode(PyObject *self,
 		 PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 					     PyUnicode_GET_SIZE(str),
 					     errors,
 					     -1),
 		       PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 utf_16_be_encode(PyObject *self,
 		 PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 					  PyUnicode_GET_SIZE(str),
 					  errors,
 					  +1),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 unicode_escape_encode(PyObject *self,
 		     PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
 						  PyUnicode_GET_SIZE(str)),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 raw_unicode_escape_encode(PyObject *self,
 			PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
 			       PyUnicode_AS_UNICODE(str),
 			       PyUnicode_GET_SIZE(str)),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 latin_1_encode(PyObject *self,
 	       PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeLatin1(
 			       PyUnicode_AS_UNICODE(str),
 			       PyUnicode_GET_SIZE(str),
 			       errors),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 ascii_encode(PyObject *self,
 	     PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeASCII(
 			       PyUnicode_AS_UNICODE(str),
 			       PyUnicode_GET_SIZE(str),
 			       errors),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 static PyObject *
 charmap_encode(PyObject *self,
 	     PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;
     PyObject *mapping = NULL;

     if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
 			  &str, &errors, &mapping))
 	return NULL;
     if (mapping == Py_None)
 	mapping = NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeCharmap(
 			       PyUnicode_AS_UNICODE(str),
 			       PyUnicode_GET_SIZE(str),
 			       mapping,
 			       errors),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)

 static PyObject *
 mbcs_encode(PyObject *self,
 	    PyObject *args)
 {
     PyObject *str, *v;
     const char *errors = NULL;

     if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
 			  &str, &errors))
 	return NULL;

     str = PyUnicode_FromObject(str);
     if (str == NULL)
 	return NULL;
     v = codec_tuple(PyUnicode_EncodeMBCS(
 			       PyUnicode_AS_UNICODE(str),
 			       PyUnicode_GET_SIZE(str),
 			       errors),
 		    PyUnicode_GET_SIZE(str));
     Py_DECREF(str);
     return v;
 }

 #endif /* MS_WINDOWS */
 #endif /* Py_USING_UNICODE */

 /* --- Module API --------------------------------------------------------- */

 static PyMethodDef _codecs_functions[] = {
     {"register",		codecregister,			METH_VARARGS},
     {"lookup",			codeclookup, 			METH_VARARGS},
     {"escape_encode",		escape_encode,			METH_VARARGS},
     {"escape_decode",		escape_decode,			METH_VARARGS},
 #ifdef Py_USING_UNICODE
     {"utf_8_encode",		utf_8_encode,			METH_VARARGS},
     {"utf_8_decode",		utf_8_decode,			METH_VARARGS},
     {"utf_7_encode",		utf_7_encode,			METH_VARARGS},
     {"utf_7_decode",		utf_7_decode,			METH_VARARGS},
     {"utf_16_encode",		utf_16_encode,			METH_VARARGS},
     {"utf_16_le_encode",	utf_16_le_encode,		METH_VARARGS},
     {"utf_16_be_encode",	utf_16_be_encode,		METH_VARARGS},
     {"utf_16_decode",		utf_16_decode,			METH_VARARGS},
     {"utf_16_le_decode",	utf_16_le_decode,		METH_VARARGS},
     {"utf_16_be_decode",	utf_16_be_decode,		METH_VARARGS},
     {"utf_16_ex_decode",	utf_16_ex_decode,		METH_VARARGS},
     {"unicode_escape_encode",	unicode_escape_encode,		METH_VARARGS},
     {"unicode_escape_decode",	unicode_escape_decode,		METH_VARARGS},
     {"unicode_internal_encode",	unicode_internal_encode,	METH_VARARGS},
     {"unicode_internal_decode",	unicode_internal_decode,	METH_VARARGS},
     {"raw_unicode_escape_encode", raw_unicode_escape_encode,	METH_VARARGS},
     {"raw_unicode_escape_decode", raw_unicode_escape_decode,	METH_VARARGS},
     {"latin_1_encode", 		latin_1_encode,			METH_VARARGS},
     {"latin_1_decode", 		latin_1_decode,			METH_VARARGS},
     {"ascii_encode", 		ascii_encode,			METH_VARARGS},
     {"ascii_decode", 		ascii_decode,			METH_VARARGS},
     {"charmap_encode", 		charmap_encode,			METH_VARARGS},
     {"charmap_decode", 		charmap_decode,			METH_VARARGS},
     {"readbuffer_encode",	readbuffer_encode,		METH_VARARGS},
     {"charbuffer_encode",	charbuffer_encode,		METH_VARARGS},
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
     {"mbcs_encode", 		mbcs_encode,			METH_VARARGS},
     {"mbcs_decode", 		mbcs_decode,			METH_VARARGS},
 #endif
 #endif /* Py_USING_UNICODE */
     {NULL, NULL}		/* sentinel */
 };

 PyMODINIT_FUNC
 init_codecs(void)
 {
     Py_InitModule("_codecs", _codecs_functions);
 }
	/* ------------------------------------------------------------------------

	_codecs -- Provides access to the codec registry and the builtin
	codecs.

	This module should never be imported directly. The standard library
	module "codecs" wraps this builtin module for use within Python.

	The codec registry is accessible via:

	register(search_function) -> None

	lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)

	The builtin Unicode codecs use the following interface:

	<encoding>_encode(Unicode_object[,errors='strict']) ->
	(string object, bytes consumed)

	<encoding>_decode(char_buffer_obj[,errors='strict']) ->
	(Unicode object, bytes consumed)

	<encoding>_encode() interfaces also accept non-Unicode object as
	input. The objects are then converted to Unicode using
	PyUnicode_FromObject() prior to applying the conversion.

	These <encoding>s are available: utf_8, unicode_escape,
	raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
	mbcs (on win32).


	Written by Marc-Andre Lemburg (mal@lemburg.com).

	Copyright (c) Corporation for National Research Initiatives.

	------------------------------------------------------------------------ */

	#include "Python.h"

	/* --- Registry ----------------------------------------------------------- */

	static
	PyObject codecregister(PyObject self, PyObject *args)
	{
	PyObject *search_function;

	if (!PyArg_ParseTuple(args, "O:register", &search_function))
	goto onError;

	if (PyCodec_Register(search_function))
	goto onError;

	Py_INCREF(Py_None);
	return Py_None;

	onError:
	return NULL;
	}

	static
	PyObject codeclookup(PyObject self, PyObject *args)
	{
	char *encoding;

	if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
	goto onError;

	return _PyCodec_Lookup(encoding);

	onError:
	return NULL;
	}

	/* --- Helpers ------------------------------------------------------------ */

	static
	PyObject codec_tuple(PyObject unicode,
	int len)
	{
	PyObject v,w;

	if (unicode == NULL)
	return NULL;
	v = PyTuple_New(2);
	if (v == NULL) {
	Py_DECREF(unicode);
	return NULL;
	}
	PyTuple_SET_ITEM(v,0,unicode);
	w = PyInt_FromLong(len);
	if (w == NULL) {
	Py_DECREF(v);
	return NULL;
	}
	PyTuple_SET_ITEM(v,1,w);
	return v;
	}

	/* --- String codecs ------------------------------------------------------ */
	static PyObject *
	escape_decode(PyObject *self,
	PyObject *args)
	{
	const char *errors = NULL;
	const char *data;
	int size;

	if (!PyArg_ParseTuple(args, "s#\|z:escape_decode",
	&data, &size, &errors))
	return NULL;
	return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
	size);
	}

	static PyObject *
	escape_encode(PyObject *self,
	PyObject *args)
	{
	PyObject *str;
	const char *errors = NULL;
	char *buf;
	int len;

	if (!PyArg_ParseTuple(args, "O!\|z:escape_encode",
	&PyString_Type, &str, &errors))
	return NULL;

	str = PyString_Repr(str, 0);
	if (!str)
	return NULL;

	/* The string will be quoted. Unquote, similar to unicode-escape. */
	buf = PyString_AS_STRING (str);
	len = PyString_GET_SIZE (str);
	memmove(buf, buf+1, len-2);
	_PyString_Resize(&str, len-2);

	return codec_tuple(str, PyString_Size(str));
	}

	#ifdef Py_USING_UNICODE
	/* --- Decoder ------------------------------------------------------------ */

	static PyObject *
	unicode_internal_decode(PyObject *self,
	PyObject *args)
	{
	PyObject *obj;
	const char *errors = NULL;
	const char *data;
	int size;

	if (!PyArg_ParseTuple(args, "O\|z:unicode_internal_decode",
	&obj, &errors))
	return NULL;

	if (PyUnicode_Check(obj))
	return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
	else {
	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
	return NULL;
	return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
	size / sizeof(Py_UNICODE)),
	size);
	}
	}

	static PyObject *
	utf_7_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:utf_7_decode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
	size);
	}

	static PyObject *
	utf_8_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:utf_8_decode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
	size);
	}

	static PyObject *
	utf_16_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;
	int byteorder = 0;

	if (!PyArg_ParseTuple(args, "t#\|z:utf_16_decode",
	&data, &size, &errors))
	return NULL;
	return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
	size);
	}

	static PyObject *
	utf_16_le_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;
	int byteorder = -1;

	if (!PyArg_ParseTuple(args, "t#\|z:utf_16_le_decode",
	&data, &size, &errors))
	return NULL;
	return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
	size);
	}

	static PyObject *
	utf_16_be_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;
	int byteorder = 1;

	if (!PyArg_ParseTuple(args, "t#\|z:utf_16_be_decode",
	&data, &size, &errors))
	return NULL;
	return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
	size);
	}

	/* This non-standard version also provides access to the byteorder
	parameter of the builtin UTF-16 codec.

	It returns a tuple (unicode, bytesread, byteorder) with byteorder
	being the value in effect at the end of data.

	*/

	static PyObject *
	utf_16_ex_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;
	int byteorder = 0;
	PyObject unicode, tuple;

	if (!PyArg_ParseTuple(args, "t#\|zi:utf_16_ex_decode",
	&data, &size, &errors, &byteorder))
	return NULL;

	unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
	if (unicode == NULL)
	return NULL;
	tuple = Py_BuildValue("Oii", unicode, size, byteorder);
	Py_DECREF(unicode);
	return tuple;
	}

	static PyObject *
	unicode_escape_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:unicode_escape_decode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
	size);
	}

	static PyObject *
	raw_unicode_escape_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:raw_unicode_escape_decode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
	size);
	}

	static PyObject *
	latin_1_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:latin_1_decode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
	size);
	}

	static PyObject *
	ascii_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:ascii_decode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
	size);
	}

	static PyObject *
	charmap_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;
	PyObject *mapping = NULL;

	if (!PyArg_ParseTuple(args, "t#\|zO:charmap_decode",
	&data, &size, &errors, &mapping))
	return NULL;
	if (mapping == Py_None)
	mapping = NULL;

	return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
	size);
	}

	#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)

	static PyObject *
	mbcs_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:mbcs_decode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
	size);
	}

	#endif /* MS_WINDOWS */

	/* --- Encoder ------------------------------------------------------------ */

	static PyObject *
	readbuffer_encode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "s#\|z:readbuffer_encode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyString_FromStringAndSize(data, size),
	size);
	}

	static PyObject *
	charbuffer_encode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:charbuffer_encode",
	&data, &size, &errors))
	return NULL;

	return codec_tuple(PyString_FromStringAndSize(data, size),
	size);
	}

	static PyObject *
	unicode_internal_encode(PyObject *self,
	PyObject *args)
	{
	PyObject *obj;
	const char *errors = NULL;
	const char *data;
	int size;

	if (!PyArg_ParseTuple(args, "O\|z:unicode_internal_encode",
	&obj, &errors))
	return NULL;

	if (PyUnicode_Check(obj)) {
	data = PyUnicode_AS_DATA(obj);
	size = PyUnicode_GET_DATA_SIZE(obj);
	return codec_tuple(PyString_FromStringAndSize(data, size),
	size);
	}
	else {
	if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
	return NULL;
	return codec_tuple(PyString_FromStringAndSize(data, size),
	size);
	}
	}

	static PyObject *
	utf_7_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:utf_7_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	0,
	0,
	errors),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	utf_8_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:utf_8_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	errors),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	/* This version provides access to the byteorder parameter of the
	builtin UTF-16 codecs as optional third argument. It defaults to 0
	which means: use the native byte order and prepend the data with a
	BOM mark.

	*/

	static PyObject *
	utf_16_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;
	int byteorder = 0;

	if (!PyArg_ParseTuple(args, "O\|zi:utf_16_encode",
	&str, &errors, &byteorder))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	errors,
	byteorder),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	utf_16_le_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:utf_16_le_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	errors,
	-1),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	utf_16_be_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:utf_16_be_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	errors,
	+1),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	unicode_escape_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:unicode_escape_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str)),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	raw_unicode_escape_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:raw_unicode_escape_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
	PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str)),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	latin_1_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:latin_1_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeLatin1(
	PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	errors),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	ascii_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:ascii_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeASCII(
	PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	errors),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	static PyObject *
	charmap_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;
	PyObject *mapping = NULL;

	if (!PyArg_ParseTuple(args, "O\|zO:charmap_encode",
	&str, &errors, &mapping))
	return NULL;
	if (mapping == Py_None)
	mapping = NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeCharmap(
	PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	mapping,
	errors),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)

	static PyObject *
	mbcs_encode(PyObject *self,
	PyObject *args)
	{
	PyObject str, v;
	const char *errors = NULL;

	if (!PyArg_ParseTuple(args, "O\|z:mbcs_encode",
	&str, &errors))
	return NULL;

	str = PyUnicode_FromObject(str);
	if (str == NULL)
	return NULL;
	v = codec_tuple(PyUnicode_EncodeMBCS(
	PyUnicode_AS_UNICODE(str),
	PyUnicode_GET_SIZE(str),
	errors),
	PyUnicode_GET_SIZE(str));
	Py_DECREF(str);
	return v;
	}

	#endif /* MS_WINDOWS */
	#endif /* Py_USING_UNICODE */

	/* --- Module API --------------------------------------------------------- */

	static PyMethodDef _codecs_functions[] = {
	{"register", codecregister, METH_VARARGS},
	{"lookup", codeclookup, METH_VARARGS},
	{"escape_encode", escape_encode, METH_VARARGS},
	{"escape_decode", escape_decode, METH_VARARGS},
	#ifdef Py_USING_UNICODE
	{"utf_8_encode", utf_8_encode, METH_VARARGS},
	{"utf_8_decode", utf_8_decode, METH_VARARGS},
	{"utf_7_encode", utf_7_encode, METH_VARARGS},
	{"utf_7_decode", utf_7_decode, METH_VARARGS},
	{"utf_16_encode", utf_16_encode, METH_VARARGS},
	{"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
	{"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
	{"utf_16_decode", utf_16_decode, METH_VARARGS},
	{"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
	{"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
	{"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
	{"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
	{"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
	{"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
	{"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
	{"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
	{"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
	{"latin_1_encode", latin_1_encode, METH_VARARGS},
	{"latin_1_decode", latin_1_decode, METH_VARARGS},
	{"ascii_encode", ascii_encode, METH_VARARGS},
	{"ascii_decode", ascii_decode, METH_VARARGS},
	{"charmap_encode", charmap_encode, METH_VARARGS},
	{"charmap_decode", charmap_decode, METH_VARARGS},
	{"readbuffer_encode", readbuffer_encode, METH_VARARGS},
	{"charbuffer_encode", charbuffer_encode, METH_VARARGS},
	#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
	{"mbcs_encode", mbcs_encode, METH_VARARGS},
	{"mbcs_decode", mbcs_decode, METH_VARARGS},
	#endif
	#endif /* Py_USING_UNICODE */
	{NULL, NULL} /* sentinel */
	};

	PyMODINIT_FUNC
	init_codecs(void)
	{
	Py_InitModule("_codecs", _codecs_functions);
	}