Fix and deprecated the unicode_internal codec
unicode_internal codec uses Py_UNICODE instead of the real internal
representation (PEP 393: Py_UCS1, Py_UCS2 or Py_UCS4) for backward
compatibility.
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 727cf5e..93cb1b7 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -675,18 +675,30 @@
PyObject *obj;
const char *errors = NULL;
const char *data;
- Py_ssize_t size;
+ Py_ssize_t len, size;
if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
&obj, &errors))
return NULL;
if (PyUnicode_Check(obj)) {
+ Py_UNICODE *u;
+
if (PyUnicode_READY(obj) < 0)
return NULL;
- data = PyUnicode_AS_DATA(obj);
- size = PyUnicode_GET_DATA_SIZE(obj);
- return codec_tuple(PyBytes_FromStringAndSize(data, size),
+
+ if (PyErr_WarnEx(PyExc_DeprecationWarning,
+ "unicode_internal codecs has been deprecated",
+ 1))
+ return NULL;
+
+ u = PyUnicode_AsUnicodeAndSize(obj, &len);
+ if (u == NULL)
+ return NULL;
+ if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
+ return PyErr_NoMemory();
+ size = len * sizeof(Py_UNICODE);
+ return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
PyUnicode_GET_LENGTH(obj));
}
else {