Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF
=> raise an UnicodeDecodeError. Patch written by Ezio Melotti.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 130ca48..d80ff71 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8170,6 +8170,7 @@
size_t buflen,
PyObject *v)
{
+ PyObject *s;
/* presume that the buffer is at least 2 characters long */
if (PyUnicode_Check(v)) {
if (PyUnicode_GET_SIZE(v) != 1)
@@ -8180,7 +8181,14 @@
else if (PyString_Check(v)) {
if (PyString_GET_SIZE(v) != 1)
goto onError;
- buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+ /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
+ string, "u'%c' % char" should fail with a UnicodeDecodeError */
+ s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
+ /* if the char is not decodable return -1 */
+ if (s == NULL)
+ return -1;
+ buf[0] = PyUnicode_AS_UNICODE(s)[0];
+ Py_DECREF(s);
}
else {