complain if the codec doesn't return unicode
diff --git a/Lib/test/bad_coding3.py b/Lib/test/bad_coding3.py
new file mode 100644
index 0000000..77836d9
--- /dev/null
+++ b/Lib/test/bad_coding3.py
@@ -0,0 +1,2 @@
+# coding: string-escape
+\x70\x72\x69\x6e\x74\x20\x32\x2b\x32\x0a
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index 4b60624..a3abc3c 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -58,6 +58,11 @@
with self.assertRaisesRegexp(SyntaxError, 'BOM'):
compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
+ def test_non_unicode_codec(self):
+ with self.assertRaisesRegexp(SyntaxError,
+ 'codec did not return a unicode'):
+ from test import bad_coding3
+
def test_main():
test_support.run_unittest(PEP263Test)
diff --git a/Misc/NEWS b/Misc/NEWS
index fe793e8..7fff780 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@
Core and Builtins
-----------------
+- Raise a better error when non-unicode codecs are used for a file's coding
+ cookie.
+
- Issue #17976: Fixed potential problem with file.write() not detecting IO error
by inspecting the return value of fwrite(). Based on patches by Jaakko Moisio
and Victor Stinner.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 46cf9b2..249bb96 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -400,6 +400,12 @@
buf = PyObject_CallObject(tok->decoding_readline, NULL);
if (buf == NULL)
return error_ret(tok);
+ if (!PyUnicode_Check(buf)) {
+ Py_DECREF(buf);
+ PyErr_SetString(PyExc_SyntaxError,
+ "codec did not return a unicode object");
+ return error_ret(tok);
+ }
} else {
tok->decoding_buffer = NULL;
if (PyString_CheckExact(buf))