- Fix segfault with invalid coding.
- SF Bug #772896, unknown encoding results in MemoryError, which is not helpful
I will only backport the segfault fix. I'll let Anthony decide if he wants
the other changes backported. I will do the backport if asked.
diff --git a/Lib/test/bad_coding.py b/Lib/test/bad_coding.py
new file mode 100644
index 0000000..971b0a8
--- /dev/null
+++ b/Lib/test/bad_coding.py
@@ -0,0 +1 @@
+# -*- coding: uft-8 -*-
diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py
new file mode 100644
index 0000000..aa7241d
--- /dev/null
+++ b/Lib/test/test_coding.py
@@ -0,0 +1,21 @@
+
+import test.test_support, unittest
+import os
+
+class CodingTest(unittest.TestCase):
+ def test_bad_coding(self):
+ module_name = 'bad_coding'
+ self.assertRaises(SyntaxError, __import__, 'test.' + module_name)
+
+ path = os.path.dirname(__file__)
+ filename = os.path.join(path, module_name + '.py')
+ fp = open(filename)
+ text = fp.read()
+ fp.close()
+ self.assertRaises(SyntaxError, compile, text, filename, 'exec')
+
+def test_main():
+ test.test_support.run_unittest(CodingTest)
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 2c58dae..bf40e9d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@
Core and builtins
-----------------
+- Fix segfault with invalid coding.
+
+- SF bug #772896: unknown encoding results in MemoryError.
+
- All iterators now have a Boolean value of true. Formerly, some iterators
supported a __len__() method which evaluated to False when the iterator
was empty.
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 1fa9739..1d25437 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -42,7 +42,7 @@
initerr(err_ret, filename);
if ((tok = PyTokenizer_FromString(s)) == NULL) {
- err_ret->error = E_NOMEM;
+ err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index 64485eb..695e2b7 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -116,6 +116,13 @@
return g;
}
+/* Can't happen in pgen */
+PyObject*
+PyErr_Occurred()
+{
+ return 0;
+}
+
void
Py_FatalError(const char *msg)
{
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 6957cc9..ce61322 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -603,8 +603,11 @@
if (tok->enc != NULL) {
assert(utf8 == NULL);
utf8 = translate_into_utf8(str, tok->enc);
- if (utf8 == NULL)
+ if (utf8 == NULL) {
+ PyErr_Format(PyExc_SyntaxError,
+ "unknown encoding: %s", tok->enc);
return NULL;
+ }
str = PyString_AsString(utf8);
}
#endif
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 68948fc..e007f98 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1487,7 +1487,7 @@
msg = "unknown decode error";
Py_DECREF(type);
Py_DECREF(value);
- Py_DECREF(tb);
+ Py_XDECREF(tb);
break;
}
case E_LINECONT: