Issue #18038: SyntaxError raised during compilation sources with illegal
encoding now always contains an encoding name.
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index 9286467..4b60624 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -41,6 +41,24 @@
# two bytes in common with the UTF-8 BOM
self.assertRaises(SyntaxError, eval, '\xef\xbb\x20')
+ def test_error_message(self):
+ compile('# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec')
+ compile('\xef\xbb\xbf\n', 'dummy', 'exec')
+ compile('\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec')
+ with self.assertRaisesRegexp(SyntaxError, 'fake'):
+ compile('# -*- coding: fake -*-\n', 'dummy', 'exec')
+ with self.assertRaisesRegexp(SyntaxError, 'iso-8859-15'):
+ compile('\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
+ 'dummy', 'exec')
+ with self.assertRaisesRegexp(SyntaxError, 'BOM'):
+ compile('\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
+ 'dummy', 'exec')
+ with self.assertRaisesRegexp(SyntaxError, 'fake'):
+ compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
+ with self.assertRaisesRegexp(SyntaxError, 'BOM'):
+ compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
+
+
def test_main():
test_support.run_unittest(PEP263Test)
diff --git a/Misc/NEWS b/Misc/NEWS
index 93d2c09..784011a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@
Core and Builtins
-----------------
+- Issue #18038: SyntaxError raised during compilation sources with illegal
+ encoding now always contains an encoding name.
+
- Issue #18019: Fix crash in the repr of dictionaries containing their own
views.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index ee6313b..46cf9b2 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -277,8 +277,11 @@
tok->encoding = cs;
tok->decoding_state = -1;
}
- else
+ else {
+ PyErr_Format(PyExc_SyntaxError,
+ "encoding problem: %s", cs);
PyMem_FREE(cs);
+ }
#else
/* Without Unicode support, we cannot
process the coding spec. Since there
@@ -289,15 +292,12 @@
}
} else { /* then, compare cs with BOM */
r = (strcmp(tok->encoding, cs) == 0);
+ if (!r)
+ PyErr_Format(PyExc_SyntaxError,
+ "encoding problem: %s with BOM", cs);
PyMem_FREE(cs);
}
}
- if (!r) {
- cs = tok->encoding;
- if (!cs)
- cs = "with BOM";
- PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
- }
return r;
}