Issue #22221: Backported fixes from Python 3 (issue #18960).
* Now the source encoding declaration on the second line isn't effective if
the first line contains anything except a comment. This affects compile(),
eval() and exec() too.
* IDLE now ignores the source encoding declaration on the second line if the
first line contains anything except a comment.
* 2to3 and the findnocoding.py script now ignore the source encoding
declaration on the second line if the first line contains anything except
a comment.
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py
index aedd372..e3affa8 100644
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -72,6 +72,7 @@
encoding = encoding.lower()
coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
class EncodingMessage(SimpleDialog):
"Inform user that an encoding declaration is needed."
@@ -130,6 +131,8 @@
match = coding_re.match(line)
if match is not None:
break
+ if not blank_re.match(line):
+ return None
else:
return None
name = match.group(1)
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py
index 4cb2a41..d64a3e6 100644
--- a/Lib/lib2to3/pgen2/tokenize.py
+++ b/Lib/lib2to3/pgen2/tokenize.py
@@ -237,6 +237,7 @@
toks_append(tokval)
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
@@ -309,6 +310,8 @@
encoding = find_cookie(first)
if encoding:
return encoding, [first]
+ if not blank_re.match(first):
+ return default, [first]
second = read_or_stop()
if not second:
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index ca396a9..cfc6389 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -412,9 +412,24 @@
l = lambda: "foo"
self.assertIsNone(l.__doc__)
- def test_unicode_encoding(self):
+ @test_support.requires_unicode
+ def test_encoding(self):
+ code = b'# -*- coding: badencoding -*-\npass\n'
+ self.assertRaises(SyntaxError, compile, code, 'tmp', 'exec')
code = u"# -*- coding: utf-8 -*-\npass\n"
self.assertRaises(SyntaxError, compile, code, "tmp", "exec")
+ code = 'u"\xc2\xa4"\n'
+ self.assertEqual(eval(code), u'\xc2\xa4')
+ code = u'u"\xc2\xa4"\n'
+ self.assertEqual(eval(code), u'\xc2\xa4')
+ code = '# -*- coding: latin1 -*-\nu"\xc2\xa4"\n'
+ self.assertEqual(eval(code), u'\xc2\xa4')
+ code = '# -*- coding: utf-8 -*-\nu"\xc2\xa4"\n'
+ self.assertEqual(eval(code), u'\xa4')
+ code = '# -*- coding: iso8859-15 -*-\nu"\xc2\xa4"\n'
+ self.assertEqual(eval(code), test_support.u(r'\xc2\u20ac'))
+ code = 'u"""\\\n# -*- coding: utf-8 -*-\n\xc2\xa4"""\n'
+ self.assertEqual(eval(code), u'# -*- coding: utf-8 -*-\n\xc2\xa4')
def test_subscripts(self):
# SF bug 1448804