Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the first two lines have non-UTF-8 characters without an encoding declaration.

commit: 63674f4b52aa7c2832fec09a026e24cd521e491b [log] [tgz]
author: Martin v. Löwis <martin@v.loewis.de> Fri Apr 20 14:36:47 2012 +0200
committer: Martin v. Löwis <martin@v.loewis.de> Fri Apr 20 14:36:47 2012 +0200
tree: 666adcb3196093e019e256d5ab9408df9436ea22
parent: 8e6e0fdb7fee3796df8b578c1311b5e46005f2d9 [diff] [blame]
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index f575e9b..f283c6d 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py

@@ -292,9 +292,12 @@
 
     def find_cookie(line):
         try:
-            line_string = line.decode('ascii')
+            # Decode as UTF-8. Either the line is an encoding declaration,
+            # in which case it should be pure ASCII, or it must be UTF-8
+            # per default encoding.
+            line_string = line.decode('utf-8')
         except UnicodeDecodeError:
-            return None
+            raise SyntaxError("invalid or missing encoding declaration")
 
         matches = cookie_re.findall(line_string)
         if not matches:
commit	63674f4b52aa7c2832fec09a026e24cd521e491b	[log] [tgz]
author	Martin v. Löwis <martin@v.loewis.de>	Fri Apr 20 14:36:47 2012 +0200
committer	Martin v. Löwis <martin@v.loewis.de>	Fri Apr 20 14:36:47 2012 +0200
tree	666adcb3196093e019e256d5ab9408df9436ea22
parent	8e6e0fdb7fee3796df8b578c1311b5e46005f2d9 [diff] [blame]