Make test_tokenize pass again: Add code to test_roundtrip() that figures out the encoding from the first two lines of the file. (We need to refactor this again to make it available to all places that need this, e.g. linecache.py.)

commit: cfbbf48e3e60438036f18f0376d7deb226873a8f [log] [tgz]
author: Guido van Rossum <guido@python.org> Sat Aug 04 17:43:15 2007 +0000
committer: Guido van Rossum <guido@python.org> Sat Aug 04 17:43:15 2007 +0000
tree: 623e8dc4d8f1b2bb1daec0df42bd115058e1ae62
parent: 1e2b760475e20c2c293c0a78a608012d7d70e3b4 [diff] [blame]
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 022b658..788a04b 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py

@@ -80,7 +80,10 @@
 
 """
 
+# ' Emacs hint
+
 import os, glob, random, time, sys
+import re
 from io import StringIO
 from test.test_support import (verbose, findfile, is_resource_enabled,
                                TestFailed)
@@ -96,7 +99,17 @@
 # tokenization doesn't match the first.
 def test_roundtrip(f):
     ## print 'Testing:', f
-    fobj = open(f)
+    # Get the encoding first
+    fobj = open(f, encoding="latin-1")
+    first2lines = fobj.readline() + fobj.readline()
+    fobj.close()
+    m = re.search(r"coding:\s*(\S+)", first2lines)
+    if m:
+        encoding = m.group(1)
+        print("    coding:", encoding)
+    else:
+        encoding = "utf-8"
+    fobj = open(f, encoding=encoding)
     try:
         fulltok = list(generate_tokens(fobj.readline))
     finally:
@@ -185,8 +198,6 @@
 
     testdir = os.path.dirname(f) or os.curdir
     testfiles = glob.glob(testdir + os.sep + 'test*.py')
-    # Exclude test_pep263 which is encoded in KOI8-R
-    testfiles = [t for t in testfiles if not t.endswith("pep263.py")]
     if not is_resource_enabled('compiler'):
         testfiles = random.sample(testfiles, 10)
commit	cfbbf48e3e60438036f18f0376d7deb226873a8f	[log] [tgz]
author	Guido van Rossum <guido@python.org>	Sat Aug 04 17:43:15 2007 +0000
committer	Guido van Rossum <guido@python.org>	Sat Aug 04 17:43:15 2007 +0000
tree	623e8dc4d8f1b2bb1daec0df42bd115058e1ae62
parent	1e2b760475e20c2c293c0a78a608012d7d70e3b4 [diff] [blame]