blob: d0db77995acfab9c21d4a17de557b13fab90f24c [file] [log] [blame]
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001from test import support
2from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
3 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
Ammar Askarab75d9e2018-07-06 06:21:05 -04004 open as tokenize_open, Untokenizer, generate_tokens,
5 NEWLINE)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03006from io import BytesIO
Stéphane Wirtel90addd62017-07-25 15:33:53 +02007import unittest
Brett Cannona721aba2016-09-09 14:57:09 -07008from unittest import TestCase, mock
9from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
10 INVALID_UNDERSCORE_LITERALS)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030011import os
12import token
Thomas Wouters89f507f2006-12-13 04:49:30 +000013
Thomas Wouters89f507f2006-12-13 04:49:30 +000014
Ammar Askarab75d9e2018-07-06 06:21:05 -040015# Converts a source string into a list of textual representation
16# of the tokens such as:
17# ` NAME 'if' (1, 0) (1, 2)`
18# to make writing tests easier.
19def stringify_tokens_from_source(token_generator, source_string):
20 result = []
21 num_lines = len(source_string.splitlines())
22 missing_trailing_nl = source_string[-1] not in '\r\n'
23
24 for type, token, start, end, line in token_generator:
25 if type == ENDMARKER:
26 break
27 # Ignore the new line on the last line if the input lacks one
28 if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
29 continue
30 type = tok_name[type]
31 result.append(f" {type:10} {token!r:13} {start} {end}")
32
33 return result
34
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030035class TokenizeTest(TestCase):
36 # Tests for the tokenize module.
Jason R. Coombs7cf36382015-06-20 19:13:50 -040037
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030038 # The tests can be really simple. Given a small fragment of source
Ammar Askarab75d9e2018-07-06 06:21:05 -040039 # code, print out a table with tokens. The ENDMARKER, ENCODING and
40 # final NEWLINE are omitted for brevity.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030041
42 def check_tokenize(self, s, expected):
43 # Format the tokens in s in a table format.
Ammar Askarab75d9e2018-07-06 06:21:05 -040044 # The ENDMARKER and final NEWLINE are omitted.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030045 f = BytesIO(s.encode('utf-8'))
Ammar Askarab75d9e2018-07-06 06:21:05 -040046 result = stringify_tokens_from_source(tokenize(f.readline), s)
47
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030048 self.assertEqual(result,
49 [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
50 expected.rstrip().splitlines())
51
Ammar Askarab75d9e2018-07-06 06:21:05 -040052 def test_implicit_newline(self):
53 # Make sure that the tokenizer puts in an implicit NEWLINE
54 # when the input lacks a trailing new line.
55 f = BytesIO("x".encode('utf-8'))
56 tokens = list(tokenize(f.readline))
57 self.assertEqual(tokens[-2].type, NEWLINE)
58 self.assertEqual(tokens[-1].type, ENDMARKER)
59
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030060 def test_basic(self):
61 self.check_tokenize("1 + 1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000062 NUMBER '1' (1, 0) (1, 1)
63 OP '+' (1, 2) (1, 3)
64 NUMBER '1' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030065 """)
66 self.check_tokenize("if False:\n"
67 " # NL\n"
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010068 " \n"
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030069 " True = False # NEWLINE\n", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000070 NAME 'if' (1, 0) (1, 2)
71 NAME 'False' (1, 3) (1, 8)
72 OP ':' (1, 8) (1, 9)
73 NEWLINE '\\n' (1, 9) (1, 10)
74 COMMENT '# NL' (2, 4) (2, 8)
75 NL '\\n' (2, 8) (2, 9)
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010076 NL '\\n' (3, 4) (3, 5)
77 INDENT ' ' (4, 0) (4, 4)
78 NAME 'True' (4, 4) (4, 8)
79 OP '=' (4, 9) (4, 10)
80 NAME 'False' (4, 11) (4, 16)
81 COMMENT '# NEWLINE' (4, 17) (4, 26)
82 NEWLINE '\\n' (4, 26) (4, 27)
83 DEDENT '' (5, 0) (5, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030084 """)
85 indent_error_file = b"""\
86def k(x):
87 x += 2
88 x += 5
89"""
90 readline = BytesIO(indent_error_file).readline
91 with self.assertRaisesRegex(IndentationError,
92 "unindent does not match any "
93 "outer indentation level"):
94 for tok in tokenize(readline):
95 pass
Thomas Wouters89f507f2006-12-13 04:49:30 +000096
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030097 def test_int(self):
98 # Ordinary integers and binary operators
99 self.check_tokenize("0xff <= 255", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000100 NUMBER '0xff' (1, 0) (1, 4)
101 OP '<=' (1, 5) (1, 7)
102 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300103 """)
104 self.check_tokenize("0b10 <= 255", """\
Eric Smith74ca5572008-03-17 19:49:19 +0000105 NUMBER '0b10' (1, 0) (1, 4)
106 OP '<=' (1, 5) (1, 7)
107 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300108 """)
109 self.check_tokenize("0o123 <= 0O123", """\
Eric Smith74ca5572008-03-17 19:49:19 +0000110 NUMBER '0o123' (1, 0) (1, 5)
111 OP '<=' (1, 6) (1, 8)
112 NUMBER '0O123' (1, 9) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300113 """)
114 self.check_tokenize("1234567 > ~0x15", """\
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000115 NUMBER '1234567' (1, 0) (1, 7)
116 OP '>' (1, 8) (1, 9)
117 OP '~' (1, 10) (1, 11)
118 NUMBER '0x15' (1, 11) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300119 """)
120 self.check_tokenize("2134568 != 1231515", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000121 NUMBER '2134568' (1, 0) (1, 7)
122 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000123 NUMBER '1231515' (1, 11) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300124 """)
125 self.check_tokenize("(-124561-1) & 200000000", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000126 OP '(' (1, 0) (1, 1)
127 OP '-' (1, 1) (1, 2)
128 NUMBER '124561' (1, 2) (1, 8)
129 OP '-' (1, 8) (1, 9)
130 NUMBER '1' (1, 9) (1, 10)
131 OP ')' (1, 10) (1, 11)
132 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000133 NUMBER '200000000' (1, 14) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300134 """)
135 self.check_tokenize("0xdeadbeef != -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000136 NUMBER '0xdeadbeef' (1, 0) (1, 10)
137 OP '!=' (1, 11) (1, 13)
138 OP '-' (1, 14) (1, 15)
139 NUMBER '1' (1, 15) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300140 """)
141 self.check_tokenize("0xdeadc0de & 12345", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000142 NUMBER '0xdeadc0de' (1, 0) (1, 10)
143 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000144 NUMBER '12345' (1, 13) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300145 """)
146 self.check_tokenize("0xFF & 0x15 | 1234", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000147 NUMBER '0xFF' (1, 0) (1, 4)
148 OP '&' (1, 5) (1, 6)
149 NUMBER '0x15' (1, 7) (1, 11)
150 OP '|' (1, 12) (1, 13)
151 NUMBER '1234' (1, 14) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300152 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000153
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300154 def test_long(self):
155 # Long integers
156 self.check_tokenize("x = 0", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000157 NAME 'x' (1, 0) (1, 1)
158 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000159 NUMBER '0' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300160 """)
161 self.check_tokenize("x = 0xfffffffffff", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000162 NAME 'x' (1, 0) (1, 1)
163 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400164 NUMBER '0xfffffffffff' (1, 4) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300165 """)
166 self.check_tokenize("x = 123141242151251616110", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000167 NAME 'x' (1, 0) (1, 1)
168 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400169 NUMBER '123141242151251616110' (1, 4) (1, 25)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300170 """)
171 self.check_tokenize("x = -15921590215012591", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000172 NAME 'x' (1, 0) (1, 1)
173 OP '=' (1, 2) (1, 3)
174 OP '-' (1, 4) (1, 5)
Eric V. Smith67317742015-10-16 20:45:53 -0400175 NUMBER '15921590215012591' (1, 5) (1, 22)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300176 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000177
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300178 def test_float(self):
179 # Floating point numbers
180 self.check_tokenize("x = 3.14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000181 NAME 'x' (1, 0) (1, 1)
182 OP '=' (1, 2) (1, 3)
183 NUMBER '3.14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300184 """)
185 self.check_tokenize("x = 314159.", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000186 NAME 'x' (1, 0) (1, 1)
187 OP '=' (1, 2) (1, 3)
188 NUMBER '314159.' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300189 """)
190 self.check_tokenize("x = .314159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000191 NAME 'x' (1, 0) (1, 1)
192 OP '=' (1, 2) (1, 3)
193 NUMBER '.314159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300194 """)
195 self.check_tokenize("x = 3e14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000196 NAME 'x' (1, 0) (1, 1)
197 OP '=' (1, 2) (1, 3)
198 NUMBER '3e14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300199 """)
200 self.check_tokenize("x = 3E123", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000201 NAME 'x' (1, 0) (1, 1)
202 OP '=' (1, 2) (1, 3)
203 NUMBER '3E123' (1, 4) (1, 9)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300204 """)
205 self.check_tokenize("x+y = 3e-1230", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000206 NAME 'x' (1, 0) (1, 1)
207 OP '+' (1, 1) (1, 2)
208 NAME 'y' (1, 2) (1, 3)
209 OP '=' (1, 4) (1, 5)
210 NUMBER '3e-1230' (1, 6) (1, 13)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300211 """)
212 self.check_tokenize("x = 3.14e159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000213 NAME 'x' (1, 0) (1, 1)
214 OP '=' (1, 2) (1, 3)
215 NUMBER '3.14e159' (1, 4) (1, 12)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300216 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000217
Brett Cannona721aba2016-09-09 14:57:09 -0700218 def test_underscore_literals(self):
219 def number_token(s):
220 f = BytesIO(s.encode('utf-8'))
221 for toktype, token, start, end, line in tokenize(f.readline):
222 if toktype == NUMBER:
223 return token
224 return 'invalid token'
225 for lit in VALID_UNDERSCORE_LITERALS:
226 if '(' in lit:
227 # this won't work with compound complex inputs
228 continue
229 self.assertEqual(number_token(lit), lit)
230 for lit in INVALID_UNDERSCORE_LITERALS:
231 self.assertNotEqual(number_token(lit), lit)
232
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300233 def test_string(self):
234 # String literals
235 self.check_tokenize("x = ''; y = \"\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000236 NAME 'x' (1, 0) (1, 1)
237 OP '=' (1, 2) (1, 3)
238 STRING "''" (1, 4) (1, 6)
239 OP ';' (1, 6) (1, 7)
240 NAME 'y' (1, 8) (1, 9)
241 OP '=' (1, 10) (1, 11)
242 STRING '""' (1, 12) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300243 """)
244 self.check_tokenize("x = '\"'; y = \"'\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000245 NAME 'x' (1, 0) (1, 1)
246 OP '=' (1, 2) (1, 3)
247 STRING '\\'"\\'' (1, 4) (1, 7)
248 OP ';' (1, 7) (1, 8)
249 NAME 'y' (1, 9) (1, 10)
250 OP '=' (1, 11) (1, 12)
251 STRING '"\\'"' (1, 13) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300252 """)
253 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000254 NAME 'x' (1, 0) (1, 1)
255 OP '=' (1, 2) (1, 3)
256 STRING '"doesn\\'t "' (1, 4) (1, 14)
257 NAME 'shrink' (1, 14) (1, 20)
258 STRING '", does it"' (1, 20) (1, 31)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300259 """)
260 self.check_tokenize("x = 'abc' + 'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000261 NAME 'x' (1, 0) (1, 1)
262 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000263 STRING "'abc'" (1, 4) (1, 9)
264 OP '+' (1, 10) (1, 11)
265 STRING "'ABC'" (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300266 """)
267 self.check_tokenize('y = "ABC" + "ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000268 NAME 'y' (1, 0) (1, 1)
269 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000270 STRING '"ABC"' (1, 4) (1, 9)
271 OP '+' (1, 10) (1, 11)
272 STRING '"ABC"' (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300273 """)
274 self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000275 NAME 'x' (1, 0) (1, 1)
276 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000277 STRING "r'abc'" (1, 4) (1, 10)
278 OP '+' (1, 11) (1, 12)
279 STRING "r'ABC'" (1, 13) (1, 19)
280 OP '+' (1, 20) (1, 21)
281 STRING "R'ABC'" (1, 22) (1, 28)
282 OP '+' (1, 29) (1, 30)
283 STRING "R'ABC'" (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300284 """)
285 self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000286 NAME 'y' (1, 0) (1, 1)
287 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000288 STRING 'r"abc"' (1, 4) (1, 10)
289 OP '+' (1, 11) (1, 12)
290 STRING 'r"ABC"' (1, 13) (1, 19)
291 OP '+' (1, 20) (1, 21)
292 STRING 'R"ABC"' (1, 22) (1, 28)
293 OP '+' (1, 29) (1, 30)
294 STRING 'R"ABC"' (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300295 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000296
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300297 self.check_tokenize("u'abc' + U'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500298 STRING "u'abc'" (1, 0) (1, 6)
299 OP '+' (1, 7) (1, 8)
300 STRING "U'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300301 """)
302 self.check_tokenize('u"abc" + U"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500303 STRING 'u"abc"' (1, 0) (1, 6)
304 OP '+' (1, 7) (1, 8)
305 STRING 'U"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300306 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500307
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300308 self.check_tokenize("b'abc' + B'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500309 STRING "b'abc'" (1, 0) (1, 6)
310 OP '+' (1, 7) (1, 8)
311 STRING "B'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300312 """)
313 self.check_tokenize('b"abc" + B"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500314 STRING 'b"abc"' (1, 0) (1, 6)
315 OP '+' (1, 7) (1, 8)
316 STRING 'B"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300317 """)
318 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500319 STRING "br'abc'" (1, 0) (1, 7)
320 OP '+' (1, 8) (1, 9)
321 STRING "bR'abc'" (1, 10) (1, 17)
322 OP '+' (1, 18) (1, 19)
323 STRING "Br'abc'" (1, 20) (1, 27)
324 OP '+' (1, 28) (1, 29)
325 STRING "BR'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300326 """)
327 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500328 STRING 'br"abc"' (1, 0) (1, 7)
329 OP '+' (1, 8) (1, 9)
330 STRING 'bR"abc"' (1, 10) (1, 17)
331 OP '+' (1, 18) (1, 19)
332 STRING 'Br"abc"' (1, 20) (1, 27)
333 OP '+' (1, 28) (1, 29)
334 STRING 'BR"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300335 """)
336 self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500337 STRING "rb'abc'" (1, 0) (1, 7)
338 OP '+' (1, 8) (1, 9)
339 STRING "rB'abc'" (1, 10) (1, 17)
340 OP '+' (1, 18) (1, 19)
341 STRING "Rb'abc'" (1, 20) (1, 27)
342 OP '+' (1, 28) (1, 29)
343 STRING "RB'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300344 """)
345 self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500346 STRING 'rb"abc"' (1, 0) (1, 7)
347 OP '+' (1, 8) (1, 9)
348 STRING 'rB"abc"' (1, 10) (1, 17)
349 OP '+' (1, 18) (1, 19)
350 STRING 'Rb"abc"' (1, 20) (1, 27)
351 OP '+' (1, 28) (1, 29)
352 STRING 'RB"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300353 """)
Eric V. Smith67317742015-10-16 20:45:53 -0400354 # Check 0, 1, and 2 character string prefixes.
355 self.check_tokenize(r'"a\
356de\
357fg"', """\
358 STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
359 """)
360 self.check_tokenize(r'u"a\
361de"', """\
362 STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
363 """)
364 self.check_tokenize(r'rb"a\
365d"', """\
366 STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
367 """)
368 self.check_tokenize(r'"""a\
369b"""', """\
370 STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
371 """)
372 self.check_tokenize(r'u"""a\
373b"""', """\
374 STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
375 """)
376 self.check_tokenize(r'rb"""a\
377b\
378c"""', """\
379 STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
380 """)
Eric V. Smith1c8222c2015-10-26 04:37:55 -0400381 self.check_tokenize('f"abc"', """\
382 STRING 'f"abc"' (1, 0) (1, 6)
383 """)
384 self.check_tokenize('fR"a{b}c"', """\
385 STRING 'fR"a{b}c"' (1, 0) (1, 9)
386 """)
387 self.check_tokenize('f"""abc"""', """\
388 STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
389 """)
390 self.check_tokenize(r'f"abc\
391def"', """\
392 STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
393 """)
394 self.check_tokenize(r'Rf"abc\
395def"', """\
396 STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
397 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500398
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300399 def test_function(self):
400 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000401 NAME 'def' (1, 0) (1, 3)
402 NAME 'd22' (1, 4) (1, 7)
403 OP '(' (1, 7) (1, 8)
404 NAME 'a' (1, 8) (1, 9)
405 OP ',' (1, 9) (1, 10)
406 NAME 'b' (1, 11) (1, 12)
407 OP ',' (1, 12) (1, 13)
408 NAME 'c' (1, 14) (1, 15)
409 OP '=' (1, 15) (1, 16)
410 NUMBER '2' (1, 16) (1, 17)
411 OP ',' (1, 17) (1, 18)
412 NAME 'd' (1, 19) (1, 20)
413 OP '=' (1, 20) (1, 21)
414 NUMBER '2' (1, 21) (1, 22)
415 OP ',' (1, 22) (1, 23)
416 OP '*' (1, 24) (1, 25)
417 NAME 'k' (1, 25) (1, 26)
418 OP ')' (1, 26) (1, 27)
419 OP ':' (1, 27) (1, 28)
420 NAME 'pass' (1, 29) (1, 33)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300421 """)
422 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000423 NAME 'def' (1, 0) (1, 3)
424 NAME 'd01v_' (1, 4) (1, 9)
425 OP '(' (1, 9) (1, 10)
426 NAME 'a' (1, 10) (1, 11)
427 OP '=' (1, 11) (1, 12)
428 NUMBER '1' (1, 12) (1, 13)
429 OP ',' (1, 13) (1, 14)
430 OP '*' (1, 15) (1, 16)
431 NAME 'k' (1, 16) (1, 17)
432 OP ',' (1, 17) (1, 18)
433 OP '**' (1, 19) (1, 21)
434 NAME 'w' (1, 21) (1, 22)
435 OP ')' (1, 22) (1, 23)
436 OP ':' (1, 23) (1, 24)
437 NAME 'pass' (1, 25) (1, 29)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300438 """)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +0200439 self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
440 NAME 'def' (1, 0) (1, 3)
441 NAME 'd23' (1, 4) (1, 7)
442 OP '(' (1, 7) (1, 8)
443 NAME 'a' (1, 8) (1, 9)
444 OP ':' (1, 9) (1, 10)
445 NAME 'str' (1, 11) (1, 14)
446 OP ',' (1, 14) (1, 15)
447 NAME 'b' (1, 16) (1, 17)
448 OP ':' (1, 17) (1, 18)
449 NAME 'int' (1, 19) (1, 22)
450 OP '=' (1, 22) (1, 23)
451 NUMBER '3' (1, 23) (1, 24)
452 OP ')' (1, 24) (1, 25)
453 OP '->' (1, 26) (1, 28)
454 NAME 'int' (1, 29) (1, 32)
455 OP ':' (1, 32) (1, 33)
456 NAME 'pass' (1, 34) (1, 38)
457 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000458
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300459 def test_comparison(self):
460 # Comparison
461 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
462 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000463 NAME 'if' (1, 0) (1, 2)
464 NUMBER '1' (1, 3) (1, 4)
465 OP '<' (1, 5) (1, 6)
466 NUMBER '1' (1, 7) (1, 8)
467 OP '>' (1, 9) (1, 10)
468 NUMBER '1' (1, 11) (1, 12)
469 OP '==' (1, 13) (1, 15)
470 NUMBER '1' (1, 16) (1, 17)
471 OP '>=' (1, 18) (1, 20)
472 NUMBER '5' (1, 21) (1, 22)
473 OP '<=' (1, 23) (1, 25)
474 NUMBER '0x15' (1, 26) (1, 30)
475 OP '<=' (1, 31) (1, 33)
476 NUMBER '0x12' (1, 34) (1, 38)
477 OP '!=' (1, 39) (1, 41)
478 NUMBER '1' (1, 42) (1, 43)
479 NAME 'and' (1, 44) (1, 47)
480 NUMBER '5' (1, 48) (1, 49)
481 NAME 'in' (1, 50) (1, 52)
482 NUMBER '1' (1, 53) (1, 54)
483 NAME 'not' (1, 55) (1, 58)
484 NAME 'in' (1, 59) (1, 61)
485 NUMBER '1' (1, 62) (1, 63)
486 NAME 'is' (1, 64) (1, 66)
487 NUMBER '1' (1, 67) (1, 68)
488 NAME 'or' (1, 69) (1, 71)
489 NUMBER '5' (1, 72) (1, 73)
490 NAME 'is' (1, 74) (1, 76)
491 NAME 'not' (1, 77) (1, 80)
492 NUMBER '1' (1, 81) (1, 82)
493 OP ':' (1, 82) (1, 83)
494 NAME 'pass' (1, 84) (1, 88)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300495 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000496
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300497 def test_shift(self):
498 # Shift
499 self.check_tokenize("x = 1 << 1 >> 5", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000500 NAME 'x' (1, 0) (1, 1)
501 OP '=' (1, 2) (1, 3)
502 NUMBER '1' (1, 4) (1, 5)
503 OP '<<' (1, 6) (1, 8)
504 NUMBER '1' (1, 9) (1, 10)
505 OP '>>' (1, 11) (1, 13)
506 NUMBER '5' (1, 14) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300507 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000508
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300509 def test_additive(self):
510 # Additive
511 self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000512 NAME 'x' (1, 0) (1, 1)
513 OP '=' (1, 2) (1, 3)
514 NUMBER '1' (1, 4) (1, 5)
515 OP '-' (1, 6) (1, 7)
516 NAME 'y' (1, 8) (1, 9)
517 OP '+' (1, 10) (1, 11)
518 NUMBER '15' (1, 12) (1, 14)
519 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000520 NUMBER '1' (1, 17) (1, 18)
521 OP '+' (1, 19) (1, 20)
522 NUMBER '0x124' (1, 21) (1, 26)
523 OP '+' (1, 27) (1, 28)
524 NAME 'z' (1, 29) (1, 30)
525 OP '+' (1, 31) (1, 32)
526 NAME 'a' (1, 33) (1, 34)
527 OP '[' (1, 34) (1, 35)
528 NUMBER '5' (1, 35) (1, 36)
529 OP ']' (1, 36) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300530 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000531
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300532 def test_multiplicative(self):
533 # Multiplicative
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300534 self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000535 NAME 'x' (1, 0) (1, 1)
536 OP '=' (1, 2) (1, 3)
537 NUMBER '1' (1, 4) (1, 5)
538 OP '//' (1, 5) (1, 7)
539 NUMBER '1' (1, 7) (1, 8)
540 OP '*' (1, 8) (1, 9)
541 NUMBER '1' (1, 9) (1, 10)
542 OP '/' (1, 10) (1, 11)
543 NUMBER '5' (1, 11) (1, 12)
544 OP '*' (1, 12) (1, 13)
545 NUMBER '12' (1, 13) (1, 15)
546 OP '%' (1, 15) (1, 16)
547 NUMBER '0x12' (1, 16) (1, 20)
Benjamin Petersond51374e2014-04-09 23:55:56 -0400548 OP '@' (1, 20) (1, 21)
549 NUMBER '42' (1, 21) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300550 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000551
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300552 def test_unary(self):
553 # Unary
554 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000555 OP '~' (1, 0) (1, 1)
556 NUMBER '1' (1, 1) (1, 2)
557 OP '^' (1, 3) (1, 4)
558 NUMBER '1' (1, 5) (1, 6)
559 OP '&' (1, 7) (1, 8)
560 NUMBER '1' (1, 9) (1, 10)
561 OP '|' (1, 11) (1, 12)
562 NUMBER '1' (1, 12) (1, 13)
563 OP '^' (1, 14) (1, 15)
564 OP '-' (1, 16) (1, 17)
565 NUMBER '1' (1, 17) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300566 """)
567 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000568 OP '-' (1, 0) (1, 1)
569 NUMBER '1' (1, 1) (1, 2)
570 OP '*' (1, 2) (1, 3)
571 NUMBER '1' (1, 3) (1, 4)
572 OP '/' (1, 4) (1, 5)
573 NUMBER '1' (1, 5) (1, 6)
574 OP '+' (1, 6) (1, 7)
575 NUMBER '1' (1, 7) (1, 8)
576 OP '*' (1, 8) (1, 9)
577 NUMBER '1' (1, 9) (1, 10)
578 OP '//' (1, 10) (1, 12)
579 NUMBER '1' (1, 12) (1, 13)
580 OP '-' (1, 14) (1, 15)
581 OP '-' (1, 16) (1, 17)
582 OP '-' (1, 17) (1, 18)
583 OP '-' (1, 18) (1, 19)
584 NUMBER '1' (1, 19) (1, 20)
585 OP '**' (1, 20) (1, 22)
586 NUMBER '1' (1, 22) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300587 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000588
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300589 def test_selector(self):
590 # Selector
591 self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000592 NAME 'import' (1, 0) (1, 6)
593 NAME 'sys' (1, 7) (1, 10)
594 OP ',' (1, 10) (1, 11)
595 NAME 'time' (1, 12) (1, 16)
596 NEWLINE '\\n' (1, 16) (1, 17)
597 NAME 'x' (2, 0) (2, 1)
598 OP '=' (2, 2) (2, 3)
599 NAME 'sys' (2, 4) (2, 7)
600 OP '.' (2, 7) (2, 8)
601 NAME 'modules' (2, 8) (2, 15)
602 OP '[' (2, 15) (2, 16)
603 STRING "'time'" (2, 16) (2, 22)
604 OP ']' (2, 22) (2, 23)
605 OP '.' (2, 23) (2, 24)
606 NAME 'time' (2, 24) (2, 28)
607 OP '(' (2, 28) (2, 29)
608 OP ')' (2, 29) (2, 30)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300609 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000610
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300611 def test_method(self):
612 # Methods
613 self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000614 OP '@' (1, 0) (1, 1)
Eric V. Smith67317742015-10-16 20:45:53 -0400615 NAME 'staticmethod' (1, 1) (1, 13)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000616 NEWLINE '\\n' (1, 13) (1, 14)
617 NAME 'def' (2, 0) (2, 3)
618 NAME 'foo' (2, 4) (2, 7)
619 OP '(' (2, 7) (2, 8)
620 NAME 'x' (2, 8) (2, 9)
621 OP ',' (2, 9) (2, 10)
622 NAME 'y' (2, 10) (2, 11)
623 OP ')' (2, 11) (2, 12)
624 OP ':' (2, 12) (2, 13)
625 NAME 'pass' (2, 14) (2, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300626 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000627
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300628 def test_tabs(self):
629 # Evil tabs
630 self.check_tokenize("def f():\n"
631 "\tif x\n"
632 " \tpass", """\
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000633 NAME 'def' (1, 0) (1, 3)
634 NAME 'f' (1, 4) (1, 5)
635 OP '(' (1, 5) (1, 6)
636 OP ')' (1, 6) (1, 7)
637 OP ':' (1, 7) (1, 8)
638 NEWLINE '\\n' (1, 8) (1, 9)
639 INDENT '\\t' (2, 0) (2, 1)
640 NAME 'if' (2, 1) (2, 3)
641 NAME 'x' (2, 4) (2, 5)
642 NEWLINE '\\n' (2, 5) (2, 6)
643 INDENT ' \\t' (3, 0) (3, 9)
644 NAME 'pass' (3, 9) (3, 13)
645 DEDENT '' (4, 0) (4, 0)
646 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300647 """)
Benjamin Peterson33856de2010-08-30 14:41:20 +0000648
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300649 def test_non_ascii_identifiers(self):
650 # Non-ascii identifiers
651 self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
Benjamin Peterson33856de2010-08-30 14:41:20 +0000652 NAME 'Örter' (1, 0) (1, 5)
653 OP '=' (1, 6) (1, 7)
654 STRING "'places'" (1, 8) (1, 16)
655 NEWLINE '\\n' (1, 16) (1, 17)
656 NAME 'grün' (2, 0) (2, 4)
657 OP '=' (2, 5) (2, 6)
658 STRING "'green'" (2, 7) (2, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300659 """)
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000660
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300661 def test_unicode(self):
662 # Legacy unicode literals:
663 self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000664 NAME 'Örter' (1, 0) (1, 5)
665 OP '=' (1, 6) (1, 7)
666 STRING "u'places'" (1, 8) (1, 17)
667 NEWLINE '\\n' (1, 17) (1, 18)
668 NAME 'grün' (2, 0) (2, 4)
669 OP '=' (2, 5) (2, 6)
Christian Heimes0b3847d2012-06-20 11:17:58 +0200670 STRING "U'green'" (2, 7) (2, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300671 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400672
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300673 def test_async(self):
674 # Async/await extension:
675 self.check_tokenize("async = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400676 NAME 'async' (1, 0) (1, 5)
677 OP '=' (1, 6) (1, 7)
678 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300679 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400680
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300681 self.check_tokenize("a = (async = 1)", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400682 NAME 'a' (1, 0) (1, 1)
683 OP '=' (1, 2) (1, 3)
684 OP '(' (1, 4) (1, 5)
685 NAME 'async' (1, 5) (1, 10)
686 OP '=' (1, 11) (1, 12)
687 NUMBER '1' (1, 13) (1, 14)
688 OP ')' (1, 14) (1, 15)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300689 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400690
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300691 self.check_tokenize("async()", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400692 NAME 'async' (1, 0) (1, 5)
693 OP '(' (1, 5) (1, 6)
694 OP ')' (1, 6) (1, 7)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300695 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400696
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300697 self.check_tokenize("class async(Bar):pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400698 NAME 'class' (1, 0) (1, 5)
699 NAME 'async' (1, 6) (1, 11)
700 OP '(' (1, 11) (1, 12)
701 NAME 'Bar' (1, 12) (1, 15)
702 OP ')' (1, 15) (1, 16)
703 OP ':' (1, 16) (1, 17)
704 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300705 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400706
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300707 self.check_tokenize("class async:pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400708 NAME 'class' (1, 0) (1, 5)
709 NAME 'async' (1, 6) (1, 11)
710 OP ':' (1, 11) (1, 12)
711 NAME 'pass' (1, 12) (1, 16)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300712 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400713
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300714 self.check_tokenize("await = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400715 NAME 'await' (1, 0) (1, 5)
716 OP '=' (1, 6) (1, 7)
717 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300718 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400719
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300720 self.check_tokenize("foo.async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400721 NAME 'foo' (1, 0) (1, 3)
722 OP '.' (1, 3) (1, 4)
723 NAME 'async' (1, 4) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300724 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400725
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300726 self.check_tokenize("async for a in b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400727 NAME 'async' (1, 0) (1, 5)
728 NAME 'for' (1, 6) (1, 9)
729 NAME 'a' (1, 10) (1, 11)
730 NAME 'in' (1, 12) (1, 14)
731 NAME 'b' (1, 15) (1, 16)
732 OP ':' (1, 16) (1, 17)
733 NAME 'pass' (1, 18) (1, 22)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300734 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400735
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300736 self.check_tokenize("async with a as b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400737 NAME 'async' (1, 0) (1, 5)
738 NAME 'with' (1, 6) (1, 10)
739 NAME 'a' (1, 11) (1, 12)
740 NAME 'as' (1, 13) (1, 15)
741 NAME 'b' (1, 16) (1, 17)
742 OP ':' (1, 17) (1, 18)
743 NAME 'pass' (1, 19) (1, 23)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300744 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400745
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300746 self.check_tokenize("async.foo", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400747 NAME 'async' (1, 0) (1, 5)
748 OP '.' (1, 5) (1, 6)
749 NAME 'foo' (1, 6) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300750 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400751
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300752 self.check_tokenize("async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400753 NAME 'async' (1, 0) (1, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300754 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400755
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300756 self.check_tokenize("async\n#comment\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400757 NAME 'async' (1, 0) (1, 5)
758 NEWLINE '\\n' (1, 5) (1, 6)
759 COMMENT '#comment' (2, 0) (2, 8)
760 NL '\\n' (2, 8) (2, 9)
761 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300762 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400763
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300764 self.check_tokenize("async\n...\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400765 NAME 'async' (1, 0) (1, 5)
766 NEWLINE '\\n' (1, 5) (1, 6)
767 OP '...' (2, 0) (2, 3)
768 NEWLINE '\\n' (2, 3) (2, 4)
769 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300770 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400771
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300772 self.check_tokenize("async\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400773 NAME 'async' (1, 0) (1, 5)
774 NEWLINE '\\n' (1, 5) (1, 6)
775 NAME 'await' (2, 0) (2, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300776 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400777
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300778 self.check_tokenize("foo.async + 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400779 NAME 'foo' (1, 0) (1, 3)
780 OP '.' (1, 3) (1, 4)
781 NAME 'async' (1, 4) (1, 9)
782 OP '+' (1, 10) (1, 11)
783 NUMBER '1' (1, 12) (1, 13)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300784 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400785
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300786 self.check_tokenize("async def foo(): pass", """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700787 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400788 NAME 'def' (1, 6) (1, 9)
789 NAME 'foo' (1, 10) (1, 13)
790 OP '(' (1, 13) (1, 14)
791 OP ')' (1, 14) (1, 15)
792 OP ':' (1, 15) (1, 16)
793 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300794 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400795
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300796 self.check_tokenize('''\
797async def foo():
798 def foo(await):
799 await = 1
800 if 1:
801 await
802async += 1
803''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700804 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400805 NAME 'def' (1, 6) (1, 9)
806 NAME 'foo' (1, 10) (1, 13)
807 OP '(' (1, 13) (1, 14)
808 OP ')' (1, 14) (1, 15)
809 OP ':' (1, 15) (1, 16)
810 NEWLINE '\\n' (1, 16) (1, 17)
811 INDENT ' ' (2, 0) (2, 2)
812 NAME 'def' (2, 2) (2, 5)
813 NAME 'foo' (2, 6) (2, 9)
814 OP '(' (2, 9) (2, 10)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700815 NAME 'await' (2, 10) (2, 15)
Yury Selivanov75445082015-05-11 22:57:16 -0400816 OP ')' (2, 15) (2, 16)
817 OP ':' (2, 16) (2, 17)
818 NEWLINE '\\n' (2, 17) (2, 18)
819 INDENT ' ' (3, 0) (3, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700820 NAME 'await' (3, 4) (3, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400821 OP '=' (3, 10) (3, 11)
822 NUMBER '1' (3, 12) (3, 13)
823 NEWLINE '\\n' (3, 13) (3, 14)
824 DEDENT '' (4, 2) (4, 2)
825 NAME 'if' (4, 2) (4, 4)
826 NUMBER '1' (4, 5) (4, 6)
827 OP ':' (4, 6) (4, 7)
828 NEWLINE '\\n' (4, 7) (4, 8)
829 INDENT ' ' (5, 0) (5, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700830 NAME 'await' (5, 4) (5, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400831 NEWLINE '\\n' (5, 9) (5, 10)
832 DEDENT '' (6, 0) (6, 0)
833 DEDENT '' (6, 0) (6, 0)
834 NAME 'async' (6, 0) (6, 5)
835 OP '+=' (6, 6) (6, 8)
836 NUMBER '1' (6, 9) (6, 10)
837 NEWLINE '\\n' (6, 10) (6, 11)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300838 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400839
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300840 self.check_tokenize('''\
841async def foo():
842 async for i in 1: pass''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700843 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400844 NAME 'def' (1, 6) (1, 9)
845 NAME 'foo' (1, 10) (1, 13)
846 OP '(' (1, 13) (1, 14)
847 OP ')' (1, 14) (1, 15)
848 OP ':' (1, 15) (1, 16)
849 NEWLINE '\\n' (1, 16) (1, 17)
850 INDENT ' ' (2, 0) (2, 2)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700851 NAME 'async' (2, 2) (2, 7)
Yury Selivanov75445082015-05-11 22:57:16 -0400852 NAME 'for' (2, 8) (2, 11)
853 NAME 'i' (2, 12) (2, 13)
854 NAME 'in' (2, 14) (2, 16)
855 NUMBER '1' (2, 17) (2, 18)
856 OP ':' (2, 18) (2, 19)
857 NAME 'pass' (2, 20) (2, 24)
858 DEDENT '' (3, 0) (3, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300859 """)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300860
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300861 self.check_tokenize('''async def foo(async): await''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700862 NAME 'async' (1, 0) (1, 5)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300863 NAME 'def' (1, 6) (1, 9)
864 NAME 'foo' (1, 10) (1, 13)
865 OP '(' (1, 13) (1, 14)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700866 NAME 'async' (1, 14) (1, 19)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300867 OP ')' (1, 19) (1, 20)
868 OP ':' (1, 20) (1, 21)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700869 NAME 'await' (1, 22) (1, 27)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300870 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300871
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300872 self.check_tokenize('''\
873def f():
874
875 def baz(): pass
876 async def bar(): pass
877
878 await = 2''', """\
Yury Selivanov96ec9342015-07-23 15:01:58 +0300879 NAME 'def' (1, 0) (1, 3)
880 NAME 'f' (1, 4) (1, 5)
881 OP '(' (1, 5) (1, 6)
882 OP ')' (1, 6) (1, 7)
883 OP ':' (1, 7) (1, 8)
884 NEWLINE '\\n' (1, 8) (1, 9)
885 NL '\\n' (2, 0) (2, 1)
886 INDENT ' ' (3, 0) (3, 2)
887 NAME 'def' (3, 2) (3, 5)
888 NAME 'baz' (3, 6) (3, 9)
889 OP '(' (3, 9) (3, 10)
890 OP ')' (3, 10) (3, 11)
891 OP ':' (3, 11) (3, 12)
892 NAME 'pass' (3, 13) (3, 17)
893 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700894 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300895 NAME 'def' (4, 8) (4, 11)
896 NAME 'bar' (4, 12) (4, 15)
897 OP '(' (4, 15) (4, 16)
898 OP ')' (4, 16) (4, 17)
899 OP ':' (4, 17) (4, 18)
900 NAME 'pass' (4, 19) (4, 23)
901 NEWLINE '\\n' (4, 23) (4, 24)
902 NL '\\n' (5, 0) (5, 1)
903 NAME 'await' (6, 2) (6, 7)
904 OP '=' (6, 8) (6, 9)
905 NUMBER '2' (6, 10) (6, 11)
906 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300907 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300908
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300909 self.check_tokenize('''\
910async def f():
911
912 def baz(): pass
913 async def bar(): pass
914
915 await = 2''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700916 NAME 'async' (1, 0) (1, 5)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300917 NAME 'def' (1, 6) (1, 9)
918 NAME 'f' (1, 10) (1, 11)
919 OP '(' (1, 11) (1, 12)
920 OP ')' (1, 12) (1, 13)
921 OP ':' (1, 13) (1, 14)
922 NEWLINE '\\n' (1, 14) (1, 15)
923 NL '\\n' (2, 0) (2, 1)
924 INDENT ' ' (3, 0) (3, 2)
925 NAME 'def' (3, 2) (3, 5)
926 NAME 'baz' (3, 6) (3, 9)
927 OP '(' (3, 9) (3, 10)
928 OP ')' (3, 10) (3, 11)
929 OP ':' (3, 11) (3, 12)
930 NAME 'pass' (3, 13) (3, 17)
931 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700932 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300933 NAME 'def' (4, 8) (4, 11)
934 NAME 'bar' (4, 12) (4, 15)
935 OP '(' (4, 15) (4, 16)
936 OP ')' (4, 16) (4, 17)
937 OP ':' (4, 17) (4, 18)
938 NAME 'pass' (4, 19) (4, 23)
939 NEWLINE '\\n' (4, 23) (4, 24)
940 NL '\\n' (5, 0) (5, 1)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700941 NAME 'await' (6, 2) (6, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300942 OP '=' (6, 8) (6, 9)
943 NUMBER '2' (6, 10) (6, 11)
944 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300945 """)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000946
Raymond Hettinger68c04532005-06-10 11:05:19 +0000947
Raymond Hettinger68c04532005-06-10 11:05:19 +0000948def decistmt(s):
Raymond Hettinger68c04532005-06-10 11:05:19 +0000949 result = []
Trent Nelson428de652008-03-18 22:41:35 +0000950 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
Raymond Hettinger68c04532005-06-10 11:05:19 +0000951 for toknum, tokval, _, _, _ in g:
952 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
953 result.extend([
954 (NAME, 'Decimal'),
955 (OP, '('),
956 (STRING, repr(tokval)),
957 (OP, ')')
958 ])
959 else:
960 result.append((toknum, tokval))
Trent Nelson428de652008-03-18 22:41:35 +0000961 return untokenize(result).decode('utf-8')
962
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300963class TestMisc(TestCase):
964
965 def test_decistmt(self):
966 # Substitute Decimals for floats in a string of statements.
967 # This is an example from the docs.
968
969 from decimal import Decimal
970 s = '+21.3e-5*-.1234/81.7'
971 self.assertEqual(decistmt(s),
972 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
973
974 # The format of the exponent is inherited from the platform C library.
975 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
976 # we're only showing 11 digits, and the 12th isn't close to 5, the
977 # rest of the output should be platform-independent.
978 self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
979
980 # Output from calculations with Decimal should be identical across all
981 # platforms.
982 self.assertEqual(eval(decistmt(s)),
983 Decimal('-3.217160342717258261933904529E-7'))
984
Trent Nelson428de652008-03-18 22:41:35 +0000985
986class TestTokenizerAdheresToPep0263(TestCase):
987 """
988 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
989 """
990
991 def _testFile(self, filename):
992 path = os.path.join(os.path.dirname(__file__), filename)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300993 TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
Trent Nelson428de652008-03-18 22:41:35 +0000994
995 def test_utf8_coding_cookie_and_no_utf8_bom(self):
Ned Deily2ea6fcc2011-07-19 16:15:27 -0700996 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300997 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +0000998
999 def test_latin1_coding_cookie_and_utf8_bom(self):
1000 """
1001 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
1002 allowed encoding for the comment is 'utf-8'. The text file used in
1003 this test starts with a BOM signature, but specifies latin1 as the
1004 coding, so verify that a SyntaxError is raised, which matches the
1005 behaviour of the interpreter when it encounters a similar condition.
1006 """
1007 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001008 self.assertRaises(SyntaxError, self._testFile, f)
Trent Nelson428de652008-03-18 22:41:35 +00001009
1010 def test_no_coding_cookie_and_utf8_bom(self):
1011 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001012 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +00001013
1014 def test_utf8_coding_cookie_and_utf8_bom(self):
1015 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001016 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +00001017
Florent Xicluna11f0b412012-07-07 12:13:35 +02001018 def test_bad_coding_cookie(self):
1019 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
1020 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
1021
Trent Nelson428de652008-03-18 22:41:35 +00001022
1023class Test_Tokenize(TestCase):
1024
1025 def test__tokenize_decodes_with_specified_encoding(self):
1026 literal = '"ЉЊЈЁЂ"'
1027 line = literal.encode('utf-8')
1028 first = False
1029 def readline():
1030 nonlocal first
1031 if not first:
1032 first = True
1033 return line
1034 else:
1035 return b''
1036
Ammar Askarab75d9e2018-07-06 06:21:05 -04001037 # skip the initial encoding token and the end tokens
1038 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
Trent Nelson428de652008-03-18 22:41:35 +00001039 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001040 self.assertEqual(tokens, expected_tokens,
1041 "bytes not decoded with encoding")
Trent Nelson428de652008-03-18 22:41:35 +00001042
1043 def test__tokenize_does_not_decode_with_encoding_none(self):
1044 literal = '"ЉЊЈЁЂ"'
1045 first = False
1046 def readline():
1047 nonlocal first
1048 if not first:
1049 first = True
1050 return literal
1051 else:
1052 return b''
1053
Ammar Askarab75d9e2018-07-06 06:21:05 -04001054 # skip the end tokens
1055 tokens = list(_tokenize(readline, encoding=None))[:-2]
Trent Nelson428de652008-03-18 22:41:35 +00001056 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001057 self.assertEqual(tokens, expected_tokens,
1058 "string not tokenized when encoding is None")
Trent Nelson428de652008-03-18 22:41:35 +00001059
1060
1061class TestDetectEncoding(TestCase):
1062
1063 def get_readline(self, lines):
1064 index = 0
1065 def readline():
1066 nonlocal index
1067 if index == len(lines):
1068 raise StopIteration
1069 line = lines[index]
1070 index += 1
1071 return line
1072 return readline
1073
1074 def test_no_bom_no_encoding_cookie(self):
1075 lines = (
1076 b'# something\n',
1077 b'print(something)\n',
1078 b'do_something(else)\n'
1079 )
1080 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001081 self.assertEqual(encoding, 'utf-8')
1082 self.assertEqual(consumed_lines, list(lines[:2]))
Trent Nelson428de652008-03-18 22:41:35 +00001083
1084 def test_bom_no_cookie(self):
1085 lines = (
1086 b'\xef\xbb\xbf# something\n',
1087 b'print(something)\n',
1088 b'do_something(else)\n'
1089 )
1090 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001091 self.assertEqual(encoding, 'utf-8-sig')
1092 self.assertEqual(consumed_lines,
1093 [b'# something\n', b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001094
1095 def test_cookie_first_line_no_bom(self):
1096 lines = (
1097 b'# -*- coding: latin-1 -*-\n',
1098 b'print(something)\n',
1099 b'do_something(else)\n'
1100 )
1101 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001102 self.assertEqual(encoding, 'iso-8859-1')
1103 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001104
1105 def test_matched_bom_and_cookie_first_line(self):
1106 lines = (
1107 b'\xef\xbb\xbf# coding=utf-8\n',
1108 b'print(something)\n',
1109 b'do_something(else)\n'
1110 )
1111 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001112 self.assertEqual(encoding, 'utf-8-sig')
1113 self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001114
1115 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
1116 lines = (
1117 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
1118 b'print(something)\n',
1119 b'do_something(else)\n'
1120 )
1121 readline = self.get_readline(lines)
1122 self.assertRaises(SyntaxError, detect_encoding, readline)
1123
1124 def test_cookie_second_line_no_bom(self):
1125 lines = (
1126 b'#! something\n',
1127 b'# vim: set fileencoding=ascii :\n',
1128 b'print(something)\n',
1129 b'do_something(else)\n'
1130 )
1131 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001132 self.assertEqual(encoding, 'ascii')
Trent Nelson428de652008-03-18 22:41:35 +00001133 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
Ezio Melottib3aedd42010-11-20 19:04:17 +00001134 self.assertEqual(consumed_lines, expected)
Trent Nelson428de652008-03-18 22:41:35 +00001135
1136 def test_matched_bom_and_cookie_second_line(self):
1137 lines = (
1138 b'\xef\xbb\xbf#! something\n',
1139 b'f# coding=utf-8\n',
1140 b'print(something)\n',
1141 b'do_something(else)\n'
1142 )
1143 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001144 self.assertEqual(encoding, 'utf-8-sig')
1145 self.assertEqual(consumed_lines,
1146 [b'#! something\n', b'f# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001147
1148 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
1149 lines = (
1150 b'\xef\xbb\xbf#! something\n',
1151 b'# vim: set fileencoding=ascii :\n',
1152 b'print(something)\n',
1153 b'do_something(else)\n'
1154 )
1155 readline = self.get_readline(lines)
1156 self.assertRaises(SyntaxError, detect_encoding, readline)
1157
Serhiy Storchaka768c16c2014-01-09 18:36:09 +02001158 def test_cookie_second_line_noncommented_first_line(self):
1159 lines = (
1160 b"print('\xc2\xa3')\n",
1161 b'# vim: set fileencoding=iso8859-15 :\n',
1162 b"print('\xe2\x82\xac')\n"
1163 )
1164 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1165 self.assertEqual(encoding, 'utf-8')
1166 expected = [b"print('\xc2\xa3')\n"]
1167 self.assertEqual(consumed_lines, expected)
1168
1169 def test_cookie_second_line_commented_first_line(self):
1170 lines = (
1171 b"#print('\xc2\xa3')\n",
1172 b'# vim: set fileencoding=iso8859-15 :\n',
1173 b"print('\xe2\x82\xac')\n"
1174 )
1175 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1176 self.assertEqual(encoding, 'iso8859-15')
1177 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
1178 self.assertEqual(consumed_lines, expected)
1179
1180 def test_cookie_second_line_empty_first_line(self):
1181 lines = (
1182 b'\n',
1183 b'# vim: set fileencoding=iso8859-15 :\n',
1184 b"print('\xe2\x82\xac')\n"
1185 )
1186 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1187 self.assertEqual(encoding, 'iso8859-15')
1188 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
1189 self.assertEqual(consumed_lines, expected)
1190
Benjamin Petersond3afada2009-10-09 21:43:09 +00001191 def test_latin1_normalization(self):
1192 # See get_normal_name() in tokenizer.c.
1193 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
1194 "iso-8859-1-unix", "iso-latin-1-mac")
1195 for encoding in encodings:
1196 for rep in ("-", "_"):
1197 enc = encoding.replace("-", rep)
1198 lines = (b"#!/usr/bin/python\n",
1199 b"# coding: " + enc.encode("ascii") + b"\n",
1200 b"print(things)\n",
1201 b"do_something += 4\n")
1202 rl = self.get_readline(lines)
1203 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001204 self.assertEqual(found, "iso-8859-1")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001205
Martin v. Löwis63674f42012-04-20 14:36:47 +02001206 def test_syntaxerror_latin1(self):
1207 # Issue 14629: need to raise SyntaxError if the first
1208 # line(s) have non-UTF-8 characters
1209 lines = (
1210 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1211 )
1212 readline = self.get_readline(lines)
1213 self.assertRaises(SyntaxError, detect_encoding, readline)
1214
1215
Benjamin Petersond3afada2009-10-09 21:43:09 +00001216 def test_utf8_normalization(self):
1217 # See get_normal_name() in tokenizer.c.
1218 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
1219 for encoding in encodings:
1220 for rep in ("-", "_"):
1221 enc = encoding.replace("-", rep)
1222 lines = (b"#!/usr/bin/python\n",
1223 b"# coding: " + enc.encode("ascii") + b"\n",
1224 b"1 + 3\n")
1225 rl = self.get_readline(lines)
1226 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001227 self.assertEqual(found, "utf-8")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001228
Trent Nelson428de652008-03-18 22:41:35 +00001229 def test_short_files(self):
1230 readline = self.get_readline((b'print(something)\n',))
1231 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001232 self.assertEqual(encoding, 'utf-8')
1233 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001234
1235 encoding, consumed_lines = detect_encoding(self.get_readline(()))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001236 self.assertEqual(encoding, 'utf-8')
1237 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001238
1239 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
1240 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001241 self.assertEqual(encoding, 'utf-8-sig')
1242 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001243
1244 readline = self.get_readline((b'\xef\xbb\xbf',))
1245 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001246 self.assertEqual(encoding, 'utf-8-sig')
1247 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001248
Benjamin Peterson433f32c2008-12-12 01:25:05 +00001249 readline = self.get_readline((b'# coding: bad\n',))
1250 self.assertRaises(SyntaxError, detect_encoding, readline)
Trent Nelson428de652008-03-18 22:41:35 +00001251
Serhiy Storchakadafea852013-09-16 23:51:56 +03001252 def test_false_encoding(self):
1253 # Issue 18873: "Encoding" detected in non-comment lines
1254 readline = self.get_readline((b'print("#coding=fake")',))
1255 encoding, consumed_lines = detect_encoding(readline)
1256 self.assertEqual(encoding, 'utf-8')
1257 self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1258
Victor Stinner58c07522010-11-09 01:08:59 +00001259 def test_open(self):
1260 filename = support.TESTFN + '.py'
1261 self.addCleanup(support.unlink, filename)
1262
1263 # test coding cookie
1264 for encoding in ('iso-8859-15', 'utf-8'):
1265 with open(filename, 'w', encoding=encoding) as fp:
1266 print("# coding: %s" % encoding, file=fp)
1267 print("print('euro:\u20ac')", file=fp)
1268 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001269 self.assertEqual(fp.encoding, encoding)
1270 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001271
1272 # test BOM (no coding cookie)
1273 with open(filename, 'w', encoding='utf-8-sig') as fp:
1274 print("print('euro:\u20ac')", file=fp)
1275 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001276 self.assertEqual(fp.encoding, 'utf-8-sig')
1277 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001278
Brett Cannonc33f3f22012-04-20 13:23:54 -04001279 def test_filename_in_exception(self):
1280 # When possible, include the file name in the exception.
1281 path = 'some_file_path'
1282 lines = (
1283 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1284 )
1285 class Bunk:
1286 def __init__(self, lines, path):
1287 self.name = path
1288 self._lines = lines
1289 self._index = 0
1290
1291 def readline(self):
1292 if self._index == len(lines):
1293 raise StopIteration
1294 line = lines[self._index]
1295 self._index += 1
1296 return line
1297
1298 with self.assertRaises(SyntaxError):
1299 ins = Bunk(lines, path)
1300 # Make sure lacking a name isn't an issue.
1301 del ins.name
1302 detect_encoding(ins.readline)
1303 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1304 ins = Bunk(lines, path)
1305 detect_encoding(ins.readline)
1306
Victor Stinner387729e2015-05-26 00:43:58 +02001307 def test_open_error(self):
1308 # Issue #23840: open() must close the binary file on error
1309 m = BytesIO(b'#coding:xxx')
1310 with mock.patch('tokenize._builtin_open', return_value=m):
1311 self.assertRaises(SyntaxError, tokenize_open, 'foobar')
1312 self.assertTrue(m.closed)
1313
1314
Trent Nelson428de652008-03-18 22:41:35 +00001315class TestTokenize(TestCase):
1316
1317 def test_tokenize(self):
1318 import tokenize as tokenize_module
1319 encoding = object()
1320 encoding_used = None
1321 def mock_detect_encoding(readline):
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001322 return encoding, [b'first', b'second']
Trent Nelson428de652008-03-18 22:41:35 +00001323
1324 def mock__tokenize(readline, encoding):
1325 nonlocal encoding_used
1326 encoding_used = encoding
1327 out = []
1328 while True:
1329 next_line = readline()
1330 if next_line:
1331 out.append(next_line)
1332 continue
1333 return out
1334
1335 counter = 0
1336 def mock_readline():
1337 nonlocal counter
1338 counter += 1
1339 if counter == 5:
1340 return b''
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001341 return str(counter).encode()
Trent Nelson428de652008-03-18 22:41:35 +00001342
1343 orig_detect_encoding = tokenize_module.detect_encoding
1344 orig__tokenize = tokenize_module._tokenize
1345 tokenize_module.detect_encoding = mock_detect_encoding
1346 tokenize_module._tokenize = mock__tokenize
1347 try:
1348 results = tokenize(mock_readline)
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001349 self.assertEqual(list(results),
1350 [b'first', b'second', b'1', b'2', b'3', b'4'])
Trent Nelson428de652008-03-18 22:41:35 +00001351 finally:
1352 tokenize_module.detect_encoding = orig_detect_encoding
1353 tokenize_module._tokenize = orig__tokenize
1354
1355 self.assertTrue(encoding_used, encoding)
Raymond Hettinger68c04532005-06-10 11:05:19 +00001356
Yury Selivanov8085b802015-05-18 12:50:52 -04001357 def test_oneline_defs(self):
1358 buf = []
1359 for i in range(500):
1360 buf.append('def i{i}(): return {i}'.format(i=i))
1361 buf.append('OK')
1362 buf = '\n'.join(buf)
1363
1364 # Test that 500 consequent, one-line defs is OK
1365 toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
Ammar Askarab75d9e2018-07-06 06:21:05 -04001366 self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
1367 # [-2] is always NEWLINE
Yury Selivanov8085b802015-05-18 12:50:52 -04001368
Meador Inge00c7f852012-01-19 00:44:45 -06001369 def assertExactTypeEqual(self, opstr, *optypes):
1370 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1371 num_optypes = len(optypes)
Ammar Askarab75d9e2018-07-06 06:21:05 -04001372 self.assertEqual(len(tokens), 3 + num_optypes)
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001373 self.assertEqual(tok_name[tokens[0].exact_type],
1374 tok_name[ENCODING])
Meador Inge00c7f852012-01-19 00:44:45 -06001375 for i in range(num_optypes):
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001376 self.assertEqual(tok_name[tokens[i + 1].exact_type],
1377 tok_name[optypes[i]])
1378 self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
Ammar Askarab75d9e2018-07-06 06:21:05 -04001379 tok_name[token.NEWLINE])
1380 self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001381 tok_name[token.ENDMARKER])
Meador Inge00c7f852012-01-19 00:44:45 -06001382
1383 def test_exact_type(self):
1384 self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1385 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1386 self.assertExactTypeEqual(':', token.COLON)
1387 self.assertExactTypeEqual(',', token.COMMA)
1388 self.assertExactTypeEqual(';', token.SEMI)
1389 self.assertExactTypeEqual('+', token.PLUS)
1390 self.assertExactTypeEqual('-', token.MINUS)
1391 self.assertExactTypeEqual('*', token.STAR)
1392 self.assertExactTypeEqual('/', token.SLASH)
1393 self.assertExactTypeEqual('|', token.VBAR)
1394 self.assertExactTypeEqual('&', token.AMPER)
1395 self.assertExactTypeEqual('<', token.LESS)
1396 self.assertExactTypeEqual('>', token.GREATER)
1397 self.assertExactTypeEqual('=', token.EQUAL)
1398 self.assertExactTypeEqual('.', token.DOT)
1399 self.assertExactTypeEqual('%', token.PERCENT)
1400 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1401 self.assertExactTypeEqual('==', token.EQEQUAL)
1402 self.assertExactTypeEqual('!=', token.NOTEQUAL)
1403 self.assertExactTypeEqual('<=', token.LESSEQUAL)
1404 self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1405 self.assertExactTypeEqual('~', token.TILDE)
1406 self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1407 self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1408 self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1409 self.assertExactTypeEqual('**', token.DOUBLESTAR)
1410 self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1411 self.assertExactTypeEqual('-=', token.MINEQUAL)
1412 self.assertExactTypeEqual('*=', token.STAREQUAL)
1413 self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1414 self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1415 self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1416 self.assertExactTypeEqual('|=', token.VBAREQUAL)
1417 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1418 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1419 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1420 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1421 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1422 self.assertExactTypeEqual('//', token.DOUBLESLASH)
1423 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +02001424 self.assertExactTypeEqual('...', token.ELLIPSIS)
1425 self.assertExactTypeEqual('->', token.RARROW)
Meador Inge00c7f852012-01-19 00:44:45 -06001426 self.assertExactTypeEqual('@', token.AT)
Benjamin Petersond51374e2014-04-09 23:55:56 -04001427 self.assertExactTypeEqual('@=', token.ATEQUAL)
Meador Inge00c7f852012-01-19 00:44:45 -06001428
1429 self.assertExactTypeEqual('a**2+b**2==c**2',
1430 NAME, token.DOUBLESTAR, NUMBER,
1431 token.PLUS,
1432 NAME, token.DOUBLESTAR, NUMBER,
1433 token.EQEQUAL,
1434 NAME, token.DOUBLESTAR, NUMBER)
1435 self.assertExactTypeEqual('{1, 2, 3}',
1436 token.LBRACE,
1437 token.NUMBER, token.COMMA,
1438 token.NUMBER, token.COMMA,
1439 token.NUMBER,
1440 token.RBRACE)
1441 self.assertExactTypeEqual('^(x & 0x1)',
1442 token.CIRCUMFLEX,
1443 token.LPAR,
1444 token.NAME, token.AMPER, token.NUMBER,
1445 token.RPAR)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001446
Ezio Melottifafa8b72012-11-03 17:46:51 +02001447 def test_pathological_trailing_whitespace(self):
1448 # See http://bugs.python.org/issue16152
1449 self.assertExactTypeEqual('@ ', token.AT)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001450
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001451
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001452class UntokenizeTest(TestCase):
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001453
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001454 def test_bad_input_order(self):
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001455 # raise if previous row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001456 u = Untokenizer()
1457 u.prev_row = 2
1458 u.prev_col = 2
1459 with self.assertRaises(ValueError) as cm:
1460 u.add_whitespace((1,3))
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001461 self.assertEqual(cm.exception.args[0],
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001462 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001463 # raise if previous column in row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001464 self.assertRaises(ValueError, u.add_whitespace, (2,1))
1465
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001466 def test_backslash_continuation(self):
1467 # The problem is that <whitespace>\<newline> leaves no token
1468 u = Untokenizer()
1469 u.prev_row = 1
1470 u.prev_col = 1
1471 u.tokens = []
1472 u.add_whitespace((2, 0))
1473 self.assertEqual(u.tokens, ['\\\n'])
1474 u.prev_row = 2
1475 u.add_whitespace((4, 4))
1476 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001477 TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001478
Terry Jan Reedy5b8d2c32014-02-17 23:12:16 -05001479 def test_iter_compat(self):
1480 u = Untokenizer()
1481 token = (NAME, 'Hello')
1482 tokens = [(ENCODING, 'utf-8'), token]
1483 u.compat(token, iter([]))
1484 self.assertEqual(u.tokens, ["Hello "])
1485 u = Untokenizer()
1486 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1487 u = Untokenizer()
1488 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1489 self.assertEqual(u.encoding, 'utf-8')
1490 self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1491
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001492
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001493class TestRoundtrip(TestCase):
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001494
1495 def check_roundtrip(self, f):
1496 """
1497 Test roundtrip for `untokenize`. `f` is an open file or a string.
1498 The source code in f is tokenized to both 5- and 2-tuples.
1499 Both sequences are converted back to source code via
1500 tokenize.untokenize(), and the latter tokenized again to 2-tuples.
1501 The test fails if the 3 pair tokenizations do not match.
1502
1503 When untokenize bugs are fixed, untokenize with 5-tuples should
1504 reproduce code that does not contain a backslash continuation
1505 following spaces. A proper test should test this.
1506 """
1507 # Get source code and original tokenizations
1508 if isinstance(f, str):
1509 code = f.encode('utf-8')
1510 else:
1511 code = f.read()
1512 f.close()
1513 readline = iter(code.splitlines(keepends=True)).__next__
1514 tokens5 = list(tokenize(readline))
1515 tokens2 = [tok[:2] for tok in tokens5]
1516 # Reproduce tokens2 from pairs
1517 bytes_from2 = untokenize(tokens2)
1518 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
1519 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
1520 self.assertEqual(tokens2_from2, tokens2)
1521 # Reproduce tokens2 from 5-tuples
1522 bytes_from5 = untokenize(tokens5)
1523 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
1524 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
1525 self.assertEqual(tokens2_from5, tokens2)
1526
1527 def test_roundtrip(self):
1528 # There are some standard formatting practices that are easy to get right.
1529
1530 self.check_roundtrip("if x == 1:\n"
1531 " print(x)\n")
1532 self.check_roundtrip("# This is a comment\n"
Ammar Askarab75d9e2018-07-06 06:21:05 -04001533 "# This also\n")
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001534
1535 # Some people use different formatting conventions, which makes
1536 # untokenize a little trickier. Note that this test involves trailing
1537 # whitespace after the colon. Note that we use hex escapes to make the
1538 # two trailing blanks apparent in the expected output.
1539
1540 self.check_roundtrip("if x == 1 : \n"
1541 " print(x)\n")
1542 fn = support.findfile("tokenize_tests.txt")
1543 with open(fn, 'rb') as f:
1544 self.check_roundtrip(f)
1545 self.check_roundtrip("if x == 1:\n"
1546 " # A comment by itself.\n"
1547 " print(x) # Comment here, too.\n"
1548 " # Another comment.\n"
1549 "after_if = True\n")
1550 self.check_roundtrip("if (x # The comments need to go in the right place\n"
1551 " == 1):\n"
1552 " print('x==1')\n")
1553 self.check_roundtrip("class Test: # A comment here\n"
1554 " # A comment with weird indent\n"
1555 " after_com = 5\n"
1556 " def x(m): return m*5 # a one liner\n"
1557 " def y(m): # A whitespace after the colon\n"
1558 " return y*4 # 3-space indent\n")
1559
1560 # Some error-handling code
1561 self.check_roundtrip("try: import somemodule\n"
1562 "except ImportError: # comment\n"
1563 " print('Can not import' # comment2\n)"
1564 "else: print('Loaded')\n")
1565
1566 def test_continuation(self):
1567 # Balancing continuation
1568 self.check_roundtrip("a = (3,4, \n"
1569 "5,6)\n"
1570 "y = [3, 4,\n"
1571 "5]\n"
1572 "z = {'a': 5,\n"
1573 "'b':15, 'c':True}\n"
1574 "x = len(y) + 5 - a[\n"
1575 "3] - a[2]\n"
1576 "+ len(z) - z[\n"
1577 "'b']\n")
1578
1579 def test_backslash_continuation(self):
1580 # Backslash means line continuation, except for comments
1581 self.check_roundtrip("x=1+\\\n"
1582 "1\n"
1583 "# This is a comment\\\n"
1584 "# This also\n")
1585 self.check_roundtrip("# Comment \\\n"
1586 "x = 0")
1587
1588 def test_string_concatenation(self):
1589 # Two string literals on the same line
1590 self.check_roundtrip("'' ''")
1591
1592 def test_random_files(self):
1593 # Test roundtrip on random python modules.
1594 # pass the '-ucpu' option to process the full directory.
1595
1596 import glob, random
1597 fn = support.findfile("tokenize_tests.txt")
1598 tempdir = os.path.dirname(fn) or os.curdir
1599 testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
1600
Brett Cannona721aba2016-09-09 14:57:09 -07001601 # Tokenize is broken on test_pep3131.py because regular expressions are
1602 # broken on the obscure unicode identifiers in it. *sigh*
1603 # With roundtrip extended to test the 5-tuple mode of untokenize,
1604 # 7 more testfiles fail. Remove them also until the failure is diagnosed.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001605
Zachary Ware724f6a62016-09-09 12:55:37 -07001606 testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001607 for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
1608 testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
1609
1610 if not support.is_resource_enabled("cpu"):
1611 testfiles = random.sample(testfiles, 10)
1612
1613 for testfile in testfiles:
1614 with open(testfile, 'rb') as f:
1615 with self.subTest(file=testfile):
1616 self.check_roundtrip(f)
1617
1618
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001619 def roundtrip(self, code):
1620 if isinstance(code, str):
1621 code = code.encode('utf-8')
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001622 return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001623
1624 def test_indentation_semantics_retained(self):
1625 """
1626 Ensure that although whitespace might be mutated in a roundtrip,
1627 the semantic meaning of the indentation remains consistent.
1628 """
1629 code = "if False:\n\tx=3\n\tx=3\n"
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001630 codelines = self.roundtrip(code).split('\n')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001631 self.assertEqual(codelines[1], codelines[2])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001632 self.check_roundtrip(code)
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001633
1634
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001635if __name__ == "__main__":
Brett Cannona721aba2016-09-09 14:57:09 -07001636 unittest.main()