blob: 93e40de96e9eb2e04dcbd1bab74e9ad393ac92d2 [file] [log] [blame]
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001from test import support
2from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
3 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
Thomas Kluyverc56b17b2018-06-05 19:26:39 +02004 open as tokenize_open, Untokenizer, generate_tokens)
5from io import BytesIO, StringIO
Stéphane Wirtel90addd62017-07-25 15:33:53 +02006import unittest
Brett Cannona721aba2016-09-09 14:57:09 -07007from unittest import TestCase, mock
8from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
9 INVALID_UNDERSCORE_LITERALS)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030010import os
11import token
Thomas Wouters89f507f2006-12-13 04:49:30 +000012
Thomas Wouters89f507f2006-12-13 04:49:30 +000013
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030014class TokenizeTest(TestCase):
15 # Tests for the tokenize module.
Jason R. Coombs7cf36382015-06-20 19:13:50 -040016
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030017 # The tests can be really simple. Given a small fragment of source
18 # code, print out a table with tokens. The ENDMARKER is omitted for
19 # brevity.
20
21 def check_tokenize(self, s, expected):
22 # Format the tokens in s in a table format.
23 # The ENDMARKER is omitted.
24 result = []
25 f = BytesIO(s.encode('utf-8'))
26 for type, token, start, end, line in tokenize(f.readline):
27 if type == ENDMARKER:
28 break
29 type = tok_name[type]
Eric V. Smith67317742015-10-16 20:45:53 -040030 result.append(f" {type:10} {token!r:13} {start} {end}")
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030031 self.assertEqual(result,
32 [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
33 expected.rstrip().splitlines())
34
35 def test_basic(self):
36 self.check_tokenize("1 + 1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000037 NUMBER '1' (1, 0) (1, 1)
38 OP '+' (1, 2) (1, 3)
39 NUMBER '1' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030040 """)
41 self.check_tokenize("if False:\n"
42 " # NL\n"
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010043 " \n"
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030044 " True = False # NEWLINE\n", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000045 NAME 'if' (1, 0) (1, 2)
46 NAME 'False' (1, 3) (1, 8)
47 OP ':' (1, 8) (1, 9)
48 NEWLINE '\\n' (1, 9) (1, 10)
49 COMMENT '# NL' (2, 4) (2, 8)
50 NL '\\n' (2, 8) (2, 9)
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010051 NL '\\n' (3, 4) (3, 5)
52 INDENT ' ' (4, 0) (4, 4)
53 NAME 'True' (4, 4) (4, 8)
54 OP '=' (4, 9) (4, 10)
55 NAME 'False' (4, 11) (4, 16)
56 COMMENT '# NEWLINE' (4, 17) (4, 26)
57 NEWLINE '\\n' (4, 26) (4, 27)
58 DEDENT '' (5, 0) (5, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030059 """)
60 indent_error_file = b"""\
61def k(x):
62 x += 2
63 x += 5
64"""
65 readline = BytesIO(indent_error_file).readline
66 with self.assertRaisesRegex(IndentationError,
67 "unindent does not match any "
68 "outer indentation level"):
69 for tok in tokenize(readline):
70 pass
Thomas Wouters89f507f2006-12-13 04:49:30 +000071
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030072 def test_int(self):
73 # Ordinary integers and binary operators
74 self.check_tokenize("0xff <= 255", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000075 NUMBER '0xff' (1, 0) (1, 4)
76 OP '<=' (1, 5) (1, 7)
77 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030078 """)
79 self.check_tokenize("0b10 <= 255", """\
Eric Smith74ca5572008-03-17 19:49:19 +000080 NUMBER '0b10' (1, 0) (1, 4)
81 OP '<=' (1, 5) (1, 7)
82 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030083 """)
84 self.check_tokenize("0o123 <= 0O123", """\
Eric Smith74ca5572008-03-17 19:49:19 +000085 NUMBER '0o123' (1, 0) (1, 5)
86 OP '<=' (1, 6) (1, 8)
87 NUMBER '0O123' (1, 9) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030088 """)
89 self.check_tokenize("1234567 > ~0x15", """\
Mark Dickinson0c1f7c02008-03-16 05:05:12 +000090 NUMBER '1234567' (1, 0) (1, 7)
91 OP '>' (1, 8) (1, 9)
92 OP '~' (1, 10) (1, 11)
93 NUMBER '0x15' (1, 11) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030094 """)
95 self.check_tokenize("2134568 != 1231515", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000096 NUMBER '2134568' (1, 0) (1, 7)
97 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +000098 NUMBER '1231515' (1, 11) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030099 """)
100 self.check_tokenize("(-124561-1) & 200000000", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000101 OP '(' (1, 0) (1, 1)
102 OP '-' (1, 1) (1, 2)
103 NUMBER '124561' (1, 2) (1, 8)
104 OP '-' (1, 8) (1, 9)
105 NUMBER '1' (1, 9) (1, 10)
106 OP ')' (1, 10) (1, 11)
107 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000108 NUMBER '200000000' (1, 14) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300109 """)
110 self.check_tokenize("0xdeadbeef != -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000111 NUMBER '0xdeadbeef' (1, 0) (1, 10)
112 OP '!=' (1, 11) (1, 13)
113 OP '-' (1, 14) (1, 15)
114 NUMBER '1' (1, 15) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300115 """)
116 self.check_tokenize("0xdeadc0de & 12345", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000117 NUMBER '0xdeadc0de' (1, 0) (1, 10)
118 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000119 NUMBER '12345' (1, 13) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300120 """)
121 self.check_tokenize("0xFF & 0x15 | 1234", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000122 NUMBER '0xFF' (1, 0) (1, 4)
123 OP '&' (1, 5) (1, 6)
124 NUMBER '0x15' (1, 7) (1, 11)
125 OP '|' (1, 12) (1, 13)
126 NUMBER '1234' (1, 14) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300127 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000128
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300129 def test_long(self):
130 # Long integers
131 self.check_tokenize("x = 0", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000132 NAME 'x' (1, 0) (1, 1)
133 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000134 NUMBER '0' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300135 """)
136 self.check_tokenize("x = 0xfffffffffff", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000137 NAME 'x' (1, 0) (1, 1)
138 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400139 NUMBER '0xfffffffffff' (1, 4) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300140 """)
141 self.check_tokenize("x = 123141242151251616110", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000142 NAME 'x' (1, 0) (1, 1)
143 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400144 NUMBER '123141242151251616110' (1, 4) (1, 25)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300145 """)
146 self.check_tokenize("x = -15921590215012591", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000147 NAME 'x' (1, 0) (1, 1)
148 OP '=' (1, 2) (1, 3)
149 OP '-' (1, 4) (1, 5)
Eric V. Smith67317742015-10-16 20:45:53 -0400150 NUMBER '15921590215012591' (1, 5) (1, 22)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300151 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000152
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300153 def test_float(self):
154 # Floating point numbers
155 self.check_tokenize("x = 3.14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000156 NAME 'x' (1, 0) (1, 1)
157 OP '=' (1, 2) (1, 3)
158 NUMBER '3.14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300159 """)
160 self.check_tokenize("x = 314159.", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000161 NAME 'x' (1, 0) (1, 1)
162 OP '=' (1, 2) (1, 3)
163 NUMBER '314159.' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300164 """)
165 self.check_tokenize("x = .314159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000166 NAME 'x' (1, 0) (1, 1)
167 OP '=' (1, 2) (1, 3)
168 NUMBER '.314159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300169 """)
170 self.check_tokenize("x = 3e14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000171 NAME 'x' (1, 0) (1, 1)
172 OP '=' (1, 2) (1, 3)
173 NUMBER '3e14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300174 """)
175 self.check_tokenize("x = 3E123", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000176 NAME 'x' (1, 0) (1, 1)
177 OP '=' (1, 2) (1, 3)
178 NUMBER '3E123' (1, 4) (1, 9)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300179 """)
180 self.check_tokenize("x+y = 3e-1230", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000181 NAME 'x' (1, 0) (1, 1)
182 OP '+' (1, 1) (1, 2)
183 NAME 'y' (1, 2) (1, 3)
184 OP '=' (1, 4) (1, 5)
185 NUMBER '3e-1230' (1, 6) (1, 13)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300186 """)
187 self.check_tokenize("x = 3.14e159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000188 NAME 'x' (1, 0) (1, 1)
189 OP '=' (1, 2) (1, 3)
190 NUMBER '3.14e159' (1, 4) (1, 12)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300191 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000192
Brett Cannona721aba2016-09-09 14:57:09 -0700193 def test_underscore_literals(self):
194 def number_token(s):
195 f = BytesIO(s.encode('utf-8'))
196 for toktype, token, start, end, line in tokenize(f.readline):
197 if toktype == NUMBER:
198 return token
199 return 'invalid token'
200 for lit in VALID_UNDERSCORE_LITERALS:
201 if '(' in lit:
202 # this won't work with compound complex inputs
203 continue
204 self.assertEqual(number_token(lit), lit)
205 for lit in INVALID_UNDERSCORE_LITERALS:
206 self.assertNotEqual(number_token(lit), lit)
207
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300208 def test_string(self):
209 # String literals
210 self.check_tokenize("x = ''; y = \"\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000211 NAME 'x' (1, 0) (1, 1)
212 OP '=' (1, 2) (1, 3)
213 STRING "''" (1, 4) (1, 6)
214 OP ';' (1, 6) (1, 7)
215 NAME 'y' (1, 8) (1, 9)
216 OP '=' (1, 10) (1, 11)
217 STRING '""' (1, 12) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300218 """)
219 self.check_tokenize("x = '\"'; y = \"'\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000220 NAME 'x' (1, 0) (1, 1)
221 OP '=' (1, 2) (1, 3)
222 STRING '\\'"\\'' (1, 4) (1, 7)
223 OP ';' (1, 7) (1, 8)
224 NAME 'y' (1, 9) (1, 10)
225 OP '=' (1, 11) (1, 12)
226 STRING '"\\'"' (1, 13) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300227 """)
228 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000229 NAME 'x' (1, 0) (1, 1)
230 OP '=' (1, 2) (1, 3)
231 STRING '"doesn\\'t "' (1, 4) (1, 14)
232 NAME 'shrink' (1, 14) (1, 20)
233 STRING '", does it"' (1, 20) (1, 31)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300234 """)
235 self.check_tokenize("x = 'abc' + 'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000236 NAME 'x' (1, 0) (1, 1)
237 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000238 STRING "'abc'" (1, 4) (1, 9)
239 OP '+' (1, 10) (1, 11)
240 STRING "'ABC'" (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300241 """)
242 self.check_tokenize('y = "ABC" + "ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000243 NAME 'y' (1, 0) (1, 1)
244 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000245 STRING '"ABC"' (1, 4) (1, 9)
246 OP '+' (1, 10) (1, 11)
247 STRING '"ABC"' (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300248 """)
249 self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000250 NAME 'x' (1, 0) (1, 1)
251 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000252 STRING "r'abc'" (1, 4) (1, 10)
253 OP '+' (1, 11) (1, 12)
254 STRING "r'ABC'" (1, 13) (1, 19)
255 OP '+' (1, 20) (1, 21)
256 STRING "R'ABC'" (1, 22) (1, 28)
257 OP '+' (1, 29) (1, 30)
258 STRING "R'ABC'" (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300259 """)
260 self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000261 NAME 'y' (1, 0) (1, 1)
262 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000263 STRING 'r"abc"' (1, 4) (1, 10)
264 OP '+' (1, 11) (1, 12)
265 STRING 'r"ABC"' (1, 13) (1, 19)
266 OP '+' (1, 20) (1, 21)
267 STRING 'R"ABC"' (1, 22) (1, 28)
268 OP '+' (1, 29) (1, 30)
269 STRING 'R"ABC"' (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300270 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000271
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300272 self.check_tokenize("u'abc' + U'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500273 STRING "u'abc'" (1, 0) (1, 6)
274 OP '+' (1, 7) (1, 8)
275 STRING "U'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300276 """)
277 self.check_tokenize('u"abc" + U"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500278 STRING 'u"abc"' (1, 0) (1, 6)
279 OP '+' (1, 7) (1, 8)
280 STRING 'U"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300281 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500282
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300283 self.check_tokenize("b'abc' + B'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500284 STRING "b'abc'" (1, 0) (1, 6)
285 OP '+' (1, 7) (1, 8)
286 STRING "B'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300287 """)
288 self.check_tokenize('b"abc" + B"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500289 STRING 'b"abc"' (1, 0) (1, 6)
290 OP '+' (1, 7) (1, 8)
291 STRING 'B"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300292 """)
293 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500294 STRING "br'abc'" (1, 0) (1, 7)
295 OP '+' (1, 8) (1, 9)
296 STRING "bR'abc'" (1, 10) (1, 17)
297 OP '+' (1, 18) (1, 19)
298 STRING "Br'abc'" (1, 20) (1, 27)
299 OP '+' (1, 28) (1, 29)
300 STRING "BR'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300301 """)
302 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500303 STRING 'br"abc"' (1, 0) (1, 7)
304 OP '+' (1, 8) (1, 9)
305 STRING 'bR"abc"' (1, 10) (1, 17)
306 OP '+' (1, 18) (1, 19)
307 STRING 'Br"abc"' (1, 20) (1, 27)
308 OP '+' (1, 28) (1, 29)
309 STRING 'BR"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300310 """)
311 self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500312 STRING "rb'abc'" (1, 0) (1, 7)
313 OP '+' (1, 8) (1, 9)
314 STRING "rB'abc'" (1, 10) (1, 17)
315 OP '+' (1, 18) (1, 19)
316 STRING "Rb'abc'" (1, 20) (1, 27)
317 OP '+' (1, 28) (1, 29)
318 STRING "RB'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300319 """)
320 self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500321 STRING 'rb"abc"' (1, 0) (1, 7)
322 OP '+' (1, 8) (1, 9)
323 STRING 'rB"abc"' (1, 10) (1, 17)
324 OP '+' (1, 18) (1, 19)
325 STRING 'Rb"abc"' (1, 20) (1, 27)
326 OP '+' (1, 28) (1, 29)
327 STRING 'RB"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300328 """)
Eric V. Smith67317742015-10-16 20:45:53 -0400329 # Check 0, 1, and 2 character string prefixes.
330 self.check_tokenize(r'"a\
331de\
332fg"', """\
333 STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
334 """)
335 self.check_tokenize(r'u"a\
336de"', """\
337 STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
338 """)
339 self.check_tokenize(r'rb"a\
340d"', """\
341 STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
342 """)
343 self.check_tokenize(r'"""a\
344b"""', """\
345 STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
346 """)
347 self.check_tokenize(r'u"""a\
348b"""', """\
349 STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
350 """)
351 self.check_tokenize(r'rb"""a\
352b\
353c"""', """\
354 STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
355 """)
Eric V. Smith1c8222c2015-10-26 04:37:55 -0400356 self.check_tokenize('f"abc"', """\
357 STRING 'f"abc"' (1, 0) (1, 6)
358 """)
359 self.check_tokenize('fR"a{b}c"', """\
360 STRING 'fR"a{b}c"' (1, 0) (1, 9)
361 """)
362 self.check_tokenize('f"""abc"""', """\
363 STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
364 """)
365 self.check_tokenize(r'f"abc\
366def"', """\
367 STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
368 """)
369 self.check_tokenize(r'Rf"abc\
370def"', """\
371 STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
372 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500373
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300374 def test_function(self):
375 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000376 NAME 'def' (1, 0) (1, 3)
377 NAME 'd22' (1, 4) (1, 7)
378 OP '(' (1, 7) (1, 8)
379 NAME 'a' (1, 8) (1, 9)
380 OP ',' (1, 9) (1, 10)
381 NAME 'b' (1, 11) (1, 12)
382 OP ',' (1, 12) (1, 13)
383 NAME 'c' (1, 14) (1, 15)
384 OP '=' (1, 15) (1, 16)
385 NUMBER '2' (1, 16) (1, 17)
386 OP ',' (1, 17) (1, 18)
387 NAME 'd' (1, 19) (1, 20)
388 OP '=' (1, 20) (1, 21)
389 NUMBER '2' (1, 21) (1, 22)
390 OP ',' (1, 22) (1, 23)
391 OP '*' (1, 24) (1, 25)
392 NAME 'k' (1, 25) (1, 26)
393 OP ')' (1, 26) (1, 27)
394 OP ':' (1, 27) (1, 28)
395 NAME 'pass' (1, 29) (1, 33)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300396 """)
397 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000398 NAME 'def' (1, 0) (1, 3)
399 NAME 'd01v_' (1, 4) (1, 9)
400 OP '(' (1, 9) (1, 10)
401 NAME 'a' (1, 10) (1, 11)
402 OP '=' (1, 11) (1, 12)
403 NUMBER '1' (1, 12) (1, 13)
404 OP ',' (1, 13) (1, 14)
405 OP '*' (1, 15) (1, 16)
406 NAME 'k' (1, 16) (1, 17)
407 OP ',' (1, 17) (1, 18)
408 OP '**' (1, 19) (1, 21)
409 NAME 'w' (1, 21) (1, 22)
410 OP ')' (1, 22) (1, 23)
411 OP ':' (1, 23) (1, 24)
412 NAME 'pass' (1, 25) (1, 29)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300413 """)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +0200414 self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
415 NAME 'def' (1, 0) (1, 3)
416 NAME 'd23' (1, 4) (1, 7)
417 OP '(' (1, 7) (1, 8)
418 NAME 'a' (1, 8) (1, 9)
419 OP ':' (1, 9) (1, 10)
420 NAME 'str' (1, 11) (1, 14)
421 OP ',' (1, 14) (1, 15)
422 NAME 'b' (1, 16) (1, 17)
423 OP ':' (1, 17) (1, 18)
424 NAME 'int' (1, 19) (1, 22)
425 OP '=' (1, 22) (1, 23)
426 NUMBER '3' (1, 23) (1, 24)
427 OP ')' (1, 24) (1, 25)
428 OP '->' (1, 26) (1, 28)
429 NAME 'int' (1, 29) (1, 32)
430 OP ':' (1, 32) (1, 33)
431 NAME 'pass' (1, 34) (1, 38)
432 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000433
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300434 def test_comparison(self):
435 # Comparison
436 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
437 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000438 NAME 'if' (1, 0) (1, 2)
439 NUMBER '1' (1, 3) (1, 4)
440 OP '<' (1, 5) (1, 6)
441 NUMBER '1' (1, 7) (1, 8)
442 OP '>' (1, 9) (1, 10)
443 NUMBER '1' (1, 11) (1, 12)
444 OP '==' (1, 13) (1, 15)
445 NUMBER '1' (1, 16) (1, 17)
446 OP '>=' (1, 18) (1, 20)
447 NUMBER '5' (1, 21) (1, 22)
448 OP '<=' (1, 23) (1, 25)
449 NUMBER '0x15' (1, 26) (1, 30)
450 OP '<=' (1, 31) (1, 33)
451 NUMBER '0x12' (1, 34) (1, 38)
452 OP '!=' (1, 39) (1, 41)
453 NUMBER '1' (1, 42) (1, 43)
454 NAME 'and' (1, 44) (1, 47)
455 NUMBER '5' (1, 48) (1, 49)
456 NAME 'in' (1, 50) (1, 52)
457 NUMBER '1' (1, 53) (1, 54)
458 NAME 'not' (1, 55) (1, 58)
459 NAME 'in' (1, 59) (1, 61)
460 NUMBER '1' (1, 62) (1, 63)
461 NAME 'is' (1, 64) (1, 66)
462 NUMBER '1' (1, 67) (1, 68)
463 NAME 'or' (1, 69) (1, 71)
464 NUMBER '5' (1, 72) (1, 73)
465 NAME 'is' (1, 74) (1, 76)
466 NAME 'not' (1, 77) (1, 80)
467 NUMBER '1' (1, 81) (1, 82)
468 OP ':' (1, 82) (1, 83)
469 NAME 'pass' (1, 84) (1, 88)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300470 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000471
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300472 def test_shift(self):
473 # Shift
474 self.check_tokenize("x = 1 << 1 >> 5", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000475 NAME 'x' (1, 0) (1, 1)
476 OP '=' (1, 2) (1, 3)
477 NUMBER '1' (1, 4) (1, 5)
478 OP '<<' (1, 6) (1, 8)
479 NUMBER '1' (1, 9) (1, 10)
480 OP '>>' (1, 11) (1, 13)
481 NUMBER '5' (1, 14) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300482 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000483
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300484 def test_additive(self):
485 # Additive
486 self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000487 NAME 'x' (1, 0) (1, 1)
488 OP '=' (1, 2) (1, 3)
489 NUMBER '1' (1, 4) (1, 5)
490 OP '-' (1, 6) (1, 7)
491 NAME 'y' (1, 8) (1, 9)
492 OP '+' (1, 10) (1, 11)
493 NUMBER '15' (1, 12) (1, 14)
494 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000495 NUMBER '1' (1, 17) (1, 18)
496 OP '+' (1, 19) (1, 20)
497 NUMBER '0x124' (1, 21) (1, 26)
498 OP '+' (1, 27) (1, 28)
499 NAME 'z' (1, 29) (1, 30)
500 OP '+' (1, 31) (1, 32)
501 NAME 'a' (1, 33) (1, 34)
502 OP '[' (1, 34) (1, 35)
503 NUMBER '5' (1, 35) (1, 36)
504 OP ']' (1, 36) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300505 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000506
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300507 def test_multiplicative(self):
508 # Multiplicative
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300509 self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000510 NAME 'x' (1, 0) (1, 1)
511 OP '=' (1, 2) (1, 3)
512 NUMBER '1' (1, 4) (1, 5)
513 OP '//' (1, 5) (1, 7)
514 NUMBER '1' (1, 7) (1, 8)
515 OP '*' (1, 8) (1, 9)
516 NUMBER '1' (1, 9) (1, 10)
517 OP '/' (1, 10) (1, 11)
518 NUMBER '5' (1, 11) (1, 12)
519 OP '*' (1, 12) (1, 13)
520 NUMBER '12' (1, 13) (1, 15)
521 OP '%' (1, 15) (1, 16)
522 NUMBER '0x12' (1, 16) (1, 20)
Benjamin Petersond51374e2014-04-09 23:55:56 -0400523 OP '@' (1, 20) (1, 21)
524 NUMBER '42' (1, 21) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300525 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000526
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300527 def test_unary(self):
528 # Unary
529 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000530 OP '~' (1, 0) (1, 1)
531 NUMBER '1' (1, 1) (1, 2)
532 OP '^' (1, 3) (1, 4)
533 NUMBER '1' (1, 5) (1, 6)
534 OP '&' (1, 7) (1, 8)
535 NUMBER '1' (1, 9) (1, 10)
536 OP '|' (1, 11) (1, 12)
537 NUMBER '1' (1, 12) (1, 13)
538 OP '^' (1, 14) (1, 15)
539 OP '-' (1, 16) (1, 17)
540 NUMBER '1' (1, 17) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300541 """)
542 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000543 OP '-' (1, 0) (1, 1)
544 NUMBER '1' (1, 1) (1, 2)
545 OP '*' (1, 2) (1, 3)
546 NUMBER '1' (1, 3) (1, 4)
547 OP '/' (1, 4) (1, 5)
548 NUMBER '1' (1, 5) (1, 6)
549 OP '+' (1, 6) (1, 7)
550 NUMBER '1' (1, 7) (1, 8)
551 OP '*' (1, 8) (1, 9)
552 NUMBER '1' (1, 9) (1, 10)
553 OP '//' (1, 10) (1, 12)
554 NUMBER '1' (1, 12) (1, 13)
555 OP '-' (1, 14) (1, 15)
556 OP '-' (1, 16) (1, 17)
557 OP '-' (1, 17) (1, 18)
558 OP '-' (1, 18) (1, 19)
559 NUMBER '1' (1, 19) (1, 20)
560 OP '**' (1, 20) (1, 22)
561 NUMBER '1' (1, 22) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300562 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000563
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300564 def test_selector(self):
565 # Selector
566 self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000567 NAME 'import' (1, 0) (1, 6)
568 NAME 'sys' (1, 7) (1, 10)
569 OP ',' (1, 10) (1, 11)
570 NAME 'time' (1, 12) (1, 16)
571 NEWLINE '\\n' (1, 16) (1, 17)
572 NAME 'x' (2, 0) (2, 1)
573 OP '=' (2, 2) (2, 3)
574 NAME 'sys' (2, 4) (2, 7)
575 OP '.' (2, 7) (2, 8)
576 NAME 'modules' (2, 8) (2, 15)
577 OP '[' (2, 15) (2, 16)
578 STRING "'time'" (2, 16) (2, 22)
579 OP ']' (2, 22) (2, 23)
580 OP '.' (2, 23) (2, 24)
581 NAME 'time' (2, 24) (2, 28)
582 OP '(' (2, 28) (2, 29)
583 OP ')' (2, 29) (2, 30)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300584 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000585
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300586 def test_method(self):
587 # Methods
588 self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000589 OP '@' (1, 0) (1, 1)
Eric V. Smith67317742015-10-16 20:45:53 -0400590 NAME 'staticmethod' (1, 1) (1, 13)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000591 NEWLINE '\\n' (1, 13) (1, 14)
592 NAME 'def' (2, 0) (2, 3)
593 NAME 'foo' (2, 4) (2, 7)
594 OP '(' (2, 7) (2, 8)
595 NAME 'x' (2, 8) (2, 9)
596 OP ',' (2, 9) (2, 10)
597 NAME 'y' (2, 10) (2, 11)
598 OP ')' (2, 11) (2, 12)
599 OP ':' (2, 12) (2, 13)
600 NAME 'pass' (2, 14) (2, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300601 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000602
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300603 def test_tabs(self):
604 # Evil tabs
605 self.check_tokenize("def f():\n"
606 "\tif x\n"
607 " \tpass", """\
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000608 NAME 'def' (1, 0) (1, 3)
609 NAME 'f' (1, 4) (1, 5)
610 OP '(' (1, 5) (1, 6)
611 OP ')' (1, 6) (1, 7)
612 OP ':' (1, 7) (1, 8)
613 NEWLINE '\\n' (1, 8) (1, 9)
614 INDENT '\\t' (2, 0) (2, 1)
615 NAME 'if' (2, 1) (2, 3)
616 NAME 'x' (2, 4) (2, 5)
617 NEWLINE '\\n' (2, 5) (2, 6)
618 INDENT ' \\t' (3, 0) (3, 9)
619 NAME 'pass' (3, 9) (3, 13)
620 DEDENT '' (4, 0) (4, 0)
621 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300622 """)
Benjamin Peterson33856de2010-08-30 14:41:20 +0000623
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300624 def test_non_ascii_identifiers(self):
625 # Non-ascii identifiers
626 self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
Benjamin Peterson33856de2010-08-30 14:41:20 +0000627 NAME 'Örter' (1, 0) (1, 5)
628 OP '=' (1, 6) (1, 7)
629 STRING "'places'" (1, 8) (1, 16)
630 NEWLINE '\\n' (1, 16) (1, 17)
631 NAME 'grün' (2, 0) (2, 4)
632 OP '=' (2, 5) (2, 6)
633 STRING "'green'" (2, 7) (2, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300634 """)
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000635
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300636 def test_unicode(self):
637 # Legacy unicode literals:
638 self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000639 NAME 'Örter' (1, 0) (1, 5)
640 OP '=' (1, 6) (1, 7)
641 STRING "u'places'" (1, 8) (1, 17)
642 NEWLINE '\\n' (1, 17) (1, 18)
643 NAME 'grün' (2, 0) (2, 4)
644 OP '=' (2, 5) (2, 6)
Christian Heimes0b3847d2012-06-20 11:17:58 +0200645 STRING "U'green'" (2, 7) (2, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300646 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400647
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300648 def test_async(self):
649 # Async/await extension:
650 self.check_tokenize("async = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400651 NAME 'async' (1, 0) (1, 5)
652 OP '=' (1, 6) (1, 7)
653 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300654 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400655
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300656 self.check_tokenize("a = (async = 1)", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400657 NAME 'a' (1, 0) (1, 1)
658 OP '=' (1, 2) (1, 3)
659 OP '(' (1, 4) (1, 5)
660 NAME 'async' (1, 5) (1, 10)
661 OP '=' (1, 11) (1, 12)
662 NUMBER '1' (1, 13) (1, 14)
663 OP ')' (1, 14) (1, 15)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300664 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400665
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300666 self.check_tokenize("async()", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400667 NAME 'async' (1, 0) (1, 5)
668 OP '(' (1, 5) (1, 6)
669 OP ')' (1, 6) (1, 7)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300670 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400671
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300672 self.check_tokenize("class async(Bar):pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400673 NAME 'class' (1, 0) (1, 5)
674 NAME 'async' (1, 6) (1, 11)
675 OP '(' (1, 11) (1, 12)
676 NAME 'Bar' (1, 12) (1, 15)
677 OP ')' (1, 15) (1, 16)
678 OP ':' (1, 16) (1, 17)
679 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300680 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400681
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300682 self.check_tokenize("class async:pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400683 NAME 'class' (1, 0) (1, 5)
684 NAME 'async' (1, 6) (1, 11)
685 OP ':' (1, 11) (1, 12)
686 NAME 'pass' (1, 12) (1, 16)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300687 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400688
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300689 self.check_tokenize("await = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400690 NAME 'await' (1, 0) (1, 5)
691 OP '=' (1, 6) (1, 7)
692 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300693 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400694
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300695 self.check_tokenize("foo.async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400696 NAME 'foo' (1, 0) (1, 3)
697 OP '.' (1, 3) (1, 4)
698 NAME 'async' (1, 4) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300699 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400700
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300701 self.check_tokenize("async for a in b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400702 NAME 'async' (1, 0) (1, 5)
703 NAME 'for' (1, 6) (1, 9)
704 NAME 'a' (1, 10) (1, 11)
705 NAME 'in' (1, 12) (1, 14)
706 NAME 'b' (1, 15) (1, 16)
707 OP ':' (1, 16) (1, 17)
708 NAME 'pass' (1, 18) (1, 22)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300709 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400710
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300711 self.check_tokenize("async with a as b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400712 NAME 'async' (1, 0) (1, 5)
713 NAME 'with' (1, 6) (1, 10)
714 NAME 'a' (1, 11) (1, 12)
715 NAME 'as' (1, 13) (1, 15)
716 NAME 'b' (1, 16) (1, 17)
717 OP ':' (1, 17) (1, 18)
718 NAME 'pass' (1, 19) (1, 23)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300719 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400720
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300721 self.check_tokenize("async.foo", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400722 NAME 'async' (1, 0) (1, 5)
723 OP '.' (1, 5) (1, 6)
724 NAME 'foo' (1, 6) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300725 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400726
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300727 self.check_tokenize("async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400728 NAME 'async' (1, 0) (1, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300729 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400730
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300731 self.check_tokenize("async\n#comment\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400732 NAME 'async' (1, 0) (1, 5)
733 NEWLINE '\\n' (1, 5) (1, 6)
734 COMMENT '#comment' (2, 0) (2, 8)
735 NL '\\n' (2, 8) (2, 9)
736 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300737 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400738
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300739 self.check_tokenize("async\n...\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400740 NAME 'async' (1, 0) (1, 5)
741 NEWLINE '\\n' (1, 5) (1, 6)
742 OP '...' (2, 0) (2, 3)
743 NEWLINE '\\n' (2, 3) (2, 4)
744 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300745 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400746
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300747 self.check_tokenize("async\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400748 NAME 'async' (1, 0) (1, 5)
749 NEWLINE '\\n' (1, 5) (1, 6)
750 NAME 'await' (2, 0) (2, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300751 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400752
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300753 self.check_tokenize("foo.async + 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400754 NAME 'foo' (1, 0) (1, 3)
755 OP '.' (1, 3) (1, 4)
756 NAME 'async' (1, 4) (1, 9)
757 OP '+' (1, 10) (1, 11)
758 NUMBER '1' (1, 12) (1, 13)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300759 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400760
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300761 self.check_tokenize("async def foo(): pass", """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700762 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400763 NAME 'def' (1, 6) (1, 9)
764 NAME 'foo' (1, 10) (1, 13)
765 OP '(' (1, 13) (1, 14)
766 OP ')' (1, 14) (1, 15)
767 OP ':' (1, 15) (1, 16)
768 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300769 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400770
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300771 self.check_tokenize('''\
772async def foo():
773 def foo(await):
774 await = 1
775 if 1:
776 await
777async += 1
778''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700779 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400780 NAME 'def' (1, 6) (1, 9)
781 NAME 'foo' (1, 10) (1, 13)
782 OP '(' (1, 13) (1, 14)
783 OP ')' (1, 14) (1, 15)
784 OP ':' (1, 15) (1, 16)
785 NEWLINE '\\n' (1, 16) (1, 17)
786 INDENT ' ' (2, 0) (2, 2)
787 NAME 'def' (2, 2) (2, 5)
788 NAME 'foo' (2, 6) (2, 9)
789 OP '(' (2, 9) (2, 10)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700790 NAME 'await' (2, 10) (2, 15)
Yury Selivanov75445082015-05-11 22:57:16 -0400791 OP ')' (2, 15) (2, 16)
792 OP ':' (2, 16) (2, 17)
793 NEWLINE '\\n' (2, 17) (2, 18)
794 INDENT ' ' (3, 0) (3, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700795 NAME 'await' (3, 4) (3, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400796 OP '=' (3, 10) (3, 11)
797 NUMBER '1' (3, 12) (3, 13)
798 NEWLINE '\\n' (3, 13) (3, 14)
799 DEDENT '' (4, 2) (4, 2)
800 NAME 'if' (4, 2) (4, 4)
801 NUMBER '1' (4, 5) (4, 6)
802 OP ':' (4, 6) (4, 7)
803 NEWLINE '\\n' (4, 7) (4, 8)
804 INDENT ' ' (5, 0) (5, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700805 NAME 'await' (5, 4) (5, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400806 NEWLINE '\\n' (5, 9) (5, 10)
807 DEDENT '' (6, 0) (6, 0)
808 DEDENT '' (6, 0) (6, 0)
809 NAME 'async' (6, 0) (6, 5)
810 OP '+=' (6, 6) (6, 8)
811 NUMBER '1' (6, 9) (6, 10)
812 NEWLINE '\\n' (6, 10) (6, 11)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300813 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400814
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300815 self.check_tokenize('''\
816async def foo():
817 async for i in 1: pass''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700818 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400819 NAME 'def' (1, 6) (1, 9)
820 NAME 'foo' (1, 10) (1, 13)
821 OP '(' (1, 13) (1, 14)
822 OP ')' (1, 14) (1, 15)
823 OP ':' (1, 15) (1, 16)
824 NEWLINE '\\n' (1, 16) (1, 17)
825 INDENT ' ' (2, 0) (2, 2)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700826 NAME 'async' (2, 2) (2, 7)
Yury Selivanov75445082015-05-11 22:57:16 -0400827 NAME 'for' (2, 8) (2, 11)
828 NAME 'i' (2, 12) (2, 13)
829 NAME 'in' (2, 14) (2, 16)
830 NUMBER '1' (2, 17) (2, 18)
831 OP ':' (2, 18) (2, 19)
832 NAME 'pass' (2, 20) (2, 24)
833 DEDENT '' (3, 0) (3, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300834 """)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300835
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300836 self.check_tokenize('''async def foo(async): await''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700837 NAME 'async' (1, 0) (1, 5)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300838 NAME 'def' (1, 6) (1, 9)
839 NAME 'foo' (1, 10) (1, 13)
840 OP '(' (1, 13) (1, 14)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700841 NAME 'async' (1, 14) (1, 19)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300842 OP ')' (1, 19) (1, 20)
843 OP ':' (1, 20) (1, 21)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700844 NAME 'await' (1, 22) (1, 27)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300845 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300846
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300847 self.check_tokenize('''\
848def f():
849
850 def baz(): pass
851 async def bar(): pass
852
853 await = 2''', """\
Yury Selivanov96ec9342015-07-23 15:01:58 +0300854 NAME 'def' (1, 0) (1, 3)
855 NAME 'f' (1, 4) (1, 5)
856 OP '(' (1, 5) (1, 6)
857 OP ')' (1, 6) (1, 7)
858 OP ':' (1, 7) (1, 8)
859 NEWLINE '\\n' (1, 8) (1, 9)
860 NL '\\n' (2, 0) (2, 1)
861 INDENT ' ' (3, 0) (3, 2)
862 NAME 'def' (3, 2) (3, 5)
863 NAME 'baz' (3, 6) (3, 9)
864 OP '(' (3, 9) (3, 10)
865 OP ')' (3, 10) (3, 11)
866 OP ':' (3, 11) (3, 12)
867 NAME 'pass' (3, 13) (3, 17)
868 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700869 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300870 NAME 'def' (4, 8) (4, 11)
871 NAME 'bar' (4, 12) (4, 15)
872 OP '(' (4, 15) (4, 16)
873 OP ')' (4, 16) (4, 17)
874 OP ':' (4, 17) (4, 18)
875 NAME 'pass' (4, 19) (4, 23)
876 NEWLINE '\\n' (4, 23) (4, 24)
877 NL '\\n' (5, 0) (5, 1)
878 NAME 'await' (6, 2) (6, 7)
879 OP '=' (6, 8) (6, 9)
880 NUMBER '2' (6, 10) (6, 11)
881 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300882 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300883
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300884 self.check_tokenize('''\
885async def f():
886
887 def baz(): pass
888 async def bar(): pass
889
890 await = 2''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700891 NAME 'async' (1, 0) (1, 5)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300892 NAME 'def' (1, 6) (1, 9)
893 NAME 'f' (1, 10) (1, 11)
894 OP '(' (1, 11) (1, 12)
895 OP ')' (1, 12) (1, 13)
896 OP ':' (1, 13) (1, 14)
897 NEWLINE '\\n' (1, 14) (1, 15)
898 NL '\\n' (2, 0) (2, 1)
899 INDENT ' ' (3, 0) (3, 2)
900 NAME 'def' (3, 2) (3, 5)
901 NAME 'baz' (3, 6) (3, 9)
902 OP '(' (3, 9) (3, 10)
903 OP ')' (3, 10) (3, 11)
904 OP ':' (3, 11) (3, 12)
905 NAME 'pass' (3, 13) (3, 17)
906 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700907 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300908 NAME 'def' (4, 8) (4, 11)
909 NAME 'bar' (4, 12) (4, 15)
910 OP '(' (4, 15) (4, 16)
911 OP ')' (4, 16) (4, 17)
912 OP ':' (4, 17) (4, 18)
913 NAME 'pass' (4, 19) (4, 23)
914 NEWLINE '\\n' (4, 23) (4, 24)
915 NL '\\n' (5, 0) (5, 1)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700916 NAME 'await' (6, 2) (6, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300917 OP '=' (6, 8) (6, 9)
918 NUMBER '2' (6, 10) (6, 11)
919 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300920 """)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000921
Thomas Kluyverc56b17b2018-06-05 19:26:39 +0200922class GenerateTokensTest(TokenizeTest):
923 def check_tokenize(self, s, expected):
924 # Format the tokens in s in a table format.
925 # The ENDMARKER is omitted.
926 result = []
927 f = StringIO(s)
928 for type, token, start, end, line in generate_tokens(f.readline):
929 if type == ENDMARKER:
930 break
931 type = tok_name[type]
932 result.append(f" {type:10} {token!r:13} {start} {end}")
933 self.assertEqual(result, expected.rstrip().splitlines())
934
Raymond Hettinger68c04532005-06-10 11:05:19 +0000935
Raymond Hettinger68c04532005-06-10 11:05:19 +0000936def decistmt(s):
Raymond Hettinger68c04532005-06-10 11:05:19 +0000937 result = []
Trent Nelson428de652008-03-18 22:41:35 +0000938 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
Raymond Hettinger68c04532005-06-10 11:05:19 +0000939 for toknum, tokval, _, _, _ in g:
940 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
941 result.extend([
942 (NAME, 'Decimal'),
943 (OP, '('),
944 (STRING, repr(tokval)),
945 (OP, ')')
946 ])
947 else:
948 result.append((toknum, tokval))
Trent Nelson428de652008-03-18 22:41:35 +0000949 return untokenize(result).decode('utf-8')
950
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300951class TestMisc(TestCase):
952
953 def test_decistmt(self):
954 # Substitute Decimals for floats in a string of statements.
955 # This is an example from the docs.
956
957 from decimal import Decimal
958 s = '+21.3e-5*-.1234/81.7'
959 self.assertEqual(decistmt(s),
960 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
961
962 # The format of the exponent is inherited from the platform C library.
963 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
964 # we're only showing 11 digits, and the 12th isn't close to 5, the
965 # rest of the output should be platform-independent.
966 self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
967
968 # Output from calculations with Decimal should be identical across all
969 # platforms.
970 self.assertEqual(eval(decistmt(s)),
971 Decimal('-3.217160342717258261933904529E-7'))
972
Trent Nelson428de652008-03-18 22:41:35 +0000973
974class TestTokenizerAdheresToPep0263(TestCase):
975 """
976 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
977 """
978
979 def _testFile(self, filename):
980 path = os.path.join(os.path.dirname(__file__), filename)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300981 TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
Trent Nelson428de652008-03-18 22:41:35 +0000982
983 def test_utf8_coding_cookie_and_no_utf8_bom(self):
Ned Deily2ea6fcc2011-07-19 16:15:27 -0700984 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300985 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +0000986
987 def test_latin1_coding_cookie_and_utf8_bom(self):
988 """
989 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
990 allowed encoding for the comment is 'utf-8'. The text file used in
991 this test starts with a BOM signature, but specifies latin1 as the
992 coding, so verify that a SyntaxError is raised, which matches the
993 behaviour of the interpreter when it encounters a similar condition.
994 """
995 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000996 self.assertRaises(SyntaxError, self._testFile, f)
Trent Nelson428de652008-03-18 22:41:35 +0000997
998 def test_no_coding_cookie_and_utf8_bom(self):
999 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001000 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +00001001
1002 def test_utf8_coding_cookie_and_utf8_bom(self):
1003 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001004 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +00001005
Florent Xicluna11f0b412012-07-07 12:13:35 +02001006 def test_bad_coding_cookie(self):
1007 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
1008 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
1009
Trent Nelson428de652008-03-18 22:41:35 +00001010
1011class Test_Tokenize(TestCase):
1012
1013 def test__tokenize_decodes_with_specified_encoding(self):
1014 literal = '"ЉЊЈЁЂ"'
1015 line = literal.encode('utf-8')
1016 first = False
1017 def readline():
1018 nonlocal first
1019 if not first:
1020 first = True
1021 return line
1022 else:
1023 return b''
1024
1025 # skip the initial encoding token and the end token
1026 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
1027 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001028 self.assertEqual(tokens, expected_tokens,
1029 "bytes not decoded with encoding")
Trent Nelson428de652008-03-18 22:41:35 +00001030
1031 def test__tokenize_does_not_decode_with_encoding_none(self):
1032 literal = '"ЉЊЈЁЂ"'
1033 first = False
1034 def readline():
1035 nonlocal first
1036 if not first:
1037 first = True
1038 return literal
1039 else:
1040 return b''
1041
1042 # skip the end token
1043 tokens = list(_tokenize(readline, encoding=None))[:-1]
1044 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001045 self.assertEqual(tokens, expected_tokens,
1046 "string not tokenized when encoding is None")
Trent Nelson428de652008-03-18 22:41:35 +00001047
1048
1049class TestDetectEncoding(TestCase):
1050
1051 def get_readline(self, lines):
1052 index = 0
1053 def readline():
1054 nonlocal index
1055 if index == len(lines):
1056 raise StopIteration
1057 line = lines[index]
1058 index += 1
1059 return line
1060 return readline
1061
1062 def test_no_bom_no_encoding_cookie(self):
1063 lines = (
1064 b'# something\n',
1065 b'print(something)\n',
1066 b'do_something(else)\n'
1067 )
1068 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001069 self.assertEqual(encoding, 'utf-8')
1070 self.assertEqual(consumed_lines, list(lines[:2]))
Trent Nelson428de652008-03-18 22:41:35 +00001071
1072 def test_bom_no_cookie(self):
1073 lines = (
1074 b'\xef\xbb\xbf# something\n',
1075 b'print(something)\n',
1076 b'do_something(else)\n'
1077 )
1078 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001079 self.assertEqual(encoding, 'utf-8-sig')
1080 self.assertEqual(consumed_lines,
1081 [b'# something\n', b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001082
1083 def test_cookie_first_line_no_bom(self):
1084 lines = (
1085 b'# -*- coding: latin-1 -*-\n',
1086 b'print(something)\n',
1087 b'do_something(else)\n'
1088 )
1089 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001090 self.assertEqual(encoding, 'iso-8859-1')
1091 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001092
1093 def test_matched_bom_and_cookie_first_line(self):
1094 lines = (
1095 b'\xef\xbb\xbf# coding=utf-8\n',
1096 b'print(something)\n',
1097 b'do_something(else)\n'
1098 )
1099 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001100 self.assertEqual(encoding, 'utf-8-sig')
1101 self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001102
1103 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
1104 lines = (
1105 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
1106 b'print(something)\n',
1107 b'do_something(else)\n'
1108 )
1109 readline = self.get_readline(lines)
1110 self.assertRaises(SyntaxError, detect_encoding, readline)
1111
1112 def test_cookie_second_line_no_bom(self):
1113 lines = (
1114 b'#! something\n',
1115 b'# vim: set fileencoding=ascii :\n',
1116 b'print(something)\n',
1117 b'do_something(else)\n'
1118 )
1119 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001120 self.assertEqual(encoding, 'ascii')
Trent Nelson428de652008-03-18 22:41:35 +00001121 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
Ezio Melottib3aedd42010-11-20 19:04:17 +00001122 self.assertEqual(consumed_lines, expected)
Trent Nelson428de652008-03-18 22:41:35 +00001123
1124 def test_matched_bom_and_cookie_second_line(self):
1125 lines = (
1126 b'\xef\xbb\xbf#! something\n',
1127 b'f# coding=utf-8\n',
1128 b'print(something)\n',
1129 b'do_something(else)\n'
1130 )
1131 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001132 self.assertEqual(encoding, 'utf-8-sig')
1133 self.assertEqual(consumed_lines,
1134 [b'#! something\n', b'f# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001135
1136 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
1137 lines = (
1138 b'\xef\xbb\xbf#! something\n',
1139 b'# vim: set fileencoding=ascii :\n',
1140 b'print(something)\n',
1141 b'do_something(else)\n'
1142 )
1143 readline = self.get_readline(lines)
1144 self.assertRaises(SyntaxError, detect_encoding, readline)
1145
Serhiy Storchaka768c16c2014-01-09 18:36:09 +02001146 def test_cookie_second_line_noncommented_first_line(self):
1147 lines = (
1148 b"print('\xc2\xa3')\n",
1149 b'# vim: set fileencoding=iso8859-15 :\n',
1150 b"print('\xe2\x82\xac')\n"
1151 )
1152 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1153 self.assertEqual(encoding, 'utf-8')
1154 expected = [b"print('\xc2\xa3')\n"]
1155 self.assertEqual(consumed_lines, expected)
1156
1157 def test_cookie_second_line_commented_first_line(self):
1158 lines = (
1159 b"#print('\xc2\xa3')\n",
1160 b'# vim: set fileencoding=iso8859-15 :\n',
1161 b"print('\xe2\x82\xac')\n"
1162 )
1163 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1164 self.assertEqual(encoding, 'iso8859-15')
1165 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
1166 self.assertEqual(consumed_lines, expected)
1167
1168 def test_cookie_second_line_empty_first_line(self):
1169 lines = (
1170 b'\n',
1171 b'# vim: set fileencoding=iso8859-15 :\n',
1172 b"print('\xe2\x82\xac')\n"
1173 )
1174 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1175 self.assertEqual(encoding, 'iso8859-15')
1176 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
1177 self.assertEqual(consumed_lines, expected)
1178
Benjamin Petersond3afada2009-10-09 21:43:09 +00001179 def test_latin1_normalization(self):
1180 # See get_normal_name() in tokenizer.c.
1181 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
1182 "iso-8859-1-unix", "iso-latin-1-mac")
1183 for encoding in encodings:
1184 for rep in ("-", "_"):
1185 enc = encoding.replace("-", rep)
1186 lines = (b"#!/usr/bin/python\n",
1187 b"# coding: " + enc.encode("ascii") + b"\n",
1188 b"print(things)\n",
1189 b"do_something += 4\n")
1190 rl = self.get_readline(lines)
1191 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001192 self.assertEqual(found, "iso-8859-1")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001193
Martin v. Löwis63674f42012-04-20 14:36:47 +02001194 def test_syntaxerror_latin1(self):
1195 # Issue 14629: need to raise SyntaxError if the first
1196 # line(s) have non-UTF-8 characters
1197 lines = (
1198 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1199 )
1200 readline = self.get_readline(lines)
1201 self.assertRaises(SyntaxError, detect_encoding, readline)
1202
1203
Benjamin Petersond3afada2009-10-09 21:43:09 +00001204 def test_utf8_normalization(self):
1205 # See get_normal_name() in tokenizer.c.
1206 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
1207 for encoding in encodings:
1208 for rep in ("-", "_"):
1209 enc = encoding.replace("-", rep)
1210 lines = (b"#!/usr/bin/python\n",
1211 b"# coding: " + enc.encode("ascii") + b"\n",
1212 b"1 + 3\n")
1213 rl = self.get_readline(lines)
1214 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001215 self.assertEqual(found, "utf-8")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001216
Trent Nelson428de652008-03-18 22:41:35 +00001217 def test_short_files(self):
1218 readline = self.get_readline((b'print(something)\n',))
1219 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001220 self.assertEqual(encoding, 'utf-8')
1221 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001222
1223 encoding, consumed_lines = detect_encoding(self.get_readline(()))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001224 self.assertEqual(encoding, 'utf-8')
1225 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001226
1227 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
1228 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001229 self.assertEqual(encoding, 'utf-8-sig')
1230 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001231
1232 readline = self.get_readline((b'\xef\xbb\xbf',))
1233 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001234 self.assertEqual(encoding, 'utf-8-sig')
1235 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001236
Benjamin Peterson433f32c2008-12-12 01:25:05 +00001237 readline = self.get_readline((b'# coding: bad\n',))
1238 self.assertRaises(SyntaxError, detect_encoding, readline)
Trent Nelson428de652008-03-18 22:41:35 +00001239
Serhiy Storchakadafea852013-09-16 23:51:56 +03001240 def test_false_encoding(self):
1241 # Issue 18873: "Encoding" detected in non-comment lines
1242 readline = self.get_readline((b'print("#coding=fake")',))
1243 encoding, consumed_lines = detect_encoding(readline)
1244 self.assertEqual(encoding, 'utf-8')
1245 self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1246
Victor Stinner58c07522010-11-09 01:08:59 +00001247 def test_open(self):
1248 filename = support.TESTFN + '.py'
1249 self.addCleanup(support.unlink, filename)
1250
1251 # test coding cookie
1252 for encoding in ('iso-8859-15', 'utf-8'):
1253 with open(filename, 'w', encoding=encoding) as fp:
1254 print("# coding: %s" % encoding, file=fp)
1255 print("print('euro:\u20ac')", file=fp)
1256 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001257 self.assertEqual(fp.encoding, encoding)
1258 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001259
1260 # test BOM (no coding cookie)
1261 with open(filename, 'w', encoding='utf-8-sig') as fp:
1262 print("print('euro:\u20ac')", file=fp)
1263 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001264 self.assertEqual(fp.encoding, 'utf-8-sig')
1265 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001266
Brett Cannonc33f3f22012-04-20 13:23:54 -04001267 def test_filename_in_exception(self):
1268 # When possible, include the file name in the exception.
1269 path = 'some_file_path'
1270 lines = (
1271 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1272 )
1273 class Bunk:
1274 def __init__(self, lines, path):
1275 self.name = path
1276 self._lines = lines
1277 self._index = 0
1278
1279 def readline(self):
1280 if self._index == len(lines):
1281 raise StopIteration
1282 line = lines[self._index]
1283 self._index += 1
1284 return line
1285
1286 with self.assertRaises(SyntaxError):
1287 ins = Bunk(lines, path)
1288 # Make sure lacking a name isn't an issue.
1289 del ins.name
1290 detect_encoding(ins.readline)
1291 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1292 ins = Bunk(lines, path)
1293 detect_encoding(ins.readline)
1294
Victor Stinner387729e2015-05-26 00:43:58 +02001295 def test_open_error(self):
1296 # Issue #23840: open() must close the binary file on error
1297 m = BytesIO(b'#coding:xxx')
1298 with mock.patch('tokenize._builtin_open', return_value=m):
1299 self.assertRaises(SyntaxError, tokenize_open, 'foobar')
1300 self.assertTrue(m.closed)
1301
1302
Trent Nelson428de652008-03-18 22:41:35 +00001303class TestTokenize(TestCase):
1304
1305 def test_tokenize(self):
1306 import tokenize as tokenize_module
1307 encoding = object()
1308 encoding_used = None
1309 def mock_detect_encoding(readline):
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001310 return encoding, [b'first', b'second']
Trent Nelson428de652008-03-18 22:41:35 +00001311
1312 def mock__tokenize(readline, encoding):
1313 nonlocal encoding_used
1314 encoding_used = encoding
1315 out = []
1316 while True:
1317 next_line = readline()
1318 if next_line:
1319 out.append(next_line)
1320 continue
1321 return out
1322
1323 counter = 0
1324 def mock_readline():
1325 nonlocal counter
1326 counter += 1
1327 if counter == 5:
1328 return b''
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001329 return str(counter).encode()
Trent Nelson428de652008-03-18 22:41:35 +00001330
1331 orig_detect_encoding = tokenize_module.detect_encoding
1332 orig__tokenize = tokenize_module._tokenize
1333 tokenize_module.detect_encoding = mock_detect_encoding
1334 tokenize_module._tokenize = mock__tokenize
1335 try:
1336 results = tokenize(mock_readline)
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001337 self.assertEqual(list(results),
1338 [b'first', b'second', b'1', b'2', b'3', b'4'])
Trent Nelson428de652008-03-18 22:41:35 +00001339 finally:
1340 tokenize_module.detect_encoding = orig_detect_encoding
1341 tokenize_module._tokenize = orig__tokenize
1342
1343 self.assertTrue(encoding_used, encoding)
Raymond Hettinger68c04532005-06-10 11:05:19 +00001344
Yury Selivanov8085b802015-05-18 12:50:52 -04001345 def test_oneline_defs(self):
1346 buf = []
1347 for i in range(500):
1348 buf.append('def i{i}(): return {i}'.format(i=i))
1349 buf.append('OK')
1350 buf = '\n'.join(buf)
1351
1352 # Test that 500 consequent, one-line defs is OK
1353 toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
1354 self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER
1355
Meador Inge00c7f852012-01-19 00:44:45 -06001356 def assertExactTypeEqual(self, opstr, *optypes):
1357 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1358 num_optypes = len(optypes)
1359 self.assertEqual(len(tokens), 2 + num_optypes)
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001360 self.assertEqual(tok_name[tokens[0].exact_type],
1361 tok_name[ENCODING])
Meador Inge00c7f852012-01-19 00:44:45 -06001362 for i in range(num_optypes):
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001363 self.assertEqual(tok_name[tokens[i + 1].exact_type],
1364 tok_name[optypes[i]])
1365 self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
1366 tok_name[token.ENDMARKER])
Meador Inge00c7f852012-01-19 00:44:45 -06001367
1368 def test_exact_type(self):
1369 self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1370 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1371 self.assertExactTypeEqual(':', token.COLON)
1372 self.assertExactTypeEqual(',', token.COMMA)
1373 self.assertExactTypeEqual(';', token.SEMI)
1374 self.assertExactTypeEqual('+', token.PLUS)
1375 self.assertExactTypeEqual('-', token.MINUS)
1376 self.assertExactTypeEqual('*', token.STAR)
1377 self.assertExactTypeEqual('/', token.SLASH)
1378 self.assertExactTypeEqual('|', token.VBAR)
1379 self.assertExactTypeEqual('&', token.AMPER)
1380 self.assertExactTypeEqual('<', token.LESS)
1381 self.assertExactTypeEqual('>', token.GREATER)
1382 self.assertExactTypeEqual('=', token.EQUAL)
1383 self.assertExactTypeEqual('.', token.DOT)
1384 self.assertExactTypeEqual('%', token.PERCENT)
1385 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1386 self.assertExactTypeEqual('==', token.EQEQUAL)
1387 self.assertExactTypeEqual('!=', token.NOTEQUAL)
1388 self.assertExactTypeEqual('<=', token.LESSEQUAL)
1389 self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1390 self.assertExactTypeEqual('~', token.TILDE)
1391 self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1392 self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1393 self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1394 self.assertExactTypeEqual('**', token.DOUBLESTAR)
1395 self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1396 self.assertExactTypeEqual('-=', token.MINEQUAL)
1397 self.assertExactTypeEqual('*=', token.STAREQUAL)
1398 self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1399 self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1400 self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1401 self.assertExactTypeEqual('|=', token.VBAREQUAL)
1402 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1403 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1404 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1405 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1406 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1407 self.assertExactTypeEqual('//', token.DOUBLESLASH)
1408 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +02001409 self.assertExactTypeEqual('...', token.ELLIPSIS)
1410 self.assertExactTypeEqual('->', token.RARROW)
Meador Inge00c7f852012-01-19 00:44:45 -06001411 self.assertExactTypeEqual('@', token.AT)
Benjamin Petersond51374e2014-04-09 23:55:56 -04001412 self.assertExactTypeEqual('@=', token.ATEQUAL)
Meador Inge00c7f852012-01-19 00:44:45 -06001413
1414 self.assertExactTypeEqual('a**2+b**2==c**2',
1415 NAME, token.DOUBLESTAR, NUMBER,
1416 token.PLUS,
1417 NAME, token.DOUBLESTAR, NUMBER,
1418 token.EQEQUAL,
1419 NAME, token.DOUBLESTAR, NUMBER)
1420 self.assertExactTypeEqual('{1, 2, 3}',
1421 token.LBRACE,
1422 token.NUMBER, token.COMMA,
1423 token.NUMBER, token.COMMA,
1424 token.NUMBER,
1425 token.RBRACE)
1426 self.assertExactTypeEqual('^(x & 0x1)',
1427 token.CIRCUMFLEX,
1428 token.LPAR,
1429 token.NAME, token.AMPER, token.NUMBER,
1430 token.RPAR)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001431
Ezio Melottifafa8b72012-11-03 17:46:51 +02001432 def test_pathological_trailing_whitespace(self):
1433 # See http://bugs.python.org/issue16152
1434 self.assertExactTypeEqual('@ ', token.AT)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001435
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001436
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001437class UntokenizeTest(TestCase):
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001438
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001439 def test_bad_input_order(self):
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001440 # raise if previous row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001441 u = Untokenizer()
1442 u.prev_row = 2
1443 u.prev_col = 2
1444 with self.assertRaises(ValueError) as cm:
1445 u.add_whitespace((1,3))
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001446 self.assertEqual(cm.exception.args[0],
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001447 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001448 # raise if previous column in row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001449 self.assertRaises(ValueError, u.add_whitespace, (2,1))
1450
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001451 def test_backslash_continuation(self):
1452 # The problem is that <whitespace>\<newline> leaves no token
1453 u = Untokenizer()
1454 u.prev_row = 1
1455 u.prev_col = 1
1456 u.tokens = []
1457 u.add_whitespace((2, 0))
1458 self.assertEqual(u.tokens, ['\\\n'])
1459 u.prev_row = 2
1460 u.add_whitespace((4, 4))
1461 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001462 TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001463
Terry Jan Reedy5b8d2c32014-02-17 23:12:16 -05001464 def test_iter_compat(self):
1465 u = Untokenizer()
1466 token = (NAME, 'Hello')
1467 tokens = [(ENCODING, 'utf-8'), token]
1468 u.compat(token, iter([]))
1469 self.assertEqual(u.tokens, ["Hello "])
1470 u = Untokenizer()
1471 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1472 u = Untokenizer()
1473 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1474 self.assertEqual(u.encoding, 'utf-8')
1475 self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1476
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001477
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001478class TestRoundtrip(TestCase):
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001479
1480 def check_roundtrip(self, f):
1481 """
1482 Test roundtrip for `untokenize`. `f` is an open file or a string.
1483 The source code in f is tokenized to both 5- and 2-tuples.
1484 Both sequences are converted back to source code via
1485 tokenize.untokenize(), and the latter tokenized again to 2-tuples.
1486 The test fails if the 3 pair tokenizations do not match.
1487
1488 When untokenize bugs are fixed, untokenize with 5-tuples should
1489 reproduce code that does not contain a backslash continuation
1490 following spaces. A proper test should test this.
1491 """
1492 # Get source code and original tokenizations
1493 if isinstance(f, str):
1494 code = f.encode('utf-8')
1495 else:
1496 code = f.read()
1497 f.close()
1498 readline = iter(code.splitlines(keepends=True)).__next__
1499 tokens5 = list(tokenize(readline))
1500 tokens2 = [tok[:2] for tok in tokens5]
1501 # Reproduce tokens2 from pairs
1502 bytes_from2 = untokenize(tokens2)
1503 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
1504 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
1505 self.assertEqual(tokens2_from2, tokens2)
1506 # Reproduce tokens2 from 5-tuples
1507 bytes_from5 = untokenize(tokens5)
1508 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
1509 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
1510 self.assertEqual(tokens2_from5, tokens2)
1511
1512 def test_roundtrip(self):
1513 # There are some standard formatting practices that are easy to get right.
1514
1515 self.check_roundtrip("if x == 1:\n"
1516 " print(x)\n")
1517 self.check_roundtrip("# This is a comment\n"
1518 "# This also")
1519
1520 # Some people use different formatting conventions, which makes
1521 # untokenize a little trickier. Note that this test involves trailing
1522 # whitespace after the colon. Note that we use hex escapes to make the
1523 # two trailing blanks apparent in the expected output.
1524
1525 self.check_roundtrip("if x == 1 : \n"
1526 " print(x)\n")
1527 fn = support.findfile("tokenize_tests.txt")
1528 with open(fn, 'rb') as f:
1529 self.check_roundtrip(f)
1530 self.check_roundtrip("if x == 1:\n"
1531 " # A comment by itself.\n"
1532 " print(x) # Comment here, too.\n"
1533 " # Another comment.\n"
1534 "after_if = True\n")
1535 self.check_roundtrip("if (x # The comments need to go in the right place\n"
1536 " == 1):\n"
1537 " print('x==1')\n")
1538 self.check_roundtrip("class Test: # A comment here\n"
1539 " # A comment with weird indent\n"
1540 " after_com = 5\n"
1541 " def x(m): return m*5 # a one liner\n"
1542 " def y(m): # A whitespace after the colon\n"
1543 " return y*4 # 3-space indent\n")
1544
1545 # Some error-handling code
1546 self.check_roundtrip("try: import somemodule\n"
1547 "except ImportError: # comment\n"
1548 " print('Can not import' # comment2\n)"
1549 "else: print('Loaded')\n")
1550
1551 def test_continuation(self):
1552 # Balancing continuation
1553 self.check_roundtrip("a = (3,4, \n"
1554 "5,6)\n"
1555 "y = [3, 4,\n"
1556 "5]\n"
1557 "z = {'a': 5,\n"
1558 "'b':15, 'c':True}\n"
1559 "x = len(y) + 5 - a[\n"
1560 "3] - a[2]\n"
1561 "+ len(z) - z[\n"
1562 "'b']\n")
1563
1564 def test_backslash_continuation(self):
1565 # Backslash means line continuation, except for comments
1566 self.check_roundtrip("x=1+\\\n"
1567 "1\n"
1568 "# This is a comment\\\n"
1569 "# This also\n")
1570 self.check_roundtrip("# Comment \\\n"
1571 "x = 0")
1572
1573 def test_string_concatenation(self):
1574 # Two string literals on the same line
1575 self.check_roundtrip("'' ''")
1576
1577 def test_random_files(self):
1578 # Test roundtrip on random python modules.
1579 # pass the '-ucpu' option to process the full directory.
1580
1581 import glob, random
1582 fn = support.findfile("tokenize_tests.txt")
1583 tempdir = os.path.dirname(fn) or os.curdir
1584 testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
1585
Brett Cannona721aba2016-09-09 14:57:09 -07001586 # Tokenize is broken on test_pep3131.py because regular expressions are
1587 # broken on the obscure unicode identifiers in it. *sigh*
1588 # With roundtrip extended to test the 5-tuple mode of untokenize,
1589 # 7 more testfiles fail. Remove them also until the failure is diagnosed.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001590
Zachary Ware724f6a62016-09-09 12:55:37 -07001591 testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001592 for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
1593 testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
1594
1595 if not support.is_resource_enabled("cpu"):
1596 testfiles = random.sample(testfiles, 10)
1597
1598 for testfile in testfiles:
1599 with open(testfile, 'rb') as f:
1600 with self.subTest(file=testfile):
1601 self.check_roundtrip(f)
1602
1603
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001604 def roundtrip(self, code):
1605 if isinstance(code, str):
1606 code = code.encode('utf-8')
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001607 return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001608
1609 def test_indentation_semantics_retained(self):
1610 """
1611 Ensure that although whitespace might be mutated in a roundtrip,
1612 the semantic meaning of the indentation remains consistent.
1613 """
1614 code = "if False:\n\tx=3\n\tx=3\n"
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001615 codelines = self.roundtrip(code).split('\n')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001616 self.assertEqual(codelines[1], codelines[2])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001617 self.check_roundtrip(code)
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001618
1619
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001620if __name__ == "__main__":
Brett Cannona721aba2016-09-09 14:57:09 -07001621 unittest.main()