blob: f68580ccfb7c63c7f42de90c720cfc9cc781fd32 [file] [log] [blame]
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001from test import support
2from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
3 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
Ammar Askarc4ef4892018-07-06 03:19:08 -04004 open as tokenize_open, Untokenizer, generate_tokens,
5 NEWLINE)
Thomas Kluyverc56b17b2018-06-05 19:26:39 +02006from io import BytesIO, StringIO
Stéphane Wirtel90addd62017-07-25 15:33:53 +02007import unittest
Brett Cannona721aba2016-09-09 14:57:09 -07008from unittest import TestCase, mock
9from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
10 INVALID_UNDERSCORE_LITERALS)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030011import os
12import token
Thomas Wouters89f507f2006-12-13 04:49:30 +000013
Thomas Wouters89f507f2006-12-13 04:49:30 +000014
Ammar Askarc4ef4892018-07-06 03:19:08 -040015# Converts a source string into a list of textual representation
16# of the tokens such as:
17# ` NAME 'if' (1, 0) (1, 2)`
18# to make writing tests easier.
19def stringify_tokens_from_source(token_generator, source_string):
20 result = []
21 num_lines = len(source_string.splitlines())
22 missing_trailing_nl = source_string[-1] not in '\r\n'
23
24 for type, token, start, end, line in token_generator:
25 if type == ENDMARKER:
26 break
27 # Ignore the new line on the last line if the input lacks one
28 if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
29 continue
30 type = tok_name[type]
31 result.append(f" {type:10} {token!r:13} {start} {end}")
32
33 return result
34
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030035class TokenizeTest(TestCase):
36 # Tests for the tokenize module.
Jason R. Coombs7cf36382015-06-20 19:13:50 -040037
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030038 # The tests can be really simple. Given a small fragment of source
Ammar Askarc4ef4892018-07-06 03:19:08 -040039 # code, print out a table with tokens. The ENDMARKER, ENCODING and
40 # final NEWLINE are omitted for brevity.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030041
42 def check_tokenize(self, s, expected):
43 # Format the tokens in s in a table format.
Ammar Askarc4ef4892018-07-06 03:19:08 -040044 # The ENDMARKER and final NEWLINE are omitted.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030045 f = BytesIO(s.encode('utf-8'))
Ammar Askarc4ef4892018-07-06 03:19:08 -040046 result = stringify_tokens_from_source(tokenize(f.readline), s)
47
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030048 self.assertEqual(result,
49 [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
50 expected.rstrip().splitlines())
51
Ammar Askarc4ef4892018-07-06 03:19:08 -040052 def test_implicit_newline(self):
53 # Make sure that the tokenizer puts in an implicit NEWLINE
54 # when the input lacks a trailing new line.
55 f = BytesIO("x".encode('utf-8'))
56 tokens = list(tokenize(f.readline))
57 self.assertEqual(tokens[-2].type, NEWLINE)
58 self.assertEqual(tokens[-1].type, ENDMARKER)
59
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030060 def test_basic(self):
61 self.check_tokenize("1 + 1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000062 NUMBER '1' (1, 0) (1, 1)
63 OP '+' (1, 2) (1, 3)
64 NUMBER '1' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030065 """)
66 self.check_tokenize("if False:\n"
67 " # NL\n"
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010068 " \n"
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030069 " True = False # NEWLINE\n", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000070 NAME 'if' (1, 0) (1, 2)
71 NAME 'False' (1, 3) (1, 8)
72 OP ':' (1, 8) (1, 9)
73 NEWLINE '\\n' (1, 9) (1, 10)
74 COMMENT '# NL' (2, 4) (2, 8)
75 NL '\\n' (2, 8) (2, 9)
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010076 NL '\\n' (3, 4) (3, 5)
77 INDENT ' ' (4, 0) (4, 4)
78 NAME 'True' (4, 4) (4, 8)
79 OP '=' (4, 9) (4, 10)
80 NAME 'False' (4, 11) (4, 16)
81 COMMENT '# NEWLINE' (4, 17) (4, 26)
82 NEWLINE '\\n' (4, 26) (4, 27)
83 DEDENT '' (5, 0) (5, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030084 """)
85 indent_error_file = b"""\
86def k(x):
87 x += 2
88 x += 5
89"""
90 readline = BytesIO(indent_error_file).readline
91 with self.assertRaisesRegex(IndentationError,
92 "unindent does not match any "
93 "outer indentation level"):
94 for tok in tokenize(readline):
95 pass
Thomas Wouters89f507f2006-12-13 04:49:30 +000096
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030097 def test_int(self):
98 # Ordinary integers and binary operators
99 self.check_tokenize("0xff <= 255", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000100 NUMBER '0xff' (1, 0) (1, 4)
101 OP '<=' (1, 5) (1, 7)
102 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300103 """)
104 self.check_tokenize("0b10 <= 255", """\
Eric Smith74ca5572008-03-17 19:49:19 +0000105 NUMBER '0b10' (1, 0) (1, 4)
106 OP '<=' (1, 5) (1, 7)
107 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300108 """)
109 self.check_tokenize("0o123 <= 0O123", """\
Eric Smith74ca5572008-03-17 19:49:19 +0000110 NUMBER '0o123' (1, 0) (1, 5)
111 OP '<=' (1, 6) (1, 8)
112 NUMBER '0O123' (1, 9) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300113 """)
114 self.check_tokenize("1234567 > ~0x15", """\
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000115 NUMBER '1234567' (1, 0) (1, 7)
116 OP '>' (1, 8) (1, 9)
117 OP '~' (1, 10) (1, 11)
118 NUMBER '0x15' (1, 11) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300119 """)
120 self.check_tokenize("2134568 != 1231515", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000121 NUMBER '2134568' (1, 0) (1, 7)
122 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000123 NUMBER '1231515' (1, 11) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300124 """)
125 self.check_tokenize("(-124561-1) & 200000000", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000126 OP '(' (1, 0) (1, 1)
127 OP '-' (1, 1) (1, 2)
128 NUMBER '124561' (1, 2) (1, 8)
129 OP '-' (1, 8) (1, 9)
130 NUMBER '1' (1, 9) (1, 10)
131 OP ')' (1, 10) (1, 11)
132 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000133 NUMBER '200000000' (1, 14) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300134 """)
135 self.check_tokenize("0xdeadbeef != -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000136 NUMBER '0xdeadbeef' (1, 0) (1, 10)
137 OP '!=' (1, 11) (1, 13)
138 OP '-' (1, 14) (1, 15)
139 NUMBER '1' (1, 15) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300140 """)
141 self.check_tokenize("0xdeadc0de & 12345", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000142 NUMBER '0xdeadc0de' (1, 0) (1, 10)
143 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000144 NUMBER '12345' (1, 13) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300145 """)
146 self.check_tokenize("0xFF & 0x15 | 1234", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000147 NUMBER '0xFF' (1, 0) (1, 4)
148 OP '&' (1, 5) (1, 6)
149 NUMBER '0x15' (1, 7) (1, 11)
150 OP '|' (1, 12) (1, 13)
151 NUMBER '1234' (1, 14) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300152 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000153
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300154 def test_long(self):
155 # Long integers
156 self.check_tokenize("x = 0", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000157 NAME 'x' (1, 0) (1, 1)
158 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000159 NUMBER '0' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300160 """)
161 self.check_tokenize("x = 0xfffffffffff", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000162 NAME 'x' (1, 0) (1, 1)
163 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400164 NUMBER '0xfffffffffff' (1, 4) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300165 """)
166 self.check_tokenize("x = 123141242151251616110", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000167 NAME 'x' (1, 0) (1, 1)
168 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400169 NUMBER '123141242151251616110' (1, 4) (1, 25)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300170 """)
171 self.check_tokenize("x = -15921590215012591", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000172 NAME 'x' (1, 0) (1, 1)
173 OP '=' (1, 2) (1, 3)
174 OP '-' (1, 4) (1, 5)
Eric V. Smith67317742015-10-16 20:45:53 -0400175 NUMBER '15921590215012591' (1, 5) (1, 22)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300176 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000177
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300178 def test_float(self):
179 # Floating point numbers
180 self.check_tokenize("x = 3.14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000181 NAME 'x' (1, 0) (1, 1)
182 OP '=' (1, 2) (1, 3)
183 NUMBER '3.14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300184 """)
185 self.check_tokenize("x = 314159.", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000186 NAME 'x' (1, 0) (1, 1)
187 OP '=' (1, 2) (1, 3)
188 NUMBER '314159.' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300189 """)
190 self.check_tokenize("x = .314159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000191 NAME 'x' (1, 0) (1, 1)
192 OP '=' (1, 2) (1, 3)
193 NUMBER '.314159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300194 """)
195 self.check_tokenize("x = 3e14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000196 NAME 'x' (1, 0) (1, 1)
197 OP '=' (1, 2) (1, 3)
198 NUMBER '3e14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300199 """)
200 self.check_tokenize("x = 3E123", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000201 NAME 'x' (1, 0) (1, 1)
202 OP '=' (1, 2) (1, 3)
203 NUMBER '3E123' (1, 4) (1, 9)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300204 """)
205 self.check_tokenize("x+y = 3e-1230", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000206 NAME 'x' (1, 0) (1, 1)
207 OP '+' (1, 1) (1, 2)
208 NAME 'y' (1, 2) (1, 3)
209 OP '=' (1, 4) (1, 5)
210 NUMBER '3e-1230' (1, 6) (1, 13)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300211 """)
212 self.check_tokenize("x = 3.14e159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000213 NAME 'x' (1, 0) (1, 1)
214 OP '=' (1, 2) (1, 3)
215 NUMBER '3.14e159' (1, 4) (1, 12)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300216 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000217
Brett Cannona721aba2016-09-09 14:57:09 -0700218 def test_underscore_literals(self):
219 def number_token(s):
220 f = BytesIO(s.encode('utf-8'))
221 for toktype, token, start, end, line in tokenize(f.readline):
222 if toktype == NUMBER:
223 return token
224 return 'invalid token'
225 for lit in VALID_UNDERSCORE_LITERALS:
226 if '(' in lit:
227 # this won't work with compound complex inputs
228 continue
229 self.assertEqual(number_token(lit), lit)
230 for lit in INVALID_UNDERSCORE_LITERALS:
231 self.assertNotEqual(number_token(lit), lit)
232
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300233 def test_string(self):
234 # String literals
235 self.check_tokenize("x = ''; y = \"\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000236 NAME 'x' (1, 0) (1, 1)
237 OP '=' (1, 2) (1, 3)
238 STRING "''" (1, 4) (1, 6)
239 OP ';' (1, 6) (1, 7)
240 NAME 'y' (1, 8) (1, 9)
241 OP '=' (1, 10) (1, 11)
242 STRING '""' (1, 12) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300243 """)
244 self.check_tokenize("x = '\"'; y = \"'\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000245 NAME 'x' (1, 0) (1, 1)
246 OP '=' (1, 2) (1, 3)
247 STRING '\\'"\\'' (1, 4) (1, 7)
248 OP ';' (1, 7) (1, 8)
249 NAME 'y' (1, 9) (1, 10)
250 OP '=' (1, 11) (1, 12)
251 STRING '"\\'"' (1, 13) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300252 """)
253 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000254 NAME 'x' (1, 0) (1, 1)
255 OP '=' (1, 2) (1, 3)
256 STRING '"doesn\\'t "' (1, 4) (1, 14)
257 NAME 'shrink' (1, 14) (1, 20)
258 STRING '", does it"' (1, 20) (1, 31)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300259 """)
260 self.check_tokenize("x = 'abc' + 'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000261 NAME 'x' (1, 0) (1, 1)
262 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000263 STRING "'abc'" (1, 4) (1, 9)
264 OP '+' (1, 10) (1, 11)
265 STRING "'ABC'" (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300266 """)
267 self.check_tokenize('y = "ABC" + "ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000268 NAME 'y' (1, 0) (1, 1)
269 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000270 STRING '"ABC"' (1, 4) (1, 9)
271 OP '+' (1, 10) (1, 11)
272 STRING '"ABC"' (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300273 """)
274 self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000275 NAME 'x' (1, 0) (1, 1)
276 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000277 STRING "r'abc'" (1, 4) (1, 10)
278 OP '+' (1, 11) (1, 12)
279 STRING "r'ABC'" (1, 13) (1, 19)
280 OP '+' (1, 20) (1, 21)
281 STRING "R'ABC'" (1, 22) (1, 28)
282 OP '+' (1, 29) (1, 30)
283 STRING "R'ABC'" (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300284 """)
285 self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000286 NAME 'y' (1, 0) (1, 1)
287 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000288 STRING 'r"abc"' (1, 4) (1, 10)
289 OP '+' (1, 11) (1, 12)
290 STRING 'r"ABC"' (1, 13) (1, 19)
291 OP '+' (1, 20) (1, 21)
292 STRING 'R"ABC"' (1, 22) (1, 28)
293 OP '+' (1, 29) (1, 30)
294 STRING 'R"ABC"' (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300295 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000296
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300297 self.check_tokenize("u'abc' + U'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500298 STRING "u'abc'" (1, 0) (1, 6)
299 OP '+' (1, 7) (1, 8)
300 STRING "U'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300301 """)
302 self.check_tokenize('u"abc" + U"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500303 STRING 'u"abc"' (1, 0) (1, 6)
304 OP '+' (1, 7) (1, 8)
305 STRING 'U"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300306 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500307
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300308 self.check_tokenize("b'abc' + B'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500309 STRING "b'abc'" (1, 0) (1, 6)
310 OP '+' (1, 7) (1, 8)
311 STRING "B'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300312 """)
313 self.check_tokenize('b"abc" + B"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500314 STRING 'b"abc"' (1, 0) (1, 6)
315 OP '+' (1, 7) (1, 8)
316 STRING 'B"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300317 """)
318 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500319 STRING "br'abc'" (1, 0) (1, 7)
320 OP '+' (1, 8) (1, 9)
321 STRING "bR'abc'" (1, 10) (1, 17)
322 OP '+' (1, 18) (1, 19)
323 STRING "Br'abc'" (1, 20) (1, 27)
324 OP '+' (1, 28) (1, 29)
325 STRING "BR'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300326 """)
327 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500328 STRING 'br"abc"' (1, 0) (1, 7)
329 OP '+' (1, 8) (1, 9)
330 STRING 'bR"abc"' (1, 10) (1, 17)
331 OP '+' (1, 18) (1, 19)
332 STRING 'Br"abc"' (1, 20) (1, 27)
333 OP '+' (1, 28) (1, 29)
334 STRING 'BR"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300335 """)
336 self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500337 STRING "rb'abc'" (1, 0) (1, 7)
338 OP '+' (1, 8) (1, 9)
339 STRING "rB'abc'" (1, 10) (1, 17)
340 OP '+' (1, 18) (1, 19)
341 STRING "Rb'abc'" (1, 20) (1, 27)
342 OP '+' (1, 28) (1, 29)
343 STRING "RB'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300344 """)
345 self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500346 STRING 'rb"abc"' (1, 0) (1, 7)
347 OP '+' (1, 8) (1, 9)
348 STRING 'rB"abc"' (1, 10) (1, 17)
349 OP '+' (1, 18) (1, 19)
350 STRING 'Rb"abc"' (1, 20) (1, 27)
351 OP '+' (1, 28) (1, 29)
352 STRING 'RB"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300353 """)
Eric V. Smith67317742015-10-16 20:45:53 -0400354 # Check 0, 1, and 2 character string prefixes.
355 self.check_tokenize(r'"a\
356de\
357fg"', """\
358 STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
359 """)
360 self.check_tokenize(r'u"a\
361de"', """\
362 STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
363 """)
364 self.check_tokenize(r'rb"a\
365d"', """\
366 STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
367 """)
368 self.check_tokenize(r'"""a\
369b"""', """\
370 STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
371 """)
372 self.check_tokenize(r'u"""a\
373b"""', """\
374 STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
375 """)
376 self.check_tokenize(r'rb"""a\
377b\
378c"""', """\
379 STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
380 """)
Eric V. Smith1c8222c2015-10-26 04:37:55 -0400381 self.check_tokenize('f"abc"', """\
382 STRING 'f"abc"' (1, 0) (1, 6)
383 """)
384 self.check_tokenize('fR"a{b}c"', """\
385 STRING 'fR"a{b}c"' (1, 0) (1, 9)
386 """)
387 self.check_tokenize('f"""abc"""', """\
388 STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
389 """)
390 self.check_tokenize(r'f"abc\
391def"', """\
392 STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
393 """)
394 self.check_tokenize(r'Rf"abc\
395def"', """\
396 STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
397 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500398
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300399 def test_function(self):
400 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000401 NAME 'def' (1, 0) (1, 3)
402 NAME 'd22' (1, 4) (1, 7)
403 OP '(' (1, 7) (1, 8)
404 NAME 'a' (1, 8) (1, 9)
405 OP ',' (1, 9) (1, 10)
406 NAME 'b' (1, 11) (1, 12)
407 OP ',' (1, 12) (1, 13)
408 NAME 'c' (1, 14) (1, 15)
409 OP '=' (1, 15) (1, 16)
410 NUMBER '2' (1, 16) (1, 17)
411 OP ',' (1, 17) (1, 18)
412 NAME 'd' (1, 19) (1, 20)
413 OP '=' (1, 20) (1, 21)
414 NUMBER '2' (1, 21) (1, 22)
415 OP ',' (1, 22) (1, 23)
416 OP '*' (1, 24) (1, 25)
417 NAME 'k' (1, 25) (1, 26)
418 OP ')' (1, 26) (1, 27)
419 OP ':' (1, 27) (1, 28)
420 NAME 'pass' (1, 29) (1, 33)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300421 """)
422 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000423 NAME 'def' (1, 0) (1, 3)
424 NAME 'd01v_' (1, 4) (1, 9)
425 OP '(' (1, 9) (1, 10)
426 NAME 'a' (1, 10) (1, 11)
427 OP '=' (1, 11) (1, 12)
428 NUMBER '1' (1, 12) (1, 13)
429 OP ',' (1, 13) (1, 14)
430 OP '*' (1, 15) (1, 16)
431 NAME 'k' (1, 16) (1, 17)
432 OP ',' (1, 17) (1, 18)
433 OP '**' (1, 19) (1, 21)
434 NAME 'w' (1, 21) (1, 22)
435 OP ')' (1, 22) (1, 23)
436 OP ':' (1, 23) (1, 24)
437 NAME 'pass' (1, 25) (1, 29)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300438 """)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +0200439 self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
440 NAME 'def' (1, 0) (1, 3)
441 NAME 'd23' (1, 4) (1, 7)
442 OP '(' (1, 7) (1, 8)
443 NAME 'a' (1, 8) (1, 9)
444 OP ':' (1, 9) (1, 10)
445 NAME 'str' (1, 11) (1, 14)
446 OP ',' (1, 14) (1, 15)
447 NAME 'b' (1, 16) (1, 17)
448 OP ':' (1, 17) (1, 18)
449 NAME 'int' (1, 19) (1, 22)
450 OP '=' (1, 22) (1, 23)
451 NUMBER '3' (1, 23) (1, 24)
452 OP ')' (1, 24) (1, 25)
453 OP '->' (1, 26) (1, 28)
454 NAME 'int' (1, 29) (1, 32)
455 OP ':' (1, 32) (1, 33)
456 NAME 'pass' (1, 34) (1, 38)
457 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000458
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300459 def test_comparison(self):
460 # Comparison
461 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
462 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000463 NAME 'if' (1, 0) (1, 2)
464 NUMBER '1' (1, 3) (1, 4)
465 OP '<' (1, 5) (1, 6)
466 NUMBER '1' (1, 7) (1, 8)
467 OP '>' (1, 9) (1, 10)
468 NUMBER '1' (1, 11) (1, 12)
469 OP '==' (1, 13) (1, 15)
470 NUMBER '1' (1, 16) (1, 17)
471 OP '>=' (1, 18) (1, 20)
472 NUMBER '5' (1, 21) (1, 22)
473 OP '<=' (1, 23) (1, 25)
474 NUMBER '0x15' (1, 26) (1, 30)
475 OP '<=' (1, 31) (1, 33)
476 NUMBER '0x12' (1, 34) (1, 38)
477 OP '!=' (1, 39) (1, 41)
478 NUMBER '1' (1, 42) (1, 43)
479 NAME 'and' (1, 44) (1, 47)
480 NUMBER '5' (1, 48) (1, 49)
481 NAME 'in' (1, 50) (1, 52)
482 NUMBER '1' (1, 53) (1, 54)
483 NAME 'not' (1, 55) (1, 58)
484 NAME 'in' (1, 59) (1, 61)
485 NUMBER '1' (1, 62) (1, 63)
486 NAME 'is' (1, 64) (1, 66)
487 NUMBER '1' (1, 67) (1, 68)
488 NAME 'or' (1, 69) (1, 71)
489 NUMBER '5' (1, 72) (1, 73)
490 NAME 'is' (1, 74) (1, 76)
491 NAME 'not' (1, 77) (1, 80)
492 NUMBER '1' (1, 81) (1, 82)
493 OP ':' (1, 82) (1, 83)
494 NAME 'pass' (1, 84) (1, 88)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300495 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000496
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300497 def test_shift(self):
498 # Shift
499 self.check_tokenize("x = 1 << 1 >> 5", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000500 NAME 'x' (1, 0) (1, 1)
501 OP '=' (1, 2) (1, 3)
502 NUMBER '1' (1, 4) (1, 5)
503 OP '<<' (1, 6) (1, 8)
504 NUMBER '1' (1, 9) (1, 10)
505 OP '>>' (1, 11) (1, 13)
506 NUMBER '5' (1, 14) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300507 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000508
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300509 def test_additive(self):
510 # Additive
511 self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000512 NAME 'x' (1, 0) (1, 1)
513 OP '=' (1, 2) (1, 3)
514 NUMBER '1' (1, 4) (1, 5)
515 OP '-' (1, 6) (1, 7)
516 NAME 'y' (1, 8) (1, 9)
517 OP '+' (1, 10) (1, 11)
518 NUMBER '15' (1, 12) (1, 14)
519 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000520 NUMBER '1' (1, 17) (1, 18)
521 OP '+' (1, 19) (1, 20)
522 NUMBER '0x124' (1, 21) (1, 26)
523 OP '+' (1, 27) (1, 28)
524 NAME 'z' (1, 29) (1, 30)
525 OP '+' (1, 31) (1, 32)
526 NAME 'a' (1, 33) (1, 34)
527 OP '[' (1, 34) (1, 35)
528 NUMBER '5' (1, 35) (1, 36)
529 OP ']' (1, 36) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300530 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000531
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300532 def test_multiplicative(self):
533 # Multiplicative
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300534 self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000535 NAME 'x' (1, 0) (1, 1)
536 OP '=' (1, 2) (1, 3)
537 NUMBER '1' (1, 4) (1, 5)
538 OP '//' (1, 5) (1, 7)
539 NUMBER '1' (1, 7) (1, 8)
540 OP '*' (1, 8) (1, 9)
541 NUMBER '1' (1, 9) (1, 10)
542 OP '/' (1, 10) (1, 11)
543 NUMBER '5' (1, 11) (1, 12)
544 OP '*' (1, 12) (1, 13)
545 NUMBER '12' (1, 13) (1, 15)
546 OP '%' (1, 15) (1, 16)
547 NUMBER '0x12' (1, 16) (1, 20)
Benjamin Petersond51374e2014-04-09 23:55:56 -0400548 OP '@' (1, 20) (1, 21)
549 NUMBER '42' (1, 21) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300550 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000551
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300552 def test_unary(self):
553 # Unary
554 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000555 OP '~' (1, 0) (1, 1)
556 NUMBER '1' (1, 1) (1, 2)
557 OP '^' (1, 3) (1, 4)
558 NUMBER '1' (1, 5) (1, 6)
559 OP '&' (1, 7) (1, 8)
560 NUMBER '1' (1, 9) (1, 10)
561 OP '|' (1, 11) (1, 12)
562 NUMBER '1' (1, 12) (1, 13)
563 OP '^' (1, 14) (1, 15)
564 OP '-' (1, 16) (1, 17)
565 NUMBER '1' (1, 17) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300566 """)
567 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000568 OP '-' (1, 0) (1, 1)
569 NUMBER '1' (1, 1) (1, 2)
570 OP '*' (1, 2) (1, 3)
571 NUMBER '1' (1, 3) (1, 4)
572 OP '/' (1, 4) (1, 5)
573 NUMBER '1' (1, 5) (1, 6)
574 OP '+' (1, 6) (1, 7)
575 NUMBER '1' (1, 7) (1, 8)
576 OP '*' (1, 8) (1, 9)
577 NUMBER '1' (1, 9) (1, 10)
578 OP '//' (1, 10) (1, 12)
579 NUMBER '1' (1, 12) (1, 13)
580 OP '-' (1, 14) (1, 15)
581 OP '-' (1, 16) (1, 17)
582 OP '-' (1, 17) (1, 18)
583 OP '-' (1, 18) (1, 19)
584 NUMBER '1' (1, 19) (1, 20)
585 OP '**' (1, 20) (1, 22)
586 NUMBER '1' (1, 22) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300587 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000588
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300589 def test_selector(self):
590 # Selector
591 self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000592 NAME 'import' (1, 0) (1, 6)
593 NAME 'sys' (1, 7) (1, 10)
594 OP ',' (1, 10) (1, 11)
595 NAME 'time' (1, 12) (1, 16)
596 NEWLINE '\\n' (1, 16) (1, 17)
597 NAME 'x' (2, 0) (2, 1)
598 OP '=' (2, 2) (2, 3)
599 NAME 'sys' (2, 4) (2, 7)
600 OP '.' (2, 7) (2, 8)
601 NAME 'modules' (2, 8) (2, 15)
602 OP '[' (2, 15) (2, 16)
603 STRING "'time'" (2, 16) (2, 22)
604 OP ']' (2, 22) (2, 23)
605 OP '.' (2, 23) (2, 24)
606 NAME 'time' (2, 24) (2, 28)
607 OP '(' (2, 28) (2, 29)
608 OP ')' (2, 29) (2, 30)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300609 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000610
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300611 def test_method(self):
612 # Methods
613 self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000614 OP '@' (1, 0) (1, 1)
Eric V. Smith67317742015-10-16 20:45:53 -0400615 NAME 'staticmethod' (1, 1) (1, 13)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000616 NEWLINE '\\n' (1, 13) (1, 14)
617 NAME 'def' (2, 0) (2, 3)
618 NAME 'foo' (2, 4) (2, 7)
619 OP '(' (2, 7) (2, 8)
620 NAME 'x' (2, 8) (2, 9)
621 OP ',' (2, 9) (2, 10)
622 NAME 'y' (2, 10) (2, 11)
623 OP ')' (2, 11) (2, 12)
624 OP ':' (2, 12) (2, 13)
625 NAME 'pass' (2, 14) (2, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300626 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000627
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300628 def test_tabs(self):
629 # Evil tabs
630 self.check_tokenize("def f():\n"
631 "\tif x\n"
632 " \tpass", """\
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000633 NAME 'def' (1, 0) (1, 3)
634 NAME 'f' (1, 4) (1, 5)
635 OP '(' (1, 5) (1, 6)
636 OP ')' (1, 6) (1, 7)
637 OP ':' (1, 7) (1, 8)
638 NEWLINE '\\n' (1, 8) (1, 9)
639 INDENT '\\t' (2, 0) (2, 1)
640 NAME 'if' (2, 1) (2, 3)
641 NAME 'x' (2, 4) (2, 5)
642 NEWLINE '\\n' (2, 5) (2, 6)
643 INDENT ' \\t' (3, 0) (3, 9)
644 NAME 'pass' (3, 9) (3, 13)
645 DEDENT '' (4, 0) (4, 0)
646 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300647 """)
Benjamin Peterson33856de2010-08-30 14:41:20 +0000648
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300649 def test_non_ascii_identifiers(self):
650 # Non-ascii identifiers
651 self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
Benjamin Peterson33856de2010-08-30 14:41:20 +0000652 NAME 'Örter' (1, 0) (1, 5)
653 OP '=' (1, 6) (1, 7)
654 STRING "'places'" (1, 8) (1, 16)
655 NEWLINE '\\n' (1, 16) (1, 17)
656 NAME 'grün' (2, 0) (2, 4)
657 OP '=' (2, 5) (2, 6)
658 STRING "'green'" (2, 7) (2, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300659 """)
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000660
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300661 def test_unicode(self):
662 # Legacy unicode literals:
663 self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000664 NAME 'Örter' (1, 0) (1, 5)
665 OP '=' (1, 6) (1, 7)
666 STRING "u'places'" (1, 8) (1, 17)
667 NEWLINE '\\n' (1, 17) (1, 18)
668 NAME 'grün' (2, 0) (2, 4)
669 OP '=' (2, 5) (2, 6)
Christian Heimes0b3847d2012-06-20 11:17:58 +0200670 STRING "U'green'" (2, 7) (2, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300671 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400672
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300673 def test_async(self):
674 # Async/await extension:
675 self.check_tokenize("async = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400676 NAME 'async' (1, 0) (1, 5)
677 OP '=' (1, 6) (1, 7)
678 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300679 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400680
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300681 self.check_tokenize("a = (async = 1)", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400682 NAME 'a' (1, 0) (1, 1)
683 OP '=' (1, 2) (1, 3)
684 OP '(' (1, 4) (1, 5)
685 NAME 'async' (1, 5) (1, 10)
686 OP '=' (1, 11) (1, 12)
687 NUMBER '1' (1, 13) (1, 14)
688 OP ')' (1, 14) (1, 15)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300689 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400690
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300691 self.check_tokenize("async()", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400692 NAME 'async' (1, 0) (1, 5)
693 OP '(' (1, 5) (1, 6)
694 OP ')' (1, 6) (1, 7)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300695 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400696
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300697 self.check_tokenize("class async(Bar):pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400698 NAME 'class' (1, 0) (1, 5)
699 NAME 'async' (1, 6) (1, 11)
700 OP '(' (1, 11) (1, 12)
701 NAME 'Bar' (1, 12) (1, 15)
702 OP ')' (1, 15) (1, 16)
703 OP ':' (1, 16) (1, 17)
704 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300705 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400706
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300707 self.check_tokenize("class async:pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400708 NAME 'class' (1, 0) (1, 5)
709 NAME 'async' (1, 6) (1, 11)
710 OP ':' (1, 11) (1, 12)
711 NAME 'pass' (1, 12) (1, 16)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300712 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400713
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300714 self.check_tokenize("await = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400715 NAME 'await' (1, 0) (1, 5)
716 OP '=' (1, 6) (1, 7)
717 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300718 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400719
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300720 self.check_tokenize("foo.async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400721 NAME 'foo' (1, 0) (1, 3)
722 OP '.' (1, 3) (1, 4)
723 NAME 'async' (1, 4) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300724 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400725
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300726 self.check_tokenize("async for a in b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400727 NAME 'async' (1, 0) (1, 5)
728 NAME 'for' (1, 6) (1, 9)
729 NAME 'a' (1, 10) (1, 11)
730 NAME 'in' (1, 12) (1, 14)
731 NAME 'b' (1, 15) (1, 16)
732 OP ':' (1, 16) (1, 17)
733 NAME 'pass' (1, 18) (1, 22)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300734 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400735
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300736 self.check_tokenize("async with a as b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400737 NAME 'async' (1, 0) (1, 5)
738 NAME 'with' (1, 6) (1, 10)
739 NAME 'a' (1, 11) (1, 12)
740 NAME 'as' (1, 13) (1, 15)
741 NAME 'b' (1, 16) (1, 17)
742 OP ':' (1, 17) (1, 18)
743 NAME 'pass' (1, 19) (1, 23)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300744 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400745
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300746 self.check_tokenize("async.foo", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400747 NAME 'async' (1, 0) (1, 5)
748 OP '.' (1, 5) (1, 6)
749 NAME 'foo' (1, 6) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300750 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400751
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300752 self.check_tokenize("async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400753 NAME 'async' (1, 0) (1, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300754 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400755
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300756 self.check_tokenize("async\n#comment\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400757 NAME 'async' (1, 0) (1, 5)
758 NEWLINE '\\n' (1, 5) (1, 6)
759 COMMENT '#comment' (2, 0) (2, 8)
760 NL '\\n' (2, 8) (2, 9)
761 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300762 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400763
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300764 self.check_tokenize("async\n...\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400765 NAME 'async' (1, 0) (1, 5)
766 NEWLINE '\\n' (1, 5) (1, 6)
767 OP '...' (2, 0) (2, 3)
768 NEWLINE '\\n' (2, 3) (2, 4)
769 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300770 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400771
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300772 self.check_tokenize("async\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400773 NAME 'async' (1, 0) (1, 5)
774 NEWLINE '\\n' (1, 5) (1, 6)
775 NAME 'await' (2, 0) (2, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300776 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400777
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300778 self.check_tokenize("foo.async + 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400779 NAME 'foo' (1, 0) (1, 3)
780 OP '.' (1, 3) (1, 4)
781 NAME 'async' (1, 4) (1, 9)
782 OP '+' (1, 10) (1, 11)
783 NUMBER '1' (1, 12) (1, 13)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300784 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400785
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300786 self.check_tokenize("async def foo(): pass", """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700787 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400788 NAME 'def' (1, 6) (1, 9)
789 NAME 'foo' (1, 10) (1, 13)
790 OP '(' (1, 13) (1, 14)
791 OP ')' (1, 14) (1, 15)
792 OP ':' (1, 15) (1, 16)
793 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300794 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400795
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300796 self.check_tokenize('''\
797async def foo():
798 def foo(await):
799 await = 1
800 if 1:
801 await
802async += 1
803''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700804 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400805 NAME 'def' (1, 6) (1, 9)
806 NAME 'foo' (1, 10) (1, 13)
807 OP '(' (1, 13) (1, 14)
808 OP ')' (1, 14) (1, 15)
809 OP ':' (1, 15) (1, 16)
810 NEWLINE '\\n' (1, 16) (1, 17)
811 INDENT ' ' (2, 0) (2, 2)
812 NAME 'def' (2, 2) (2, 5)
813 NAME 'foo' (2, 6) (2, 9)
814 OP '(' (2, 9) (2, 10)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700815 NAME 'await' (2, 10) (2, 15)
Yury Selivanov75445082015-05-11 22:57:16 -0400816 OP ')' (2, 15) (2, 16)
817 OP ':' (2, 16) (2, 17)
818 NEWLINE '\\n' (2, 17) (2, 18)
819 INDENT ' ' (3, 0) (3, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700820 NAME 'await' (3, 4) (3, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400821 OP '=' (3, 10) (3, 11)
822 NUMBER '1' (3, 12) (3, 13)
823 NEWLINE '\\n' (3, 13) (3, 14)
824 DEDENT '' (4, 2) (4, 2)
825 NAME 'if' (4, 2) (4, 4)
826 NUMBER '1' (4, 5) (4, 6)
827 OP ':' (4, 6) (4, 7)
828 NEWLINE '\\n' (4, 7) (4, 8)
829 INDENT ' ' (5, 0) (5, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700830 NAME 'await' (5, 4) (5, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400831 NEWLINE '\\n' (5, 9) (5, 10)
832 DEDENT '' (6, 0) (6, 0)
833 DEDENT '' (6, 0) (6, 0)
834 NAME 'async' (6, 0) (6, 5)
835 OP '+=' (6, 6) (6, 8)
836 NUMBER '1' (6, 9) (6, 10)
837 NEWLINE '\\n' (6, 10) (6, 11)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300838 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400839
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300840 self.check_tokenize('''\
841async def foo():
842 async for i in 1: pass''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700843 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400844 NAME 'def' (1, 6) (1, 9)
845 NAME 'foo' (1, 10) (1, 13)
846 OP '(' (1, 13) (1, 14)
847 OP ')' (1, 14) (1, 15)
848 OP ':' (1, 15) (1, 16)
849 NEWLINE '\\n' (1, 16) (1, 17)
850 INDENT ' ' (2, 0) (2, 2)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700851 NAME 'async' (2, 2) (2, 7)
Yury Selivanov75445082015-05-11 22:57:16 -0400852 NAME 'for' (2, 8) (2, 11)
853 NAME 'i' (2, 12) (2, 13)
854 NAME 'in' (2, 14) (2, 16)
855 NUMBER '1' (2, 17) (2, 18)
856 OP ':' (2, 18) (2, 19)
857 NAME 'pass' (2, 20) (2, 24)
858 DEDENT '' (3, 0) (3, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300859 """)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300860
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300861 self.check_tokenize('''async def foo(async): await''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700862 NAME 'async' (1, 0) (1, 5)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300863 NAME 'def' (1, 6) (1, 9)
864 NAME 'foo' (1, 10) (1, 13)
865 OP '(' (1, 13) (1, 14)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700866 NAME 'async' (1, 14) (1, 19)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300867 OP ')' (1, 19) (1, 20)
868 OP ':' (1, 20) (1, 21)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700869 NAME 'await' (1, 22) (1, 27)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300870 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300871
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300872 self.check_tokenize('''\
873def f():
874
875 def baz(): pass
876 async def bar(): pass
877
878 await = 2''', """\
Yury Selivanov96ec9342015-07-23 15:01:58 +0300879 NAME 'def' (1, 0) (1, 3)
880 NAME 'f' (1, 4) (1, 5)
881 OP '(' (1, 5) (1, 6)
882 OP ')' (1, 6) (1, 7)
883 OP ':' (1, 7) (1, 8)
884 NEWLINE '\\n' (1, 8) (1, 9)
885 NL '\\n' (2, 0) (2, 1)
886 INDENT ' ' (3, 0) (3, 2)
887 NAME 'def' (3, 2) (3, 5)
888 NAME 'baz' (3, 6) (3, 9)
889 OP '(' (3, 9) (3, 10)
890 OP ')' (3, 10) (3, 11)
891 OP ':' (3, 11) (3, 12)
892 NAME 'pass' (3, 13) (3, 17)
893 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700894 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300895 NAME 'def' (4, 8) (4, 11)
896 NAME 'bar' (4, 12) (4, 15)
897 OP '(' (4, 15) (4, 16)
898 OP ')' (4, 16) (4, 17)
899 OP ':' (4, 17) (4, 18)
900 NAME 'pass' (4, 19) (4, 23)
901 NEWLINE '\\n' (4, 23) (4, 24)
902 NL '\\n' (5, 0) (5, 1)
903 NAME 'await' (6, 2) (6, 7)
904 OP '=' (6, 8) (6, 9)
905 NUMBER '2' (6, 10) (6, 11)
906 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300907 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300908
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300909 self.check_tokenize('''\
910async def f():
911
912 def baz(): pass
913 async def bar(): pass
914
915 await = 2''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700916 NAME 'async' (1, 0) (1, 5)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300917 NAME 'def' (1, 6) (1, 9)
918 NAME 'f' (1, 10) (1, 11)
919 OP '(' (1, 11) (1, 12)
920 OP ')' (1, 12) (1, 13)
921 OP ':' (1, 13) (1, 14)
922 NEWLINE '\\n' (1, 14) (1, 15)
923 NL '\\n' (2, 0) (2, 1)
924 INDENT ' ' (3, 0) (3, 2)
925 NAME 'def' (3, 2) (3, 5)
926 NAME 'baz' (3, 6) (3, 9)
927 OP '(' (3, 9) (3, 10)
928 OP ')' (3, 10) (3, 11)
929 OP ':' (3, 11) (3, 12)
930 NAME 'pass' (3, 13) (3, 17)
931 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700932 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300933 NAME 'def' (4, 8) (4, 11)
934 NAME 'bar' (4, 12) (4, 15)
935 OP '(' (4, 15) (4, 16)
936 OP ')' (4, 16) (4, 17)
937 OP ':' (4, 17) (4, 18)
938 NAME 'pass' (4, 19) (4, 23)
939 NEWLINE '\\n' (4, 23) (4, 24)
940 NL '\\n' (5, 0) (5, 1)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700941 NAME 'await' (6, 2) (6, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300942 OP '=' (6, 8) (6, 9)
943 NUMBER '2' (6, 10) (6, 11)
944 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300945 """)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000946
Thomas Kluyverc56b17b2018-06-05 19:26:39 +0200947class GenerateTokensTest(TokenizeTest):
948 def check_tokenize(self, s, expected):
949 # Format the tokens in s in a table format.
Ammar Askarc4ef4892018-07-06 03:19:08 -0400950 # The ENDMARKER and final NEWLINE are omitted.
Thomas Kluyverc56b17b2018-06-05 19:26:39 +0200951 f = StringIO(s)
Ammar Askarc4ef4892018-07-06 03:19:08 -0400952 result = stringify_tokens_from_source(generate_tokens(f.readline), s)
Thomas Kluyverc56b17b2018-06-05 19:26:39 +0200953 self.assertEqual(result, expected.rstrip().splitlines())
954
Raymond Hettinger68c04532005-06-10 11:05:19 +0000955
Raymond Hettinger68c04532005-06-10 11:05:19 +0000956def decistmt(s):
Raymond Hettinger68c04532005-06-10 11:05:19 +0000957 result = []
Trent Nelson428de652008-03-18 22:41:35 +0000958 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
Raymond Hettinger68c04532005-06-10 11:05:19 +0000959 for toknum, tokval, _, _, _ in g:
960 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
961 result.extend([
962 (NAME, 'Decimal'),
963 (OP, '('),
964 (STRING, repr(tokval)),
965 (OP, ')')
966 ])
967 else:
968 result.append((toknum, tokval))
Trent Nelson428de652008-03-18 22:41:35 +0000969 return untokenize(result).decode('utf-8')
970
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300971class TestMisc(TestCase):
972
973 def test_decistmt(self):
974 # Substitute Decimals for floats in a string of statements.
975 # This is an example from the docs.
976
977 from decimal import Decimal
978 s = '+21.3e-5*-.1234/81.7'
979 self.assertEqual(decistmt(s),
980 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
981
982 # The format of the exponent is inherited from the platform C library.
983 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
984 # we're only showing 11 digits, and the 12th isn't close to 5, the
985 # rest of the output should be platform-independent.
986 self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
987
988 # Output from calculations with Decimal should be identical across all
989 # platforms.
990 self.assertEqual(eval(decistmt(s)),
991 Decimal('-3.217160342717258261933904529E-7'))
992
Trent Nelson428de652008-03-18 22:41:35 +0000993
994class TestTokenizerAdheresToPep0263(TestCase):
995 """
996 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
997 """
998
999 def _testFile(self, filename):
1000 path = os.path.join(os.path.dirname(__file__), filename)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001001 TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
Trent Nelson428de652008-03-18 22:41:35 +00001002
1003 def test_utf8_coding_cookie_and_no_utf8_bom(self):
Ned Deily2ea6fcc2011-07-19 16:15:27 -07001004 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001005 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +00001006
1007 def test_latin1_coding_cookie_and_utf8_bom(self):
1008 """
1009 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
1010 allowed encoding for the comment is 'utf-8'. The text file used in
1011 this test starts with a BOM signature, but specifies latin1 as the
1012 coding, so verify that a SyntaxError is raised, which matches the
1013 behaviour of the interpreter when it encounters a similar condition.
1014 """
1015 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001016 self.assertRaises(SyntaxError, self._testFile, f)
Trent Nelson428de652008-03-18 22:41:35 +00001017
1018 def test_no_coding_cookie_and_utf8_bom(self):
1019 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001020 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +00001021
1022 def test_utf8_coding_cookie_and_utf8_bom(self):
1023 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001024 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +00001025
Florent Xicluna11f0b412012-07-07 12:13:35 +02001026 def test_bad_coding_cookie(self):
1027 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
1028 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
1029
Trent Nelson428de652008-03-18 22:41:35 +00001030
1031class Test_Tokenize(TestCase):
1032
1033 def test__tokenize_decodes_with_specified_encoding(self):
1034 literal = '"ЉЊЈЁЂ"'
1035 line = literal.encode('utf-8')
1036 first = False
1037 def readline():
1038 nonlocal first
1039 if not first:
1040 first = True
1041 return line
1042 else:
1043 return b''
1044
Ammar Askarc4ef4892018-07-06 03:19:08 -04001045 # skip the initial encoding token and the end tokens
1046 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
Trent Nelson428de652008-03-18 22:41:35 +00001047 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001048 self.assertEqual(tokens, expected_tokens,
1049 "bytes not decoded with encoding")
Trent Nelson428de652008-03-18 22:41:35 +00001050
1051 def test__tokenize_does_not_decode_with_encoding_none(self):
1052 literal = '"ЉЊЈЁЂ"'
1053 first = False
1054 def readline():
1055 nonlocal first
1056 if not first:
1057 first = True
1058 return literal
1059 else:
1060 return b''
1061
Ammar Askarc4ef4892018-07-06 03:19:08 -04001062 # skip the end tokens
1063 tokens = list(_tokenize(readline, encoding=None))[:-2]
Trent Nelson428de652008-03-18 22:41:35 +00001064 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001065 self.assertEqual(tokens, expected_tokens,
1066 "string not tokenized when encoding is None")
Trent Nelson428de652008-03-18 22:41:35 +00001067
1068
1069class TestDetectEncoding(TestCase):
1070
1071 def get_readline(self, lines):
1072 index = 0
1073 def readline():
1074 nonlocal index
1075 if index == len(lines):
1076 raise StopIteration
1077 line = lines[index]
1078 index += 1
1079 return line
1080 return readline
1081
1082 def test_no_bom_no_encoding_cookie(self):
1083 lines = (
1084 b'# something\n',
1085 b'print(something)\n',
1086 b'do_something(else)\n'
1087 )
1088 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001089 self.assertEqual(encoding, 'utf-8')
1090 self.assertEqual(consumed_lines, list(lines[:2]))
Trent Nelson428de652008-03-18 22:41:35 +00001091
1092 def test_bom_no_cookie(self):
1093 lines = (
1094 b'\xef\xbb\xbf# something\n',
1095 b'print(something)\n',
1096 b'do_something(else)\n'
1097 )
1098 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001099 self.assertEqual(encoding, 'utf-8-sig')
1100 self.assertEqual(consumed_lines,
1101 [b'# something\n', b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001102
1103 def test_cookie_first_line_no_bom(self):
1104 lines = (
1105 b'# -*- coding: latin-1 -*-\n',
1106 b'print(something)\n',
1107 b'do_something(else)\n'
1108 )
1109 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001110 self.assertEqual(encoding, 'iso-8859-1')
1111 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001112
1113 def test_matched_bom_and_cookie_first_line(self):
1114 lines = (
1115 b'\xef\xbb\xbf# coding=utf-8\n',
1116 b'print(something)\n',
1117 b'do_something(else)\n'
1118 )
1119 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001120 self.assertEqual(encoding, 'utf-8-sig')
1121 self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001122
1123 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
1124 lines = (
1125 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
1126 b'print(something)\n',
1127 b'do_something(else)\n'
1128 )
1129 readline = self.get_readline(lines)
1130 self.assertRaises(SyntaxError, detect_encoding, readline)
1131
1132 def test_cookie_second_line_no_bom(self):
1133 lines = (
1134 b'#! something\n',
1135 b'# vim: set fileencoding=ascii :\n',
1136 b'print(something)\n',
1137 b'do_something(else)\n'
1138 )
1139 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001140 self.assertEqual(encoding, 'ascii')
Trent Nelson428de652008-03-18 22:41:35 +00001141 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
Ezio Melottib3aedd42010-11-20 19:04:17 +00001142 self.assertEqual(consumed_lines, expected)
Trent Nelson428de652008-03-18 22:41:35 +00001143
1144 def test_matched_bom_and_cookie_second_line(self):
1145 lines = (
1146 b'\xef\xbb\xbf#! something\n',
1147 b'f# coding=utf-8\n',
1148 b'print(something)\n',
1149 b'do_something(else)\n'
1150 )
1151 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001152 self.assertEqual(encoding, 'utf-8-sig')
1153 self.assertEqual(consumed_lines,
1154 [b'#! something\n', b'f# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001155
1156 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
1157 lines = (
1158 b'\xef\xbb\xbf#! something\n',
1159 b'# vim: set fileencoding=ascii :\n',
1160 b'print(something)\n',
1161 b'do_something(else)\n'
1162 )
1163 readline = self.get_readline(lines)
1164 self.assertRaises(SyntaxError, detect_encoding, readline)
1165
Serhiy Storchaka768c16c2014-01-09 18:36:09 +02001166 def test_cookie_second_line_noncommented_first_line(self):
1167 lines = (
1168 b"print('\xc2\xa3')\n",
1169 b'# vim: set fileencoding=iso8859-15 :\n',
1170 b"print('\xe2\x82\xac')\n"
1171 )
1172 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1173 self.assertEqual(encoding, 'utf-8')
1174 expected = [b"print('\xc2\xa3')\n"]
1175 self.assertEqual(consumed_lines, expected)
1176
1177 def test_cookie_second_line_commented_first_line(self):
1178 lines = (
1179 b"#print('\xc2\xa3')\n",
1180 b'# vim: set fileencoding=iso8859-15 :\n',
1181 b"print('\xe2\x82\xac')\n"
1182 )
1183 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1184 self.assertEqual(encoding, 'iso8859-15')
1185 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
1186 self.assertEqual(consumed_lines, expected)
1187
1188 def test_cookie_second_line_empty_first_line(self):
1189 lines = (
1190 b'\n',
1191 b'# vim: set fileencoding=iso8859-15 :\n',
1192 b"print('\xe2\x82\xac')\n"
1193 )
1194 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1195 self.assertEqual(encoding, 'iso8859-15')
1196 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
1197 self.assertEqual(consumed_lines, expected)
1198
Benjamin Petersond3afada2009-10-09 21:43:09 +00001199 def test_latin1_normalization(self):
1200 # See get_normal_name() in tokenizer.c.
1201 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
1202 "iso-8859-1-unix", "iso-latin-1-mac")
1203 for encoding in encodings:
1204 for rep in ("-", "_"):
1205 enc = encoding.replace("-", rep)
1206 lines = (b"#!/usr/bin/python\n",
1207 b"# coding: " + enc.encode("ascii") + b"\n",
1208 b"print(things)\n",
1209 b"do_something += 4\n")
1210 rl = self.get_readline(lines)
1211 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001212 self.assertEqual(found, "iso-8859-1")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001213
Martin v. Löwis63674f42012-04-20 14:36:47 +02001214 def test_syntaxerror_latin1(self):
1215 # Issue 14629: need to raise SyntaxError if the first
1216 # line(s) have non-UTF-8 characters
1217 lines = (
1218 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1219 )
1220 readline = self.get_readline(lines)
1221 self.assertRaises(SyntaxError, detect_encoding, readline)
1222
1223
Benjamin Petersond3afada2009-10-09 21:43:09 +00001224 def test_utf8_normalization(self):
1225 # See get_normal_name() in tokenizer.c.
1226 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
1227 for encoding in encodings:
1228 for rep in ("-", "_"):
1229 enc = encoding.replace("-", rep)
1230 lines = (b"#!/usr/bin/python\n",
1231 b"# coding: " + enc.encode("ascii") + b"\n",
1232 b"1 + 3\n")
1233 rl = self.get_readline(lines)
1234 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001235 self.assertEqual(found, "utf-8")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001236
Trent Nelson428de652008-03-18 22:41:35 +00001237 def test_short_files(self):
1238 readline = self.get_readline((b'print(something)\n',))
1239 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001240 self.assertEqual(encoding, 'utf-8')
1241 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001242
1243 encoding, consumed_lines = detect_encoding(self.get_readline(()))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001244 self.assertEqual(encoding, 'utf-8')
1245 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001246
1247 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
1248 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001249 self.assertEqual(encoding, 'utf-8-sig')
1250 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001251
1252 readline = self.get_readline((b'\xef\xbb\xbf',))
1253 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001254 self.assertEqual(encoding, 'utf-8-sig')
1255 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001256
Benjamin Peterson433f32c2008-12-12 01:25:05 +00001257 readline = self.get_readline((b'# coding: bad\n',))
1258 self.assertRaises(SyntaxError, detect_encoding, readline)
Trent Nelson428de652008-03-18 22:41:35 +00001259
Serhiy Storchakadafea852013-09-16 23:51:56 +03001260 def test_false_encoding(self):
1261 # Issue 18873: "Encoding" detected in non-comment lines
1262 readline = self.get_readline((b'print("#coding=fake")',))
1263 encoding, consumed_lines = detect_encoding(readline)
1264 self.assertEqual(encoding, 'utf-8')
1265 self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1266
Victor Stinner58c07522010-11-09 01:08:59 +00001267 def test_open(self):
1268 filename = support.TESTFN + '.py'
1269 self.addCleanup(support.unlink, filename)
1270
1271 # test coding cookie
1272 for encoding in ('iso-8859-15', 'utf-8'):
1273 with open(filename, 'w', encoding=encoding) as fp:
1274 print("# coding: %s" % encoding, file=fp)
1275 print("print('euro:\u20ac')", file=fp)
1276 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001277 self.assertEqual(fp.encoding, encoding)
1278 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001279
1280 # test BOM (no coding cookie)
1281 with open(filename, 'w', encoding='utf-8-sig') as fp:
1282 print("print('euro:\u20ac')", file=fp)
1283 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001284 self.assertEqual(fp.encoding, 'utf-8-sig')
1285 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001286
Brett Cannonc33f3f22012-04-20 13:23:54 -04001287 def test_filename_in_exception(self):
1288 # When possible, include the file name in the exception.
1289 path = 'some_file_path'
1290 lines = (
1291 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1292 )
1293 class Bunk:
1294 def __init__(self, lines, path):
1295 self.name = path
1296 self._lines = lines
1297 self._index = 0
1298
1299 def readline(self):
1300 if self._index == len(lines):
1301 raise StopIteration
1302 line = lines[self._index]
1303 self._index += 1
1304 return line
1305
1306 with self.assertRaises(SyntaxError):
1307 ins = Bunk(lines, path)
1308 # Make sure lacking a name isn't an issue.
1309 del ins.name
1310 detect_encoding(ins.readline)
1311 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1312 ins = Bunk(lines, path)
1313 detect_encoding(ins.readline)
1314
Victor Stinner387729e2015-05-26 00:43:58 +02001315 def test_open_error(self):
1316 # Issue #23840: open() must close the binary file on error
1317 m = BytesIO(b'#coding:xxx')
1318 with mock.patch('tokenize._builtin_open', return_value=m):
1319 self.assertRaises(SyntaxError, tokenize_open, 'foobar')
1320 self.assertTrue(m.closed)
1321
1322
Trent Nelson428de652008-03-18 22:41:35 +00001323class TestTokenize(TestCase):
1324
1325 def test_tokenize(self):
1326 import tokenize as tokenize_module
1327 encoding = object()
1328 encoding_used = None
1329 def mock_detect_encoding(readline):
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001330 return encoding, [b'first', b'second']
Trent Nelson428de652008-03-18 22:41:35 +00001331
1332 def mock__tokenize(readline, encoding):
1333 nonlocal encoding_used
1334 encoding_used = encoding
1335 out = []
1336 while True:
1337 next_line = readline()
1338 if next_line:
1339 out.append(next_line)
1340 continue
1341 return out
1342
1343 counter = 0
1344 def mock_readline():
1345 nonlocal counter
1346 counter += 1
1347 if counter == 5:
1348 return b''
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001349 return str(counter).encode()
Trent Nelson428de652008-03-18 22:41:35 +00001350
1351 orig_detect_encoding = tokenize_module.detect_encoding
1352 orig__tokenize = tokenize_module._tokenize
1353 tokenize_module.detect_encoding = mock_detect_encoding
1354 tokenize_module._tokenize = mock__tokenize
1355 try:
1356 results = tokenize(mock_readline)
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001357 self.assertEqual(list(results),
1358 [b'first', b'second', b'1', b'2', b'3', b'4'])
Trent Nelson428de652008-03-18 22:41:35 +00001359 finally:
1360 tokenize_module.detect_encoding = orig_detect_encoding
1361 tokenize_module._tokenize = orig__tokenize
1362
1363 self.assertTrue(encoding_used, encoding)
Raymond Hettinger68c04532005-06-10 11:05:19 +00001364
Yury Selivanov8085b802015-05-18 12:50:52 -04001365 def test_oneline_defs(self):
1366 buf = []
1367 for i in range(500):
1368 buf.append('def i{i}(): return {i}'.format(i=i))
1369 buf.append('OK')
1370 buf = '\n'.join(buf)
1371
1372 # Test that 500 consequent, one-line defs is OK
1373 toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
Ammar Askarc4ef4892018-07-06 03:19:08 -04001374 self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
1375 # [-2] is always NEWLINE
Yury Selivanov8085b802015-05-18 12:50:52 -04001376
Meador Inge00c7f852012-01-19 00:44:45 -06001377 def assertExactTypeEqual(self, opstr, *optypes):
1378 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1379 num_optypes = len(optypes)
Ammar Askarc4ef4892018-07-06 03:19:08 -04001380 self.assertEqual(len(tokens), 3 + num_optypes)
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001381 self.assertEqual(tok_name[tokens[0].exact_type],
1382 tok_name[ENCODING])
Meador Inge00c7f852012-01-19 00:44:45 -06001383 for i in range(num_optypes):
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001384 self.assertEqual(tok_name[tokens[i + 1].exact_type],
1385 tok_name[optypes[i]])
1386 self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
Ammar Askarc4ef4892018-07-06 03:19:08 -04001387 tok_name[token.NEWLINE])
1388 self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001389 tok_name[token.ENDMARKER])
Meador Inge00c7f852012-01-19 00:44:45 -06001390
1391 def test_exact_type(self):
1392 self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1393 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1394 self.assertExactTypeEqual(':', token.COLON)
1395 self.assertExactTypeEqual(',', token.COMMA)
1396 self.assertExactTypeEqual(';', token.SEMI)
1397 self.assertExactTypeEqual('+', token.PLUS)
1398 self.assertExactTypeEqual('-', token.MINUS)
1399 self.assertExactTypeEqual('*', token.STAR)
1400 self.assertExactTypeEqual('/', token.SLASH)
1401 self.assertExactTypeEqual('|', token.VBAR)
1402 self.assertExactTypeEqual('&', token.AMPER)
1403 self.assertExactTypeEqual('<', token.LESS)
1404 self.assertExactTypeEqual('>', token.GREATER)
1405 self.assertExactTypeEqual('=', token.EQUAL)
1406 self.assertExactTypeEqual('.', token.DOT)
1407 self.assertExactTypeEqual('%', token.PERCENT)
1408 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1409 self.assertExactTypeEqual('==', token.EQEQUAL)
1410 self.assertExactTypeEqual('!=', token.NOTEQUAL)
1411 self.assertExactTypeEqual('<=', token.LESSEQUAL)
1412 self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1413 self.assertExactTypeEqual('~', token.TILDE)
1414 self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1415 self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1416 self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1417 self.assertExactTypeEqual('**', token.DOUBLESTAR)
1418 self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1419 self.assertExactTypeEqual('-=', token.MINEQUAL)
1420 self.assertExactTypeEqual('*=', token.STAREQUAL)
1421 self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1422 self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1423 self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1424 self.assertExactTypeEqual('|=', token.VBAREQUAL)
1425 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1426 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1427 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1428 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1429 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1430 self.assertExactTypeEqual('//', token.DOUBLESLASH)
1431 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +02001432 self.assertExactTypeEqual('...', token.ELLIPSIS)
1433 self.assertExactTypeEqual('->', token.RARROW)
Meador Inge00c7f852012-01-19 00:44:45 -06001434 self.assertExactTypeEqual('@', token.AT)
Benjamin Petersond51374e2014-04-09 23:55:56 -04001435 self.assertExactTypeEqual('@=', token.ATEQUAL)
Meador Inge00c7f852012-01-19 00:44:45 -06001436
1437 self.assertExactTypeEqual('a**2+b**2==c**2',
1438 NAME, token.DOUBLESTAR, NUMBER,
1439 token.PLUS,
1440 NAME, token.DOUBLESTAR, NUMBER,
1441 token.EQEQUAL,
1442 NAME, token.DOUBLESTAR, NUMBER)
1443 self.assertExactTypeEqual('{1, 2, 3}',
1444 token.LBRACE,
1445 token.NUMBER, token.COMMA,
1446 token.NUMBER, token.COMMA,
1447 token.NUMBER,
1448 token.RBRACE)
1449 self.assertExactTypeEqual('^(x & 0x1)',
1450 token.CIRCUMFLEX,
1451 token.LPAR,
1452 token.NAME, token.AMPER, token.NUMBER,
1453 token.RPAR)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001454
Ezio Melottifafa8b72012-11-03 17:46:51 +02001455 def test_pathological_trailing_whitespace(self):
1456 # See http://bugs.python.org/issue16152
1457 self.assertExactTypeEqual('@ ', token.AT)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001458
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001459
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001460class UntokenizeTest(TestCase):
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001461
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001462 def test_bad_input_order(self):
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001463 # raise if previous row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001464 u = Untokenizer()
1465 u.prev_row = 2
1466 u.prev_col = 2
1467 with self.assertRaises(ValueError) as cm:
1468 u.add_whitespace((1,3))
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001469 self.assertEqual(cm.exception.args[0],
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001470 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001471 # raise if previous column in row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001472 self.assertRaises(ValueError, u.add_whitespace, (2,1))
1473
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001474 def test_backslash_continuation(self):
1475 # The problem is that <whitespace>\<newline> leaves no token
1476 u = Untokenizer()
1477 u.prev_row = 1
1478 u.prev_col = 1
1479 u.tokens = []
1480 u.add_whitespace((2, 0))
1481 self.assertEqual(u.tokens, ['\\\n'])
1482 u.prev_row = 2
1483 u.add_whitespace((4, 4))
1484 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001485 TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001486
Terry Jan Reedy5b8d2c32014-02-17 23:12:16 -05001487 def test_iter_compat(self):
1488 u = Untokenizer()
1489 token = (NAME, 'Hello')
1490 tokens = [(ENCODING, 'utf-8'), token]
1491 u.compat(token, iter([]))
1492 self.assertEqual(u.tokens, ["Hello "])
1493 u = Untokenizer()
1494 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1495 u = Untokenizer()
1496 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1497 self.assertEqual(u.encoding, 'utf-8')
1498 self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1499
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001500
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001501class TestRoundtrip(TestCase):
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001502
1503 def check_roundtrip(self, f):
1504 """
1505 Test roundtrip for `untokenize`. `f` is an open file or a string.
1506 The source code in f is tokenized to both 5- and 2-tuples.
1507 Both sequences are converted back to source code via
1508 tokenize.untokenize(), and the latter tokenized again to 2-tuples.
1509 The test fails if the 3 pair tokenizations do not match.
1510
1511 When untokenize bugs are fixed, untokenize with 5-tuples should
1512 reproduce code that does not contain a backslash continuation
1513 following spaces. A proper test should test this.
1514 """
1515 # Get source code and original tokenizations
1516 if isinstance(f, str):
1517 code = f.encode('utf-8')
1518 else:
1519 code = f.read()
1520 f.close()
1521 readline = iter(code.splitlines(keepends=True)).__next__
1522 tokens5 = list(tokenize(readline))
1523 tokens2 = [tok[:2] for tok in tokens5]
1524 # Reproduce tokens2 from pairs
1525 bytes_from2 = untokenize(tokens2)
1526 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
1527 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
1528 self.assertEqual(tokens2_from2, tokens2)
1529 # Reproduce tokens2 from 5-tuples
1530 bytes_from5 = untokenize(tokens5)
1531 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
1532 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
1533 self.assertEqual(tokens2_from5, tokens2)
1534
1535 def test_roundtrip(self):
1536 # There are some standard formatting practices that are easy to get right.
1537
1538 self.check_roundtrip("if x == 1:\n"
1539 " print(x)\n")
1540 self.check_roundtrip("# This is a comment\n"
Ammar Askarc4ef4892018-07-06 03:19:08 -04001541 "# This also\n")
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001542
1543 # Some people use different formatting conventions, which makes
1544 # untokenize a little trickier. Note that this test involves trailing
1545 # whitespace after the colon. Note that we use hex escapes to make the
1546 # two trailing blanks apparent in the expected output.
1547
1548 self.check_roundtrip("if x == 1 : \n"
1549 " print(x)\n")
1550 fn = support.findfile("tokenize_tests.txt")
1551 with open(fn, 'rb') as f:
1552 self.check_roundtrip(f)
1553 self.check_roundtrip("if x == 1:\n"
1554 " # A comment by itself.\n"
1555 " print(x) # Comment here, too.\n"
1556 " # Another comment.\n"
1557 "after_if = True\n")
1558 self.check_roundtrip("if (x # The comments need to go in the right place\n"
1559 " == 1):\n"
1560 " print('x==1')\n")
1561 self.check_roundtrip("class Test: # A comment here\n"
1562 " # A comment with weird indent\n"
1563 " after_com = 5\n"
1564 " def x(m): return m*5 # a one liner\n"
1565 " def y(m): # A whitespace after the colon\n"
1566 " return y*4 # 3-space indent\n")
1567
1568 # Some error-handling code
1569 self.check_roundtrip("try: import somemodule\n"
1570 "except ImportError: # comment\n"
1571 " print('Can not import' # comment2\n)"
1572 "else: print('Loaded')\n")
1573
1574 def test_continuation(self):
1575 # Balancing continuation
1576 self.check_roundtrip("a = (3,4, \n"
1577 "5,6)\n"
1578 "y = [3, 4,\n"
1579 "5]\n"
1580 "z = {'a': 5,\n"
1581 "'b':15, 'c':True}\n"
1582 "x = len(y) + 5 - a[\n"
1583 "3] - a[2]\n"
1584 "+ len(z) - z[\n"
1585 "'b']\n")
1586
1587 def test_backslash_continuation(self):
1588 # Backslash means line continuation, except for comments
1589 self.check_roundtrip("x=1+\\\n"
1590 "1\n"
1591 "# This is a comment\\\n"
1592 "# This also\n")
1593 self.check_roundtrip("# Comment \\\n"
1594 "x = 0")
1595
1596 def test_string_concatenation(self):
1597 # Two string literals on the same line
1598 self.check_roundtrip("'' ''")
1599
1600 def test_random_files(self):
1601 # Test roundtrip on random python modules.
1602 # pass the '-ucpu' option to process the full directory.
1603
1604 import glob, random
1605 fn = support.findfile("tokenize_tests.txt")
1606 tempdir = os.path.dirname(fn) or os.curdir
1607 testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
1608
Brett Cannona721aba2016-09-09 14:57:09 -07001609 # Tokenize is broken on test_pep3131.py because regular expressions are
1610 # broken on the obscure unicode identifiers in it. *sigh*
1611 # With roundtrip extended to test the 5-tuple mode of untokenize,
1612 # 7 more testfiles fail. Remove them also until the failure is diagnosed.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001613
Zachary Ware724f6a62016-09-09 12:55:37 -07001614 testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001615 for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
1616 testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
1617
1618 if not support.is_resource_enabled("cpu"):
1619 testfiles = random.sample(testfiles, 10)
1620
1621 for testfile in testfiles:
1622 with open(testfile, 'rb') as f:
1623 with self.subTest(file=testfile):
1624 self.check_roundtrip(f)
1625
1626
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001627 def roundtrip(self, code):
1628 if isinstance(code, str):
1629 code = code.encode('utf-8')
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001630 return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001631
1632 def test_indentation_semantics_retained(self):
1633 """
1634 Ensure that although whitespace might be mutated in a roundtrip,
1635 the semantic meaning of the indentation remains consistent.
1636 """
1637 code = "if False:\n\tx=3\n\tx=3\n"
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001638 codelines = self.roundtrip(code).split('\n')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001639 self.assertEqual(codelines[1], codelines[2])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001640 self.check_roundtrip(code)
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001641
1642
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001643if __name__ == "__main__":
Brett Cannona721aba2016-09-09 14:57:09 -07001644 unittest.main()