blob: 3520a67bd42b111fc3f42ca50399e90106617567 [file] [log] [blame]
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001from test import support
2from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
3 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
4 open as tokenize_open, Untokenizer)
5from io import BytesIO
Stéphane Wirtel90addd62017-07-25 15:33:53 +02006import unittest
Brett Cannona721aba2016-09-09 14:57:09 -07007from unittest import TestCase, mock
8from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
9 INVALID_UNDERSCORE_LITERALS)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030010import os
11import token
Thomas Wouters89f507f2006-12-13 04:49:30 +000012
Thomas Wouters89f507f2006-12-13 04:49:30 +000013
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030014class TokenizeTest(TestCase):
15 # Tests for the tokenize module.
Jason R. Coombs7cf36382015-06-20 19:13:50 -040016
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030017 # The tests can be really simple. Given a small fragment of source
18 # code, print out a table with tokens. The ENDMARKER is omitted for
19 # brevity.
20
21 def check_tokenize(self, s, expected):
22 # Format the tokens in s in a table format.
23 # The ENDMARKER is omitted.
24 result = []
25 f = BytesIO(s.encode('utf-8'))
26 for type, token, start, end, line in tokenize(f.readline):
27 if type == ENDMARKER:
28 break
29 type = tok_name[type]
Eric V. Smith67317742015-10-16 20:45:53 -040030 result.append(f" {type:10} {token!r:13} {start} {end}")
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030031 self.assertEqual(result,
32 [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
33 expected.rstrip().splitlines())
34
35 def test_basic(self):
36 self.check_tokenize("1 + 1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000037 NUMBER '1' (1, 0) (1, 1)
38 OP '+' (1, 2) (1, 3)
39 NUMBER '1' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030040 """)
41 self.check_tokenize("if False:\n"
42 " # NL\n"
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010043 " \n"
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030044 " True = False # NEWLINE\n", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000045 NAME 'if' (1, 0) (1, 2)
46 NAME 'False' (1, 3) (1, 8)
47 OP ':' (1, 8) (1, 9)
48 NEWLINE '\\n' (1, 9) (1, 10)
49 COMMENT '# NL' (2, 4) (2, 8)
50 NL '\\n' (2, 8) (2, 9)
Albert-Jan Nijburgc471ca42017-05-24 12:31:57 +010051 NL '\\n' (3, 4) (3, 5)
52 INDENT ' ' (4, 0) (4, 4)
53 NAME 'True' (4, 4) (4, 8)
54 OP '=' (4, 9) (4, 10)
55 NAME 'False' (4, 11) (4, 16)
56 COMMENT '# NEWLINE' (4, 17) (4, 26)
57 NEWLINE '\\n' (4, 26) (4, 27)
58 DEDENT '' (5, 0) (5, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030059 """)
60 indent_error_file = b"""\
61def k(x):
62 x += 2
63 x += 5
64"""
65 readline = BytesIO(indent_error_file).readline
66 with self.assertRaisesRegex(IndentationError,
67 "unindent does not match any "
68 "outer indentation level"):
69 for tok in tokenize(readline):
70 pass
Thomas Wouters89f507f2006-12-13 04:49:30 +000071
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030072 def test_int(self):
73 # Ordinary integers and binary operators
74 self.check_tokenize("0xff <= 255", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000075 NUMBER '0xff' (1, 0) (1, 4)
76 OP '<=' (1, 5) (1, 7)
77 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030078 """)
79 self.check_tokenize("0b10 <= 255", """\
Eric Smith74ca5572008-03-17 19:49:19 +000080 NUMBER '0b10' (1, 0) (1, 4)
81 OP '<=' (1, 5) (1, 7)
82 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030083 """)
84 self.check_tokenize("0o123 <= 0O123", """\
Eric Smith74ca5572008-03-17 19:49:19 +000085 NUMBER '0o123' (1, 0) (1, 5)
86 OP '<=' (1, 6) (1, 8)
87 NUMBER '0O123' (1, 9) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030088 """)
89 self.check_tokenize("1234567 > ~0x15", """\
Mark Dickinson0c1f7c02008-03-16 05:05:12 +000090 NUMBER '1234567' (1, 0) (1, 7)
91 OP '>' (1, 8) (1, 9)
92 OP '~' (1, 10) (1, 11)
93 NUMBER '0x15' (1, 11) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030094 """)
95 self.check_tokenize("2134568 != 1231515", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +000096 NUMBER '2134568' (1, 0) (1, 7)
97 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +000098 NUMBER '1231515' (1, 11) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +030099 """)
100 self.check_tokenize("(-124561-1) & 200000000", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000101 OP '(' (1, 0) (1, 1)
102 OP '-' (1, 1) (1, 2)
103 NUMBER '124561' (1, 2) (1, 8)
104 OP '-' (1, 8) (1, 9)
105 NUMBER '1' (1, 9) (1, 10)
106 OP ')' (1, 10) (1, 11)
107 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000108 NUMBER '200000000' (1, 14) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300109 """)
110 self.check_tokenize("0xdeadbeef != -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000111 NUMBER '0xdeadbeef' (1, 0) (1, 10)
112 OP '!=' (1, 11) (1, 13)
113 OP '-' (1, 14) (1, 15)
114 NUMBER '1' (1, 15) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300115 """)
116 self.check_tokenize("0xdeadc0de & 12345", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000117 NUMBER '0xdeadc0de' (1, 0) (1, 10)
118 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000119 NUMBER '12345' (1, 13) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300120 """)
121 self.check_tokenize("0xFF & 0x15 | 1234", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000122 NUMBER '0xFF' (1, 0) (1, 4)
123 OP '&' (1, 5) (1, 6)
124 NUMBER '0x15' (1, 7) (1, 11)
125 OP '|' (1, 12) (1, 13)
126 NUMBER '1234' (1, 14) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300127 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000128
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300129 def test_long(self):
130 # Long integers
131 self.check_tokenize("x = 0", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000132 NAME 'x' (1, 0) (1, 1)
133 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000134 NUMBER '0' (1, 4) (1, 5)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300135 """)
136 self.check_tokenize("x = 0xfffffffffff", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000137 NAME 'x' (1, 0) (1, 1)
138 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400139 NUMBER '0xfffffffffff' (1, 4) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300140 """)
141 self.check_tokenize("x = 123141242151251616110", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000142 NAME 'x' (1, 0) (1, 1)
143 OP '=' (1, 2) (1, 3)
Eric V. Smith67317742015-10-16 20:45:53 -0400144 NUMBER '123141242151251616110' (1, 4) (1, 25)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300145 """)
146 self.check_tokenize("x = -15921590215012591", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000147 NAME 'x' (1, 0) (1, 1)
148 OP '=' (1, 2) (1, 3)
149 OP '-' (1, 4) (1, 5)
Eric V. Smith67317742015-10-16 20:45:53 -0400150 NUMBER '15921590215012591' (1, 5) (1, 22)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300151 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000152
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300153 def test_float(self):
154 # Floating point numbers
155 self.check_tokenize("x = 3.14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000156 NAME 'x' (1, 0) (1, 1)
157 OP '=' (1, 2) (1, 3)
158 NUMBER '3.14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300159 """)
160 self.check_tokenize("x = 314159.", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000161 NAME 'x' (1, 0) (1, 1)
162 OP '=' (1, 2) (1, 3)
163 NUMBER '314159.' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300164 """)
165 self.check_tokenize("x = .314159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000166 NAME 'x' (1, 0) (1, 1)
167 OP '=' (1, 2) (1, 3)
168 NUMBER '.314159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300169 """)
170 self.check_tokenize("x = 3e14159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000171 NAME 'x' (1, 0) (1, 1)
172 OP '=' (1, 2) (1, 3)
173 NUMBER '3e14159' (1, 4) (1, 11)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300174 """)
175 self.check_tokenize("x = 3E123", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000176 NAME 'x' (1, 0) (1, 1)
177 OP '=' (1, 2) (1, 3)
178 NUMBER '3E123' (1, 4) (1, 9)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300179 """)
180 self.check_tokenize("x+y = 3e-1230", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000181 NAME 'x' (1, 0) (1, 1)
182 OP '+' (1, 1) (1, 2)
183 NAME 'y' (1, 2) (1, 3)
184 OP '=' (1, 4) (1, 5)
185 NUMBER '3e-1230' (1, 6) (1, 13)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300186 """)
187 self.check_tokenize("x = 3.14e159", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000188 NAME 'x' (1, 0) (1, 1)
189 OP '=' (1, 2) (1, 3)
190 NUMBER '3.14e159' (1, 4) (1, 12)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300191 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000192
Brett Cannona721aba2016-09-09 14:57:09 -0700193 def test_underscore_literals(self):
194 def number_token(s):
195 f = BytesIO(s.encode('utf-8'))
196 for toktype, token, start, end, line in tokenize(f.readline):
197 if toktype == NUMBER:
198 return token
199 return 'invalid token'
200 for lit in VALID_UNDERSCORE_LITERALS:
201 if '(' in lit:
202 # this won't work with compound complex inputs
203 continue
204 self.assertEqual(number_token(lit), lit)
205 for lit in INVALID_UNDERSCORE_LITERALS:
206 self.assertNotEqual(number_token(lit), lit)
207
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300208 def test_string(self):
209 # String literals
210 self.check_tokenize("x = ''; y = \"\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000211 NAME 'x' (1, 0) (1, 1)
212 OP '=' (1, 2) (1, 3)
213 STRING "''" (1, 4) (1, 6)
214 OP ';' (1, 6) (1, 7)
215 NAME 'y' (1, 8) (1, 9)
216 OP '=' (1, 10) (1, 11)
217 STRING '""' (1, 12) (1, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300218 """)
219 self.check_tokenize("x = '\"'; y = \"'\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000220 NAME 'x' (1, 0) (1, 1)
221 OP '=' (1, 2) (1, 3)
222 STRING '\\'"\\'' (1, 4) (1, 7)
223 OP ';' (1, 7) (1, 8)
224 NAME 'y' (1, 9) (1, 10)
225 OP '=' (1, 11) (1, 12)
226 STRING '"\\'"' (1, 13) (1, 16)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300227 """)
228 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000229 NAME 'x' (1, 0) (1, 1)
230 OP '=' (1, 2) (1, 3)
231 STRING '"doesn\\'t "' (1, 4) (1, 14)
232 NAME 'shrink' (1, 14) (1, 20)
233 STRING '", does it"' (1, 20) (1, 31)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300234 """)
235 self.check_tokenize("x = 'abc' + 'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000236 NAME 'x' (1, 0) (1, 1)
237 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000238 STRING "'abc'" (1, 4) (1, 9)
239 OP '+' (1, 10) (1, 11)
240 STRING "'ABC'" (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300241 """)
242 self.check_tokenize('y = "ABC" + "ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000243 NAME 'y' (1, 0) (1, 1)
244 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000245 STRING '"ABC"' (1, 4) (1, 9)
246 OP '+' (1, 10) (1, 11)
247 STRING '"ABC"' (1, 12) (1, 17)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300248 """)
249 self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000250 NAME 'x' (1, 0) (1, 1)
251 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000252 STRING "r'abc'" (1, 4) (1, 10)
253 OP '+' (1, 11) (1, 12)
254 STRING "r'ABC'" (1, 13) (1, 19)
255 OP '+' (1, 20) (1, 21)
256 STRING "R'ABC'" (1, 22) (1, 28)
257 OP '+' (1, 29) (1, 30)
258 STRING "R'ABC'" (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300259 """)
260 self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000261 NAME 'y' (1, 0) (1, 1)
262 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000263 STRING 'r"abc"' (1, 4) (1, 10)
264 OP '+' (1, 11) (1, 12)
265 STRING 'r"ABC"' (1, 13) (1, 19)
266 OP '+' (1, 20) (1, 21)
267 STRING 'R"ABC"' (1, 22) (1, 28)
268 OP '+' (1, 29) (1, 30)
269 STRING 'R"ABC"' (1, 31) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300270 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000271
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300272 self.check_tokenize("u'abc' + U'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500273 STRING "u'abc'" (1, 0) (1, 6)
274 OP '+' (1, 7) (1, 8)
275 STRING "U'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300276 """)
277 self.check_tokenize('u"abc" + U"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500278 STRING 'u"abc"' (1, 0) (1, 6)
279 OP '+' (1, 7) (1, 8)
280 STRING 'U"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300281 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500282
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300283 self.check_tokenize("b'abc' + B'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500284 STRING "b'abc'" (1, 0) (1, 6)
285 OP '+' (1, 7) (1, 8)
286 STRING "B'abc'" (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300287 """)
288 self.check_tokenize('b"abc" + B"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500289 STRING 'b"abc"' (1, 0) (1, 6)
290 OP '+' (1, 7) (1, 8)
291 STRING 'B"abc"' (1, 9) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300292 """)
293 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500294 STRING "br'abc'" (1, 0) (1, 7)
295 OP '+' (1, 8) (1, 9)
296 STRING "bR'abc'" (1, 10) (1, 17)
297 OP '+' (1, 18) (1, 19)
298 STRING "Br'abc'" (1, 20) (1, 27)
299 OP '+' (1, 28) (1, 29)
300 STRING "BR'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300301 """)
302 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500303 STRING 'br"abc"' (1, 0) (1, 7)
304 OP '+' (1, 8) (1, 9)
305 STRING 'bR"abc"' (1, 10) (1, 17)
306 OP '+' (1, 18) (1, 19)
307 STRING 'Br"abc"' (1, 20) (1, 27)
308 OP '+' (1, 28) (1, 29)
309 STRING 'BR"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300310 """)
311 self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500312 STRING "rb'abc'" (1, 0) (1, 7)
313 OP '+' (1, 8) (1, 9)
314 STRING "rB'abc'" (1, 10) (1, 17)
315 OP '+' (1, 18) (1, 19)
316 STRING "Rb'abc'" (1, 20) (1, 27)
317 OP '+' (1, 28) (1, 29)
318 STRING "RB'abc'" (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300319 """)
320 self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
Meador Inge8d5c0b82012-06-16 21:49:08 -0500321 STRING 'rb"abc"' (1, 0) (1, 7)
322 OP '+' (1, 8) (1, 9)
323 STRING 'rB"abc"' (1, 10) (1, 17)
324 OP '+' (1, 18) (1, 19)
325 STRING 'Rb"abc"' (1, 20) (1, 27)
326 OP '+' (1, 28) (1, 29)
327 STRING 'RB"abc"' (1, 30) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300328 """)
Eric V. Smith67317742015-10-16 20:45:53 -0400329 # Check 0, 1, and 2 character string prefixes.
330 self.check_tokenize(r'"a\
331de\
332fg"', """\
333 STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
334 """)
335 self.check_tokenize(r'u"a\
336de"', """\
337 STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
338 """)
339 self.check_tokenize(r'rb"a\
340d"', """\
341 STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
342 """)
343 self.check_tokenize(r'"""a\
344b"""', """\
345 STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
346 """)
347 self.check_tokenize(r'u"""a\
348b"""', """\
349 STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
350 """)
351 self.check_tokenize(r'rb"""a\
352b\
353c"""', """\
354 STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
355 """)
Eric V. Smith1c8222c2015-10-26 04:37:55 -0400356 self.check_tokenize('f"abc"', """\
357 STRING 'f"abc"' (1, 0) (1, 6)
358 """)
359 self.check_tokenize('fR"a{b}c"', """\
360 STRING 'fR"a{b}c"' (1, 0) (1, 9)
361 """)
362 self.check_tokenize('f"""abc"""', """\
363 STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
364 """)
365 self.check_tokenize(r'f"abc\
366def"', """\
367 STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
368 """)
369 self.check_tokenize(r'Rf"abc\
370def"', """\
371 STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
372 """)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500373
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300374 def test_function(self):
375 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000376 NAME 'def' (1, 0) (1, 3)
377 NAME 'd22' (1, 4) (1, 7)
378 OP '(' (1, 7) (1, 8)
379 NAME 'a' (1, 8) (1, 9)
380 OP ',' (1, 9) (1, 10)
381 NAME 'b' (1, 11) (1, 12)
382 OP ',' (1, 12) (1, 13)
383 NAME 'c' (1, 14) (1, 15)
384 OP '=' (1, 15) (1, 16)
385 NUMBER '2' (1, 16) (1, 17)
386 OP ',' (1, 17) (1, 18)
387 NAME 'd' (1, 19) (1, 20)
388 OP '=' (1, 20) (1, 21)
389 NUMBER '2' (1, 21) (1, 22)
390 OP ',' (1, 22) (1, 23)
391 OP '*' (1, 24) (1, 25)
392 NAME 'k' (1, 25) (1, 26)
393 OP ')' (1, 26) (1, 27)
394 OP ':' (1, 27) (1, 28)
395 NAME 'pass' (1, 29) (1, 33)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300396 """)
397 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000398 NAME 'def' (1, 0) (1, 3)
399 NAME 'd01v_' (1, 4) (1, 9)
400 OP '(' (1, 9) (1, 10)
401 NAME 'a' (1, 10) (1, 11)
402 OP '=' (1, 11) (1, 12)
403 NUMBER '1' (1, 12) (1, 13)
404 OP ',' (1, 13) (1, 14)
405 OP '*' (1, 15) (1, 16)
406 NAME 'k' (1, 16) (1, 17)
407 OP ',' (1, 17) (1, 18)
408 OP '**' (1, 19) (1, 21)
409 NAME 'w' (1, 21) (1, 22)
410 OP ')' (1, 22) (1, 23)
411 OP ':' (1, 23) (1, 24)
412 NAME 'pass' (1, 25) (1, 29)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300413 """)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +0200414 self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
415 NAME 'def' (1, 0) (1, 3)
416 NAME 'd23' (1, 4) (1, 7)
417 OP '(' (1, 7) (1, 8)
418 NAME 'a' (1, 8) (1, 9)
419 OP ':' (1, 9) (1, 10)
420 NAME 'str' (1, 11) (1, 14)
421 OP ',' (1, 14) (1, 15)
422 NAME 'b' (1, 16) (1, 17)
423 OP ':' (1, 17) (1, 18)
424 NAME 'int' (1, 19) (1, 22)
425 OP '=' (1, 22) (1, 23)
426 NUMBER '3' (1, 23) (1, 24)
427 OP ')' (1, 24) (1, 25)
428 OP '->' (1, 26) (1, 28)
429 NAME 'int' (1, 29) (1, 32)
430 OP ':' (1, 32) (1, 33)
431 NAME 'pass' (1, 34) (1, 38)
432 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000433
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300434 def test_comparison(self):
435 # Comparison
436 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
437 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000438 NAME 'if' (1, 0) (1, 2)
439 NUMBER '1' (1, 3) (1, 4)
440 OP '<' (1, 5) (1, 6)
441 NUMBER '1' (1, 7) (1, 8)
442 OP '>' (1, 9) (1, 10)
443 NUMBER '1' (1, 11) (1, 12)
444 OP '==' (1, 13) (1, 15)
445 NUMBER '1' (1, 16) (1, 17)
446 OP '>=' (1, 18) (1, 20)
447 NUMBER '5' (1, 21) (1, 22)
448 OP '<=' (1, 23) (1, 25)
449 NUMBER '0x15' (1, 26) (1, 30)
450 OP '<=' (1, 31) (1, 33)
451 NUMBER '0x12' (1, 34) (1, 38)
452 OP '!=' (1, 39) (1, 41)
453 NUMBER '1' (1, 42) (1, 43)
454 NAME 'and' (1, 44) (1, 47)
455 NUMBER '5' (1, 48) (1, 49)
456 NAME 'in' (1, 50) (1, 52)
457 NUMBER '1' (1, 53) (1, 54)
458 NAME 'not' (1, 55) (1, 58)
459 NAME 'in' (1, 59) (1, 61)
460 NUMBER '1' (1, 62) (1, 63)
461 NAME 'is' (1, 64) (1, 66)
462 NUMBER '1' (1, 67) (1, 68)
463 NAME 'or' (1, 69) (1, 71)
464 NUMBER '5' (1, 72) (1, 73)
465 NAME 'is' (1, 74) (1, 76)
466 NAME 'not' (1, 77) (1, 80)
467 NUMBER '1' (1, 81) (1, 82)
468 OP ':' (1, 82) (1, 83)
469 NAME 'pass' (1, 84) (1, 88)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300470 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000471
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300472 def test_shift(self):
473 # Shift
474 self.check_tokenize("x = 1 << 1 >> 5", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000475 NAME 'x' (1, 0) (1, 1)
476 OP '=' (1, 2) (1, 3)
477 NUMBER '1' (1, 4) (1, 5)
478 OP '<<' (1, 6) (1, 8)
479 NUMBER '1' (1, 9) (1, 10)
480 OP '>>' (1, 11) (1, 13)
481 NUMBER '5' (1, 14) (1, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300482 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000483
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300484 def test_additive(self):
485 # Additive
486 self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000487 NAME 'x' (1, 0) (1, 1)
488 OP '=' (1, 2) (1, 3)
489 NUMBER '1' (1, 4) (1, 5)
490 OP '-' (1, 6) (1, 7)
491 NAME 'y' (1, 8) (1, 9)
492 OP '+' (1, 10) (1, 11)
493 NUMBER '15' (1, 12) (1, 14)
494 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000495 NUMBER '1' (1, 17) (1, 18)
496 OP '+' (1, 19) (1, 20)
497 NUMBER '0x124' (1, 21) (1, 26)
498 OP '+' (1, 27) (1, 28)
499 NAME 'z' (1, 29) (1, 30)
500 OP '+' (1, 31) (1, 32)
501 NAME 'a' (1, 33) (1, 34)
502 OP '[' (1, 34) (1, 35)
503 NUMBER '5' (1, 35) (1, 36)
504 OP ']' (1, 36) (1, 37)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300505 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000506
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300507 def test_multiplicative(self):
508 # Multiplicative
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300509 self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000510 NAME 'x' (1, 0) (1, 1)
511 OP '=' (1, 2) (1, 3)
512 NUMBER '1' (1, 4) (1, 5)
513 OP '//' (1, 5) (1, 7)
514 NUMBER '1' (1, 7) (1, 8)
515 OP '*' (1, 8) (1, 9)
516 NUMBER '1' (1, 9) (1, 10)
517 OP '/' (1, 10) (1, 11)
518 NUMBER '5' (1, 11) (1, 12)
519 OP '*' (1, 12) (1, 13)
520 NUMBER '12' (1, 13) (1, 15)
521 OP '%' (1, 15) (1, 16)
522 NUMBER '0x12' (1, 16) (1, 20)
Benjamin Petersond51374e2014-04-09 23:55:56 -0400523 OP '@' (1, 20) (1, 21)
524 NUMBER '42' (1, 21) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300525 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000526
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300527 def test_unary(self):
528 # Unary
529 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000530 OP '~' (1, 0) (1, 1)
531 NUMBER '1' (1, 1) (1, 2)
532 OP '^' (1, 3) (1, 4)
533 NUMBER '1' (1, 5) (1, 6)
534 OP '&' (1, 7) (1, 8)
535 NUMBER '1' (1, 9) (1, 10)
536 OP '|' (1, 11) (1, 12)
537 NUMBER '1' (1, 12) (1, 13)
538 OP '^' (1, 14) (1, 15)
539 OP '-' (1, 16) (1, 17)
540 NUMBER '1' (1, 17) (1, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300541 """)
542 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000543 OP '-' (1, 0) (1, 1)
544 NUMBER '1' (1, 1) (1, 2)
545 OP '*' (1, 2) (1, 3)
546 NUMBER '1' (1, 3) (1, 4)
547 OP '/' (1, 4) (1, 5)
548 NUMBER '1' (1, 5) (1, 6)
549 OP '+' (1, 6) (1, 7)
550 NUMBER '1' (1, 7) (1, 8)
551 OP '*' (1, 8) (1, 9)
552 NUMBER '1' (1, 9) (1, 10)
553 OP '//' (1, 10) (1, 12)
554 NUMBER '1' (1, 12) (1, 13)
555 OP '-' (1, 14) (1, 15)
556 OP '-' (1, 16) (1, 17)
557 OP '-' (1, 17) (1, 18)
558 OP '-' (1, 18) (1, 19)
559 NUMBER '1' (1, 19) (1, 20)
560 OP '**' (1, 20) (1, 22)
561 NUMBER '1' (1, 22) (1, 23)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300562 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000563
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300564 def test_selector(self):
565 # Selector
566 self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000567 NAME 'import' (1, 0) (1, 6)
568 NAME 'sys' (1, 7) (1, 10)
569 OP ',' (1, 10) (1, 11)
570 NAME 'time' (1, 12) (1, 16)
571 NEWLINE '\\n' (1, 16) (1, 17)
572 NAME 'x' (2, 0) (2, 1)
573 OP '=' (2, 2) (2, 3)
574 NAME 'sys' (2, 4) (2, 7)
575 OP '.' (2, 7) (2, 8)
576 NAME 'modules' (2, 8) (2, 15)
577 OP '[' (2, 15) (2, 16)
578 STRING "'time'" (2, 16) (2, 22)
579 OP ']' (2, 22) (2, 23)
580 OP '.' (2, 23) (2, 24)
581 NAME 'time' (2, 24) (2, 28)
582 OP '(' (2, 28) (2, 29)
583 OP ')' (2, 29) (2, 30)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300584 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000585
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300586 def test_method(self):
587 # Methods
588 self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000589 OP '@' (1, 0) (1, 1)
Eric V. Smith67317742015-10-16 20:45:53 -0400590 NAME 'staticmethod' (1, 1) (1, 13)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000591 NEWLINE '\\n' (1, 13) (1, 14)
592 NAME 'def' (2, 0) (2, 3)
593 NAME 'foo' (2, 4) (2, 7)
594 OP '(' (2, 7) (2, 8)
595 NAME 'x' (2, 8) (2, 9)
596 OP ',' (2, 9) (2, 10)
597 NAME 'y' (2, 10) (2, 11)
598 OP ')' (2, 11) (2, 12)
599 OP ':' (2, 12) (2, 13)
600 NAME 'pass' (2, 14) (2, 18)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300601 """)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000602
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300603 def test_tabs(self):
604 # Evil tabs
605 self.check_tokenize("def f():\n"
606 "\tif x\n"
607 " \tpass", """\
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000608 NAME 'def' (1, 0) (1, 3)
609 NAME 'f' (1, 4) (1, 5)
610 OP '(' (1, 5) (1, 6)
611 OP ')' (1, 6) (1, 7)
612 OP ':' (1, 7) (1, 8)
613 NEWLINE '\\n' (1, 8) (1, 9)
614 INDENT '\\t' (2, 0) (2, 1)
615 NAME 'if' (2, 1) (2, 3)
616 NAME 'x' (2, 4) (2, 5)
617 NEWLINE '\\n' (2, 5) (2, 6)
618 INDENT ' \\t' (3, 0) (3, 9)
619 NAME 'pass' (3, 9) (3, 13)
620 DEDENT '' (4, 0) (4, 0)
621 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300622 """)
Benjamin Peterson33856de2010-08-30 14:41:20 +0000623
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300624 def test_non_ascii_identifiers(self):
625 # Non-ascii identifiers
626 self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
Benjamin Peterson33856de2010-08-30 14:41:20 +0000627 NAME 'Örter' (1, 0) (1, 5)
628 OP '=' (1, 6) (1, 7)
629 STRING "'places'" (1, 8) (1, 16)
630 NEWLINE '\\n' (1, 16) (1, 17)
631 NAME 'grün' (2, 0) (2, 4)
632 OP '=' (2, 5) (2, 6)
633 STRING "'green'" (2, 7) (2, 14)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300634 """)
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000635
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300636 def test_unicode(self):
637 # Legacy unicode literals:
638 self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000639 NAME 'Örter' (1, 0) (1, 5)
640 OP '=' (1, 6) (1, 7)
641 STRING "u'places'" (1, 8) (1, 17)
642 NEWLINE '\\n' (1, 17) (1, 18)
643 NAME 'grün' (2, 0) (2, 4)
644 OP '=' (2, 5) (2, 6)
Christian Heimes0b3847d2012-06-20 11:17:58 +0200645 STRING "U'green'" (2, 7) (2, 15)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300646 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400647
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300648 def test_async(self):
649 # Async/await extension:
650 self.check_tokenize("async = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400651 NAME 'async' (1, 0) (1, 5)
652 OP '=' (1, 6) (1, 7)
653 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300654 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400655
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300656 self.check_tokenize("a = (async = 1)", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400657 NAME 'a' (1, 0) (1, 1)
658 OP '=' (1, 2) (1, 3)
659 OP '(' (1, 4) (1, 5)
660 NAME 'async' (1, 5) (1, 10)
661 OP '=' (1, 11) (1, 12)
662 NUMBER '1' (1, 13) (1, 14)
663 OP ')' (1, 14) (1, 15)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300664 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400665
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300666 self.check_tokenize("async()", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400667 NAME 'async' (1, 0) (1, 5)
668 OP '(' (1, 5) (1, 6)
669 OP ')' (1, 6) (1, 7)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300670 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400671
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300672 self.check_tokenize("class async(Bar):pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400673 NAME 'class' (1, 0) (1, 5)
674 NAME 'async' (1, 6) (1, 11)
675 OP '(' (1, 11) (1, 12)
676 NAME 'Bar' (1, 12) (1, 15)
677 OP ')' (1, 15) (1, 16)
678 OP ':' (1, 16) (1, 17)
679 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300680 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400681
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300682 self.check_tokenize("class async:pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400683 NAME 'class' (1, 0) (1, 5)
684 NAME 'async' (1, 6) (1, 11)
685 OP ':' (1, 11) (1, 12)
686 NAME 'pass' (1, 12) (1, 16)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300687 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400688
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300689 self.check_tokenize("await = 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400690 NAME 'await' (1, 0) (1, 5)
691 OP '=' (1, 6) (1, 7)
692 NUMBER '1' (1, 8) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300693 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400694
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300695 self.check_tokenize("foo.async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400696 NAME 'foo' (1, 0) (1, 3)
697 OP '.' (1, 3) (1, 4)
698 NAME 'async' (1, 4) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300699 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400700
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300701 self.check_tokenize("async for a in b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400702 NAME 'async' (1, 0) (1, 5)
703 NAME 'for' (1, 6) (1, 9)
704 NAME 'a' (1, 10) (1, 11)
705 NAME 'in' (1, 12) (1, 14)
706 NAME 'b' (1, 15) (1, 16)
707 OP ':' (1, 16) (1, 17)
708 NAME 'pass' (1, 18) (1, 22)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300709 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400710
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300711 self.check_tokenize("async with a as b: pass", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400712 NAME 'async' (1, 0) (1, 5)
713 NAME 'with' (1, 6) (1, 10)
714 NAME 'a' (1, 11) (1, 12)
715 NAME 'as' (1, 13) (1, 15)
716 NAME 'b' (1, 16) (1, 17)
717 OP ':' (1, 17) (1, 18)
718 NAME 'pass' (1, 19) (1, 23)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300719 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400720
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300721 self.check_tokenize("async.foo", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400722 NAME 'async' (1, 0) (1, 5)
723 OP '.' (1, 5) (1, 6)
724 NAME 'foo' (1, 6) (1, 9)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300725 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400726
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300727 self.check_tokenize("async", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400728 NAME 'async' (1, 0) (1, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300729 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400730
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300731 self.check_tokenize("async\n#comment\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400732 NAME 'async' (1, 0) (1, 5)
733 NEWLINE '\\n' (1, 5) (1, 6)
734 COMMENT '#comment' (2, 0) (2, 8)
735 NL '\\n' (2, 8) (2, 9)
736 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300737 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400738
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300739 self.check_tokenize("async\n...\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400740 NAME 'async' (1, 0) (1, 5)
741 NEWLINE '\\n' (1, 5) (1, 6)
742 OP '...' (2, 0) (2, 3)
743 NEWLINE '\\n' (2, 3) (2, 4)
744 NAME 'await' (3, 0) (3, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300745 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400746
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300747 self.check_tokenize("async\nawait", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400748 NAME 'async' (1, 0) (1, 5)
749 NEWLINE '\\n' (1, 5) (1, 6)
750 NAME 'await' (2, 0) (2, 5)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300751 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400752
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300753 self.check_tokenize("foo.async + 1", """\
Yury Selivanov75445082015-05-11 22:57:16 -0400754 NAME 'foo' (1, 0) (1, 3)
755 OP '.' (1, 3) (1, 4)
756 NAME 'async' (1, 4) (1, 9)
757 OP '+' (1, 10) (1, 11)
758 NUMBER '1' (1, 12) (1, 13)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300759 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400760
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300761 self.check_tokenize("async def foo(): pass", """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700762 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400763 NAME 'def' (1, 6) (1, 9)
764 NAME 'foo' (1, 10) (1, 13)
765 OP '(' (1, 13) (1, 14)
766 OP ')' (1, 14) (1, 15)
767 OP ':' (1, 15) (1, 16)
768 NAME 'pass' (1, 17) (1, 21)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300769 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400770
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300771 self.check_tokenize('''\
772async def foo():
773 def foo(await):
774 await = 1
775 if 1:
776 await
777async += 1
778''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700779 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400780 NAME 'def' (1, 6) (1, 9)
781 NAME 'foo' (1, 10) (1, 13)
782 OP '(' (1, 13) (1, 14)
783 OP ')' (1, 14) (1, 15)
784 OP ':' (1, 15) (1, 16)
785 NEWLINE '\\n' (1, 16) (1, 17)
786 INDENT ' ' (2, 0) (2, 2)
787 NAME 'def' (2, 2) (2, 5)
788 NAME 'foo' (2, 6) (2, 9)
789 OP '(' (2, 9) (2, 10)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700790 NAME 'await' (2, 10) (2, 15)
Yury Selivanov75445082015-05-11 22:57:16 -0400791 OP ')' (2, 15) (2, 16)
792 OP ':' (2, 16) (2, 17)
793 NEWLINE '\\n' (2, 17) (2, 18)
794 INDENT ' ' (3, 0) (3, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700795 NAME 'await' (3, 4) (3, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400796 OP '=' (3, 10) (3, 11)
797 NUMBER '1' (3, 12) (3, 13)
798 NEWLINE '\\n' (3, 13) (3, 14)
799 DEDENT '' (4, 2) (4, 2)
800 NAME 'if' (4, 2) (4, 4)
801 NUMBER '1' (4, 5) (4, 6)
802 OP ':' (4, 6) (4, 7)
803 NEWLINE '\\n' (4, 7) (4, 8)
804 INDENT ' ' (5, 0) (5, 4)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700805 NAME 'await' (5, 4) (5, 9)
Yury Selivanov75445082015-05-11 22:57:16 -0400806 NEWLINE '\\n' (5, 9) (5, 10)
807 DEDENT '' (6, 0) (6, 0)
808 DEDENT '' (6, 0) (6, 0)
809 NAME 'async' (6, 0) (6, 5)
810 OP '+=' (6, 6) (6, 8)
811 NUMBER '1' (6, 9) (6, 10)
812 NEWLINE '\\n' (6, 10) (6, 11)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300813 """)
Yury Selivanov75445082015-05-11 22:57:16 -0400814
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300815 self.check_tokenize('''\
816async def foo():
817 async for i in 1: pass''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700818 NAME 'async' (1, 0) (1, 5)
Yury Selivanov75445082015-05-11 22:57:16 -0400819 NAME 'def' (1, 6) (1, 9)
820 NAME 'foo' (1, 10) (1, 13)
821 OP '(' (1, 13) (1, 14)
822 OP ')' (1, 14) (1, 15)
823 OP ':' (1, 15) (1, 16)
824 NEWLINE '\\n' (1, 16) (1, 17)
825 INDENT ' ' (2, 0) (2, 2)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700826 NAME 'async' (2, 2) (2, 7)
Yury Selivanov75445082015-05-11 22:57:16 -0400827 NAME 'for' (2, 8) (2, 11)
828 NAME 'i' (2, 12) (2, 13)
829 NAME 'in' (2, 14) (2, 16)
830 NUMBER '1' (2, 17) (2, 18)
831 OP ':' (2, 18) (2, 19)
832 NAME 'pass' (2, 20) (2, 24)
833 DEDENT '' (3, 0) (3, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300834 """)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300835
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300836 self.check_tokenize('''async def foo(async): await''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700837 NAME 'async' (1, 0) (1, 5)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300838 NAME 'def' (1, 6) (1, 9)
839 NAME 'foo' (1, 10) (1, 13)
840 OP '(' (1, 13) (1, 14)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700841 NAME 'async' (1, 14) (1, 19)
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300842 OP ')' (1, 19) (1, 20)
843 OP ':' (1, 20) (1, 21)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700844 NAME 'await' (1, 22) (1, 27)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300845 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300846
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300847 self.check_tokenize('''\
848def f():
849
850 def baz(): pass
851 async def bar(): pass
852
853 await = 2''', """\
Yury Selivanov96ec9342015-07-23 15:01:58 +0300854 NAME 'def' (1, 0) (1, 3)
855 NAME 'f' (1, 4) (1, 5)
856 OP '(' (1, 5) (1, 6)
857 OP ')' (1, 6) (1, 7)
858 OP ':' (1, 7) (1, 8)
859 NEWLINE '\\n' (1, 8) (1, 9)
860 NL '\\n' (2, 0) (2, 1)
861 INDENT ' ' (3, 0) (3, 2)
862 NAME 'def' (3, 2) (3, 5)
863 NAME 'baz' (3, 6) (3, 9)
864 OP '(' (3, 9) (3, 10)
865 OP ')' (3, 10) (3, 11)
866 OP ':' (3, 11) (3, 12)
867 NAME 'pass' (3, 13) (3, 17)
868 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700869 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300870 NAME 'def' (4, 8) (4, 11)
871 NAME 'bar' (4, 12) (4, 15)
872 OP '(' (4, 15) (4, 16)
873 OP ')' (4, 16) (4, 17)
874 OP ':' (4, 17) (4, 18)
875 NAME 'pass' (4, 19) (4, 23)
876 NEWLINE '\\n' (4, 23) (4, 24)
877 NL '\\n' (5, 0) (5, 1)
878 NAME 'await' (6, 2) (6, 7)
879 OP '=' (6, 8) (6, 9)
880 NUMBER '2' (6, 10) (6, 11)
881 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300882 """)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300883
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300884 self.check_tokenize('''\
885async def f():
886
887 def baz(): pass
888 async def bar(): pass
889
890 await = 2''', """\
Jelle Zijlstraac317702017-10-05 20:24:46 -0700891 NAME 'async' (1, 0) (1, 5)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300892 NAME 'def' (1, 6) (1, 9)
893 NAME 'f' (1, 10) (1, 11)
894 OP '(' (1, 11) (1, 12)
895 OP ')' (1, 12) (1, 13)
896 OP ':' (1, 13) (1, 14)
897 NEWLINE '\\n' (1, 14) (1, 15)
898 NL '\\n' (2, 0) (2, 1)
899 INDENT ' ' (3, 0) (3, 2)
900 NAME 'def' (3, 2) (3, 5)
901 NAME 'baz' (3, 6) (3, 9)
902 OP '(' (3, 9) (3, 10)
903 OP ')' (3, 10) (3, 11)
904 OP ':' (3, 11) (3, 12)
905 NAME 'pass' (3, 13) (3, 17)
906 NEWLINE '\\n' (3, 17) (3, 18)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700907 NAME 'async' (4, 2) (4, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300908 NAME 'def' (4, 8) (4, 11)
909 NAME 'bar' (4, 12) (4, 15)
910 OP '(' (4, 15) (4, 16)
911 OP ')' (4, 16) (4, 17)
912 OP ':' (4, 17) (4, 18)
913 NAME 'pass' (4, 19) (4, 23)
914 NEWLINE '\\n' (4, 23) (4, 24)
915 NL '\\n' (5, 0) (5, 1)
Jelle Zijlstraac317702017-10-05 20:24:46 -0700916 NAME 'await' (6, 2) (6, 7)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300917 OP '=' (6, 8) (6, 9)
918 NUMBER '2' (6, 10) (6, 11)
919 DEDENT '' (7, 0) (7, 0)
Serhiy Storchaka6f5175d2015-10-06 18:23:12 +0300920 """)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000921
Raymond Hettinger68c04532005-06-10 11:05:19 +0000922
Raymond Hettinger68c04532005-06-10 11:05:19 +0000923def decistmt(s):
Raymond Hettinger68c04532005-06-10 11:05:19 +0000924 result = []
Trent Nelson428de652008-03-18 22:41:35 +0000925 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
Raymond Hettinger68c04532005-06-10 11:05:19 +0000926 for toknum, tokval, _, _, _ in g:
927 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
928 result.extend([
929 (NAME, 'Decimal'),
930 (OP, '('),
931 (STRING, repr(tokval)),
932 (OP, ')')
933 ])
934 else:
935 result.append((toknum, tokval))
Trent Nelson428de652008-03-18 22:41:35 +0000936 return untokenize(result).decode('utf-8')
937
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300938class TestMisc(TestCase):
939
940 def test_decistmt(self):
941 # Substitute Decimals for floats in a string of statements.
942 # This is an example from the docs.
943
944 from decimal import Decimal
945 s = '+21.3e-5*-.1234/81.7'
946 self.assertEqual(decistmt(s),
947 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
948
949 # The format of the exponent is inherited from the platform C library.
950 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
951 # we're only showing 11 digits, and the 12th isn't close to 5, the
952 # rest of the output should be platform-independent.
953 self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
954
955 # Output from calculations with Decimal should be identical across all
956 # platforms.
957 self.assertEqual(eval(decistmt(s)),
958 Decimal('-3.217160342717258261933904529E-7'))
959
Trent Nelson428de652008-03-18 22:41:35 +0000960
961class TestTokenizerAdheresToPep0263(TestCase):
962 """
963 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
964 """
965
966 def _testFile(self, filename):
967 path = os.path.join(os.path.dirname(__file__), filename)
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300968 TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
Trent Nelson428de652008-03-18 22:41:35 +0000969
970 def test_utf8_coding_cookie_and_no_utf8_bom(self):
Ned Deily2ea6fcc2011-07-19 16:15:27 -0700971 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300972 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +0000973
974 def test_latin1_coding_cookie_and_utf8_bom(self):
975 """
976 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
977 allowed encoding for the comment is 'utf-8'. The text file used in
978 this test starts with a BOM signature, but specifies latin1 as the
979 coding, so verify that a SyntaxError is raised, which matches the
980 behaviour of the interpreter when it encounters a similar condition.
981 """
982 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000983 self.assertRaises(SyntaxError, self._testFile, f)
Trent Nelson428de652008-03-18 22:41:35 +0000984
985 def test_no_coding_cookie_and_utf8_bom(self):
986 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300987 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +0000988
989 def test_utf8_coding_cookie_and_utf8_bom(self):
990 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +0300991 self._testFile(f)
Trent Nelson428de652008-03-18 22:41:35 +0000992
Florent Xicluna11f0b412012-07-07 12:13:35 +0200993 def test_bad_coding_cookie(self):
994 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
995 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
996
Trent Nelson428de652008-03-18 22:41:35 +0000997
998class Test_Tokenize(TestCase):
999
1000 def test__tokenize_decodes_with_specified_encoding(self):
1001 literal = '"ЉЊЈЁЂ"'
1002 line = literal.encode('utf-8')
1003 first = False
1004 def readline():
1005 nonlocal first
1006 if not first:
1007 first = True
1008 return line
1009 else:
1010 return b''
1011
1012 # skip the initial encoding token and the end token
1013 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
1014 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001015 self.assertEqual(tokens, expected_tokens,
1016 "bytes not decoded with encoding")
Trent Nelson428de652008-03-18 22:41:35 +00001017
1018 def test__tokenize_does_not_decode_with_encoding_none(self):
1019 literal = '"ЉЊЈЁЂ"'
1020 first = False
1021 def readline():
1022 nonlocal first
1023 if not first:
1024 first = True
1025 return literal
1026 else:
1027 return b''
1028
1029 # skip the end token
1030 tokens = list(_tokenize(readline, encoding=None))[:-1]
1031 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +00001032 self.assertEqual(tokens, expected_tokens,
1033 "string not tokenized when encoding is None")
Trent Nelson428de652008-03-18 22:41:35 +00001034
1035
1036class TestDetectEncoding(TestCase):
1037
1038 def get_readline(self, lines):
1039 index = 0
1040 def readline():
1041 nonlocal index
1042 if index == len(lines):
1043 raise StopIteration
1044 line = lines[index]
1045 index += 1
1046 return line
1047 return readline
1048
1049 def test_no_bom_no_encoding_cookie(self):
1050 lines = (
1051 b'# something\n',
1052 b'print(something)\n',
1053 b'do_something(else)\n'
1054 )
1055 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001056 self.assertEqual(encoding, 'utf-8')
1057 self.assertEqual(consumed_lines, list(lines[:2]))
Trent Nelson428de652008-03-18 22:41:35 +00001058
1059 def test_bom_no_cookie(self):
1060 lines = (
1061 b'\xef\xbb\xbf# something\n',
1062 b'print(something)\n',
1063 b'do_something(else)\n'
1064 )
1065 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001066 self.assertEqual(encoding, 'utf-8-sig')
1067 self.assertEqual(consumed_lines,
1068 [b'# something\n', b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001069
1070 def test_cookie_first_line_no_bom(self):
1071 lines = (
1072 b'# -*- coding: latin-1 -*-\n',
1073 b'print(something)\n',
1074 b'do_something(else)\n'
1075 )
1076 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001077 self.assertEqual(encoding, 'iso-8859-1')
1078 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001079
1080 def test_matched_bom_and_cookie_first_line(self):
1081 lines = (
1082 b'\xef\xbb\xbf# coding=utf-8\n',
1083 b'print(something)\n',
1084 b'do_something(else)\n'
1085 )
1086 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001087 self.assertEqual(encoding, 'utf-8-sig')
1088 self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001089
1090 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
1091 lines = (
1092 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
1093 b'print(something)\n',
1094 b'do_something(else)\n'
1095 )
1096 readline = self.get_readline(lines)
1097 self.assertRaises(SyntaxError, detect_encoding, readline)
1098
1099 def test_cookie_second_line_no_bom(self):
1100 lines = (
1101 b'#! something\n',
1102 b'# vim: set fileencoding=ascii :\n',
1103 b'print(something)\n',
1104 b'do_something(else)\n'
1105 )
1106 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001107 self.assertEqual(encoding, 'ascii')
Trent Nelson428de652008-03-18 22:41:35 +00001108 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
Ezio Melottib3aedd42010-11-20 19:04:17 +00001109 self.assertEqual(consumed_lines, expected)
Trent Nelson428de652008-03-18 22:41:35 +00001110
1111 def test_matched_bom_and_cookie_second_line(self):
1112 lines = (
1113 b'\xef\xbb\xbf#! something\n',
1114 b'f# coding=utf-8\n',
1115 b'print(something)\n',
1116 b'do_something(else)\n'
1117 )
1118 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001119 self.assertEqual(encoding, 'utf-8-sig')
1120 self.assertEqual(consumed_lines,
1121 [b'#! something\n', b'f# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001122
1123 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
1124 lines = (
1125 b'\xef\xbb\xbf#! something\n',
1126 b'# vim: set fileencoding=ascii :\n',
1127 b'print(something)\n',
1128 b'do_something(else)\n'
1129 )
1130 readline = self.get_readline(lines)
1131 self.assertRaises(SyntaxError, detect_encoding, readline)
1132
Serhiy Storchaka768c16c2014-01-09 18:36:09 +02001133 def test_cookie_second_line_noncommented_first_line(self):
1134 lines = (
1135 b"print('\xc2\xa3')\n",
1136 b'# vim: set fileencoding=iso8859-15 :\n',
1137 b"print('\xe2\x82\xac')\n"
1138 )
1139 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1140 self.assertEqual(encoding, 'utf-8')
1141 expected = [b"print('\xc2\xa3')\n"]
1142 self.assertEqual(consumed_lines, expected)
1143
1144 def test_cookie_second_line_commented_first_line(self):
1145 lines = (
1146 b"#print('\xc2\xa3')\n",
1147 b'# vim: set fileencoding=iso8859-15 :\n',
1148 b"print('\xe2\x82\xac')\n"
1149 )
1150 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1151 self.assertEqual(encoding, 'iso8859-15')
1152 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
1153 self.assertEqual(consumed_lines, expected)
1154
1155 def test_cookie_second_line_empty_first_line(self):
1156 lines = (
1157 b'\n',
1158 b'# vim: set fileencoding=iso8859-15 :\n',
1159 b"print('\xe2\x82\xac')\n"
1160 )
1161 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1162 self.assertEqual(encoding, 'iso8859-15')
1163 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
1164 self.assertEqual(consumed_lines, expected)
1165
Benjamin Petersond3afada2009-10-09 21:43:09 +00001166 def test_latin1_normalization(self):
1167 # See get_normal_name() in tokenizer.c.
1168 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
1169 "iso-8859-1-unix", "iso-latin-1-mac")
1170 for encoding in encodings:
1171 for rep in ("-", "_"):
1172 enc = encoding.replace("-", rep)
1173 lines = (b"#!/usr/bin/python\n",
1174 b"# coding: " + enc.encode("ascii") + b"\n",
1175 b"print(things)\n",
1176 b"do_something += 4\n")
1177 rl = self.get_readline(lines)
1178 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001179 self.assertEqual(found, "iso-8859-1")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001180
Martin v. Löwis63674f42012-04-20 14:36:47 +02001181 def test_syntaxerror_latin1(self):
1182 # Issue 14629: need to raise SyntaxError if the first
1183 # line(s) have non-UTF-8 characters
1184 lines = (
1185 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1186 )
1187 readline = self.get_readline(lines)
1188 self.assertRaises(SyntaxError, detect_encoding, readline)
1189
1190
Benjamin Petersond3afada2009-10-09 21:43:09 +00001191 def test_utf8_normalization(self):
1192 # See get_normal_name() in tokenizer.c.
1193 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
1194 for encoding in encodings:
1195 for rep in ("-", "_"):
1196 enc = encoding.replace("-", rep)
1197 lines = (b"#!/usr/bin/python\n",
1198 b"# coding: " + enc.encode("ascii") + b"\n",
1199 b"1 + 3\n")
1200 rl = self.get_readline(lines)
1201 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001202 self.assertEqual(found, "utf-8")
Benjamin Petersond3afada2009-10-09 21:43:09 +00001203
Trent Nelson428de652008-03-18 22:41:35 +00001204 def test_short_files(self):
1205 readline = self.get_readline((b'print(something)\n',))
1206 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001207 self.assertEqual(encoding, 'utf-8')
1208 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001209
1210 encoding, consumed_lines = detect_encoding(self.get_readline(()))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001211 self.assertEqual(encoding, 'utf-8')
1212 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001213
1214 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
1215 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001216 self.assertEqual(encoding, 'utf-8-sig')
1217 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +00001218
1219 readline = self.get_readline((b'\xef\xbb\xbf',))
1220 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001221 self.assertEqual(encoding, 'utf-8-sig')
1222 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001223
Benjamin Peterson433f32c2008-12-12 01:25:05 +00001224 readline = self.get_readline((b'# coding: bad\n',))
1225 self.assertRaises(SyntaxError, detect_encoding, readline)
Trent Nelson428de652008-03-18 22:41:35 +00001226
Serhiy Storchakadafea852013-09-16 23:51:56 +03001227 def test_false_encoding(self):
1228 # Issue 18873: "Encoding" detected in non-comment lines
1229 readline = self.get_readline((b'print("#coding=fake")',))
1230 encoding, consumed_lines = detect_encoding(readline)
1231 self.assertEqual(encoding, 'utf-8')
1232 self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1233
Victor Stinner58c07522010-11-09 01:08:59 +00001234 def test_open(self):
1235 filename = support.TESTFN + '.py'
1236 self.addCleanup(support.unlink, filename)
1237
1238 # test coding cookie
1239 for encoding in ('iso-8859-15', 'utf-8'):
1240 with open(filename, 'w', encoding=encoding) as fp:
1241 print("# coding: %s" % encoding, file=fp)
1242 print("print('euro:\u20ac')", file=fp)
1243 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001244 self.assertEqual(fp.encoding, encoding)
1245 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001246
1247 # test BOM (no coding cookie)
1248 with open(filename, 'w', encoding='utf-8-sig') as fp:
1249 print("print('euro:\u20ac')", file=fp)
1250 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001251 self.assertEqual(fp.encoding, 'utf-8-sig')
1252 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001253
Brett Cannonc33f3f22012-04-20 13:23:54 -04001254 def test_filename_in_exception(self):
1255 # When possible, include the file name in the exception.
1256 path = 'some_file_path'
1257 lines = (
1258 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1259 )
1260 class Bunk:
1261 def __init__(self, lines, path):
1262 self.name = path
1263 self._lines = lines
1264 self._index = 0
1265
1266 def readline(self):
1267 if self._index == len(lines):
1268 raise StopIteration
1269 line = lines[self._index]
1270 self._index += 1
1271 return line
1272
1273 with self.assertRaises(SyntaxError):
1274 ins = Bunk(lines, path)
1275 # Make sure lacking a name isn't an issue.
1276 del ins.name
1277 detect_encoding(ins.readline)
1278 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1279 ins = Bunk(lines, path)
1280 detect_encoding(ins.readline)
1281
Victor Stinner387729e2015-05-26 00:43:58 +02001282 def test_open_error(self):
1283 # Issue #23840: open() must close the binary file on error
1284 m = BytesIO(b'#coding:xxx')
1285 with mock.patch('tokenize._builtin_open', return_value=m):
1286 self.assertRaises(SyntaxError, tokenize_open, 'foobar')
1287 self.assertTrue(m.closed)
1288
1289
Trent Nelson428de652008-03-18 22:41:35 +00001290class TestTokenize(TestCase):
1291
1292 def test_tokenize(self):
1293 import tokenize as tokenize_module
1294 encoding = object()
1295 encoding_used = None
1296 def mock_detect_encoding(readline):
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001297 return encoding, [b'first', b'second']
Trent Nelson428de652008-03-18 22:41:35 +00001298
1299 def mock__tokenize(readline, encoding):
1300 nonlocal encoding_used
1301 encoding_used = encoding
1302 out = []
1303 while True:
1304 next_line = readline()
1305 if next_line:
1306 out.append(next_line)
1307 continue
1308 return out
1309
1310 counter = 0
1311 def mock_readline():
1312 nonlocal counter
1313 counter += 1
1314 if counter == 5:
1315 return b''
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001316 return str(counter).encode()
Trent Nelson428de652008-03-18 22:41:35 +00001317
1318 orig_detect_encoding = tokenize_module.detect_encoding
1319 orig__tokenize = tokenize_module._tokenize
1320 tokenize_module.detect_encoding = mock_detect_encoding
1321 tokenize_module._tokenize = mock__tokenize
1322 try:
1323 results = tokenize(mock_readline)
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001324 self.assertEqual(list(results),
1325 [b'first', b'second', b'1', b'2', b'3', b'4'])
Trent Nelson428de652008-03-18 22:41:35 +00001326 finally:
1327 tokenize_module.detect_encoding = orig_detect_encoding
1328 tokenize_module._tokenize = orig__tokenize
1329
1330 self.assertTrue(encoding_used, encoding)
Raymond Hettinger68c04532005-06-10 11:05:19 +00001331
Yury Selivanov8085b802015-05-18 12:50:52 -04001332 def test_oneline_defs(self):
1333 buf = []
1334 for i in range(500):
1335 buf.append('def i{i}(): return {i}'.format(i=i))
1336 buf.append('OK')
1337 buf = '\n'.join(buf)
1338
1339 # Test that 500 consequent, one-line defs is OK
1340 toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
1341 self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER
1342
Meador Inge00c7f852012-01-19 00:44:45 -06001343 def assertExactTypeEqual(self, opstr, *optypes):
1344 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1345 num_optypes = len(optypes)
1346 self.assertEqual(len(tokens), 2 + num_optypes)
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001347 self.assertEqual(tok_name[tokens[0].exact_type],
1348 tok_name[ENCODING])
Meador Inge00c7f852012-01-19 00:44:45 -06001349 for i in range(num_optypes):
Albert-Jan Nijburgfc354f02017-05-31 15:00:21 +01001350 self.assertEqual(tok_name[tokens[i + 1].exact_type],
1351 tok_name[optypes[i]])
1352 self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
1353 tok_name[token.ENDMARKER])
Meador Inge00c7f852012-01-19 00:44:45 -06001354
1355 def test_exact_type(self):
1356 self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1357 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1358 self.assertExactTypeEqual(':', token.COLON)
1359 self.assertExactTypeEqual(',', token.COMMA)
1360 self.assertExactTypeEqual(';', token.SEMI)
1361 self.assertExactTypeEqual('+', token.PLUS)
1362 self.assertExactTypeEqual('-', token.MINUS)
1363 self.assertExactTypeEqual('*', token.STAR)
1364 self.assertExactTypeEqual('/', token.SLASH)
1365 self.assertExactTypeEqual('|', token.VBAR)
1366 self.assertExactTypeEqual('&', token.AMPER)
1367 self.assertExactTypeEqual('<', token.LESS)
1368 self.assertExactTypeEqual('>', token.GREATER)
1369 self.assertExactTypeEqual('=', token.EQUAL)
1370 self.assertExactTypeEqual('.', token.DOT)
1371 self.assertExactTypeEqual('%', token.PERCENT)
1372 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1373 self.assertExactTypeEqual('==', token.EQEQUAL)
1374 self.assertExactTypeEqual('!=', token.NOTEQUAL)
1375 self.assertExactTypeEqual('<=', token.LESSEQUAL)
1376 self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1377 self.assertExactTypeEqual('~', token.TILDE)
1378 self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1379 self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1380 self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1381 self.assertExactTypeEqual('**', token.DOUBLESTAR)
1382 self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1383 self.assertExactTypeEqual('-=', token.MINEQUAL)
1384 self.assertExactTypeEqual('*=', token.STAREQUAL)
1385 self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1386 self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1387 self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1388 self.assertExactTypeEqual('|=', token.VBAREQUAL)
1389 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1390 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1391 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1392 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1393 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1394 self.assertExactTypeEqual('//', token.DOUBLESLASH)
1395 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
Jim Fasarakis-Hilliardd4914e92017-03-14 22:16:15 +02001396 self.assertExactTypeEqual('...', token.ELLIPSIS)
1397 self.assertExactTypeEqual('->', token.RARROW)
Meador Inge00c7f852012-01-19 00:44:45 -06001398 self.assertExactTypeEqual('@', token.AT)
Benjamin Petersond51374e2014-04-09 23:55:56 -04001399 self.assertExactTypeEqual('@=', token.ATEQUAL)
Meador Inge00c7f852012-01-19 00:44:45 -06001400
1401 self.assertExactTypeEqual('a**2+b**2==c**2',
1402 NAME, token.DOUBLESTAR, NUMBER,
1403 token.PLUS,
1404 NAME, token.DOUBLESTAR, NUMBER,
1405 token.EQEQUAL,
1406 NAME, token.DOUBLESTAR, NUMBER)
1407 self.assertExactTypeEqual('{1, 2, 3}',
1408 token.LBRACE,
1409 token.NUMBER, token.COMMA,
1410 token.NUMBER, token.COMMA,
1411 token.NUMBER,
1412 token.RBRACE)
1413 self.assertExactTypeEqual('^(x & 0x1)',
1414 token.CIRCUMFLEX,
1415 token.LPAR,
1416 token.NAME, token.AMPER, token.NUMBER,
1417 token.RPAR)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001418
Ezio Melottifafa8b72012-11-03 17:46:51 +02001419 def test_pathological_trailing_whitespace(self):
1420 # See http://bugs.python.org/issue16152
1421 self.assertExactTypeEqual('@ ', token.AT)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001422
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001423
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001424class UntokenizeTest(TestCase):
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001425
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001426 def test_bad_input_order(self):
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001427 # raise if previous row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001428 u = Untokenizer()
1429 u.prev_row = 2
1430 u.prev_col = 2
1431 with self.assertRaises(ValueError) as cm:
1432 u.add_whitespace((1,3))
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001433 self.assertEqual(cm.exception.args[0],
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001434 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001435 # raise if previous column in row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001436 self.assertRaises(ValueError, u.add_whitespace, (2,1))
1437
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001438 def test_backslash_continuation(self):
1439 # The problem is that <whitespace>\<newline> leaves no token
1440 u = Untokenizer()
1441 u.prev_row = 1
1442 u.prev_col = 1
1443 u.tokens = []
1444 u.add_whitespace((2, 0))
1445 self.assertEqual(u.tokens, ['\\\n'])
1446 u.prev_row = 2
1447 u.add_whitespace((4, 4))
1448 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001449 TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001450
Terry Jan Reedy5b8d2c32014-02-17 23:12:16 -05001451 def test_iter_compat(self):
1452 u = Untokenizer()
1453 token = (NAME, 'Hello')
1454 tokens = [(ENCODING, 'utf-8'), token]
1455 u.compat(token, iter([]))
1456 self.assertEqual(u.tokens, ["Hello "])
1457 u = Untokenizer()
1458 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1459 u = Untokenizer()
1460 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1461 self.assertEqual(u.encoding, 'utf-8')
1462 self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1463
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001464
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001465class TestRoundtrip(TestCase):
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001466
1467 def check_roundtrip(self, f):
1468 """
1469 Test roundtrip for `untokenize`. `f` is an open file or a string.
1470 The source code in f is tokenized to both 5- and 2-tuples.
1471 Both sequences are converted back to source code via
1472 tokenize.untokenize(), and the latter tokenized again to 2-tuples.
1473 The test fails if the 3 pair tokenizations do not match.
1474
1475 When untokenize bugs are fixed, untokenize with 5-tuples should
1476 reproduce code that does not contain a backslash continuation
1477 following spaces. A proper test should test this.
1478 """
1479 # Get source code and original tokenizations
1480 if isinstance(f, str):
1481 code = f.encode('utf-8')
1482 else:
1483 code = f.read()
1484 f.close()
1485 readline = iter(code.splitlines(keepends=True)).__next__
1486 tokens5 = list(tokenize(readline))
1487 tokens2 = [tok[:2] for tok in tokens5]
1488 # Reproduce tokens2 from pairs
1489 bytes_from2 = untokenize(tokens2)
1490 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
1491 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
1492 self.assertEqual(tokens2_from2, tokens2)
1493 # Reproduce tokens2 from 5-tuples
1494 bytes_from5 = untokenize(tokens5)
1495 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
1496 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
1497 self.assertEqual(tokens2_from5, tokens2)
1498
1499 def test_roundtrip(self):
1500 # There are some standard formatting practices that are easy to get right.
1501
1502 self.check_roundtrip("if x == 1:\n"
1503 " print(x)\n")
1504 self.check_roundtrip("# This is a comment\n"
1505 "# This also")
1506
1507 # Some people use different formatting conventions, which makes
1508 # untokenize a little trickier. Note that this test involves trailing
1509 # whitespace after the colon. Note that we use hex escapes to make the
1510 # two trailing blanks apparent in the expected output.
1511
1512 self.check_roundtrip("if x == 1 : \n"
1513 " print(x)\n")
1514 fn = support.findfile("tokenize_tests.txt")
1515 with open(fn, 'rb') as f:
1516 self.check_roundtrip(f)
1517 self.check_roundtrip("if x == 1:\n"
1518 " # A comment by itself.\n"
1519 " print(x) # Comment here, too.\n"
1520 " # Another comment.\n"
1521 "after_if = True\n")
1522 self.check_roundtrip("if (x # The comments need to go in the right place\n"
1523 " == 1):\n"
1524 " print('x==1')\n")
1525 self.check_roundtrip("class Test: # A comment here\n"
1526 " # A comment with weird indent\n"
1527 " after_com = 5\n"
1528 " def x(m): return m*5 # a one liner\n"
1529 " def y(m): # A whitespace after the colon\n"
1530 " return y*4 # 3-space indent\n")
1531
1532 # Some error-handling code
1533 self.check_roundtrip("try: import somemodule\n"
1534 "except ImportError: # comment\n"
1535 " print('Can not import' # comment2\n)"
1536 "else: print('Loaded')\n")
1537
1538 def test_continuation(self):
1539 # Balancing continuation
1540 self.check_roundtrip("a = (3,4, \n"
1541 "5,6)\n"
1542 "y = [3, 4,\n"
1543 "5]\n"
1544 "z = {'a': 5,\n"
1545 "'b':15, 'c':True}\n"
1546 "x = len(y) + 5 - a[\n"
1547 "3] - a[2]\n"
1548 "+ len(z) - z[\n"
1549 "'b']\n")
1550
1551 def test_backslash_continuation(self):
1552 # Backslash means line continuation, except for comments
1553 self.check_roundtrip("x=1+\\\n"
1554 "1\n"
1555 "# This is a comment\\\n"
1556 "# This also\n")
1557 self.check_roundtrip("# Comment \\\n"
1558 "x = 0")
1559
1560 def test_string_concatenation(self):
1561 # Two string literals on the same line
1562 self.check_roundtrip("'' ''")
1563
1564 def test_random_files(self):
1565 # Test roundtrip on random python modules.
1566 # pass the '-ucpu' option to process the full directory.
1567
1568 import glob, random
1569 fn = support.findfile("tokenize_tests.txt")
1570 tempdir = os.path.dirname(fn) or os.curdir
1571 testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
1572
Brett Cannona721aba2016-09-09 14:57:09 -07001573 # Tokenize is broken on test_pep3131.py because regular expressions are
1574 # broken on the obscure unicode identifiers in it. *sigh*
1575 # With roundtrip extended to test the 5-tuple mode of untokenize,
1576 # 7 more testfiles fail. Remove them also until the failure is diagnosed.
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001577
Zachary Ware724f6a62016-09-09 12:55:37 -07001578 testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001579 for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
1580 testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
1581
1582 if not support.is_resource_enabled("cpu"):
1583 testfiles = random.sample(testfiles, 10)
1584
1585 for testfile in testfiles:
1586 with open(testfile, 'rb') as f:
1587 with self.subTest(file=testfile):
1588 self.check_roundtrip(f)
1589
1590
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001591 def roundtrip(self, code):
1592 if isinstance(code, str):
1593 code = code.encode('utf-8')
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001594 return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001595
1596 def test_indentation_semantics_retained(self):
1597 """
1598 Ensure that although whitespace might be mutated in a roundtrip,
1599 the semantic meaning of the indentation remains consistent.
1600 """
1601 code = "if False:\n\tx=3\n\tx=3\n"
Jason R. Coombsb6d1cdd2015-06-25 22:42:24 -04001602 codelines = self.roundtrip(code).split('\n')
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001603 self.assertEqual(codelines[1], codelines[2])
Serhiy Storchaka5f6fa822015-10-06 18:16:28 +03001604 self.check_roundtrip(code)
Jason R. Coombs5713b3c2015-06-20 19:52:22 -04001605
1606
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001607if __name__ == "__main__":
Brett Cannona721aba2016-09-09 14:57:09 -07001608 unittest.main()