blob: a4625971d378242407405fe6884844edd07f6467 [file] [log] [blame]
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +03001from test import test_support
Ammar Askar7829bba2018-07-06 06:23:13 -04002from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +03003 STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
4from StringIO import StringIO
5import os
6from unittest import TestCase
Jeremy Hylton29bef0b2006-08-23 18:37:43 +00007
Jeremy Hylton29bef0b2006-08-23 18:37:43 +00008
Ammar Askar7829bba2018-07-06 06:23:13 -04009# Converts a source string into a list of textual representation
10# of the tokens such as:
11# ` NAME 'if' (1, 0) (1, 2)`
12# to make writing tests easier.
13def stringify_tokens_from_source(token_generator, source_string):
14 result = []
15 num_lines = len(source_string.splitlines())
16 missing_trailing_nl = source_string[-1] not in '\r\n'
17
18 for type, token, start, end, line in token_generator:
19 if type == ENDMARKER:
20 break
21 # Ignore the new line on the last line if the input lacks one
22 if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
23 continue
24 type = tok_name[type]
25 result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
26 locals())
27
28 return result
29
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030030class TokenizeTest(TestCase):
31 # Tests for the tokenize module.
Jeremy Hylton29bef0b2006-08-23 18:37:43 +000032
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030033 # The tests can be really simple. Given a small fragment of source
Ammar Askar7829bba2018-07-06 06:23:13 -040034 # code, print out a table with tokens. The ENDMARKER, ENCODING and
35 # final NEWLINE are omitted for brevity.
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030036
37 def check_tokenize(self, s, expected):
38 # Format the tokens in s in a table format.
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030039 f = StringIO(s)
Ammar Askar7829bba2018-07-06 06:23:13 -040040 result = stringify_tokens_from_source(generate_tokens(f.readline), s)
41
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030042 self.assertEqual(result,
43 expected.rstrip().splitlines())
44
Ammar Askar7829bba2018-07-06 06:23:13 -040045 def test_implicit_newline(self):
46 # Make sure that the tokenizer puts in an implicit NEWLINE
47 # when the input lacks a trailing new line.
48 f = StringIO("x")
49 tokens = list(generate_tokens(f.readline))
50 self.assertEqual(tokens[-2][0], NEWLINE)
51 self.assertEqual(tokens[-1][0], ENDMARKER)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030052
53 def test_basic(self):
54 self.check_tokenize("1 + 1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000055 NUMBER '1' (1, 0) (1, 1)
56 OP '+' (1, 2) (1, 3)
57 NUMBER '1' (1, 4) (1, 5)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030058 """)
59 self.check_tokenize("if False:\n"
60 " # NL\n"
61 " True = False # NEWLINE\n", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000062 NAME 'if' (1, 0) (1, 2)
63 NAME 'False' (1, 3) (1, 8)
64 OP ':' (1, 8) (1, 9)
65 NEWLINE '\\n' (1, 9) (1, 10)
66 COMMENT '# NL' (2, 4) (2, 8)
67 NL '\\n' (2, 8) (2, 9)
68 INDENT ' ' (3, 0) (3, 4)
69 NAME 'True' (3, 4) (3, 8)
70 OP '=' (3, 9) (3, 10)
71 NAME 'False' (3, 11) (3, 16)
72 COMMENT '# NEWLINE' (3, 17) (3, 26)
73 NEWLINE '\\n' (3, 26) (3, 27)
74 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030075 """)
Jeremy Hylton76467ba2006-08-23 21:14:03 +000076
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030077 indent_error_file = """\
78def k(x):
79 x += 2
80 x += 5
81"""
82 with self.assertRaisesRegexp(IndentationError,
83 "unindent does not match any "
84 "outer indentation level"):
85 for tok in generate_tokens(StringIO(indent_error_file).readline):
86 pass
Tim Peters147f9ae2006-08-25 22:05:39 +000087
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030088 def test_int(self):
89 # Ordinary integers and binary operators
90 self.check_tokenize("0xff <= 255", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000091 NUMBER '0xff' (1, 0) (1, 4)
92 OP '<=' (1, 5) (1, 7)
93 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030094 """)
95 self.check_tokenize("0b10 <= 255", """\
Eric Smith0aed07a2008-03-17 19:43:40 +000096 NUMBER '0b10' (1, 0) (1, 4)
97 OP '<=' (1, 5) (1, 7)
98 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030099 """)
100 self.check_tokenize("0o123 <= 0123", """\
Eric Smith0aed07a2008-03-17 19:43:40 +0000101 NUMBER '0o123' (1, 0) (1, 5)
102 OP '<=' (1, 6) (1, 8)
103 NUMBER '0123' (1, 9) (1, 13)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300104 """)
105 self.check_tokenize("01234567 > ~0x15", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000106 NUMBER '01234567' (1, 0) (1, 8)
107 OP '>' (1, 9) (1, 10)
108 OP '~' (1, 11) (1, 12)
109 NUMBER '0x15' (1, 12) (1, 16)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300110 """)
111 self.check_tokenize("2134568 != 01231515", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000112 NUMBER '2134568' (1, 0) (1, 7)
113 OP '!=' (1, 8) (1, 10)
114 NUMBER '01231515' (1, 11) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300115 """)
116 self.check_tokenize("(-124561-1) & 0200000000", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000117 OP '(' (1, 0) (1, 1)
118 OP '-' (1, 1) (1, 2)
119 NUMBER '124561' (1, 2) (1, 8)
120 OP '-' (1, 8) (1, 9)
121 NUMBER '1' (1, 9) (1, 10)
122 OP ')' (1, 10) (1, 11)
123 OP '&' (1, 12) (1, 13)
124 NUMBER '0200000000' (1, 14) (1, 24)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300125 """)
126 self.check_tokenize("0xdeadbeef != -1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000127 NUMBER '0xdeadbeef' (1, 0) (1, 10)
128 OP '!=' (1, 11) (1, 13)
129 OP '-' (1, 14) (1, 15)
130 NUMBER '1' (1, 15) (1, 16)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300131 """)
132 self.check_tokenize("0xdeadc0de & 012345", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000133 NUMBER '0xdeadc0de' (1, 0) (1, 10)
134 OP '&' (1, 11) (1, 12)
135 NUMBER '012345' (1, 13) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300136 """)
137 self.check_tokenize("0xFF & 0x15 | 1234", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000138 NUMBER '0xFF' (1, 0) (1, 4)
139 OP '&' (1, 5) (1, 6)
140 NUMBER '0x15' (1, 7) (1, 11)
141 OP '|' (1, 12) (1, 13)
142 NUMBER '1234' (1, 14) (1, 18)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300143 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000144
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300145 def test_long(self):
146 # Long integers
147 self.check_tokenize("x = 0L", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000148 NAME 'x' (1, 0) (1, 1)
149 OP '=' (1, 2) (1, 3)
150 NUMBER '0L' (1, 4) (1, 6)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300151 """)
152 self.check_tokenize("x = 0xfffffffffff", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000153 NAME 'x' (1, 0) (1, 1)
154 OP '=' (1, 2) (1, 3)
155 NUMBER '0xffffffffff (1, 4) (1, 17)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300156 """)
157 self.check_tokenize("x = 123141242151251616110l", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000158 NAME 'x' (1, 0) (1, 1)
159 OP '=' (1, 2) (1, 3)
160 NUMBER '123141242151 (1, 4) (1, 26)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300161 """)
162 self.check_tokenize("x = -15921590215012591L", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000163 NAME 'x' (1, 0) (1, 1)
164 OP '=' (1, 2) (1, 3)
165 OP '-' (1, 4) (1, 5)
166 NUMBER '159215902150 (1, 5) (1, 23)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300167 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000168
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300169 def test_float(self):
170 # Floating point numbers
171 self.check_tokenize("x = 3.14159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000172 NAME 'x' (1, 0) (1, 1)
173 OP '=' (1, 2) (1, 3)
174 NUMBER '3.14159' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300175 """)
176 self.check_tokenize("x = 314159.", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000177 NAME 'x' (1, 0) (1, 1)
178 OP '=' (1, 2) (1, 3)
179 NUMBER '314159.' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300180 """)
181 self.check_tokenize("x = .314159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000182 NAME 'x' (1, 0) (1, 1)
183 OP '=' (1, 2) (1, 3)
184 NUMBER '.314159' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300185 """)
186 self.check_tokenize("x = 3e14159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000187 NAME 'x' (1, 0) (1, 1)
188 OP '=' (1, 2) (1, 3)
189 NUMBER '3e14159' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300190 """)
191 self.check_tokenize("x = 3E123", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000192 NAME 'x' (1, 0) (1, 1)
193 OP '=' (1, 2) (1, 3)
194 NUMBER '3E123' (1, 4) (1, 9)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300195 """)
196 self.check_tokenize("x+y = 3e-1230", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000197 NAME 'x' (1, 0) (1, 1)
198 OP '+' (1, 1) (1, 2)
199 NAME 'y' (1, 2) (1, 3)
200 OP '=' (1, 4) (1, 5)
201 NUMBER '3e-1230' (1, 6) (1, 13)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300202 """)
203 self.check_tokenize("x = 3.14e159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000204 NAME 'x' (1, 0) (1, 1)
205 OP '=' (1, 2) (1, 3)
206 NUMBER '3.14e159' (1, 4) (1, 12)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300207 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000208
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300209 def test_string(self):
210 # String literals
211 self.check_tokenize("x = ''; y = \"\"", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000212 NAME 'x' (1, 0) (1, 1)
213 OP '=' (1, 2) (1, 3)
214 STRING "''" (1, 4) (1, 6)
215 OP ';' (1, 6) (1, 7)
216 NAME 'y' (1, 8) (1, 9)
217 OP '=' (1, 10) (1, 11)
218 STRING '""' (1, 12) (1, 14)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300219 """)
220 self.check_tokenize("x = '\"'; y = \"'\"", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000221 NAME 'x' (1, 0) (1, 1)
222 OP '=' (1, 2) (1, 3)
223 STRING '\\'"\\'' (1, 4) (1, 7)
224 OP ';' (1, 7) (1, 8)
225 NAME 'y' (1, 9) (1, 10)
226 OP '=' (1, 11) (1, 12)
227 STRING '"\\'"' (1, 13) (1, 16)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300228 """)
229 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000230 NAME 'x' (1, 0) (1, 1)
231 OP '=' (1, 2) (1, 3)
232 STRING '"doesn\\'t "' (1, 4) (1, 14)
233 NAME 'shrink' (1, 14) (1, 20)
234 STRING '", does it"' (1, 20) (1, 31)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300235 """)
236 self.check_tokenize("x = u'abc' + U'ABC'", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000237 NAME 'x' (1, 0) (1, 1)
238 OP '=' (1, 2) (1, 3)
239 STRING "u'abc'" (1, 4) (1, 10)
240 OP '+' (1, 11) (1, 12)
241 STRING "U'ABC'" (1, 13) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300242 """)
243 self.check_tokenize('y = u"ABC" + U"ABC"', """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000244 NAME 'y' (1, 0) (1, 1)
245 OP '=' (1, 2) (1, 3)
246 STRING 'u"ABC"' (1, 4) (1, 10)
247 OP '+' (1, 11) (1, 12)
248 STRING 'U"ABC"' (1, 13) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300249 """)
250 self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000251 NAME 'x' (1, 0) (1, 1)
252 OP '=' (1, 2) (1, 3)
253 STRING "ur'abc'" (1, 4) (1, 11)
254 OP '+' (1, 12) (1, 13)
255 STRING "Ur'ABC'" (1, 14) (1, 21)
256 OP '+' (1, 22) (1, 23)
257 STRING "uR'ABC'" (1, 24) (1, 31)
258 OP '+' (1, 32) (1, 33)
259 STRING "UR'ABC'" (1, 34) (1, 41)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300260 """)
261 self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000262 NAME 'y' (1, 0) (1, 1)
263 OP '=' (1, 2) (1, 3)
264 STRING 'ur"abc"' (1, 4) (1, 11)
265 OP '+' (1, 12) (1, 13)
266 STRING 'Ur"ABC"' (1, 14) (1, 21)
267 OP '+' (1, 22) (1, 23)
268 STRING 'uR"ABC"' (1, 24) (1, 31)
269 OP '+' (1, 32) (1, 33)
270 STRING 'UR"ABC"' (1, 34) (1, 41)
271
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300272 """)
273 self.check_tokenize("b'abc' + B'abc'", """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500274 STRING "b'abc'" (1, 0) (1, 6)
275 OP '+' (1, 7) (1, 8)
276 STRING "B'abc'" (1, 9) (1, 15)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300277 """)
278 self.check_tokenize('b"abc" + B"abc"', """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500279 STRING 'b"abc"' (1, 0) (1, 6)
280 OP '+' (1, 7) (1, 8)
281 STRING 'B"abc"' (1, 9) (1, 15)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300282 """)
283 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500284 STRING "br'abc'" (1, 0) (1, 7)
285 OP '+' (1, 8) (1, 9)
286 STRING "bR'abc'" (1, 10) (1, 17)
287 OP '+' (1, 18) (1, 19)
288 STRING "Br'abc'" (1, 20) (1, 27)
289 OP '+' (1, 28) (1, 29)
290 STRING "BR'abc'" (1, 30) (1, 37)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300291 """)
292 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500293 STRING 'br"abc"' (1, 0) (1, 7)
294 OP '+' (1, 8) (1, 9)
295 STRING 'bR"abc"' (1, 10) (1, 17)
296 OP '+' (1, 18) (1, 19)
297 STRING 'Br"abc"' (1, 20) (1, 27)
298 OP '+' (1, 28) (1, 29)
299 STRING 'BR"abc"' (1, 30) (1, 37)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300300 """)
Meador Inge43f42fc2012-06-16 21:05:50 -0500301
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300302 def test_function(self):
303 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000304 NAME 'def' (1, 0) (1, 3)
305 NAME 'd22' (1, 4) (1, 7)
306 OP '(' (1, 7) (1, 8)
307 NAME 'a' (1, 8) (1, 9)
308 OP ',' (1, 9) (1, 10)
309 NAME 'b' (1, 11) (1, 12)
310 OP ',' (1, 12) (1, 13)
311 NAME 'c' (1, 14) (1, 15)
312 OP '=' (1, 15) (1, 16)
313 NUMBER '2' (1, 16) (1, 17)
314 OP ',' (1, 17) (1, 18)
315 NAME 'd' (1, 19) (1, 20)
316 OP '=' (1, 20) (1, 21)
317 NUMBER '2' (1, 21) (1, 22)
318 OP ',' (1, 22) (1, 23)
319 OP '*' (1, 24) (1, 25)
320 NAME 'k' (1, 25) (1, 26)
321 OP ')' (1, 26) (1, 27)
322 OP ':' (1, 27) (1, 28)
323 NAME 'pass' (1, 29) (1, 33)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300324 """)
325 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000326 NAME 'def' (1, 0) (1, 3)
327 NAME 'd01v_' (1, 4) (1, 9)
328 OP '(' (1, 9) (1, 10)
329 NAME 'a' (1, 10) (1, 11)
330 OP '=' (1, 11) (1, 12)
331 NUMBER '1' (1, 12) (1, 13)
332 OP ',' (1, 13) (1, 14)
333 OP '*' (1, 15) (1, 16)
334 NAME 'k' (1, 16) (1, 17)
335 OP ',' (1, 17) (1, 18)
336 OP '**' (1, 19) (1, 21)
337 NAME 'w' (1, 21) (1, 22)
338 OP ')' (1, 22) (1, 23)
339 OP ':' (1, 23) (1, 24)
340 NAME 'pass' (1, 25) (1, 29)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300341 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000342
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300343 def test_comparison(self):
344 # Comparison
345 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
346 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000347 NAME 'if' (1, 0) (1, 2)
348 NUMBER '1' (1, 3) (1, 4)
349 OP '<' (1, 5) (1, 6)
350 NUMBER '1' (1, 7) (1, 8)
351 OP '>' (1, 9) (1, 10)
352 NUMBER '1' (1, 11) (1, 12)
353 OP '==' (1, 13) (1, 15)
354 NUMBER '1' (1, 16) (1, 17)
355 OP '>=' (1, 18) (1, 20)
356 NUMBER '5' (1, 21) (1, 22)
357 OP '<=' (1, 23) (1, 25)
358 NUMBER '0x15' (1, 26) (1, 30)
359 OP '<=' (1, 31) (1, 33)
360 NUMBER '0x12' (1, 34) (1, 38)
361 OP '!=' (1, 39) (1, 41)
362 NUMBER '1' (1, 42) (1, 43)
363 NAME 'and' (1, 44) (1, 47)
364 NUMBER '5' (1, 48) (1, 49)
365 NAME 'in' (1, 50) (1, 52)
366 NUMBER '1' (1, 53) (1, 54)
367 NAME 'not' (1, 55) (1, 58)
368 NAME 'in' (1, 59) (1, 61)
369 NUMBER '1' (1, 62) (1, 63)
370 NAME 'is' (1, 64) (1, 66)
371 NUMBER '1' (1, 67) (1, 68)
372 NAME 'or' (1, 69) (1, 71)
373 NUMBER '5' (1, 72) (1, 73)
374 NAME 'is' (1, 74) (1, 76)
375 NAME 'not' (1, 77) (1, 80)
376 NUMBER '1' (1, 81) (1, 82)
377 OP ':' (1, 82) (1, 83)
378 NAME 'pass' (1, 84) (1, 88)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300379 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000380
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300381 def test_shift(self):
382 # Shift
383 self.check_tokenize("x = 1 << 1 >> 5", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000384 NAME 'x' (1, 0) (1, 1)
385 OP '=' (1, 2) (1, 3)
386 NUMBER '1' (1, 4) (1, 5)
387 OP '<<' (1, 6) (1, 8)
388 NUMBER '1' (1, 9) (1, 10)
389 OP '>>' (1, 11) (1, 13)
390 NUMBER '5' (1, 14) (1, 15)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300391 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000392
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300393 def test_additive(self):
394 # Additive
395 self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000396 NAME 'x' (1, 0) (1, 1)
397 OP '=' (1, 2) (1, 3)
398 NUMBER '1' (1, 4) (1, 5)
399 OP '-' (1, 6) (1, 7)
400 NAME 'y' (1, 8) (1, 9)
401 OP '+' (1, 10) (1, 11)
402 NUMBER '15' (1, 12) (1, 14)
403 OP '-' (1, 15) (1, 16)
404 NUMBER '01' (1, 17) (1, 19)
405 OP '+' (1, 20) (1, 21)
406 NUMBER '0x124' (1, 22) (1, 27)
407 OP '+' (1, 28) (1, 29)
408 NAME 'z' (1, 30) (1, 31)
409 OP '+' (1, 32) (1, 33)
410 NAME 'a' (1, 34) (1, 35)
411 OP '[' (1, 35) (1, 36)
412 NUMBER '5' (1, 36) (1, 37)
413 OP ']' (1, 37) (1, 38)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300414 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000415
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300416 def test_multiplicative(self):
417 # Multiplicative
418 self.check_tokenize("x = 1//1*1/5*12%0x12", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000419 NAME 'x' (1, 0) (1, 1)
420 OP '=' (1, 2) (1, 3)
421 NUMBER '1' (1, 4) (1, 5)
422 OP '//' (1, 5) (1, 7)
423 NUMBER '1' (1, 7) (1, 8)
424 OP '*' (1, 8) (1, 9)
425 NUMBER '1' (1, 9) (1, 10)
426 OP '/' (1, 10) (1, 11)
427 NUMBER '5' (1, 11) (1, 12)
428 OP '*' (1, 12) (1, 13)
429 NUMBER '12' (1, 13) (1, 15)
430 OP '%' (1, 15) (1, 16)
431 NUMBER '0x12' (1, 16) (1, 20)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300432 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000433
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300434 def test_unary(self):
435 # Unary
436 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000437 OP '~' (1, 0) (1, 1)
438 NUMBER '1' (1, 1) (1, 2)
439 OP '^' (1, 3) (1, 4)
440 NUMBER '1' (1, 5) (1, 6)
441 OP '&' (1, 7) (1, 8)
442 NUMBER '1' (1, 9) (1, 10)
443 OP '|' (1, 11) (1, 12)
444 NUMBER '1' (1, 12) (1, 13)
445 OP '^' (1, 14) (1, 15)
446 OP '-' (1, 16) (1, 17)
447 NUMBER '1' (1, 17) (1, 18)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300448 """)
449 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000450 OP '-' (1, 0) (1, 1)
451 NUMBER '1' (1, 1) (1, 2)
452 OP '*' (1, 2) (1, 3)
453 NUMBER '1' (1, 3) (1, 4)
454 OP '/' (1, 4) (1, 5)
455 NUMBER '1' (1, 5) (1, 6)
456 OP '+' (1, 6) (1, 7)
457 NUMBER '1' (1, 7) (1, 8)
458 OP '*' (1, 8) (1, 9)
459 NUMBER '1' (1, 9) (1, 10)
460 OP '//' (1, 10) (1, 12)
461 NUMBER '1' (1, 12) (1, 13)
462 OP '-' (1, 14) (1, 15)
463 OP '-' (1, 16) (1, 17)
464 OP '-' (1, 17) (1, 18)
465 OP '-' (1, 18) (1, 19)
466 NUMBER '1' (1, 19) (1, 20)
467 OP '**' (1, 20) (1, 22)
468 NUMBER '1' (1, 22) (1, 23)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300469 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000470
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300471 def test_selector(self):
472 # Selector
473 self.check_tokenize("import sys, time\n"
474 "x = sys.modules['time'].time()", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000475 NAME 'import' (1, 0) (1, 6)
476 NAME 'sys' (1, 7) (1, 10)
477 OP ',' (1, 10) (1, 11)
478 NAME 'time' (1, 12) (1, 16)
479 NEWLINE '\\n' (1, 16) (1, 17)
480 NAME 'x' (2, 0) (2, 1)
481 OP '=' (2, 2) (2, 3)
482 NAME 'sys' (2, 4) (2, 7)
483 OP '.' (2, 7) (2, 8)
484 NAME 'modules' (2, 8) (2, 15)
485 OP '[' (2, 15) (2, 16)
486 STRING "'time'" (2, 16) (2, 22)
487 OP ']' (2, 22) (2, 23)
488 OP '.' (2, 23) (2, 24)
489 NAME 'time' (2, 24) (2, 28)
490 OP '(' (2, 28) (2, 29)
491 OP ')' (2, 29) (2, 30)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300492 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000493
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300494 def test_method(self):
495 # Methods
496 self.check_tokenize("@staticmethod\n"
497 "def foo(x,y): pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000498 OP '@' (1, 0) (1, 1)
499 NAME 'staticmethod (1, 1) (1, 13)
500 NEWLINE '\\n' (1, 13) (1, 14)
501 NAME 'def' (2, 0) (2, 3)
502 NAME 'foo' (2, 4) (2, 7)
503 OP '(' (2, 7) (2, 8)
504 NAME 'x' (2, 8) (2, 9)
505 OP ',' (2, 9) (2, 10)
506 NAME 'y' (2, 10) (2, 11)
507 OP ')' (2, 11) (2, 12)
508 OP ':' (2, 12) (2, 13)
509 NAME 'pass' (2, 14) (2, 18)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300510 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000511
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300512 def test_tabs(self):
513 # Evil tabs
514 self.check_tokenize("def f():\n"
515 "\tif x\n"
516 " \tpass", """\
Benjamin Peterson447dc152009-10-15 01:49:37 +0000517 NAME 'def' (1, 0) (1, 3)
518 NAME 'f' (1, 4) (1, 5)
519 OP '(' (1, 5) (1, 6)
520 OP ')' (1, 6) (1, 7)
521 OP ':' (1, 7) (1, 8)
522 NEWLINE '\\n' (1, 8) (1, 9)
523 INDENT '\\t' (2, 0) (2, 1)
524 NAME 'if' (2, 1) (2, 3)
525 NAME 'x' (2, 4) (2, 5)
526 NEWLINE '\\n' (2, 5) (2, 6)
Benjamin Petersone5265722009-10-15 01:56:25 +0000527 INDENT ' \\t' (3, 0) (3, 9)
Benjamin Peterson447dc152009-10-15 01:49:37 +0000528 NAME 'pass' (3, 9) (3, 13)
529 DEDENT '' (4, 0) (4, 0)
530 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300531 """)
Ezio Melotti7d24b162012-11-03 17:30:51 +0200532
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300533 def test_pathological_trailing_whitespace(self):
534 # Pathological whitespace (http://bugs.python.org/issue16152)
535 self.check_tokenize("@ ", """\
Ezio Melotti7d24b162012-11-03 17:30:51 +0200536 OP '@' (1, 0) (1, 1)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300537 """)
Jeremy Hylton29bef0b2006-08-23 18:37:43 +0000538
Guido van Rossum0874f7f1997-10-27 22:15:06 +0000539
Raymond Hettinger68c04532005-06-10 11:05:19 +0000540def decistmt(s):
Raymond Hettinger68c04532005-06-10 11:05:19 +0000541 result = []
542 g = generate_tokens(StringIO(s).readline) # tokenize the string
543 for toknum, tokval, _, _, _ in g:
544 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
545 result.extend([
546 (NAME, 'Decimal'),
547 (OP, '('),
548 (STRING, repr(tokval)),
549 (OP, ')')
550 ])
551 else:
552 result.append((toknum, tokval))
553 return untokenize(result)
554
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300555class TestMisc(TestCase):
556
557 def test_decistmt(self):
558 # Substitute Decimals for floats in a string of statements.
559 # This is an example from the docs.
560
561 from decimal import Decimal
562 s = '+21.3e-5*-.1234/81.7'
563 self.assertEqual(decistmt(s),
564 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
565
566 # The format of the exponent is inherited from the platform C library.
567 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
568 # we're only showing 12 digits, and the 13th isn't close to 5, the
569 # rest of the output should be platform-independent.
570 self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7')
571
572 # Output from calculations with Decimal should be identical across all
573 # platforms.
574 self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7'))
575
Brett Cannonb8d37352008-03-13 20:33:10 +0000576
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500577class UntokenizeTest(TestCase):
578
579 def test_bad_input_order(self):
Terry Jan Reedybd7cf3a2014-02-23 23:32:59 -0500580 # raise if previous row
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500581 u = Untokenizer()
582 u.prev_row = 2
583 u.prev_col = 2
584 with self.assertRaises(ValueError) as cm:
585 u.add_whitespace((1,3))
586 self.assertEqual(cm.exception.args[0],
587 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedybd7cf3a2014-02-23 23:32:59 -0500588 # raise if previous column in row
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500589 self.assertRaises(ValueError, u.add_whitespace, (2,1))
590
Terry Jan Reedybd7cf3a2014-02-23 23:32:59 -0500591 def test_backslash_continuation(self):
592 # The problem is that <whitespace>\<newline> leaves no token
593 u = Untokenizer()
594 u.prev_row = 1
595 u.prev_col = 1
596 u.tokens = []
597 u.add_whitespace((2, 0))
598 self.assertEqual(u.tokens, ['\\\n'])
599 u.prev_row = 2
600 u.add_whitespace((4, 4))
601 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
602
Terry Jan Reedy6858f002014-02-17 23:12:07 -0500603 def test_iter_compat(self):
604 u = Untokenizer()
605 token = (NAME, 'Hello')
606 u.compat(token, iter([]))
607 self.assertEqual(u.tokens, ["Hello "])
608 u = Untokenizer()
609 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
Brett Cannonb8d37352008-03-13 20:33:10 +0000610
611
Jason R. Coombs33b24f52015-06-28 13:03:26 -0400612class TestRoundtrip(TestCase):
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300613
614 def check_roundtrip(self, f):
615 """
616 Test roundtrip for `untokenize`. `f` is an open file or a string.
617 The source code in f is tokenized, converted back to source code
618 via tokenize.untokenize(), and tokenized again from the latter.
619 The test fails if the second tokenization doesn't match the first.
620 """
621 if isinstance(f, str): f = StringIO(f)
622 token_list = list(generate_tokens(f.readline))
623 f.close()
624 tokens1 = [tok[:2] for tok in token_list]
625 new_text = untokenize(tokens1)
626 readline = iter(new_text.splitlines(1)).next
627 tokens2 = [tok[:2] for tok in generate_tokens(readline)]
628 self.assertEqual(tokens2, tokens1)
629
630 def test_roundtrip(self):
631 # There are some standard formatting practices that are easy to get right.
632
633 self.check_roundtrip("if x == 1:\n"
634 " print(x)\n")
635
636 # There are some standard formatting practices that are easy to get right.
637
638 self.check_roundtrip("if x == 1:\n"
639 " print x\n")
640 self.check_roundtrip("# This is a comment\n"
Ammar Askar7829bba2018-07-06 06:23:13 -0400641 "# This also\n")
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300642
643 # Some people use different formatting conventions, which makes
644 # untokenize a little trickier. Note that this test involves trailing
645 # whitespace after the colon. Note that we use hex escapes to make the
646 # two trailing blanks apperant in the expected output.
647
648 self.check_roundtrip("if x == 1 : \n"
649 " print x\n")
650 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
651 with open(fn) as f:
652 self.check_roundtrip(f)
653 self.check_roundtrip("if x == 1:\n"
654 " # A comment by itself.\n"
655 " print x # Comment here, too.\n"
656 " # Another comment.\n"
657 "after_if = True\n")
658 self.check_roundtrip("if (x # The comments need to go in the right place\n"
659 " == 1):\n"
660 " print 'x==1'\n")
661 self.check_roundtrip("class Test: # A comment here\n"
662 " # A comment with weird indent\n"
663 " after_com = 5\n"
664 " def x(m): return m*5 # a one liner\n"
665 " def y(m): # A whitespace after the colon\n"
666 " return y*4 # 3-space indent\n")
667
668 # Some error-handling code
669
670 self.check_roundtrip("try: import somemodule\n"
671 "except ImportError: # comment\n"
672 " print 'Can not import' # comment2\n"
673 "else: print 'Loaded'\n")
674
675 def test_continuation(self):
676 # Balancing continuation
677 self.check_roundtrip("a = (3,4, \n"
678 "5,6)\n"
679 "y = [3, 4,\n"
680 "5]\n"
681 "z = {'a': 5,\n"
682 "'b':15, 'c':True}\n"
683 "x = len(y) + 5 - a[\n"
684 "3] - a[2]\n"
685 "+ len(z) - z[\n"
686 "'b']\n")
687
688 def test_backslash_continuation(self):
689 # Backslash means line continuation, except for comments
690 self.check_roundtrip("x=1+\\\n"
691 "1\n"
692 "# This is a comment\\\n"
693 "# This also\n")
694 self.check_roundtrip("# Comment \\\n"
695 "x = 0")
696
697 def test_string_concatenation(self):
698 # Two string literals on the same line
699 self.check_roundtrip("'' ''")
700
701 def test_random_files(self):
702 # Test roundtrip on random python modules.
703 # pass the '-ucpu' option to process the full directory.
704
705 import glob, random
706 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
707 tempdir = os.path.dirname(fn) or os.curdir
708 testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
709
710 if not test_support.is_resource_enabled("cpu"):
711 testfiles = random.sample(testfiles, 10)
712
713 for testfile in testfiles:
714 try:
715 with open(testfile, 'rb') as f:
716 self.check_roundtrip(f)
717 except:
718 print "Roundtrip failed for file %s" % testfile
719 raise
720
721
Jason R. Coombs33b24f52015-06-28 13:03:26 -0400722 def roundtrip(self, code):
723 if isinstance(code, str):
724 code = code.encode('utf-8')
725 tokens = generate_tokens(StringIO(code).readline)
726 return untokenize(tokens).decode('utf-8')
727
728 def test_indentation_semantics_retained(self):
729 """
730 Ensure that although whitespace might be mutated in a roundtrip,
731 the semantic meaning of the indentation remains consistent.
732 """
733 code = "if False:\n\tx=3\n\tx=3\n"
734 codelines = self.roundtrip(code).split('\n')
735 self.assertEqual(codelines[1], codelines[2])
736
737
Tim Petersef575672006-03-31 03:17:30 +0000738def test_main():
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300739 test_support.run_unittest(TokenizeTest)
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500740 test_support.run_unittest(UntokenizeTest)
Jason R. Coombs33b24f52015-06-28 13:03:26 -0400741 test_support.run_unittest(TestRoundtrip)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300742 test_support.run_unittest(TestMisc)
Tim Petersef575672006-03-31 03:17:30 +0000743
744if __name__ == "__main__":
745 test_main()