blob: fd9486bdd7d3cf7fcc15cb092a48a2539e1d3e5d [file] [log] [blame]
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +03001from test import test_support
2from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
3 STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
4from StringIO import StringIO
5import os
6from unittest import TestCase
Jeremy Hylton29bef0b2006-08-23 18:37:43 +00007
Jeremy Hylton29bef0b2006-08-23 18:37:43 +00008
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +03009class TokenizeTest(TestCase):
10 # Tests for the tokenize module.
Jeremy Hylton29bef0b2006-08-23 18:37:43 +000011
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030012 # The tests can be really simple. Given a small fragment of source
13 # code, print out a table with tokens. The ENDMARKER is omitted for
14 # brevity.
15
16 def check_tokenize(self, s, expected):
17 # Format the tokens in s in a table format.
18 # The ENDMARKER is omitted.
19 result = []
20 f = StringIO(s)
21 for type, token, start, end, line in generate_tokens(f.readline):
22 if type == ENDMARKER:
23 break
24 type = tok_name[type]
25 result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
26 locals())
27 self.assertEqual(result,
28 expected.rstrip().splitlines())
29
30
31 def test_basic(self):
32 self.check_tokenize("1 + 1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000033 NUMBER '1' (1, 0) (1, 1)
34 OP '+' (1, 2) (1, 3)
35 NUMBER '1' (1, 4) (1, 5)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030036 """)
37 self.check_tokenize("if False:\n"
38 " # NL\n"
39 " True = False # NEWLINE\n", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000040 NAME 'if' (1, 0) (1, 2)
41 NAME 'False' (1, 3) (1, 8)
42 OP ':' (1, 8) (1, 9)
43 NEWLINE '\\n' (1, 9) (1, 10)
44 COMMENT '# NL' (2, 4) (2, 8)
45 NL '\\n' (2, 8) (2, 9)
46 INDENT ' ' (3, 0) (3, 4)
47 NAME 'True' (3, 4) (3, 8)
48 OP '=' (3, 9) (3, 10)
49 NAME 'False' (3, 11) (3, 16)
50 COMMENT '# NEWLINE' (3, 17) (3, 26)
51 NEWLINE '\\n' (3, 26) (3, 27)
52 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030053 """)
Jeremy Hylton76467ba2006-08-23 21:14:03 +000054
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030055 indent_error_file = """\
56def k(x):
57 x += 2
58 x += 5
59"""
60 with self.assertRaisesRegexp(IndentationError,
61 "unindent does not match any "
62 "outer indentation level"):
63 for tok in generate_tokens(StringIO(indent_error_file).readline):
64 pass
Tim Peters147f9ae2006-08-25 22:05:39 +000065
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030066 def test_int(self):
67 # Ordinary integers and binary operators
68 self.check_tokenize("0xff <= 255", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000069 NUMBER '0xff' (1, 0) (1, 4)
70 OP '<=' (1, 5) (1, 7)
71 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030072 """)
73 self.check_tokenize("0b10 <= 255", """\
Eric Smith0aed07a2008-03-17 19:43:40 +000074 NUMBER '0b10' (1, 0) (1, 4)
75 OP '<=' (1, 5) (1, 7)
76 NUMBER '255' (1, 8) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030077 """)
78 self.check_tokenize("0o123 <= 0123", """\
Eric Smith0aed07a2008-03-17 19:43:40 +000079 NUMBER '0o123' (1, 0) (1, 5)
80 OP '<=' (1, 6) (1, 8)
81 NUMBER '0123' (1, 9) (1, 13)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030082 """)
83 self.check_tokenize("01234567 > ~0x15", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000084 NUMBER '01234567' (1, 0) (1, 8)
85 OP '>' (1, 9) (1, 10)
86 OP '~' (1, 11) (1, 12)
87 NUMBER '0x15' (1, 12) (1, 16)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030088 """)
89 self.check_tokenize("2134568 != 01231515", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000090 NUMBER '2134568' (1, 0) (1, 7)
91 OP '!=' (1, 8) (1, 10)
92 NUMBER '01231515' (1, 11) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +030093 """)
94 self.check_tokenize("(-124561-1) & 0200000000", """\
Brett Cannonb8d37352008-03-13 20:33:10 +000095 OP '(' (1, 0) (1, 1)
96 OP '-' (1, 1) (1, 2)
97 NUMBER '124561' (1, 2) (1, 8)
98 OP '-' (1, 8) (1, 9)
99 NUMBER '1' (1, 9) (1, 10)
100 OP ')' (1, 10) (1, 11)
101 OP '&' (1, 12) (1, 13)
102 NUMBER '0200000000' (1, 14) (1, 24)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300103 """)
104 self.check_tokenize("0xdeadbeef != -1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000105 NUMBER '0xdeadbeef' (1, 0) (1, 10)
106 OP '!=' (1, 11) (1, 13)
107 OP '-' (1, 14) (1, 15)
108 NUMBER '1' (1, 15) (1, 16)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300109 """)
110 self.check_tokenize("0xdeadc0de & 012345", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000111 NUMBER '0xdeadc0de' (1, 0) (1, 10)
112 OP '&' (1, 11) (1, 12)
113 NUMBER '012345' (1, 13) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300114 """)
115 self.check_tokenize("0xFF & 0x15 | 1234", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000116 NUMBER '0xFF' (1, 0) (1, 4)
117 OP '&' (1, 5) (1, 6)
118 NUMBER '0x15' (1, 7) (1, 11)
119 OP '|' (1, 12) (1, 13)
120 NUMBER '1234' (1, 14) (1, 18)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300121 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000122
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300123 def test_long(self):
124 # Long integers
125 self.check_tokenize("x = 0L", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000126 NAME 'x' (1, 0) (1, 1)
127 OP '=' (1, 2) (1, 3)
128 NUMBER '0L' (1, 4) (1, 6)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300129 """)
130 self.check_tokenize("x = 0xfffffffffff", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000131 NAME 'x' (1, 0) (1, 1)
132 OP '=' (1, 2) (1, 3)
133 NUMBER '0xffffffffff (1, 4) (1, 17)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300134 """)
135 self.check_tokenize("x = 123141242151251616110l", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000136 NAME 'x' (1, 0) (1, 1)
137 OP '=' (1, 2) (1, 3)
138 NUMBER '123141242151 (1, 4) (1, 26)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300139 """)
140 self.check_tokenize("x = -15921590215012591L", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000141 NAME 'x' (1, 0) (1, 1)
142 OP '=' (1, 2) (1, 3)
143 OP '-' (1, 4) (1, 5)
144 NUMBER '159215902150 (1, 5) (1, 23)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300145 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000146
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300147 def test_float(self):
148 # Floating point numbers
149 self.check_tokenize("x = 3.14159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000150 NAME 'x' (1, 0) (1, 1)
151 OP '=' (1, 2) (1, 3)
152 NUMBER '3.14159' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300153 """)
154 self.check_tokenize("x = 314159.", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000155 NAME 'x' (1, 0) (1, 1)
156 OP '=' (1, 2) (1, 3)
157 NUMBER '314159.' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300158 """)
159 self.check_tokenize("x = .314159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000160 NAME 'x' (1, 0) (1, 1)
161 OP '=' (1, 2) (1, 3)
162 NUMBER '.314159' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300163 """)
164 self.check_tokenize("x = 3e14159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000165 NAME 'x' (1, 0) (1, 1)
166 OP '=' (1, 2) (1, 3)
167 NUMBER '3e14159' (1, 4) (1, 11)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300168 """)
169 self.check_tokenize("x = 3E123", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000170 NAME 'x' (1, 0) (1, 1)
171 OP '=' (1, 2) (1, 3)
172 NUMBER '3E123' (1, 4) (1, 9)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300173 """)
174 self.check_tokenize("x+y = 3e-1230", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000175 NAME 'x' (1, 0) (1, 1)
176 OP '+' (1, 1) (1, 2)
177 NAME 'y' (1, 2) (1, 3)
178 OP '=' (1, 4) (1, 5)
179 NUMBER '3e-1230' (1, 6) (1, 13)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300180 """)
181 self.check_tokenize("x = 3.14e159", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000182 NAME 'x' (1, 0) (1, 1)
183 OP '=' (1, 2) (1, 3)
184 NUMBER '3.14e159' (1, 4) (1, 12)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300185 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000186
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300187 def test_string(self):
188 # String literals
189 self.check_tokenize("x = ''; y = \"\"", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000190 NAME 'x' (1, 0) (1, 1)
191 OP '=' (1, 2) (1, 3)
192 STRING "''" (1, 4) (1, 6)
193 OP ';' (1, 6) (1, 7)
194 NAME 'y' (1, 8) (1, 9)
195 OP '=' (1, 10) (1, 11)
196 STRING '""' (1, 12) (1, 14)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300197 """)
198 self.check_tokenize("x = '\"'; y = \"'\"", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000199 NAME 'x' (1, 0) (1, 1)
200 OP '=' (1, 2) (1, 3)
201 STRING '\\'"\\'' (1, 4) (1, 7)
202 OP ';' (1, 7) (1, 8)
203 NAME 'y' (1, 9) (1, 10)
204 OP '=' (1, 11) (1, 12)
205 STRING '"\\'"' (1, 13) (1, 16)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300206 """)
207 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000208 NAME 'x' (1, 0) (1, 1)
209 OP '=' (1, 2) (1, 3)
210 STRING '"doesn\\'t "' (1, 4) (1, 14)
211 NAME 'shrink' (1, 14) (1, 20)
212 STRING '", does it"' (1, 20) (1, 31)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300213 """)
214 self.check_tokenize("x = u'abc' + U'ABC'", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000215 NAME 'x' (1, 0) (1, 1)
216 OP '=' (1, 2) (1, 3)
217 STRING "u'abc'" (1, 4) (1, 10)
218 OP '+' (1, 11) (1, 12)
219 STRING "U'ABC'" (1, 13) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300220 """)
221 self.check_tokenize('y = u"ABC" + U"ABC"', """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000222 NAME 'y' (1, 0) (1, 1)
223 OP '=' (1, 2) (1, 3)
224 STRING 'u"ABC"' (1, 4) (1, 10)
225 OP '+' (1, 11) (1, 12)
226 STRING 'U"ABC"' (1, 13) (1, 19)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300227 """)
228 self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000229 NAME 'x' (1, 0) (1, 1)
230 OP '=' (1, 2) (1, 3)
231 STRING "ur'abc'" (1, 4) (1, 11)
232 OP '+' (1, 12) (1, 13)
233 STRING "Ur'ABC'" (1, 14) (1, 21)
234 OP '+' (1, 22) (1, 23)
235 STRING "uR'ABC'" (1, 24) (1, 31)
236 OP '+' (1, 32) (1, 33)
237 STRING "UR'ABC'" (1, 34) (1, 41)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300238 """)
239 self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000240 NAME 'y' (1, 0) (1, 1)
241 OP '=' (1, 2) (1, 3)
242 STRING 'ur"abc"' (1, 4) (1, 11)
243 OP '+' (1, 12) (1, 13)
244 STRING 'Ur"ABC"' (1, 14) (1, 21)
245 OP '+' (1, 22) (1, 23)
246 STRING 'uR"ABC"' (1, 24) (1, 31)
247 OP '+' (1, 32) (1, 33)
248 STRING 'UR"ABC"' (1, 34) (1, 41)
249
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300250 """)
251 self.check_tokenize("b'abc' + B'abc'", """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500252 STRING "b'abc'" (1, 0) (1, 6)
253 OP '+' (1, 7) (1, 8)
254 STRING "B'abc'" (1, 9) (1, 15)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300255 """)
256 self.check_tokenize('b"abc" + B"abc"', """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500257 STRING 'b"abc"' (1, 0) (1, 6)
258 OP '+' (1, 7) (1, 8)
259 STRING 'B"abc"' (1, 9) (1, 15)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300260 """)
261 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500262 STRING "br'abc'" (1, 0) (1, 7)
263 OP '+' (1, 8) (1, 9)
264 STRING "bR'abc'" (1, 10) (1, 17)
265 OP '+' (1, 18) (1, 19)
266 STRING "Br'abc'" (1, 20) (1, 27)
267 OP '+' (1, 28) (1, 29)
268 STRING "BR'abc'" (1, 30) (1, 37)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300269 """)
270 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
Meador Inge43f42fc2012-06-16 21:05:50 -0500271 STRING 'br"abc"' (1, 0) (1, 7)
272 OP '+' (1, 8) (1, 9)
273 STRING 'bR"abc"' (1, 10) (1, 17)
274 OP '+' (1, 18) (1, 19)
275 STRING 'Br"abc"' (1, 20) (1, 27)
276 OP '+' (1, 28) (1, 29)
277 STRING 'BR"abc"' (1, 30) (1, 37)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300278 """)
Meador Inge43f42fc2012-06-16 21:05:50 -0500279
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300280 def test_function(self):
281 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000282 NAME 'def' (1, 0) (1, 3)
283 NAME 'd22' (1, 4) (1, 7)
284 OP '(' (1, 7) (1, 8)
285 NAME 'a' (1, 8) (1, 9)
286 OP ',' (1, 9) (1, 10)
287 NAME 'b' (1, 11) (1, 12)
288 OP ',' (1, 12) (1, 13)
289 NAME 'c' (1, 14) (1, 15)
290 OP '=' (1, 15) (1, 16)
291 NUMBER '2' (1, 16) (1, 17)
292 OP ',' (1, 17) (1, 18)
293 NAME 'd' (1, 19) (1, 20)
294 OP '=' (1, 20) (1, 21)
295 NUMBER '2' (1, 21) (1, 22)
296 OP ',' (1, 22) (1, 23)
297 OP '*' (1, 24) (1, 25)
298 NAME 'k' (1, 25) (1, 26)
299 OP ')' (1, 26) (1, 27)
300 OP ':' (1, 27) (1, 28)
301 NAME 'pass' (1, 29) (1, 33)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300302 """)
303 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000304 NAME 'def' (1, 0) (1, 3)
305 NAME 'd01v_' (1, 4) (1, 9)
306 OP '(' (1, 9) (1, 10)
307 NAME 'a' (1, 10) (1, 11)
308 OP '=' (1, 11) (1, 12)
309 NUMBER '1' (1, 12) (1, 13)
310 OP ',' (1, 13) (1, 14)
311 OP '*' (1, 15) (1, 16)
312 NAME 'k' (1, 16) (1, 17)
313 OP ',' (1, 17) (1, 18)
314 OP '**' (1, 19) (1, 21)
315 NAME 'w' (1, 21) (1, 22)
316 OP ')' (1, 22) (1, 23)
317 OP ':' (1, 23) (1, 24)
318 NAME 'pass' (1, 25) (1, 29)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300319 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000320
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300321 def test_comparison(self):
322 # Comparison
323 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
324 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000325 NAME 'if' (1, 0) (1, 2)
326 NUMBER '1' (1, 3) (1, 4)
327 OP '<' (1, 5) (1, 6)
328 NUMBER '1' (1, 7) (1, 8)
329 OP '>' (1, 9) (1, 10)
330 NUMBER '1' (1, 11) (1, 12)
331 OP '==' (1, 13) (1, 15)
332 NUMBER '1' (1, 16) (1, 17)
333 OP '>=' (1, 18) (1, 20)
334 NUMBER '5' (1, 21) (1, 22)
335 OP '<=' (1, 23) (1, 25)
336 NUMBER '0x15' (1, 26) (1, 30)
337 OP '<=' (1, 31) (1, 33)
338 NUMBER '0x12' (1, 34) (1, 38)
339 OP '!=' (1, 39) (1, 41)
340 NUMBER '1' (1, 42) (1, 43)
341 NAME 'and' (1, 44) (1, 47)
342 NUMBER '5' (1, 48) (1, 49)
343 NAME 'in' (1, 50) (1, 52)
344 NUMBER '1' (1, 53) (1, 54)
345 NAME 'not' (1, 55) (1, 58)
346 NAME 'in' (1, 59) (1, 61)
347 NUMBER '1' (1, 62) (1, 63)
348 NAME 'is' (1, 64) (1, 66)
349 NUMBER '1' (1, 67) (1, 68)
350 NAME 'or' (1, 69) (1, 71)
351 NUMBER '5' (1, 72) (1, 73)
352 NAME 'is' (1, 74) (1, 76)
353 NAME 'not' (1, 77) (1, 80)
354 NUMBER '1' (1, 81) (1, 82)
355 OP ':' (1, 82) (1, 83)
356 NAME 'pass' (1, 84) (1, 88)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300357 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000358
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300359 def test_shift(self):
360 # Shift
361 self.check_tokenize("x = 1 << 1 >> 5", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000362 NAME 'x' (1, 0) (1, 1)
363 OP '=' (1, 2) (1, 3)
364 NUMBER '1' (1, 4) (1, 5)
365 OP '<<' (1, 6) (1, 8)
366 NUMBER '1' (1, 9) (1, 10)
367 OP '>>' (1, 11) (1, 13)
368 NUMBER '5' (1, 14) (1, 15)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300369 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000370
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300371 def test_additive(self):
372 # Additive
373 self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000374 NAME 'x' (1, 0) (1, 1)
375 OP '=' (1, 2) (1, 3)
376 NUMBER '1' (1, 4) (1, 5)
377 OP '-' (1, 6) (1, 7)
378 NAME 'y' (1, 8) (1, 9)
379 OP '+' (1, 10) (1, 11)
380 NUMBER '15' (1, 12) (1, 14)
381 OP '-' (1, 15) (1, 16)
382 NUMBER '01' (1, 17) (1, 19)
383 OP '+' (1, 20) (1, 21)
384 NUMBER '0x124' (1, 22) (1, 27)
385 OP '+' (1, 28) (1, 29)
386 NAME 'z' (1, 30) (1, 31)
387 OP '+' (1, 32) (1, 33)
388 NAME 'a' (1, 34) (1, 35)
389 OP '[' (1, 35) (1, 36)
390 NUMBER '5' (1, 36) (1, 37)
391 OP ']' (1, 37) (1, 38)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300392 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000393
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300394 def test_multiplicative(self):
395 # Multiplicative
396 self.check_tokenize("x = 1//1*1/5*12%0x12", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000397 NAME 'x' (1, 0) (1, 1)
398 OP '=' (1, 2) (1, 3)
399 NUMBER '1' (1, 4) (1, 5)
400 OP '//' (1, 5) (1, 7)
401 NUMBER '1' (1, 7) (1, 8)
402 OP '*' (1, 8) (1, 9)
403 NUMBER '1' (1, 9) (1, 10)
404 OP '/' (1, 10) (1, 11)
405 NUMBER '5' (1, 11) (1, 12)
406 OP '*' (1, 12) (1, 13)
407 NUMBER '12' (1, 13) (1, 15)
408 OP '%' (1, 15) (1, 16)
409 NUMBER '0x12' (1, 16) (1, 20)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300410 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000411
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300412 def test_unary(self):
413 # Unary
414 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000415 OP '~' (1, 0) (1, 1)
416 NUMBER '1' (1, 1) (1, 2)
417 OP '^' (1, 3) (1, 4)
418 NUMBER '1' (1, 5) (1, 6)
419 OP '&' (1, 7) (1, 8)
420 NUMBER '1' (1, 9) (1, 10)
421 OP '|' (1, 11) (1, 12)
422 NUMBER '1' (1, 12) (1, 13)
423 OP '^' (1, 14) (1, 15)
424 OP '-' (1, 16) (1, 17)
425 NUMBER '1' (1, 17) (1, 18)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300426 """)
427 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000428 OP '-' (1, 0) (1, 1)
429 NUMBER '1' (1, 1) (1, 2)
430 OP '*' (1, 2) (1, 3)
431 NUMBER '1' (1, 3) (1, 4)
432 OP '/' (1, 4) (1, 5)
433 NUMBER '1' (1, 5) (1, 6)
434 OP '+' (1, 6) (1, 7)
435 NUMBER '1' (1, 7) (1, 8)
436 OP '*' (1, 8) (1, 9)
437 NUMBER '1' (1, 9) (1, 10)
438 OP '//' (1, 10) (1, 12)
439 NUMBER '1' (1, 12) (1, 13)
440 OP '-' (1, 14) (1, 15)
441 OP '-' (1, 16) (1, 17)
442 OP '-' (1, 17) (1, 18)
443 OP '-' (1, 18) (1, 19)
444 NUMBER '1' (1, 19) (1, 20)
445 OP '**' (1, 20) (1, 22)
446 NUMBER '1' (1, 22) (1, 23)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300447 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000448
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300449 def test_selector(self):
450 # Selector
451 self.check_tokenize("import sys, time\n"
452 "x = sys.modules['time'].time()", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000453 NAME 'import' (1, 0) (1, 6)
454 NAME 'sys' (1, 7) (1, 10)
455 OP ',' (1, 10) (1, 11)
456 NAME 'time' (1, 12) (1, 16)
457 NEWLINE '\\n' (1, 16) (1, 17)
458 NAME 'x' (2, 0) (2, 1)
459 OP '=' (2, 2) (2, 3)
460 NAME 'sys' (2, 4) (2, 7)
461 OP '.' (2, 7) (2, 8)
462 NAME 'modules' (2, 8) (2, 15)
463 OP '[' (2, 15) (2, 16)
464 STRING "'time'" (2, 16) (2, 22)
465 OP ']' (2, 22) (2, 23)
466 OP '.' (2, 23) (2, 24)
467 NAME 'time' (2, 24) (2, 28)
468 OP '(' (2, 28) (2, 29)
469 OP ')' (2, 29) (2, 30)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300470 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000471
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300472 def test_method(self):
473 # Methods
474 self.check_tokenize("@staticmethod\n"
475 "def foo(x,y): pass", """\
Brett Cannonb8d37352008-03-13 20:33:10 +0000476 OP '@' (1, 0) (1, 1)
477 NAME 'staticmethod (1, 1) (1, 13)
478 NEWLINE '\\n' (1, 13) (1, 14)
479 NAME 'def' (2, 0) (2, 3)
480 NAME 'foo' (2, 4) (2, 7)
481 OP '(' (2, 7) (2, 8)
482 NAME 'x' (2, 8) (2, 9)
483 OP ',' (2, 9) (2, 10)
484 NAME 'y' (2, 10) (2, 11)
485 OP ')' (2, 11) (2, 12)
486 OP ':' (2, 12) (2, 13)
487 NAME 'pass' (2, 14) (2, 18)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300488 """)
Brett Cannonb8d37352008-03-13 20:33:10 +0000489
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300490 def test_tabs(self):
491 # Evil tabs
492 self.check_tokenize("def f():\n"
493 "\tif x\n"
494 " \tpass", """\
Benjamin Peterson447dc152009-10-15 01:49:37 +0000495 NAME 'def' (1, 0) (1, 3)
496 NAME 'f' (1, 4) (1, 5)
497 OP '(' (1, 5) (1, 6)
498 OP ')' (1, 6) (1, 7)
499 OP ':' (1, 7) (1, 8)
500 NEWLINE '\\n' (1, 8) (1, 9)
501 INDENT '\\t' (2, 0) (2, 1)
502 NAME 'if' (2, 1) (2, 3)
503 NAME 'x' (2, 4) (2, 5)
504 NEWLINE '\\n' (2, 5) (2, 6)
Benjamin Petersone5265722009-10-15 01:56:25 +0000505 INDENT ' \\t' (3, 0) (3, 9)
Benjamin Peterson447dc152009-10-15 01:49:37 +0000506 NAME 'pass' (3, 9) (3, 13)
507 DEDENT '' (4, 0) (4, 0)
508 DEDENT '' (4, 0) (4, 0)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300509 """)
Ezio Melotti7d24b162012-11-03 17:30:51 +0200510
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300511 def test_pathological_trailing_whitespace(self):
512 # Pathological whitespace (http://bugs.python.org/issue16152)
513 self.check_tokenize("@ ", """\
Ezio Melotti7d24b162012-11-03 17:30:51 +0200514 OP '@' (1, 0) (1, 1)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300515 """)
Jeremy Hylton29bef0b2006-08-23 18:37:43 +0000516
Guido van Rossum0874f7f1997-10-27 22:15:06 +0000517
Raymond Hettinger68c04532005-06-10 11:05:19 +0000518def decistmt(s):
Raymond Hettinger68c04532005-06-10 11:05:19 +0000519 result = []
520 g = generate_tokens(StringIO(s).readline) # tokenize the string
521 for toknum, tokval, _, _, _ in g:
522 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
523 result.extend([
524 (NAME, 'Decimal'),
525 (OP, '('),
526 (STRING, repr(tokval)),
527 (OP, ')')
528 ])
529 else:
530 result.append((toknum, tokval))
531 return untokenize(result)
532
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300533class TestMisc(TestCase):
534
535 def test_decistmt(self):
536 # Substitute Decimals for floats in a string of statements.
537 # This is an example from the docs.
538
539 from decimal import Decimal
540 s = '+21.3e-5*-.1234/81.7'
541 self.assertEqual(decistmt(s),
542 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
543
544 # The format of the exponent is inherited from the platform C library.
545 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
546 # we're only showing 12 digits, and the 13th isn't close to 5, the
547 # rest of the output should be platform-independent.
548 self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7')
549
550 # Output from calculations with Decimal should be identical across all
551 # platforms.
552 self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7'))
553
Brett Cannonb8d37352008-03-13 20:33:10 +0000554
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500555class UntokenizeTest(TestCase):
556
557 def test_bad_input_order(self):
Terry Jan Reedybd7cf3a2014-02-23 23:32:59 -0500558 # raise if previous row
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500559 u = Untokenizer()
560 u.prev_row = 2
561 u.prev_col = 2
562 with self.assertRaises(ValueError) as cm:
563 u.add_whitespace((1,3))
564 self.assertEqual(cm.exception.args[0],
565 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedybd7cf3a2014-02-23 23:32:59 -0500566 # raise if previous column in row
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500567 self.assertRaises(ValueError, u.add_whitespace, (2,1))
568
Terry Jan Reedybd7cf3a2014-02-23 23:32:59 -0500569 def test_backslash_continuation(self):
570 # The problem is that <whitespace>\<newline> leaves no token
571 u = Untokenizer()
572 u.prev_row = 1
573 u.prev_col = 1
574 u.tokens = []
575 u.add_whitespace((2, 0))
576 self.assertEqual(u.tokens, ['\\\n'])
577 u.prev_row = 2
578 u.add_whitespace((4, 4))
579 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
580
Terry Jan Reedy6858f002014-02-17 23:12:07 -0500581 def test_iter_compat(self):
582 u = Untokenizer()
583 token = (NAME, 'Hello')
584 u.compat(token, iter([]))
585 self.assertEqual(u.tokens, ["Hello "])
586 u = Untokenizer()
587 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
Brett Cannonb8d37352008-03-13 20:33:10 +0000588
589
Jason R. Coombs33b24f52015-06-28 13:03:26 -0400590class TestRoundtrip(TestCase):
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300591
592 def check_roundtrip(self, f):
593 """
594 Test roundtrip for `untokenize`. `f` is an open file or a string.
595 The source code in f is tokenized, converted back to source code
596 via tokenize.untokenize(), and tokenized again from the latter.
597 The test fails if the second tokenization doesn't match the first.
598 """
599 if isinstance(f, str): f = StringIO(f)
600 token_list = list(generate_tokens(f.readline))
601 f.close()
602 tokens1 = [tok[:2] for tok in token_list]
603 new_text = untokenize(tokens1)
604 readline = iter(new_text.splitlines(1)).next
605 tokens2 = [tok[:2] for tok in generate_tokens(readline)]
606 self.assertEqual(tokens2, tokens1)
607
608 def test_roundtrip(self):
609 # There are some standard formatting practices that are easy to get right.
610
611 self.check_roundtrip("if x == 1:\n"
612 " print(x)\n")
613
614 # There are some standard formatting practices that are easy to get right.
615
616 self.check_roundtrip("if x == 1:\n"
617 " print x\n")
618 self.check_roundtrip("# This is a comment\n"
619 "# This also")
620
621 # Some people use different formatting conventions, which makes
622 # untokenize a little trickier. Note that this test involves trailing
623 # whitespace after the colon. Note that we use hex escapes to make the
624 # two trailing blanks apperant in the expected output.
625
626 self.check_roundtrip("if x == 1 : \n"
627 " print x\n")
628 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
629 with open(fn) as f:
630 self.check_roundtrip(f)
631 self.check_roundtrip("if x == 1:\n"
632 " # A comment by itself.\n"
633 " print x # Comment here, too.\n"
634 " # Another comment.\n"
635 "after_if = True\n")
636 self.check_roundtrip("if (x # The comments need to go in the right place\n"
637 " == 1):\n"
638 " print 'x==1'\n")
639 self.check_roundtrip("class Test: # A comment here\n"
640 " # A comment with weird indent\n"
641 " after_com = 5\n"
642 " def x(m): return m*5 # a one liner\n"
643 " def y(m): # A whitespace after the colon\n"
644 " return y*4 # 3-space indent\n")
645
646 # Some error-handling code
647
648 self.check_roundtrip("try: import somemodule\n"
649 "except ImportError: # comment\n"
650 " print 'Can not import' # comment2\n"
651 "else: print 'Loaded'\n")
652
653 def test_continuation(self):
654 # Balancing continuation
655 self.check_roundtrip("a = (3,4, \n"
656 "5,6)\n"
657 "y = [3, 4,\n"
658 "5]\n"
659 "z = {'a': 5,\n"
660 "'b':15, 'c':True}\n"
661 "x = len(y) + 5 - a[\n"
662 "3] - a[2]\n"
663 "+ len(z) - z[\n"
664 "'b']\n")
665
666 def test_backslash_continuation(self):
667 # Backslash means line continuation, except for comments
668 self.check_roundtrip("x=1+\\\n"
669 "1\n"
670 "# This is a comment\\\n"
671 "# This also\n")
672 self.check_roundtrip("# Comment \\\n"
673 "x = 0")
674
675 def test_string_concatenation(self):
676 # Two string literals on the same line
677 self.check_roundtrip("'' ''")
678
679 def test_random_files(self):
680 # Test roundtrip on random python modules.
681 # pass the '-ucpu' option to process the full directory.
682
683 import glob, random
684 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
685 tempdir = os.path.dirname(fn) or os.curdir
686 testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
687
688 if not test_support.is_resource_enabled("cpu"):
689 testfiles = random.sample(testfiles, 10)
690
691 for testfile in testfiles:
692 try:
693 with open(testfile, 'rb') as f:
694 self.check_roundtrip(f)
695 except:
696 print "Roundtrip failed for file %s" % testfile
697 raise
698
699
Jason R. Coombs33b24f52015-06-28 13:03:26 -0400700 def roundtrip(self, code):
701 if isinstance(code, str):
702 code = code.encode('utf-8')
703 tokens = generate_tokens(StringIO(code).readline)
704 return untokenize(tokens).decode('utf-8')
705
706 def test_indentation_semantics_retained(self):
707 """
708 Ensure that although whitespace might be mutated in a roundtrip,
709 the semantic meaning of the indentation remains consistent.
710 """
711 code = "if False:\n\tx=3\n\tx=3\n"
712 codelines = self.roundtrip(code).split('\n')
713 self.assertEqual(codelines[1], codelines[2])
714
715
Tim Petersef575672006-03-31 03:17:30 +0000716def test_main():
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300717 test_support.run_unittest(TokenizeTest)
Terry Jan Reedy7751a342014-02-17 16:45:38 -0500718 test_support.run_unittest(UntokenizeTest)
Jason R. Coombs33b24f52015-06-28 13:03:26 -0400719 test_support.run_unittest(TestRoundtrip)
Serhiy Storchaka0a8845e2015-10-06 18:13:38 +0300720 test_support.run_unittest(TestMisc)
Tim Petersef575672006-03-31 03:17:30 +0000721
722if __name__ == "__main__":
723 test_main()