blob: a51e7818670d7ac2acab3fd4d0fe98df0dbee69b [file] [log] [blame]
Brett Cannonb8d37352008-03-13 20:33:10 +00001doctests = """
2Tests for the tokenize module.
Jeremy Hylton29bef0b2006-08-23 18:37:43 +00003
Brett Cannonb8d37352008-03-13 20:33:10 +00004 >>> import glob, random, sys
Jeremy Hylton29bef0b2006-08-23 18:37:43 +00005
Brett Cannonb8d37352008-03-13 20:33:10 +00006The tests can be really simple. Given a small fragment of source
Eric Smith0aed07a2008-03-17 19:43:40 +00007code, print out a table with tokens. The ENDMARK is omitted for
Jeremy Hylton29bef0b2006-08-23 18:37:43 +00008brevity.
9
Brett Cannonb8d37352008-03-13 20:33:10 +000010 >>> dump_tokens("1 + 1")
11 NUMBER '1' (1, 0) (1, 1)
12 OP '+' (1, 2) (1, 3)
13 NUMBER '1' (1, 4) (1, 5)
Jeremy Hylton76467ba2006-08-23 21:14:03 +000014
Brett Cannonb8d37352008-03-13 20:33:10 +000015 >>> dump_tokens("if False:\\n"
16 ... " # NL\\n"
17 ... " True = False # NEWLINE\\n")
18 NAME 'if' (1, 0) (1, 2)
19 NAME 'False' (1, 3) (1, 8)
20 OP ':' (1, 8) (1, 9)
21 NEWLINE '\\n' (1, 9) (1, 10)
22 COMMENT '# NL' (2, 4) (2, 8)
23 NL '\\n' (2, 8) (2, 9)
24 INDENT ' ' (3, 0) (3, 4)
25 NAME 'True' (3, 4) (3, 8)
26 OP '=' (3, 9) (3, 10)
27 NAME 'False' (3, 11) (3, 16)
28 COMMENT '# NEWLINE' (3, 17) (3, 26)
29 NEWLINE '\\n' (3, 26) (3, 27)
30 DEDENT '' (4, 0) (4, 0)
Jeremy Hylton76467ba2006-08-23 21:14:03 +000031
Brett Cannonb8d37352008-03-13 20:33:10 +000032 >>> indent_error_file = \"""
33 ... def k(x):
34 ... x += 2
35 ... x += 5
36 ... \"""
Tim Peters147f9ae2006-08-25 22:05:39 +000037
Brett Cannonb8d37352008-03-13 20:33:10 +000038 >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
39 Traceback (most recent call last):
40 ...
41 IndentationError: unindent does not match any outer indentation level
Jeremy Hylton29bef0b2006-08-23 18:37:43 +000042
Brett Cannonb8d37352008-03-13 20:33:10 +000043Test roundtrip for `untokenize`. `f` is an open file or a string. The source
44code in f is tokenized, converted back to source code via tokenize.untokenize(),
45and tokenized again from the latter. The test fails if the second tokenization
46doesn't match the first.
Jeremy Hylton29bef0b2006-08-23 18:37:43 +000047
Brett Cannonb8d37352008-03-13 20:33:10 +000048 >>> def roundtrip(f):
49 ... if isinstance(f, str): f = StringIO(f)
50 ... token_list = list(generate_tokens(f.readline))
51 ... f.close()
52 ... tokens1 = [tok[:2] for tok in token_list]
53 ... new_text = untokenize(tokens1)
54 ... readline = iter(new_text.splitlines(1)).next
55 ... tokens2 = [tok[:2] for tok in generate_tokens(readline)]
56 ... return tokens1 == tokens2
57 ...
Jeremy Hylton76467ba2006-08-23 21:14:03 +000058
Mark Dickinson85862492010-06-29 07:37:25 +000059There are some standard formatting practices that are easy to get right.
Jeremy Hylton29bef0b2006-08-23 18:37:43 +000060
Brett Cannonb8d37352008-03-13 20:33:10 +000061 >>> roundtrip("if x == 1:\\n"
62 ... " print x\\n")
63 True
64
65 >>> roundtrip("# This is a comment\\n# This also")
66 True
Jeremy Hylton76467ba2006-08-23 21:14:03 +000067
68Some people use different formatting conventions, which makes
Brett Cannonb8d37352008-03-13 20:33:10 +000069untokenize a little trickier. Note that this test involves trailing
70whitespace after the colon. Note that we use hex escapes to make the
71two trailing blanks apperant in the expected output.
Jeremy Hylton76467ba2006-08-23 21:14:03 +000072
Brett Cannonb8d37352008-03-13 20:33:10 +000073 >>> roundtrip("if x == 1 : \\n"
74 ... " print x\\n")
75 True
Jeremy Hylton76467ba2006-08-23 21:14:03 +000076
Brett Cannonb8d37352008-03-13 20:33:10 +000077 >>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt")
78 >>> roundtrip(open(f))
79 True
Jeremy Hylton76467ba2006-08-23 21:14:03 +000080
Brett Cannonb8d37352008-03-13 20:33:10 +000081 >>> roundtrip("if x == 1:\\n"
82 ... " # A comment by itself.\\n"
83 ... " print x # Comment here, too.\\n"
84 ... " # Another comment.\\n"
85 ... "after_if = True\\n")
86 True
Jeremy Hylton76467ba2006-08-23 21:14:03 +000087
Brett Cannonb8d37352008-03-13 20:33:10 +000088 >>> roundtrip("if (x # The comments need to go in the right place\\n"
89 ... " == 1):\\n"
90 ... " print 'x==1'\\n")
91 True
Jeremy Hylton76467ba2006-08-23 21:14:03 +000092
Brett Cannonb8d37352008-03-13 20:33:10 +000093 >>> roundtrip("class Test: # A comment here\\n"
94 ... " # A comment with weird indent\\n"
95 ... " after_com = 5\\n"
96 ... " def x(m): return m*5 # a one liner\\n"
97 ... " def y(m): # A whitespace after the colon\\n"
98 ... " return y*4 # 3-space indent\\n")
99 True
100
101Some error-handling code
102
103 >>> roundtrip("try: import somemodule\\n"
104 ... "except ImportError: # comment\\n"
105 ... " print 'Can not import' # comment2\\n"
106 ... "else: print 'Loaded'\\n")
107 True
108
Eric Smith0aed07a2008-03-17 19:43:40 +0000109Balancing continuation
Brett Cannonb8d37352008-03-13 20:33:10 +0000110
111 >>> roundtrip("a = (3,4, \\n"
112 ... "5,6)\\n"
113 ... "y = [3, 4,\\n"
114 ... "5]\\n"
115 ... "z = {'a': 5,\\n"
116 ... "'b':15, 'c':True}\\n"
117 ... "x = len(y) + 5 - a[\\n"
118 ... "3] - a[2]\\n"
119 ... "+ len(z) - z[\\n"
120 ... "'b']\\n")
121 True
122
123Ordinary integers and binary operators
124
125 >>> dump_tokens("0xff <= 255")
126 NUMBER '0xff' (1, 0) (1, 4)
127 OP '<=' (1, 5) (1, 7)
128 NUMBER '255' (1, 8) (1, 11)
Eric Smith0aed07a2008-03-17 19:43:40 +0000129 >>> dump_tokens("0b10 <= 255")
130 NUMBER '0b10' (1, 0) (1, 4)
131 OP '<=' (1, 5) (1, 7)
132 NUMBER '255' (1, 8) (1, 11)
133 >>> dump_tokens("0o123 <= 0123")
134 NUMBER '0o123' (1, 0) (1, 5)
135 OP '<=' (1, 6) (1, 8)
136 NUMBER '0123' (1, 9) (1, 13)
Brett Cannonb8d37352008-03-13 20:33:10 +0000137 >>> dump_tokens("01234567 > ~0x15")
138 NUMBER '01234567' (1, 0) (1, 8)
139 OP '>' (1, 9) (1, 10)
140 OP '~' (1, 11) (1, 12)
141 NUMBER '0x15' (1, 12) (1, 16)
142 >>> dump_tokens("2134568 != 01231515")
143 NUMBER '2134568' (1, 0) (1, 7)
144 OP '!=' (1, 8) (1, 10)
145 NUMBER '01231515' (1, 11) (1, 19)
146 >>> dump_tokens("(-124561-1) & 0200000000")
147 OP '(' (1, 0) (1, 1)
148 OP '-' (1, 1) (1, 2)
149 NUMBER '124561' (1, 2) (1, 8)
150 OP '-' (1, 8) (1, 9)
151 NUMBER '1' (1, 9) (1, 10)
152 OP ')' (1, 10) (1, 11)
153 OP '&' (1, 12) (1, 13)
154 NUMBER '0200000000' (1, 14) (1, 24)
155 >>> dump_tokens("0xdeadbeef != -1")
156 NUMBER '0xdeadbeef' (1, 0) (1, 10)
157 OP '!=' (1, 11) (1, 13)
158 OP '-' (1, 14) (1, 15)
159 NUMBER '1' (1, 15) (1, 16)
160 >>> dump_tokens("0xdeadc0de & 012345")
161 NUMBER '0xdeadc0de' (1, 0) (1, 10)
162 OP '&' (1, 11) (1, 12)
163 NUMBER '012345' (1, 13) (1, 19)
164 >>> dump_tokens("0xFF & 0x15 | 1234")
165 NUMBER '0xFF' (1, 0) (1, 4)
166 OP '&' (1, 5) (1, 6)
167 NUMBER '0x15' (1, 7) (1, 11)
168 OP '|' (1, 12) (1, 13)
169 NUMBER '1234' (1, 14) (1, 18)
170
171Long integers
172
173 >>> dump_tokens("x = 0L")
174 NAME 'x' (1, 0) (1, 1)
175 OP '=' (1, 2) (1, 3)
176 NUMBER '0L' (1, 4) (1, 6)
177 >>> dump_tokens("x = 0xfffffffffff")
178 NAME 'x' (1, 0) (1, 1)
179 OP '=' (1, 2) (1, 3)
180 NUMBER '0xffffffffff (1, 4) (1, 17)
181 >>> dump_tokens("x = 123141242151251616110l")
182 NAME 'x' (1, 0) (1, 1)
183 OP '=' (1, 2) (1, 3)
184 NUMBER '123141242151 (1, 4) (1, 26)
185 >>> dump_tokens("x = -15921590215012591L")
186 NAME 'x' (1, 0) (1, 1)
187 OP '=' (1, 2) (1, 3)
188 OP '-' (1, 4) (1, 5)
189 NUMBER '159215902150 (1, 5) (1, 23)
190
191Floating point numbers
192
193 >>> dump_tokens("x = 3.14159")
194 NAME 'x' (1, 0) (1, 1)
195 OP '=' (1, 2) (1, 3)
196 NUMBER '3.14159' (1, 4) (1, 11)
197 >>> dump_tokens("x = 314159.")
198 NAME 'x' (1, 0) (1, 1)
199 OP '=' (1, 2) (1, 3)
200 NUMBER '314159.' (1, 4) (1, 11)
201 >>> dump_tokens("x = .314159")
202 NAME 'x' (1, 0) (1, 1)
203 OP '=' (1, 2) (1, 3)
204 NUMBER '.314159' (1, 4) (1, 11)
205 >>> dump_tokens("x = 3e14159")
206 NAME 'x' (1, 0) (1, 1)
207 OP '=' (1, 2) (1, 3)
208 NUMBER '3e14159' (1, 4) (1, 11)
209 >>> dump_tokens("x = 3E123")
210 NAME 'x' (1, 0) (1, 1)
211 OP '=' (1, 2) (1, 3)
212 NUMBER '3E123' (1, 4) (1, 9)
213 >>> dump_tokens("x+y = 3e-1230")
214 NAME 'x' (1, 0) (1, 1)
215 OP '+' (1, 1) (1, 2)
216 NAME 'y' (1, 2) (1, 3)
217 OP '=' (1, 4) (1, 5)
218 NUMBER '3e-1230' (1, 6) (1, 13)
219 >>> dump_tokens("x = 3.14e159")
220 NAME 'x' (1, 0) (1, 1)
221 OP '=' (1, 2) (1, 3)
222 NUMBER '3.14e159' (1, 4) (1, 12)
223
224String literals
225
226 >>> dump_tokens("x = ''; y = \\\"\\\"")
227 NAME 'x' (1, 0) (1, 1)
228 OP '=' (1, 2) (1, 3)
229 STRING "''" (1, 4) (1, 6)
230 OP ';' (1, 6) (1, 7)
231 NAME 'y' (1, 8) (1, 9)
232 OP '=' (1, 10) (1, 11)
233 STRING '""' (1, 12) (1, 14)
234 >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
235 NAME 'x' (1, 0) (1, 1)
236 OP '=' (1, 2) (1, 3)
237 STRING '\\'"\\'' (1, 4) (1, 7)
238 OP ';' (1, 7) (1, 8)
239 NAME 'y' (1, 9) (1, 10)
240 OP '=' (1, 11) (1, 12)
241 STRING '"\\'"' (1, 13) (1, 16)
242 >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
243 NAME 'x' (1, 0) (1, 1)
244 OP '=' (1, 2) (1, 3)
245 STRING '"doesn\\'t "' (1, 4) (1, 14)
246 NAME 'shrink' (1, 14) (1, 20)
247 STRING '", does it"' (1, 20) (1, 31)
248 >>> dump_tokens("x = u'abc' + U'ABC'")
249 NAME 'x' (1, 0) (1, 1)
250 OP '=' (1, 2) (1, 3)
251 STRING "u'abc'" (1, 4) (1, 10)
252 OP '+' (1, 11) (1, 12)
253 STRING "U'ABC'" (1, 13) (1, 19)
254 >>> dump_tokens('y = u"ABC" + U"ABC"')
255 NAME 'y' (1, 0) (1, 1)
256 OP '=' (1, 2) (1, 3)
257 STRING 'u"ABC"' (1, 4) (1, 10)
258 OP '+' (1, 11) (1, 12)
259 STRING 'U"ABC"' (1, 13) (1, 19)
260 >>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
261 NAME 'x' (1, 0) (1, 1)
262 OP '=' (1, 2) (1, 3)
263 STRING "ur'abc'" (1, 4) (1, 11)
264 OP '+' (1, 12) (1, 13)
265 STRING "Ur'ABC'" (1, 14) (1, 21)
266 OP '+' (1, 22) (1, 23)
267 STRING "uR'ABC'" (1, 24) (1, 31)
268 OP '+' (1, 32) (1, 33)
269 STRING "UR'ABC'" (1, 34) (1, 41)
270 >>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"')
271 NAME 'y' (1, 0) (1, 1)
272 OP '=' (1, 2) (1, 3)
273 STRING 'ur"abc"' (1, 4) (1, 11)
274 OP '+' (1, 12) (1, 13)
275 STRING 'Ur"ABC"' (1, 14) (1, 21)
276 OP '+' (1, 22) (1, 23)
277 STRING 'uR"ABC"' (1, 24) (1, 31)
278 OP '+' (1, 32) (1, 33)
279 STRING 'UR"ABC"' (1, 34) (1, 41)
280
Meador Inge43f42fc2012-06-16 21:05:50 -0500281 >>> dump_tokens("b'abc' + B'abc'")
282 STRING "b'abc'" (1, 0) (1, 6)
283 OP '+' (1, 7) (1, 8)
284 STRING "B'abc'" (1, 9) (1, 15)
285 >>> dump_tokens('b"abc" + B"abc"')
286 STRING 'b"abc"' (1, 0) (1, 6)
287 OP '+' (1, 7) (1, 8)
288 STRING 'B"abc"' (1, 9) (1, 15)
289 >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
290 STRING "br'abc'" (1, 0) (1, 7)
291 OP '+' (1, 8) (1, 9)
292 STRING "bR'abc'" (1, 10) (1, 17)
293 OP '+' (1, 18) (1, 19)
294 STRING "Br'abc'" (1, 20) (1, 27)
295 OP '+' (1, 28) (1, 29)
296 STRING "BR'abc'" (1, 30) (1, 37)
297 >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
298 STRING 'br"abc"' (1, 0) (1, 7)
299 OP '+' (1, 8) (1, 9)
300 STRING 'bR"abc"' (1, 10) (1, 17)
301 OP '+' (1, 18) (1, 19)
302 STRING 'Br"abc"' (1, 20) (1, 27)
303 OP '+' (1, 28) (1, 29)
304 STRING 'BR"abc"' (1, 30) (1, 37)
305
Brett Cannonb8d37352008-03-13 20:33:10 +0000306Operators
307
308 >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
309 NAME 'def' (1, 0) (1, 3)
310 NAME 'd22' (1, 4) (1, 7)
311 OP '(' (1, 7) (1, 8)
312 NAME 'a' (1, 8) (1, 9)
313 OP ',' (1, 9) (1, 10)
314 NAME 'b' (1, 11) (1, 12)
315 OP ',' (1, 12) (1, 13)
316 NAME 'c' (1, 14) (1, 15)
317 OP '=' (1, 15) (1, 16)
318 NUMBER '2' (1, 16) (1, 17)
319 OP ',' (1, 17) (1, 18)
320 NAME 'd' (1, 19) (1, 20)
321 OP '=' (1, 20) (1, 21)
322 NUMBER '2' (1, 21) (1, 22)
323 OP ',' (1, 22) (1, 23)
324 OP '*' (1, 24) (1, 25)
325 NAME 'k' (1, 25) (1, 26)
326 OP ')' (1, 26) (1, 27)
327 OP ':' (1, 27) (1, 28)
328 NAME 'pass' (1, 29) (1, 33)
329 >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
330 NAME 'def' (1, 0) (1, 3)
331 NAME 'd01v_' (1, 4) (1, 9)
332 OP '(' (1, 9) (1, 10)
333 NAME 'a' (1, 10) (1, 11)
334 OP '=' (1, 11) (1, 12)
335 NUMBER '1' (1, 12) (1, 13)
336 OP ',' (1, 13) (1, 14)
337 OP '*' (1, 15) (1, 16)
338 NAME 'k' (1, 16) (1, 17)
339 OP ',' (1, 17) (1, 18)
340 OP '**' (1, 19) (1, 21)
341 NAME 'w' (1, 21) (1, 22)
342 OP ')' (1, 22) (1, 23)
343 OP ':' (1, 23) (1, 24)
344 NAME 'pass' (1, 25) (1, 29)
345
346Comparison
347
348 >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
349 ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
350 NAME 'if' (1, 0) (1, 2)
351 NUMBER '1' (1, 3) (1, 4)
352 OP '<' (1, 5) (1, 6)
353 NUMBER '1' (1, 7) (1, 8)
354 OP '>' (1, 9) (1, 10)
355 NUMBER '1' (1, 11) (1, 12)
356 OP '==' (1, 13) (1, 15)
357 NUMBER '1' (1, 16) (1, 17)
358 OP '>=' (1, 18) (1, 20)
359 NUMBER '5' (1, 21) (1, 22)
360 OP '<=' (1, 23) (1, 25)
361 NUMBER '0x15' (1, 26) (1, 30)
362 OP '<=' (1, 31) (1, 33)
363 NUMBER '0x12' (1, 34) (1, 38)
364 OP '!=' (1, 39) (1, 41)
365 NUMBER '1' (1, 42) (1, 43)
366 NAME 'and' (1, 44) (1, 47)
367 NUMBER '5' (1, 48) (1, 49)
368 NAME 'in' (1, 50) (1, 52)
369 NUMBER '1' (1, 53) (1, 54)
370 NAME 'not' (1, 55) (1, 58)
371 NAME 'in' (1, 59) (1, 61)
372 NUMBER '1' (1, 62) (1, 63)
373 NAME 'is' (1, 64) (1, 66)
374 NUMBER '1' (1, 67) (1, 68)
375 NAME 'or' (1, 69) (1, 71)
376 NUMBER '5' (1, 72) (1, 73)
377 NAME 'is' (1, 74) (1, 76)
378 NAME 'not' (1, 77) (1, 80)
379 NUMBER '1' (1, 81) (1, 82)
380 OP ':' (1, 82) (1, 83)
381 NAME 'pass' (1, 84) (1, 88)
382
383Shift
384
385 >>> dump_tokens("x = 1 << 1 >> 5")
386 NAME 'x' (1, 0) (1, 1)
387 OP '=' (1, 2) (1, 3)
388 NUMBER '1' (1, 4) (1, 5)
389 OP '<<' (1, 6) (1, 8)
390 NUMBER '1' (1, 9) (1, 10)
391 OP '>>' (1, 11) (1, 13)
392 NUMBER '5' (1, 14) (1, 15)
393
394Additive
395
396 >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
397 NAME 'x' (1, 0) (1, 1)
398 OP '=' (1, 2) (1, 3)
399 NUMBER '1' (1, 4) (1, 5)
400 OP '-' (1, 6) (1, 7)
401 NAME 'y' (1, 8) (1, 9)
402 OP '+' (1, 10) (1, 11)
403 NUMBER '15' (1, 12) (1, 14)
404 OP '-' (1, 15) (1, 16)
405 NUMBER '01' (1, 17) (1, 19)
406 OP '+' (1, 20) (1, 21)
407 NUMBER '0x124' (1, 22) (1, 27)
408 OP '+' (1, 28) (1, 29)
409 NAME 'z' (1, 30) (1, 31)
410 OP '+' (1, 32) (1, 33)
411 NAME 'a' (1, 34) (1, 35)
412 OP '[' (1, 35) (1, 36)
413 NUMBER '5' (1, 36) (1, 37)
414 OP ']' (1, 37) (1, 38)
415
416Multiplicative
417
418 >>> dump_tokens("x = 1//1*1/5*12%0x12")
419 NAME 'x' (1, 0) (1, 1)
420 OP '=' (1, 2) (1, 3)
421 NUMBER '1' (1, 4) (1, 5)
422 OP '//' (1, 5) (1, 7)
423 NUMBER '1' (1, 7) (1, 8)
424 OP '*' (1, 8) (1, 9)
425 NUMBER '1' (1, 9) (1, 10)
426 OP '/' (1, 10) (1, 11)
427 NUMBER '5' (1, 11) (1, 12)
428 OP '*' (1, 12) (1, 13)
429 NUMBER '12' (1, 13) (1, 15)
430 OP '%' (1, 15) (1, 16)
431 NUMBER '0x12' (1, 16) (1, 20)
432
433Unary
434
435 >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
436 OP '~' (1, 0) (1, 1)
437 NUMBER '1' (1, 1) (1, 2)
438 OP '^' (1, 3) (1, 4)
439 NUMBER '1' (1, 5) (1, 6)
440 OP '&' (1, 7) (1, 8)
441 NUMBER '1' (1, 9) (1, 10)
442 OP '|' (1, 11) (1, 12)
443 NUMBER '1' (1, 12) (1, 13)
444 OP '^' (1, 14) (1, 15)
445 OP '-' (1, 16) (1, 17)
446 NUMBER '1' (1, 17) (1, 18)
447 >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
448 OP '-' (1, 0) (1, 1)
449 NUMBER '1' (1, 1) (1, 2)
450 OP '*' (1, 2) (1, 3)
451 NUMBER '1' (1, 3) (1, 4)
452 OP '/' (1, 4) (1, 5)
453 NUMBER '1' (1, 5) (1, 6)
454 OP '+' (1, 6) (1, 7)
455 NUMBER '1' (1, 7) (1, 8)
456 OP '*' (1, 8) (1, 9)
457 NUMBER '1' (1, 9) (1, 10)
458 OP '//' (1, 10) (1, 12)
459 NUMBER '1' (1, 12) (1, 13)
460 OP '-' (1, 14) (1, 15)
461 OP '-' (1, 16) (1, 17)
462 OP '-' (1, 17) (1, 18)
463 OP '-' (1, 18) (1, 19)
464 NUMBER '1' (1, 19) (1, 20)
465 OP '**' (1, 20) (1, 22)
466 NUMBER '1' (1, 22) (1, 23)
467
468Selector
469
470 >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
471 NAME 'import' (1, 0) (1, 6)
472 NAME 'sys' (1, 7) (1, 10)
473 OP ',' (1, 10) (1, 11)
474 NAME 'time' (1, 12) (1, 16)
475 NEWLINE '\\n' (1, 16) (1, 17)
476 NAME 'x' (2, 0) (2, 1)
477 OP '=' (2, 2) (2, 3)
478 NAME 'sys' (2, 4) (2, 7)
479 OP '.' (2, 7) (2, 8)
480 NAME 'modules' (2, 8) (2, 15)
481 OP '[' (2, 15) (2, 16)
482 STRING "'time'" (2, 16) (2, 22)
483 OP ']' (2, 22) (2, 23)
484 OP '.' (2, 23) (2, 24)
485 NAME 'time' (2, 24) (2, 28)
486 OP '(' (2, 28) (2, 29)
487 OP ')' (2, 29) (2, 30)
488
489Methods
490
491 >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
492 OP '@' (1, 0) (1, 1)
493 NAME 'staticmethod (1, 1) (1, 13)
494 NEWLINE '\\n' (1, 13) (1, 14)
495 NAME 'def' (2, 0) (2, 3)
496 NAME 'foo' (2, 4) (2, 7)
497 OP '(' (2, 7) (2, 8)
498 NAME 'x' (2, 8) (2, 9)
499 OP ',' (2, 9) (2, 10)
500 NAME 'y' (2, 10) (2, 11)
501 OP ')' (2, 11) (2, 12)
502 OP ':' (2, 12) (2, 13)
503 NAME 'pass' (2, 14) (2, 18)
504
505Backslash means line continuation, except for comments
506
507 >>> roundtrip("x=1+\\\\n"
508 ... "1\\n"
509 ... "# This is a comment\\\\n"
510 ... "# This also\\n")
511 True
512 >>> roundtrip("# Comment \\\\nx = 0")
513 True
514
Amaury Forgeot d'Arcda0c0252008-03-27 23:23:54 +0000515Two string literals on the same line
516
517 >>> roundtrip("'' ''")
518 True
519
520Test roundtrip on random python modules.
Antoine Pitroud989f822010-10-14 15:43:25 +0000521pass the '-ucpu' option to process the full directory.
Amaury Forgeot d'Arcda0c0252008-03-27 23:23:54 +0000522
Brett Cannonb8d37352008-03-13 20:33:10 +0000523 >>>
524 >>> tempdir = os.path.dirname(f) or os.curdir
525 >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
Christian Heimes6c052fd2008-03-27 11:46:37 +0000526
Antoine Pitroud989f822010-10-14 15:43:25 +0000527 >>> if not test_support.is_resource_enabled("cpu"):
Brett Cannonb8d37352008-03-13 20:33:10 +0000528 ... testfiles = random.sample(testfiles, 10)
529 ...
530 >>> for testfile in testfiles:
Christian Heimes6c052fd2008-03-27 11:46:37 +0000531 ... if not roundtrip(open(testfile)):
532 ... print "Roundtrip failed for file %s" % testfile
533 ... break
Brett Cannonb8d37352008-03-13 20:33:10 +0000534 ... else: True
535 True
Benjamin Peterson447dc152009-10-15 01:49:37 +0000536
537Evil tabs
Benjamin Petersone5265722009-10-15 01:56:25 +0000538 >>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
Benjamin Peterson447dc152009-10-15 01:49:37 +0000539 NAME 'def' (1, 0) (1, 3)
540 NAME 'f' (1, 4) (1, 5)
541 OP '(' (1, 5) (1, 6)
542 OP ')' (1, 6) (1, 7)
543 OP ':' (1, 7) (1, 8)
544 NEWLINE '\\n' (1, 8) (1, 9)
545 INDENT '\\t' (2, 0) (2, 1)
546 NAME 'if' (2, 1) (2, 3)
547 NAME 'x' (2, 4) (2, 5)
548 NEWLINE '\\n' (2, 5) (2, 6)
Benjamin Petersone5265722009-10-15 01:56:25 +0000549 INDENT ' \\t' (3, 0) (3, 9)
Benjamin Peterson447dc152009-10-15 01:49:37 +0000550 NAME 'pass' (3, 9) (3, 13)
551 DEDENT '' (4, 0) (4, 0)
552 DEDENT '' (4, 0) (4, 0)
Jeremy Hylton29bef0b2006-08-23 18:37:43 +0000553"""
554
Guido van Rossum0874f7f1997-10-27 22:15:06 +0000555
Brett Cannonb8d37352008-03-13 20:33:10 +0000556from test import test_support
Georg Brandla4f46e12010-02-07 17:03:15 +0000557from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
Brett Cannonb8d37352008-03-13 20:33:10 +0000558 STRING, ENDMARKER, tok_name)
559from StringIO import StringIO
560import os
Raymond Hettinger68c04532005-06-10 11:05:19 +0000561
Jeremy Hylton29bef0b2006-08-23 18:37:43 +0000562def dump_tokens(s):
563 """Print out the tokens in s in a table format.
564
565 The ENDMARKER is omitted.
566 """
567 f = StringIO(s)
568 for type, token, start, end, line in generate_tokens(f.readline):
569 if type == ENDMARKER:
570 break
571 type = tok_name[type]
Brett Cannonb8d37352008-03-13 20:33:10 +0000572 print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
Jeremy Hylton29bef0b2006-08-23 18:37:43 +0000573
Tim Petersef575672006-03-31 03:17:30 +0000574# This is an example from the docs, set up as a doctest.
Raymond Hettinger68c04532005-06-10 11:05:19 +0000575def decistmt(s):
576 """Substitute Decimals for floats in a string of statements.
577
578 >>> from decimal import Decimal
579 >>> s = 'print +21.3e-5*-.1234/81.7'
580 >>> decistmt(s)
581 "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
582
Tim Petersef575672006-03-31 03:17:30 +0000583 The format of the exponent is inherited from the platform C library.
584 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
585 we're only showing 12 digits, and the 13th isn't close to 5, the
586 rest of the output should be platform-independent.
587
588 >>> exec(s) #doctest: +ELLIPSIS
589 -3.21716034272e-0...7
590
591 Output from calculations with Decimal should be identical across all
592 platforms.
593
Raymond Hettinger68c04532005-06-10 11:05:19 +0000594 >>> exec(decistmt(s))
595 -3.217160342717258261933904529E-7
Raymond Hettinger68c04532005-06-10 11:05:19 +0000596 """
Tim Petersef575672006-03-31 03:17:30 +0000597
Raymond Hettinger68c04532005-06-10 11:05:19 +0000598 result = []
599 g = generate_tokens(StringIO(s).readline) # tokenize the string
600 for toknum, tokval, _, _, _ in g:
601 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
602 result.extend([
603 (NAME, 'Decimal'),
604 (OP, '('),
605 (STRING, repr(tokval)),
606 (OP, ')')
607 ])
608 else:
609 result.append((toknum, tokval))
610 return untokenize(result)
611
Brett Cannonb8d37352008-03-13 20:33:10 +0000612
613__test__ = {"doctests" : doctests, 'decistmt': decistmt}
614
615
Tim Petersef575672006-03-31 03:17:30 +0000616def test_main():
Brett Cannonb8d37352008-03-13 20:33:10 +0000617 from test import test_tokenize
618 test_support.run_doctest(test_tokenize, True)
Tim Petersef575672006-03-31 03:17:30 +0000619
620if __name__ == "__main__":
621 test_main()