blob: 2938520e27293af4eea18ec855f45f24b52a2da6 [file] [log] [blame]
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001doctests = """
2Tests for the tokenize module.
Thomas Wouters89f507f2006-12-13 04:49:30 +00003
Christian Heimesdd15f6c2008-03-16 00:07:10 +00004 >>> import glob, random, sys
Thomas Wouters89f507f2006-12-13 04:49:30 +00005
Christian Heimesdd15f6c2008-03-16 00:07:10 +00006The tests can be really simple. Given a small fragment of source
Eric Smith74ca5572008-03-17 19:49:19 +00007code, print out a table with tokens. The ENDMARK is omitted for
Thomas Wouters89f507f2006-12-13 04:49:30 +00008brevity.
9
Christian Heimesdd15f6c2008-03-16 00:07:10 +000010 >>> dump_tokens("1 + 1")
11 NUMBER '1' (1, 0) (1, 1)
12 OP '+' (1, 2) (1, 3)
13 NUMBER '1' (1, 4) (1, 5)
Thomas Wouters89f507f2006-12-13 04:49:30 +000014
Christian Heimesdd15f6c2008-03-16 00:07:10 +000015 >>> dump_tokens("if False:\\n"
16 ... " # NL\\n"
17 ... " True = False # NEWLINE\\n")
18 NAME 'if' (1, 0) (1, 2)
19 NAME 'False' (1, 3) (1, 8)
20 OP ':' (1, 8) (1, 9)
21 NEWLINE '\\n' (1, 9) (1, 10)
22 COMMENT '# NL' (2, 4) (2, 8)
23 NL '\\n' (2, 8) (2, 9)
24 INDENT ' ' (3, 0) (3, 4)
25 NAME 'True' (3, 4) (3, 8)
26 OP '=' (3, 9) (3, 10)
27 NAME 'False' (3, 11) (3, 16)
28 COMMENT '# NEWLINE' (3, 17) (3, 26)
29 NEWLINE '\\n' (3, 26) (3, 27)
30 DEDENT '' (4, 0) (4, 0)
Thomas Wouters89f507f2006-12-13 04:49:30 +000031
Christian Heimesdd15f6c2008-03-16 00:07:10 +000032 >>> indent_error_file = \"""
33 ... def k(x):
34 ... x += 2
35 ... x += 5
36 ... \"""
37 >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
38 Traceback (most recent call last):
39 ...
40 IndentationError: unindent does not match any outer indentation level
Thomas Wouters89f507f2006-12-13 04:49:30 +000041
Christian Heimesdd15f6c2008-03-16 00:07:10 +000042Test roundtrip for `untokenize`. `f` is an open file or a string. The source
43code in f is tokenized, converted back to source code via tokenize.untokenize(),
44and tokenized again from the latter. The test fails if the second tokenization
45doesn't match the first.
Thomas Wouters89f507f2006-12-13 04:49:30 +000046
Christian Heimesdd15f6c2008-03-16 00:07:10 +000047 >>> def roundtrip(f):
48 ... if isinstance(f, str): f = StringIO(f)
49 ... token_list = list(generate_tokens(f.readline))
50 ... f.close()
51 ... tokens1 = [tok[:2] for tok in token_list]
52 ... new_text = untokenize(tokens1)
53 ... readline = iter(new_text.splitlines(1)).__next__
54 ... tokens2 = [tok[:2] for tok in generate_tokens(readline)]
55 ... return tokens1 == tokens2
56 ...
Thomas Wouters89f507f2006-12-13 04:49:30 +000057
Christian Heimesdd15f6c2008-03-16 00:07:10 +000058There are some standard formattig practises that are easy to get right.
Thomas Wouters89f507f2006-12-13 04:49:30 +000059
Christian Heimesdd15f6c2008-03-16 00:07:10 +000060 >>> roundtrip("if x == 1:\\n"
61 ... " print(x)\\n")
62 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000063
Christian Heimesdd15f6c2008-03-16 00:07:10 +000064 >>> roundtrip("# This is a comment\\n# This also")
65 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000066
67Some people use different formatting conventions, which makes
Christian Heimesdd15f6c2008-03-16 00:07:10 +000068untokenize a little trickier. Note that this test involves trailing
69whitespace after the colon. Note that we use hex escapes to make the
70two trailing blanks apperant in the expected output.
Thomas Wouters89f507f2006-12-13 04:49:30 +000071
Christian Heimesdd15f6c2008-03-16 00:07:10 +000072 >>> roundtrip("if x == 1 : \\n"
73 ... " print(x)\\n")
74 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000075
Christian Heimesdd15f6c2008-03-16 00:07:10 +000076 >>> f = test_support.findfile("tokenize_tests.txt")
77 >>> roundtrip(open(f))
78 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000079
Christian Heimesdd15f6c2008-03-16 00:07:10 +000080 >>> roundtrip("if x == 1:\\n"
81 ... " # A comment by itself.\\n"
82 ... " print(x) # Comment here, too.\\n"
83 ... " # Another comment.\\n"
84 ... "after_if = True\\n")
85 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000086
Christian Heimesdd15f6c2008-03-16 00:07:10 +000087 >>> roundtrip("if (x # The comments need to go in the right place\\n"
88 ... " == 1):\\n"
89 ... " print('x==1')\\n")
90 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000091
Christian Heimesdd15f6c2008-03-16 00:07:10 +000092 >>> roundtrip("class Test: # A comment here\\n"
93 ... " # A comment with weird indent\\n"
94 ... " after_com = 5\\n"
95 ... " def x(m): return m*5 # a one liner\\n"
96 ... " def y(m): # A whitespace after the colon\\n"
97 ... " return y*4 # 3-space indent\\n")
98 True
99
100Some error-handling code
101
102 >>> roundtrip("try: import somemodule\\n"
103 ... "except ImportError: # comment\\n"
104 ... " print 'Can not import' # comment2\\n"
105 ... "else: print 'Loaded'\\n")
106 True
107
Eric Smith74ca5572008-03-17 19:49:19 +0000108Balancing continuation
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000109
110 >>> roundtrip("a = (3,4, \\n"
111 ... "5,6)\\n"
112 ... "y = [3, 4,\\n"
113 ... "5]\\n"
114 ... "z = {'a': 5,\\n"
115 ... "'b':15, 'c':True}\\n"
116 ... "x = len(y) + 5 - a[\\n"
117 ... "3] - a[2]\\n"
118 ... "+ len(z) - z[\\n"
119 ... "'b']\\n")
120 True
121
122Ordinary integers and binary operators
123
124 >>> dump_tokens("0xff <= 255")
125 NUMBER '0xff' (1, 0) (1, 4)
126 OP '<=' (1, 5) (1, 7)
127 NUMBER '255' (1, 8) (1, 11)
Eric Smith74ca5572008-03-17 19:49:19 +0000128 >>> dump_tokens("0b10 <= 255")
129 NUMBER '0b10' (1, 0) (1, 4)
130 OP '<=' (1, 5) (1, 7)
131 NUMBER '255' (1, 8) (1, 11)
132 >>> dump_tokens("0o123 <= 0O123")
133 NUMBER '0o123' (1, 0) (1, 5)
134 OP '<=' (1, 6) (1, 8)
135 NUMBER '0O123' (1, 9) (1, 14)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000136 >>> dump_tokens("1234567 > ~0x15")
137 NUMBER '1234567' (1, 0) (1, 7)
138 OP '>' (1, 8) (1, 9)
139 OP '~' (1, 10) (1, 11)
140 NUMBER '0x15' (1, 11) (1, 15)
141 >>> dump_tokens("2134568 != 1231515")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000142 NUMBER '2134568' (1, 0) (1, 7)
143 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000144 NUMBER '1231515' (1, 11) (1, 18)
145 >>> dump_tokens("(-124561-1) & 200000000")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000146 OP '(' (1, 0) (1, 1)
147 OP '-' (1, 1) (1, 2)
148 NUMBER '124561' (1, 2) (1, 8)
149 OP '-' (1, 8) (1, 9)
150 NUMBER '1' (1, 9) (1, 10)
151 OP ')' (1, 10) (1, 11)
152 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000153 NUMBER '200000000' (1, 14) (1, 23)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000154 >>> dump_tokens("0xdeadbeef != -1")
155 NUMBER '0xdeadbeef' (1, 0) (1, 10)
156 OP '!=' (1, 11) (1, 13)
157 OP '-' (1, 14) (1, 15)
158 NUMBER '1' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000159 >>> dump_tokens("0xdeadc0de & 12345")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000160 NUMBER '0xdeadc0de' (1, 0) (1, 10)
161 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000162 NUMBER '12345' (1, 13) (1, 18)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000163 >>> dump_tokens("0xFF & 0x15 | 1234")
164 NUMBER '0xFF' (1, 0) (1, 4)
165 OP '&' (1, 5) (1, 6)
166 NUMBER '0x15' (1, 7) (1, 11)
167 OP '|' (1, 12) (1, 13)
168 NUMBER '1234' (1, 14) (1, 18)
169
170Long integers
171
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000172 >>> dump_tokens("x = 0")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000173 NAME 'x' (1, 0) (1, 1)
174 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000175 NUMBER '0' (1, 4) (1, 5)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000176 >>> dump_tokens("x = 0xfffffffffff")
177 NAME 'x' (1, 0) (1, 1)
178 OP '=' (1, 2) (1, 3)
179 NUMBER '0xffffffffff (1, 4) (1, 17)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000180 >>> dump_tokens("x = 123141242151251616110")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000181 NAME 'x' (1, 0) (1, 1)
182 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000183 NUMBER '123141242151 (1, 4) (1, 25)
184 >>> dump_tokens("x = -15921590215012591")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000185 NAME 'x' (1, 0) (1, 1)
186 OP '=' (1, 2) (1, 3)
187 OP '-' (1, 4) (1, 5)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000188 NUMBER '159215902150 (1, 5) (1, 22)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000189
190Floating point numbers
191
192 >>> dump_tokens("x = 3.14159")
193 NAME 'x' (1, 0) (1, 1)
194 OP '=' (1, 2) (1, 3)
195 NUMBER '3.14159' (1, 4) (1, 11)
196 >>> dump_tokens("x = 314159.")
197 NAME 'x' (1, 0) (1, 1)
198 OP '=' (1, 2) (1, 3)
199 NUMBER '314159.' (1, 4) (1, 11)
200 >>> dump_tokens("x = .314159")
201 NAME 'x' (1, 0) (1, 1)
202 OP '=' (1, 2) (1, 3)
203 NUMBER '.314159' (1, 4) (1, 11)
204 >>> dump_tokens("x = 3e14159")
205 NAME 'x' (1, 0) (1, 1)
206 OP '=' (1, 2) (1, 3)
207 NUMBER '3e14159' (1, 4) (1, 11)
208 >>> dump_tokens("x = 3E123")
209 NAME 'x' (1, 0) (1, 1)
210 OP '=' (1, 2) (1, 3)
211 NUMBER '3E123' (1, 4) (1, 9)
212 >>> dump_tokens("x+y = 3e-1230")
213 NAME 'x' (1, 0) (1, 1)
214 OP '+' (1, 1) (1, 2)
215 NAME 'y' (1, 2) (1, 3)
216 OP '=' (1, 4) (1, 5)
217 NUMBER '3e-1230' (1, 6) (1, 13)
218 >>> dump_tokens("x = 3.14e159")
219 NAME 'x' (1, 0) (1, 1)
220 OP '=' (1, 2) (1, 3)
221 NUMBER '3.14e159' (1, 4) (1, 12)
222
223String literals
224
225 >>> dump_tokens("x = ''; y = \\\"\\\"")
226 NAME 'x' (1, 0) (1, 1)
227 OP '=' (1, 2) (1, 3)
228 STRING "''" (1, 4) (1, 6)
229 OP ';' (1, 6) (1, 7)
230 NAME 'y' (1, 8) (1, 9)
231 OP '=' (1, 10) (1, 11)
232 STRING '""' (1, 12) (1, 14)
233 >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
234 NAME 'x' (1, 0) (1, 1)
235 OP '=' (1, 2) (1, 3)
236 STRING '\\'"\\'' (1, 4) (1, 7)
237 OP ';' (1, 7) (1, 8)
238 NAME 'y' (1, 9) (1, 10)
239 OP '=' (1, 11) (1, 12)
240 STRING '"\\'"' (1, 13) (1, 16)
241 >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
242 NAME 'x' (1, 0) (1, 1)
243 OP '=' (1, 2) (1, 3)
244 STRING '"doesn\\'t "' (1, 4) (1, 14)
245 NAME 'shrink' (1, 14) (1, 20)
246 STRING '", does it"' (1, 20) (1, 31)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000247 >>> dump_tokens("x = 'abc' + 'ABC'")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000248 NAME 'x' (1, 0) (1, 1)
249 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000250 STRING "'abc'" (1, 4) (1, 9)
251 OP '+' (1, 10) (1, 11)
252 STRING "'ABC'" (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000253 >>> dump_tokens('y = "ABC" + "ABC"')
254 NAME 'y' (1, 0) (1, 1)
255 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000256 STRING '"ABC"' (1, 4) (1, 9)
257 OP '+' (1, 10) (1, 11)
258 STRING '"ABC"' (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000259 >>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'")
260 NAME 'x' (1, 0) (1, 1)
261 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000262 STRING "r'abc'" (1, 4) (1, 10)
263 OP '+' (1, 11) (1, 12)
264 STRING "r'ABC'" (1, 13) (1, 19)
265 OP '+' (1, 20) (1, 21)
266 STRING "R'ABC'" (1, 22) (1, 28)
267 OP '+' (1, 29) (1, 30)
268 STRING "R'ABC'" (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000269 >>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"')
270 NAME 'y' (1, 0) (1, 1)
271 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000272 STRING 'r"abc"' (1, 4) (1, 10)
273 OP '+' (1, 11) (1, 12)
274 STRING 'r"ABC"' (1, 13) (1, 19)
275 OP '+' (1, 20) (1, 21)
276 STRING 'R"ABC"' (1, 22) (1, 28)
277 OP '+' (1, 29) (1, 30)
278 STRING 'R"ABC"' (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000279
280Operators
281
282 >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
283 NAME 'def' (1, 0) (1, 3)
284 NAME 'd22' (1, 4) (1, 7)
285 OP '(' (1, 7) (1, 8)
286 NAME 'a' (1, 8) (1, 9)
287 OP ',' (1, 9) (1, 10)
288 NAME 'b' (1, 11) (1, 12)
289 OP ',' (1, 12) (1, 13)
290 NAME 'c' (1, 14) (1, 15)
291 OP '=' (1, 15) (1, 16)
292 NUMBER '2' (1, 16) (1, 17)
293 OP ',' (1, 17) (1, 18)
294 NAME 'd' (1, 19) (1, 20)
295 OP '=' (1, 20) (1, 21)
296 NUMBER '2' (1, 21) (1, 22)
297 OP ',' (1, 22) (1, 23)
298 OP '*' (1, 24) (1, 25)
299 NAME 'k' (1, 25) (1, 26)
300 OP ')' (1, 26) (1, 27)
301 OP ':' (1, 27) (1, 28)
302 NAME 'pass' (1, 29) (1, 33)
303 >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
304 NAME 'def' (1, 0) (1, 3)
305 NAME 'd01v_' (1, 4) (1, 9)
306 OP '(' (1, 9) (1, 10)
307 NAME 'a' (1, 10) (1, 11)
308 OP '=' (1, 11) (1, 12)
309 NUMBER '1' (1, 12) (1, 13)
310 OP ',' (1, 13) (1, 14)
311 OP '*' (1, 15) (1, 16)
312 NAME 'k' (1, 16) (1, 17)
313 OP ',' (1, 17) (1, 18)
314 OP '**' (1, 19) (1, 21)
315 NAME 'w' (1, 21) (1, 22)
316 OP ')' (1, 22) (1, 23)
317 OP ':' (1, 23) (1, 24)
318 NAME 'pass' (1, 25) (1, 29)
319
320Comparison
321
322 >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
323 ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
324 NAME 'if' (1, 0) (1, 2)
325 NUMBER '1' (1, 3) (1, 4)
326 OP '<' (1, 5) (1, 6)
327 NUMBER '1' (1, 7) (1, 8)
328 OP '>' (1, 9) (1, 10)
329 NUMBER '1' (1, 11) (1, 12)
330 OP '==' (1, 13) (1, 15)
331 NUMBER '1' (1, 16) (1, 17)
332 OP '>=' (1, 18) (1, 20)
333 NUMBER '5' (1, 21) (1, 22)
334 OP '<=' (1, 23) (1, 25)
335 NUMBER '0x15' (1, 26) (1, 30)
336 OP '<=' (1, 31) (1, 33)
337 NUMBER '0x12' (1, 34) (1, 38)
338 OP '!=' (1, 39) (1, 41)
339 NUMBER '1' (1, 42) (1, 43)
340 NAME 'and' (1, 44) (1, 47)
341 NUMBER '5' (1, 48) (1, 49)
342 NAME 'in' (1, 50) (1, 52)
343 NUMBER '1' (1, 53) (1, 54)
344 NAME 'not' (1, 55) (1, 58)
345 NAME 'in' (1, 59) (1, 61)
346 NUMBER '1' (1, 62) (1, 63)
347 NAME 'is' (1, 64) (1, 66)
348 NUMBER '1' (1, 67) (1, 68)
349 NAME 'or' (1, 69) (1, 71)
350 NUMBER '5' (1, 72) (1, 73)
351 NAME 'is' (1, 74) (1, 76)
352 NAME 'not' (1, 77) (1, 80)
353 NUMBER '1' (1, 81) (1, 82)
354 OP ':' (1, 82) (1, 83)
355 NAME 'pass' (1, 84) (1, 88)
356
357Shift
358
359 >>> dump_tokens("x = 1 << 1 >> 5")
360 NAME 'x' (1, 0) (1, 1)
361 OP '=' (1, 2) (1, 3)
362 NUMBER '1' (1, 4) (1, 5)
363 OP '<<' (1, 6) (1, 8)
364 NUMBER '1' (1, 9) (1, 10)
365 OP '>>' (1, 11) (1, 13)
366 NUMBER '5' (1, 14) (1, 15)
367
368Additive
369
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000370 >>> dump_tokens("x = 1 - y + 15 - 1 + 0x124 + z + a[5]")
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000371 NAME 'x' (1, 0) (1, 1)
372 OP '=' (1, 2) (1, 3)
373 NUMBER '1' (1, 4) (1, 5)
374 OP '-' (1, 6) (1, 7)
375 NAME 'y' (1, 8) (1, 9)
376 OP '+' (1, 10) (1, 11)
377 NUMBER '15' (1, 12) (1, 14)
378 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000379 NUMBER '1' (1, 17) (1, 18)
380 OP '+' (1, 19) (1, 20)
381 NUMBER '0x124' (1, 21) (1, 26)
382 OP '+' (1, 27) (1, 28)
383 NAME 'z' (1, 29) (1, 30)
384 OP '+' (1, 31) (1, 32)
385 NAME 'a' (1, 33) (1, 34)
386 OP '[' (1, 34) (1, 35)
387 NUMBER '5' (1, 35) (1, 36)
388 OP ']' (1, 36) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000389
390Multiplicative
391
392 >>> dump_tokens("x = 1//1*1/5*12%0x12")
393 NAME 'x' (1, 0) (1, 1)
394 OP '=' (1, 2) (1, 3)
395 NUMBER '1' (1, 4) (1, 5)
396 OP '//' (1, 5) (1, 7)
397 NUMBER '1' (1, 7) (1, 8)
398 OP '*' (1, 8) (1, 9)
399 NUMBER '1' (1, 9) (1, 10)
400 OP '/' (1, 10) (1, 11)
401 NUMBER '5' (1, 11) (1, 12)
402 OP '*' (1, 12) (1, 13)
403 NUMBER '12' (1, 13) (1, 15)
404 OP '%' (1, 15) (1, 16)
405 NUMBER '0x12' (1, 16) (1, 20)
406
407Unary
408
409 >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
410 OP '~' (1, 0) (1, 1)
411 NUMBER '1' (1, 1) (1, 2)
412 OP '^' (1, 3) (1, 4)
413 NUMBER '1' (1, 5) (1, 6)
414 OP '&' (1, 7) (1, 8)
415 NUMBER '1' (1, 9) (1, 10)
416 OP '|' (1, 11) (1, 12)
417 NUMBER '1' (1, 12) (1, 13)
418 OP '^' (1, 14) (1, 15)
419 OP '-' (1, 16) (1, 17)
420 NUMBER '1' (1, 17) (1, 18)
421 >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
422 OP '-' (1, 0) (1, 1)
423 NUMBER '1' (1, 1) (1, 2)
424 OP '*' (1, 2) (1, 3)
425 NUMBER '1' (1, 3) (1, 4)
426 OP '/' (1, 4) (1, 5)
427 NUMBER '1' (1, 5) (1, 6)
428 OP '+' (1, 6) (1, 7)
429 NUMBER '1' (1, 7) (1, 8)
430 OP '*' (1, 8) (1, 9)
431 NUMBER '1' (1, 9) (1, 10)
432 OP '//' (1, 10) (1, 12)
433 NUMBER '1' (1, 12) (1, 13)
434 OP '-' (1, 14) (1, 15)
435 OP '-' (1, 16) (1, 17)
436 OP '-' (1, 17) (1, 18)
437 OP '-' (1, 18) (1, 19)
438 NUMBER '1' (1, 19) (1, 20)
439 OP '**' (1, 20) (1, 22)
440 NUMBER '1' (1, 22) (1, 23)
441
442Selector
443
444 >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
445 NAME 'import' (1, 0) (1, 6)
446 NAME 'sys' (1, 7) (1, 10)
447 OP ',' (1, 10) (1, 11)
448 NAME 'time' (1, 12) (1, 16)
449 NEWLINE '\\n' (1, 16) (1, 17)
450 NAME 'x' (2, 0) (2, 1)
451 OP '=' (2, 2) (2, 3)
452 NAME 'sys' (2, 4) (2, 7)
453 OP '.' (2, 7) (2, 8)
454 NAME 'modules' (2, 8) (2, 15)
455 OP '[' (2, 15) (2, 16)
456 STRING "'time'" (2, 16) (2, 22)
457 OP ']' (2, 22) (2, 23)
458 OP '.' (2, 23) (2, 24)
459 NAME 'time' (2, 24) (2, 28)
460 OP '(' (2, 28) (2, 29)
461 OP ')' (2, 29) (2, 30)
462
463Methods
464
465 >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
466 OP '@' (1, 0) (1, 1)
467 NAME 'staticmethod (1, 1) (1, 13)
468 NEWLINE '\\n' (1, 13) (1, 14)
469 NAME 'def' (2, 0) (2, 3)
470 NAME 'foo' (2, 4) (2, 7)
471 OP '(' (2, 7) (2, 8)
472 NAME 'x' (2, 8) (2, 9)
473 OP ',' (2, 9) (2, 10)
474 NAME 'y' (2, 10) (2, 11)
475 OP ')' (2, 11) (2, 12)
476 OP ':' (2, 12) (2, 13)
477 NAME 'pass' (2, 14) (2, 18)
478
479Backslash means line continuation, except for comments
480
481 >>> roundtrip("x=1+\\\\n"
482 ... "1\\n"
483 ... "# This is a comment\\\\n"
484 ... "# This also\\n")
485 True
486 >>> roundtrip("# Comment \\\\nx = 0")
487 True
488
489 >>>
490 >>> tempdir = os.path.dirname(f) or os.curdir
491 >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
492 >>> if not test_support.is_resource_enabled("compiler"):
493 ... testfiles = random.sample(testfiles, 10)
494 ...
495 >>> for testfile in testfiles:
496 ... if not roundtrip(open(testfile)): break
497 ... else: True
498 True
Thomas Wouters89f507f2006-12-13 04:49:30 +0000499"""
500
Guido van Rossumcfbbf482007-08-04 17:43:15 +0000501
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000502from test import test_support
503from tokenize import (tokenize, untokenize, generate_tokens, NUMBER, NAME, OP,
504 STRING, ENDMARKER, tok_name)
Guido van Rossumc43e79f2007-06-18 18:26:36 +0000505from io import StringIO
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000506import os
Raymond Hettinger68c04532005-06-10 11:05:19 +0000507
Thomas Wouters89f507f2006-12-13 04:49:30 +0000508def dump_tokens(s):
509 """Print out the tokens in s in a table format.
510
511 The ENDMARKER is omitted.
512 """
513 f = StringIO(s)
514 for type, token, start, end, line in generate_tokens(f.readline):
515 if type == ENDMARKER:
516 break
517 type = tok_name[type]
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000518 print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
Thomas Wouters89f507f2006-12-13 04:49:30 +0000519
520def roundtrip(s):
521 f = StringIO(s)
522 source = untokenize(generate_tokens(f.readline))
Guido van Rossum0bcbb0d2007-02-09 22:43:10 +0000523 print(source, end="")
Thomas Wouters89f507f2006-12-13 04:49:30 +0000524
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000525# This is an example from the docs, set up as a doctest.
Raymond Hettinger68c04532005-06-10 11:05:19 +0000526def decistmt(s):
527 """Substitute Decimals for floats in a string of statements.
528
529 >>> from decimal import Decimal
Georg Brandl88fc6642007-02-09 21:28:07 +0000530 >>> s = 'print(+21.3e-5*-.1234/81.7)'
Raymond Hettinger68c04532005-06-10 11:05:19 +0000531 >>> decistmt(s)
Georg Brandl88fc6642007-02-09 21:28:07 +0000532 "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"
Raymond Hettinger68c04532005-06-10 11:05:19 +0000533
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000534 The format of the exponent is inherited from the platform C library.
535 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
536 we're only showing 12 digits, and the 13th isn't close to 5, the
537 rest of the output should be platform-independent.
538
539 >>> exec(s) #doctest: +ELLIPSIS
540 -3.21716034272e-0...7
541
542 Output from calculations with Decimal should be identical across all
543 platforms.
544
Raymond Hettinger68c04532005-06-10 11:05:19 +0000545 >>> exec(decistmt(s))
546 -3.217160342717258261933904529E-7
Raymond Hettinger68c04532005-06-10 11:05:19 +0000547 """
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000548
Raymond Hettinger68c04532005-06-10 11:05:19 +0000549 result = []
550 g = generate_tokens(StringIO(s).readline) # tokenize the string
551 for toknum, tokval, _, _, _ in g:
552 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
553 result.extend([
554 (NAME, 'Decimal'),
555 (OP, '('),
556 (STRING, repr(tokval)),
557 (OP, ')')
558 ])
559 else:
560 result.append((toknum, tokval))
561 return untokenize(result)
562
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000563
564__test__ = {"doctests" : doctests, 'decistmt': decistmt}
565
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000566def test_main():
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000567 from test import test_tokenize
568 test_support.run_doctest(test_tokenize, True)
Neal Norwitzc1505362006-12-28 06:47:50 +0000569
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000570if __name__ == "__main__":
571 test_main()