blob: ba705bac7ac61d579d511bae159cddc7b1f51756 [file] [log] [blame]
Trent Nelson428de652008-03-18 22:41:35 +00001# -*- coding: utf-8 -*-
2
Christian Heimesdd15f6c2008-03-16 00:07:10 +00003doctests = """
4Tests for the tokenize module.
Thomas Wouters89f507f2006-12-13 04:49:30 +00005
Christian Heimesdd15f6c2008-03-16 00:07:10 +00006The tests can be really simple. Given a small fragment of source
Eric Smith74ca5572008-03-17 19:49:19 +00007code, print out a table with tokens. The ENDMARK is omitted for
Thomas Wouters89f507f2006-12-13 04:49:30 +00008brevity.
9
Christian Heimesdd15f6c2008-03-16 00:07:10 +000010 >>> dump_tokens("1 + 1")
Trent Nelson428de652008-03-18 22:41:35 +000011 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +000012 NUMBER '1' (1, 0) (1, 1)
13 OP '+' (1, 2) (1, 3)
14 NUMBER '1' (1, 4) (1, 5)
Thomas Wouters89f507f2006-12-13 04:49:30 +000015
Christian Heimesdd15f6c2008-03-16 00:07:10 +000016 >>> dump_tokens("if False:\\n"
17 ... " # NL\\n"
18 ... " True = False # NEWLINE\\n")
Trent Nelson428de652008-03-18 22:41:35 +000019 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +000020 NAME 'if' (1, 0) (1, 2)
21 NAME 'False' (1, 3) (1, 8)
22 OP ':' (1, 8) (1, 9)
23 NEWLINE '\\n' (1, 9) (1, 10)
24 COMMENT '# NL' (2, 4) (2, 8)
25 NL '\\n' (2, 8) (2, 9)
26 INDENT ' ' (3, 0) (3, 4)
27 NAME 'True' (3, 4) (3, 8)
28 OP '=' (3, 9) (3, 10)
29 NAME 'False' (3, 11) (3, 16)
30 COMMENT '# NEWLINE' (3, 17) (3, 26)
31 NEWLINE '\\n' (3, 26) (3, 27)
32 DEDENT '' (4, 0) (4, 0)
Thomas Wouters89f507f2006-12-13 04:49:30 +000033
Christian Heimesdd15f6c2008-03-16 00:07:10 +000034 >>> indent_error_file = \"""
35 ... def k(x):
36 ... x += 2
37 ... x += 5
38 ... \"""
Trent Nelson428de652008-03-18 22:41:35 +000039 >>> readline = BytesIO(indent_error_file.encode('utf-8')).readline
40 >>> for tok in tokenize(readline): pass
Christian Heimesdd15f6c2008-03-16 00:07:10 +000041 Traceback (most recent call last):
42 ...
43 IndentationError: unindent does not match any outer indentation level
Thomas Wouters89f507f2006-12-13 04:49:30 +000044
Christian Heimesdd15f6c2008-03-16 00:07:10 +000045There are some standard formattig practises that are easy to get right.
Thomas Wouters89f507f2006-12-13 04:49:30 +000046
Christian Heimesdd15f6c2008-03-16 00:07:10 +000047 >>> roundtrip("if x == 1:\\n"
48 ... " print(x)\\n")
49 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000050
Christian Heimesdd15f6c2008-03-16 00:07:10 +000051 >>> roundtrip("# This is a comment\\n# This also")
52 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000053
54Some people use different formatting conventions, which makes
Christian Heimesdd15f6c2008-03-16 00:07:10 +000055untokenize a little trickier. Note that this test involves trailing
56whitespace after the colon. Note that we use hex escapes to make the
Trent Nelson428de652008-03-18 22:41:35 +000057two trailing blanks apparent in the expected output.
Thomas Wouters89f507f2006-12-13 04:49:30 +000058
Christian Heimesdd15f6c2008-03-16 00:07:10 +000059 >>> roundtrip("if x == 1 : \\n"
60 ... " print(x)\\n")
61 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000062
Benjamin Petersonee8712c2008-05-20 21:35:26 +000063 >>> f = support.findfile("tokenize_tests.txt")
Trent Nelson428de652008-03-18 22:41:35 +000064 >>> roundtrip(open(f, 'rb'))
Christian Heimesdd15f6c2008-03-16 00:07:10 +000065 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000066
Christian Heimesdd15f6c2008-03-16 00:07:10 +000067 >>> roundtrip("if x == 1:\\n"
68 ... " # A comment by itself.\\n"
69 ... " print(x) # Comment here, too.\\n"
70 ... " # Another comment.\\n"
71 ... "after_if = True\\n")
72 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000073
Christian Heimesdd15f6c2008-03-16 00:07:10 +000074 >>> roundtrip("if (x # The comments need to go in the right place\\n"
75 ... " == 1):\\n"
76 ... " print('x==1')\\n")
77 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000078
Christian Heimesdd15f6c2008-03-16 00:07:10 +000079 >>> roundtrip("class Test: # A comment here\\n"
80 ... " # A comment with weird indent\\n"
81 ... " after_com = 5\\n"
82 ... " def x(m): return m*5 # a one liner\\n"
83 ... " def y(m): # A whitespace after the colon\\n"
84 ... " return y*4 # 3-space indent\\n")
85 True
86
87Some error-handling code
88
89 >>> roundtrip("try: import somemodule\\n"
90 ... "except ImportError: # comment\\n"
Christian Heimesba4af492008-03-28 00:55:15 +000091 ... " print('Can not import' # comment2\\n)"
Neal Norwitz752abd02008-05-13 04:55:24 +000092 ... "else: print('Loaded')\\n")
Christian Heimesdd15f6c2008-03-16 00:07:10 +000093 True
94
Eric Smith74ca5572008-03-17 19:49:19 +000095Balancing continuation
Christian Heimesdd15f6c2008-03-16 00:07:10 +000096
97 >>> roundtrip("a = (3,4, \\n"
98 ... "5,6)\\n"
99 ... "y = [3, 4,\\n"
100 ... "5]\\n"
101 ... "z = {'a': 5,\\n"
102 ... "'b':15, 'c':True}\\n"
103 ... "x = len(y) + 5 - a[\\n"
104 ... "3] - a[2]\\n"
105 ... "+ len(z) - z[\\n"
106 ... "'b']\\n")
107 True
108
109Ordinary integers and binary operators
110
111 >>> dump_tokens("0xff <= 255")
Trent Nelson428de652008-03-18 22:41:35 +0000112 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000113 NUMBER '0xff' (1, 0) (1, 4)
114 OP '<=' (1, 5) (1, 7)
115 NUMBER '255' (1, 8) (1, 11)
Eric Smith74ca5572008-03-17 19:49:19 +0000116 >>> dump_tokens("0b10 <= 255")
Trent Nelson428de652008-03-18 22:41:35 +0000117 ENCODING 'utf-8' (0, 0) (0, 0)
Eric Smith74ca5572008-03-17 19:49:19 +0000118 NUMBER '0b10' (1, 0) (1, 4)
119 OP '<=' (1, 5) (1, 7)
120 NUMBER '255' (1, 8) (1, 11)
121 >>> dump_tokens("0o123 <= 0O123")
Trent Nelson428de652008-03-18 22:41:35 +0000122 ENCODING 'utf-8' (0, 0) (0, 0)
Eric Smith74ca5572008-03-17 19:49:19 +0000123 NUMBER '0o123' (1, 0) (1, 5)
124 OP '<=' (1, 6) (1, 8)
125 NUMBER '0O123' (1, 9) (1, 14)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000126 >>> dump_tokens("1234567 > ~0x15")
Trent Nelson428de652008-03-18 22:41:35 +0000127 ENCODING 'utf-8' (0, 0) (0, 0)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000128 NUMBER '1234567' (1, 0) (1, 7)
129 OP '>' (1, 8) (1, 9)
130 OP '~' (1, 10) (1, 11)
131 NUMBER '0x15' (1, 11) (1, 15)
132 >>> dump_tokens("2134568 != 1231515")
Trent Nelson428de652008-03-18 22:41:35 +0000133 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000134 NUMBER '2134568' (1, 0) (1, 7)
135 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000136 NUMBER '1231515' (1, 11) (1, 18)
137 >>> dump_tokens("(-124561-1) & 200000000")
Trent Nelson428de652008-03-18 22:41:35 +0000138 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000139 OP '(' (1, 0) (1, 1)
140 OP '-' (1, 1) (1, 2)
141 NUMBER '124561' (1, 2) (1, 8)
142 OP '-' (1, 8) (1, 9)
143 NUMBER '1' (1, 9) (1, 10)
144 OP ')' (1, 10) (1, 11)
145 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000146 NUMBER '200000000' (1, 14) (1, 23)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000147 >>> dump_tokens("0xdeadbeef != -1")
Trent Nelson428de652008-03-18 22:41:35 +0000148 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000149 NUMBER '0xdeadbeef' (1, 0) (1, 10)
150 OP '!=' (1, 11) (1, 13)
151 OP '-' (1, 14) (1, 15)
152 NUMBER '1' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000153 >>> dump_tokens("0xdeadc0de & 12345")
Trent Nelson428de652008-03-18 22:41:35 +0000154 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000155 NUMBER '0xdeadc0de' (1, 0) (1, 10)
156 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000157 NUMBER '12345' (1, 13) (1, 18)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000158 >>> dump_tokens("0xFF & 0x15 | 1234")
Trent Nelson428de652008-03-18 22:41:35 +0000159 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000160 NUMBER '0xFF' (1, 0) (1, 4)
161 OP '&' (1, 5) (1, 6)
162 NUMBER '0x15' (1, 7) (1, 11)
163 OP '|' (1, 12) (1, 13)
164 NUMBER '1234' (1, 14) (1, 18)
165
166Long integers
167
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000168 >>> dump_tokens("x = 0")
Trent Nelson428de652008-03-18 22:41:35 +0000169 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000170 NAME 'x' (1, 0) (1, 1)
171 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000172 NUMBER '0' (1, 4) (1, 5)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000173 >>> dump_tokens("x = 0xfffffffffff")
Trent Nelson428de652008-03-18 22:41:35 +0000174 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000175 NAME 'x' (1, 0) (1, 1)
176 OP '=' (1, 2) (1, 3)
177 NUMBER '0xffffffffff (1, 4) (1, 17)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000178 >>> dump_tokens("x = 123141242151251616110")
Trent Nelson428de652008-03-18 22:41:35 +0000179 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000180 NAME 'x' (1, 0) (1, 1)
181 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000182 NUMBER '123141242151 (1, 4) (1, 25)
183 >>> dump_tokens("x = -15921590215012591")
Trent Nelson428de652008-03-18 22:41:35 +0000184 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000185 NAME 'x' (1, 0) (1, 1)
186 OP '=' (1, 2) (1, 3)
187 OP '-' (1, 4) (1, 5)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000188 NUMBER '159215902150 (1, 5) (1, 22)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000189
190Floating point numbers
191
192 >>> dump_tokens("x = 3.14159")
Trent Nelson428de652008-03-18 22:41:35 +0000193 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000194 NAME 'x' (1, 0) (1, 1)
195 OP '=' (1, 2) (1, 3)
196 NUMBER '3.14159' (1, 4) (1, 11)
197 >>> dump_tokens("x = 314159.")
Trent Nelson428de652008-03-18 22:41:35 +0000198 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000199 NAME 'x' (1, 0) (1, 1)
200 OP '=' (1, 2) (1, 3)
201 NUMBER '314159.' (1, 4) (1, 11)
202 >>> dump_tokens("x = .314159")
Trent Nelson428de652008-03-18 22:41:35 +0000203 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000204 NAME 'x' (1, 0) (1, 1)
205 OP '=' (1, 2) (1, 3)
206 NUMBER '.314159' (1, 4) (1, 11)
207 >>> dump_tokens("x = 3e14159")
Trent Nelson428de652008-03-18 22:41:35 +0000208 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000209 NAME 'x' (1, 0) (1, 1)
210 OP '=' (1, 2) (1, 3)
211 NUMBER '3e14159' (1, 4) (1, 11)
212 >>> dump_tokens("x = 3E123")
Trent Nelson428de652008-03-18 22:41:35 +0000213 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000214 NAME 'x' (1, 0) (1, 1)
215 OP '=' (1, 2) (1, 3)
216 NUMBER '3E123' (1, 4) (1, 9)
217 >>> dump_tokens("x+y = 3e-1230")
Trent Nelson428de652008-03-18 22:41:35 +0000218 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000219 NAME 'x' (1, 0) (1, 1)
220 OP '+' (1, 1) (1, 2)
221 NAME 'y' (1, 2) (1, 3)
222 OP '=' (1, 4) (1, 5)
223 NUMBER '3e-1230' (1, 6) (1, 13)
224 >>> dump_tokens("x = 3.14e159")
Trent Nelson428de652008-03-18 22:41:35 +0000225 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000226 NAME 'x' (1, 0) (1, 1)
227 OP '=' (1, 2) (1, 3)
228 NUMBER '3.14e159' (1, 4) (1, 12)
229
230String literals
231
232 >>> dump_tokens("x = ''; y = \\\"\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000233 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000234 NAME 'x' (1, 0) (1, 1)
235 OP '=' (1, 2) (1, 3)
236 STRING "''" (1, 4) (1, 6)
237 OP ';' (1, 6) (1, 7)
238 NAME 'y' (1, 8) (1, 9)
239 OP '=' (1, 10) (1, 11)
240 STRING '""' (1, 12) (1, 14)
241 >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000242 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000243 NAME 'x' (1, 0) (1, 1)
244 OP '=' (1, 2) (1, 3)
245 STRING '\\'"\\'' (1, 4) (1, 7)
246 OP ';' (1, 7) (1, 8)
247 NAME 'y' (1, 9) (1, 10)
248 OP '=' (1, 11) (1, 12)
249 STRING '"\\'"' (1, 13) (1, 16)
250 >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000251 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000252 NAME 'x' (1, 0) (1, 1)
253 OP '=' (1, 2) (1, 3)
254 STRING '"doesn\\'t "' (1, 4) (1, 14)
255 NAME 'shrink' (1, 14) (1, 20)
256 STRING '", does it"' (1, 20) (1, 31)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000257 >>> dump_tokens("x = 'abc' + 'ABC'")
Trent Nelson428de652008-03-18 22:41:35 +0000258 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000259 NAME 'x' (1, 0) (1, 1)
260 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000261 STRING "'abc'" (1, 4) (1, 9)
262 OP '+' (1, 10) (1, 11)
263 STRING "'ABC'" (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000264 >>> dump_tokens('y = "ABC" + "ABC"')
Trent Nelson428de652008-03-18 22:41:35 +0000265 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000266 NAME 'y' (1, 0) (1, 1)
267 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000268 STRING '"ABC"' (1, 4) (1, 9)
269 OP '+' (1, 10) (1, 11)
270 STRING '"ABC"' (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000271 >>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'")
Trent Nelson428de652008-03-18 22:41:35 +0000272 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000273 NAME 'x' (1, 0) (1, 1)
274 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000275 STRING "r'abc'" (1, 4) (1, 10)
276 OP '+' (1, 11) (1, 12)
277 STRING "r'ABC'" (1, 13) (1, 19)
278 OP '+' (1, 20) (1, 21)
279 STRING "R'ABC'" (1, 22) (1, 28)
280 OP '+' (1, 29) (1, 30)
281 STRING "R'ABC'" (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000282 >>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"')
Trent Nelson428de652008-03-18 22:41:35 +0000283 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000284 NAME 'y' (1, 0) (1, 1)
285 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000286 STRING 'r"abc"' (1, 4) (1, 10)
287 OP '+' (1, 11) (1, 12)
288 STRING 'r"ABC"' (1, 13) (1, 19)
289 OP '+' (1, 20) (1, 21)
290 STRING 'R"ABC"' (1, 22) (1, 28)
291 OP '+' (1, 29) (1, 30)
292 STRING 'R"ABC"' (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000293
294Operators
295
296 >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000297 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000298 NAME 'def' (1, 0) (1, 3)
299 NAME 'd22' (1, 4) (1, 7)
300 OP '(' (1, 7) (1, 8)
301 NAME 'a' (1, 8) (1, 9)
302 OP ',' (1, 9) (1, 10)
303 NAME 'b' (1, 11) (1, 12)
304 OP ',' (1, 12) (1, 13)
305 NAME 'c' (1, 14) (1, 15)
306 OP '=' (1, 15) (1, 16)
307 NUMBER '2' (1, 16) (1, 17)
308 OP ',' (1, 17) (1, 18)
309 NAME 'd' (1, 19) (1, 20)
310 OP '=' (1, 20) (1, 21)
311 NUMBER '2' (1, 21) (1, 22)
312 OP ',' (1, 22) (1, 23)
313 OP '*' (1, 24) (1, 25)
314 NAME 'k' (1, 25) (1, 26)
315 OP ')' (1, 26) (1, 27)
316 OP ':' (1, 27) (1, 28)
317 NAME 'pass' (1, 29) (1, 33)
318 >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000319 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000320 NAME 'def' (1, 0) (1, 3)
321 NAME 'd01v_' (1, 4) (1, 9)
322 OP '(' (1, 9) (1, 10)
323 NAME 'a' (1, 10) (1, 11)
324 OP '=' (1, 11) (1, 12)
325 NUMBER '1' (1, 12) (1, 13)
326 OP ',' (1, 13) (1, 14)
327 OP '*' (1, 15) (1, 16)
328 NAME 'k' (1, 16) (1, 17)
329 OP ',' (1, 17) (1, 18)
330 OP '**' (1, 19) (1, 21)
331 NAME 'w' (1, 21) (1, 22)
332 OP ')' (1, 22) (1, 23)
333 OP ':' (1, 23) (1, 24)
334 NAME 'pass' (1, 25) (1, 29)
335
336Comparison
337
338 >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
339 ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
Trent Nelson428de652008-03-18 22:41:35 +0000340 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000341 NAME 'if' (1, 0) (1, 2)
342 NUMBER '1' (1, 3) (1, 4)
343 OP '<' (1, 5) (1, 6)
344 NUMBER '1' (1, 7) (1, 8)
345 OP '>' (1, 9) (1, 10)
346 NUMBER '1' (1, 11) (1, 12)
347 OP '==' (1, 13) (1, 15)
348 NUMBER '1' (1, 16) (1, 17)
349 OP '>=' (1, 18) (1, 20)
350 NUMBER '5' (1, 21) (1, 22)
351 OP '<=' (1, 23) (1, 25)
352 NUMBER '0x15' (1, 26) (1, 30)
353 OP '<=' (1, 31) (1, 33)
354 NUMBER '0x12' (1, 34) (1, 38)
355 OP '!=' (1, 39) (1, 41)
356 NUMBER '1' (1, 42) (1, 43)
357 NAME 'and' (1, 44) (1, 47)
358 NUMBER '5' (1, 48) (1, 49)
359 NAME 'in' (1, 50) (1, 52)
360 NUMBER '1' (1, 53) (1, 54)
361 NAME 'not' (1, 55) (1, 58)
362 NAME 'in' (1, 59) (1, 61)
363 NUMBER '1' (1, 62) (1, 63)
364 NAME 'is' (1, 64) (1, 66)
365 NUMBER '1' (1, 67) (1, 68)
366 NAME 'or' (1, 69) (1, 71)
367 NUMBER '5' (1, 72) (1, 73)
368 NAME 'is' (1, 74) (1, 76)
369 NAME 'not' (1, 77) (1, 80)
370 NUMBER '1' (1, 81) (1, 82)
371 OP ':' (1, 82) (1, 83)
372 NAME 'pass' (1, 84) (1, 88)
373
374Shift
375
376 >>> dump_tokens("x = 1 << 1 >> 5")
Trent Nelson428de652008-03-18 22:41:35 +0000377 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000378 NAME 'x' (1, 0) (1, 1)
379 OP '=' (1, 2) (1, 3)
380 NUMBER '1' (1, 4) (1, 5)
381 OP '<<' (1, 6) (1, 8)
382 NUMBER '1' (1, 9) (1, 10)
383 OP '>>' (1, 11) (1, 13)
384 NUMBER '5' (1, 14) (1, 15)
385
386Additive
387
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000388 >>> dump_tokens("x = 1 - y + 15 - 1 + 0x124 + z + a[5]")
Trent Nelson428de652008-03-18 22:41:35 +0000389 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000390 NAME 'x' (1, 0) (1, 1)
391 OP '=' (1, 2) (1, 3)
392 NUMBER '1' (1, 4) (1, 5)
393 OP '-' (1, 6) (1, 7)
394 NAME 'y' (1, 8) (1, 9)
395 OP '+' (1, 10) (1, 11)
396 NUMBER '15' (1, 12) (1, 14)
397 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000398 NUMBER '1' (1, 17) (1, 18)
399 OP '+' (1, 19) (1, 20)
400 NUMBER '0x124' (1, 21) (1, 26)
401 OP '+' (1, 27) (1, 28)
402 NAME 'z' (1, 29) (1, 30)
403 OP '+' (1, 31) (1, 32)
404 NAME 'a' (1, 33) (1, 34)
405 OP '[' (1, 34) (1, 35)
406 NUMBER '5' (1, 35) (1, 36)
407 OP ']' (1, 36) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000408
409Multiplicative
410
411 >>> dump_tokens("x = 1//1*1/5*12%0x12")
Trent Nelson428de652008-03-18 22:41:35 +0000412 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000413 NAME 'x' (1, 0) (1, 1)
414 OP '=' (1, 2) (1, 3)
415 NUMBER '1' (1, 4) (1, 5)
416 OP '//' (1, 5) (1, 7)
417 NUMBER '1' (1, 7) (1, 8)
418 OP '*' (1, 8) (1, 9)
419 NUMBER '1' (1, 9) (1, 10)
420 OP '/' (1, 10) (1, 11)
421 NUMBER '5' (1, 11) (1, 12)
422 OP '*' (1, 12) (1, 13)
423 NUMBER '12' (1, 13) (1, 15)
424 OP '%' (1, 15) (1, 16)
425 NUMBER '0x12' (1, 16) (1, 20)
426
427Unary
428
429 >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
Trent Nelson428de652008-03-18 22:41:35 +0000430 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000431 OP '~' (1, 0) (1, 1)
432 NUMBER '1' (1, 1) (1, 2)
433 OP '^' (1, 3) (1, 4)
434 NUMBER '1' (1, 5) (1, 6)
435 OP '&' (1, 7) (1, 8)
436 NUMBER '1' (1, 9) (1, 10)
437 OP '|' (1, 11) (1, 12)
438 NUMBER '1' (1, 12) (1, 13)
439 OP '^' (1, 14) (1, 15)
440 OP '-' (1, 16) (1, 17)
441 NUMBER '1' (1, 17) (1, 18)
442 >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
Trent Nelson428de652008-03-18 22:41:35 +0000443 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000444 OP '-' (1, 0) (1, 1)
445 NUMBER '1' (1, 1) (1, 2)
446 OP '*' (1, 2) (1, 3)
447 NUMBER '1' (1, 3) (1, 4)
448 OP '/' (1, 4) (1, 5)
449 NUMBER '1' (1, 5) (1, 6)
450 OP '+' (1, 6) (1, 7)
451 NUMBER '1' (1, 7) (1, 8)
452 OP '*' (1, 8) (1, 9)
453 NUMBER '1' (1, 9) (1, 10)
454 OP '//' (1, 10) (1, 12)
455 NUMBER '1' (1, 12) (1, 13)
456 OP '-' (1, 14) (1, 15)
457 OP '-' (1, 16) (1, 17)
458 OP '-' (1, 17) (1, 18)
459 OP '-' (1, 18) (1, 19)
460 NUMBER '1' (1, 19) (1, 20)
461 OP '**' (1, 20) (1, 22)
462 NUMBER '1' (1, 22) (1, 23)
463
464Selector
465
466 >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
Trent Nelson428de652008-03-18 22:41:35 +0000467 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000468 NAME 'import' (1, 0) (1, 6)
469 NAME 'sys' (1, 7) (1, 10)
470 OP ',' (1, 10) (1, 11)
471 NAME 'time' (1, 12) (1, 16)
472 NEWLINE '\\n' (1, 16) (1, 17)
473 NAME 'x' (2, 0) (2, 1)
474 OP '=' (2, 2) (2, 3)
475 NAME 'sys' (2, 4) (2, 7)
476 OP '.' (2, 7) (2, 8)
477 NAME 'modules' (2, 8) (2, 15)
478 OP '[' (2, 15) (2, 16)
479 STRING "'time'" (2, 16) (2, 22)
480 OP ']' (2, 22) (2, 23)
481 OP '.' (2, 23) (2, 24)
482 NAME 'time' (2, 24) (2, 28)
483 OP '(' (2, 28) (2, 29)
484 OP ')' (2, 29) (2, 30)
485
486Methods
487
488 >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000489 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000490 OP '@' (1, 0) (1, 1)
491 NAME 'staticmethod (1, 1) (1, 13)
492 NEWLINE '\\n' (1, 13) (1, 14)
493 NAME 'def' (2, 0) (2, 3)
494 NAME 'foo' (2, 4) (2, 7)
495 OP '(' (2, 7) (2, 8)
496 NAME 'x' (2, 8) (2, 9)
497 OP ',' (2, 9) (2, 10)
498 NAME 'y' (2, 10) (2, 11)
499 OP ')' (2, 11) (2, 12)
500 OP ':' (2, 12) (2, 13)
501 NAME 'pass' (2, 14) (2, 18)
502
503Backslash means line continuation, except for comments
504
505 >>> roundtrip("x=1+\\\\n"
506 ... "1\\n"
507 ... "# This is a comment\\\\n"
508 ... "# This also\\n")
509 True
510 >>> roundtrip("# Comment \\\\nx = 0")
511 True
Christian Heimesba4af492008-03-28 00:55:15 +0000512
513Two string literals on the same line
514
515 >>> roundtrip("'' ''")
516 True
517
518Test roundtrip on random python modules.
519pass the '-ucompiler' option to process the full directory.
520
521 >>> import random
522 >>> tempdir = os.path.dirname(f) or os.curdir
523 >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
524
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000525 >>> if not support.is_resource_enabled("compiler"):
Christian Heimesba4af492008-03-28 00:55:15 +0000526 ... testfiles = random.sample(testfiles, 10)
527 ...
528 >>> for testfile in testfiles:
529 ... if not roundtrip(open(testfile, 'rb')):
530 ... print("Roundtrip failed for file %s" % testfile)
531 ... break
532 ... else: True
533 True
Thomas Wouters89f507f2006-12-13 04:49:30 +0000534"""
535
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000536from test import support
Trent Nelson428de652008-03-18 22:41:35 +0000537from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
538 STRING, ENDMARKER, tok_name, detect_encoding)
539from io import BytesIO
540from unittest import TestCase
541import os, sys, glob
Raymond Hettinger68c04532005-06-10 11:05:19 +0000542
Thomas Wouters89f507f2006-12-13 04:49:30 +0000543def dump_tokens(s):
544 """Print out the tokens in s in a table format.
545
546 The ENDMARKER is omitted.
547 """
Trent Nelson428de652008-03-18 22:41:35 +0000548 f = BytesIO(s.encode('utf-8'))
549 for type, token, start, end, line in tokenize(f.readline):
Thomas Wouters89f507f2006-12-13 04:49:30 +0000550 if type == ENDMARKER:
551 break
552 type = tok_name[type]
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000553 print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
Thomas Wouters89f507f2006-12-13 04:49:30 +0000554
Trent Nelson428de652008-03-18 22:41:35 +0000555def roundtrip(f):
556 """
557 Test roundtrip for `untokenize`. `f` is an open file or a string.
558 The source code in f is tokenized, converted back to source code via
559 tokenize.untokenize(), and tokenized again from the latter. The test
560 fails if the second tokenization doesn't match the first.
561 """
562 if isinstance(f, str):
563 f = BytesIO(f.encode('utf-8'))
564 token_list = list(tokenize(f.readline))
565 f.close()
566 tokens1 = [tok[:2] for tok in token_list]
567 new_bytes = untokenize(tokens1)
568 readline = (line for line in new_bytes.splitlines(1)).__next__
569 tokens2 = [tok[:2] for tok in tokenize(readline)]
570 return tokens1 == tokens2
Thomas Wouters89f507f2006-12-13 04:49:30 +0000571
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000572# This is an example from the docs, set up as a doctest.
Raymond Hettinger68c04532005-06-10 11:05:19 +0000573def decistmt(s):
574 """Substitute Decimals for floats in a string of statements.
575
576 >>> from decimal import Decimal
Georg Brandl88fc6642007-02-09 21:28:07 +0000577 >>> s = 'print(+21.3e-5*-.1234/81.7)'
Raymond Hettinger68c04532005-06-10 11:05:19 +0000578 >>> decistmt(s)
Georg Brandl88fc6642007-02-09 21:28:07 +0000579 "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"
Raymond Hettinger68c04532005-06-10 11:05:19 +0000580
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000581 The format of the exponent is inherited from the platform C library.
582 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
583 we're only showing 12 digits, and the 13th isn't close to 5, the
584 rest of the output should be platform-independent.
585
586 >>> exec(s) #doctest: +ELLIPSIS
587 -3.21716034272e-0...7
588
589 Output from calculations with Decimal should be identical across all
590 platforms.
591
Raymond Hettinger68c04532005-06-10 11:05:19 +0000592 >>> exec(decistmt(s))
593 -3.217160342717258261933904529E-7
Raymond Hettinger68c04532005-06-10 11:05:19 +0000594 """
595 result = []
Trent Nelson428de652008-03-18 22:41:35 +0000596 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
Raymond Hettinger68c04532005-06-10 11:05:19 +0000597 for toknum, tokval, _, _, _ in g:
598 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
599 result.extend([
600 (NAME, 'Decimal'),
601 (OP, '('),
602 (STRING, repr(tokval)),
603 (OP, ')')
604 ])
605 else:
606 result.append((toknum, tokval))
Trent Nelson428de652008-03-18 22:41:35 +0000607 return untokenize(result).decode('utf-8')
608
609
610class TestTokenizerAdheresToPep0263(TestCase):
611 """
612 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
613 """
614
615 def _testFile(self, filename):
616 path = os.path.join(os.path.dirname(__file__), filename)
617 return roundtrip(open(path, 'rb'))
618
619 def test_utf8_coding_cookie_and_no_utf8_bom(self):
620 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
621 self.assertTrue(self._testFile(f))
622
623 def test_latin1_coding_cookie_and_utf8_bom(self):
624 """
625 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
626 allowed encoding for the comment is 'utf-8'. The text file used in
627 this test starts with a BOM signature, but specifies latin1 as the
628 coding, so verify that a SyntaxError is raised, which matches the
629 behaviour of the interpreter when it encounters a similar condition.
630 """
631 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
Georg Brandlab91fde2009-08-13 08:51:18 +0000632 self.assertRaises(SyntaxError, self._testFile, f)
Trent Nelson428de652008-03-18 22:41:35 +0000633
634 def test_no_coding_cookie_and_utf8_bom(self):
635 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
636 self.assertTrue(self._testFile(f))
637
638 def test_utf8_coding_cookie_and_utf8_bom(self):
639 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
640 self.assertTrue(self._testFile(f))
641
642
643class Test_Tokenize(TestCase):
644
645 def test__tokenize_decodes_with_specified_encoding(self):
646 literal = '"ЉЊЈЁЂ"'
647 line = literal.encode('utf-8')
648 first = False
649 def readline():
650 nonlocal first
651 if not first:
652 first = True
653 return line
654 else:
655 return b''
656
657 # skip the initial encoding token and the end token
658 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
659 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
660 self.assertEquals(tokens, expected_tokens,
661 "bytes not decoded with encoding")
662
663 def test__tokenize_does_not_decode_with_encoding_none(self):
664 literal = '"ЉЊЈЁЂ"'
665 first = False
666 def readline():
667 nonlocal first
668 if not first:
669 first = True
670 return literal
671 else:
672 return b''
673
674 # skip the end token
675 tokens = list(_tokenize(readline, encoding=None))[:-1]
676 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
677 self.assertEquals(tokens, expected_tokens,
678 "string not tokenized when encoding is None")
679
680
681class TestDetectEncoding(TestCase):
682
683 def get_readline(self, lines):
684 index = 0
685 def readline():
686 nonlocal index
687 if index == len(lines):
688 raise StopIteration
689 line = lines[index]
690 index += 1
691 return line
692 return readline
693
694 def test_no_bom_no_encoding_cookie(self):
695 lines = (
696 b'# something\n',
697 b'print(something)\n',
698 b'do_something(else)\n'
699 )
700 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
701 self.assertEquals(encoding, 'utf-8')
702 self.assertEquals(consumed_lines, list(lines[:2]))
703
704 def test_bom_no_cookie(self):
705 lines = (
706 b'\xef\xbb\xbf# something\n',
707 b'print(something)\n',
708 b'do_something(else)\n'
709 )
710 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
711 self.assertEquals(encoding, 'utf-8')
712 self.assertEquals(consumed_lines,
713 [b'# something\n', b'print(something)\n'])
714
715 def test_cookie_first_line_no_bom(self):
716 lines = (
717 b'# -*- coding: latin-1 -*-\n',
718 b'print(something)\n',
719 b'do_something(else)\n'
720 )
721 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Benjamin Peterson0c7f9c92009-10-09 21:53:27 +0000722 self.assertEquals(encoding, 'iso-8859-1')
Trent Nelson428de652008-03-18 22:41:35 +0000723 self.assertEquals(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
724
725 def test_matched_bom_and_cookie_first_line(self):
726 lines = (
727 b'\xef\xbb\xbf# coding=utf-8\n',
728 b'print(something)\n',
729 b'do_something(else)\n'
730 )
731 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
732 self.assertEquals(encoding, 'utf-8')
733 self.assertEquals(consumed_lines, [b'# coding=utf-8\n'])
734
735 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
736 lines = (
737 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
738 b'print(something)\n',
739 b'do_something(else)\n'
740 )
741 readline = self.get_readline(lines)
742 self.assertRaises(SyntaxError, detect_encoding, readline)
743
744 def test_cookie_second_line_no_bom(self):
745 lines = (
746 b'#! something\n',
747 b'# vim: set fileencoding=ascii :\n',
748 b'print(something)\n',
749 b'do_something(else)\n'
750 )
751 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
752 self.assertEquals(encoding, 'ascii')
753 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
754 self.assertEquals(consumed_lines, expected)
755
756 def test_matched_bom_and_cookie_second_line(self):
757 lines = (
758 b'\xef\xbb\xbf#! something\n',
759 b'f# coding=utf-8\n',
760 b'print(something)\n',
761 b'do_something(else)\n'
762 )
763 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
764 self.assertEquals(encoding, 'utf-8')
765 self.assertEquals(consumed_lines,
766 [b'#! something\n', b'f# coding=utf-8\n'])
767
768 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
769 lines = (
770 b'\xef\xbb\xbf#! something\n',
771 b'# vim: set fileencoding=ascii :\n',
772 b'print(something)\n',
773 b'do_something(else)\n'
774 )
775 readline = self.get_readline(lines)
776 self.assertRaises(SyntaxError, detect_encoding, readline)
777
Benjamin Peterson0c7f9c92009-10-09 21:53:27 +0000778 def test_latin1_normalization(self):
779 # See get_normal_name() in tokenizer.c.
780 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
781 "iso-8859-1-unix", "iso-latin-1-mac")
782 for encoding in encodings:
783 for rep in ("-", "_"):
784 enc = encoding.replace("-", rep)
785 lines = (b"#!/usr/bin/python\n",
786 b"# coding: " + enc.encode("ascii") + b"\n",
787 b"print(things)\n",
788 b"do_something += 4\n")
789 rl = self.get_readline(lines)
790 found, consumed_lines = detect_encoding(rl)
791 self.assertEquals(found, "iso-8859-1")
792
793 def test_utf8_normalization(self):
794 # See get_normal_name() in tokenizer.c.
795 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
796 for encoding in encodings:
797 for rep in ("-", "_"):
798 enc = encoding.replace("-", rep)
799 lines = (b"#!/usr/bin/python\n",
800 b"# coding: " + enc.encode("ascii") + b"\n",
801 b"1 + 3\n")
802 rl = self.get_readline(lines)
803 found, consumed_lines = detect_encoding(rl)
804 self.assertEquals(found, "utf-8")
805
Trent Nelson428de652008-03-18 22:41:35 +0000806 def test_short_files(self):
807 readline = self.get_readline((b'print(something)\n',))
808 encoding, consumed_lines = detect_encoding(readline)
809 self.assertEquals(encoding, 'utf-8')
810 self.assertEquals(consumed_lines, [b'print(something)\n'])
811
812 encoding, consumed_lines = detect_encoding(self.get_readline(()))
813 self.assertEquals(encoding, 'utf-8')
814 self.assertEquals(consumed_lines, [])
815
816 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
817 encoding, consumed_lines = detect_encoding(readline)
818 self.assertEquals(encoding, 'utf-8')
819 self.assertEquals(consumed_lines, [b'print(something)\n'])
820
821 readline = self.get_readline((b'\xef\xbb\xbf',))
822 encoding, consumed_lines = detect_encoding(readline)
823 self.assertEquals(encoding, 'utf-8')
824 self.assertEquals(consumed_lines, [])
825
Benjamin Peterson433f32c2008-12-12 01:25:05 +0000826 readline = self.get_readline((b'# coding: bad\n',))
827 self.assertRaises(SyntaxError, detect_encoding, readline)
Trent Nelson428de652008-03-18 22:41:35 +0000828
829class TestTokenize(TestCase):
830
831 def test_tokenize(self):
832 import tokenize as tokenize_module
833 encoding = object()
834 encoding_used = None
835 def mock_detect_encoding(readline):
836 return encoding, ['first', 'second']
837
838 def mock__tokenize(readline, encoding):
839 nonlocal encoding_used
840 encoding_used = encoding
841 out = []
842 while True:
843 next_line = readline()
844 if next_line:
845 out.append(next_line)
846 continue
847 return out
848
849 counter = 0
850 def mock_readline():
851 nonlocal counter
852 counter += 1
853 if counter == 5:
854 return b''
855 return counter
856
857 orig_detect_encoding = tokenize_module.detect_encoding
858 orig__tokenize = tokenize_module._tokenize
859 tokenize_module.detect_encoding = mock_detect_encoding
860 tokenize_module._tokenize = mock__tokenize
861 try:
862 results = tokenize(mock_readline)
863 self.assertEquals(list(results), ['first', 'second', 1, 2, 3, 4])
864 finally:
865 tokenize_module.detect_encoding = orig_detect_encoding
866 tokenize_module._tokenize = orig__tokenize
867
868 self.assertTrue(encoding_used, encoding)
Raymond Hettinger68c04532005-06-10 11:05:19 +0000869
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000870
871__test__ = {"doctests" : doctests, 'decistmt': decistmt}
872
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000873def test_main():
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000874 from test import test_tokenize
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000875 support.run_doctest(test_tokenize, True)
876 support.run_unittest(TestTokenizerAdheresToPep0263)
877 support.run_unittest(Test_Tokenize)
878 support.run_unittest(TestDetectEncoding)
879 support.run_unittest(TestTokenize)
Neal Norwitzc1505362006-12-28 06:47:50 +0000880
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000881if __name__ == "__main__":
882 test_main()