blob: 38611a79eec09a1b32aa3d1a2b5e1a4b796ad3a8 [file] [log] [blame]
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001doctests = """
2Tests for the tokenize module.
Thomas Wouters89f507f2006-12-13 04:49:30 +00003
Christian Heimesdd15f6c2008-03-16 00:07:10 +00004The tests can be really simple. Given a small fragment of source
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05005code, print out a table with tokens. The ENDMARKER is omitted for
Thomas Wouters89f507f2006-12-13 04:49:30 +00006brevity.
7
Christian Heimesdd15f6c2008-03-16 00:07:10 +00008 >>> dump_tokens("1 + 1")
Trent Nelson428de652008-03-18 22:41:35 +00009 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +000010 NUMBER '1' (1, 0) (1, 1)
11 OP '+' (1, 2) (1, 3)
12 NUMBER '1' (1, 4) (1, 5)
Thomas Wouters89f507f2006-12-13 04:49:30 +000013
Christian Heimesdd15f6c2008-03-16 00:07:10 +000014 >>> dump_tokens("if False:\\n"
15 ... " # NL\\n"
16 ... " True = False # NEWLINE\\n")
Trent Nelson428de652008-03-18 22:41:35 +000017 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +000018 NAME 'if' (1, 0) (1, 2)
19 NAME 'False' (1, 3) (1, 8)
20 OP ':' (1, 8) (1, 9)
21 NEWLINE '\\n' (1, 9) (1, 10)
22 COMMENT '# NL' (2, 4) (2, 8)
23 NL '\\n' (2, 8) (2, 9)
24 INDENT ' ' (3, 0) (3, 4)
25 NAME 'True' (3, 4) (3, 8)
26 OP '=' (3, 9) (3, 10)
27 NAME 'False' (3, 11) (3, 16)
28 COMMENT '# NEWLINE' (3, 17) (3, 26)
29 NEWLINE '\\n' (3, 26) (3, 27)
30 DEDENT '' (4, 0) (4, 0)
Thomas Wouters89f507f2006-12-13 04:49:30 +000031
Christian Heimesdd15f6c2008-03-16 00:07:10 +000032 >>> indent_error_file = \"""
33 ... def k(x):
34 ... x += 2
35 ... x += 5
36 ... \"""
Trent Nelson428de652008-03-18 22:41:35 +000037 >>> readline = BytesIO(indent_error_file.encode('utf-8')).readline
38 >>> for tok in tokenize(readline): pass
Christian Heimesdd15f6c2008-03-16 00:07:10 +000039 Traceback (most recent call last):
40 ...
41 IndentationError: unindent does not match any outer indentation level
Thomas Wouters89f507f2006-12-13 04:49:30 +000042
Mark Dickinson3c0b3172010-06-29 07:38:37 +000043There are some standard formatting practices that are easy to get right.
Thomas Wouters89f507f2006-12-13 04:49:30 +000044
Christian Heimesdd15f6c2008-03-16 00:07:10 +000045 >>> roundtrip("if x == 1:\\n"
46 ... " print(x)\\n")
47 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000048
Christian Heimesdd15f6c2008-03-16 00:07:10 +000049 >>> roundtrip("# This is a comment\\n# This also")
50 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000051
52Some people use different formatting conventions, which makes
Christian Heimesdd15f6c2008-03-16 00:07:10 +000053untokenize a little trickier. Note that this test involves trailing
54whitespace after the colon. Note that we use hex escapes to make the
Trent Nelson428de652008-03-18 22:41:35 +000055two trailing blanks apparent in the expected output.
Thomas Wouters89f507f2006-12-13 04:49:30 +000056
Christian Heimesdd15f6c2008-03-16 00:07:10 +000057 >>> roundtrip("if x == 1 : \\n"
58 ... " print(x)\\n")
59 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000060
Benjamin Petersonee8712c2008-05-20 21:35:26 +000061 >>> f = support.findfile("tokenize_tests.txt")
Trent Nelson428de652008-03-18 22:41:35 +000062 >>> roundtrip(open(f, 'rb'))
Christian Heimesdd15f6c2008-03-16 00:07:10 +000063 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000064
Christian Heimesdd15f6c2008-03-16 00:07:10 +000065 >>> roundtrip("if x == 1:\\n"
66 ... " # A comment by itself.\\n"
67 ... " print(x) # Comment here, too.\\n"
68 ... " # Another comment.\\n"
69 ... "after_if = True\\n")
70 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000071
Christian Heimesdd15f6c2008-03-16 00:07:10 +000072 >>> roundtrip("if (x # The comments need to go in the right place\\n"
73 ... " == 1):\\n"
74 ... " print('x==1')\\n")
75 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000076
Christian Heimesdd15f6c2008-03-16 00:07:10 +000077 >>> roundtrip("class Test: # A comment here\\n"
78 ... " # A comment with weird indent\\n"
79 ... " after_com = 5\\n"
80 ... " def x(m): return m*5 # a one liner\\n"
81 ... " def y(m): # A whitespace after the colon\\n"
82 ... " return y*4 # 3-space indent\\n")
83 True
84
85Some error-handling code
86
87 >>> roundtrip("try: import somemodule\\n"
88 ... "except ImportError: # comment\\n"
Christian Heimesba4af492008-03-28 00:55:15 +000089 ... " print('Can not import' # comment2\\n)"
Neal Norwitz752abd02008-05-13 04:55:24 +000090 ... "else: print('Loaded')\\n")
Christian Heimesdd15f6c2008-03-16 00:07:10 +000091 True
92
Eric Smith74ca5572008-03-17 19:49:19 +000093Balancing continuation
Christian Heimesdd15f6c2008-03-16 00:07:10 +000094
95 >>> roundtrip("a = (3,4, \\n"
96 ... "5,6)\\n"
97 ... "y = [3, 4,\\n"
98 ... "5]\\n"
99 ... "z = {'a': 5,\\n"
100 ... "'b':15, 'c':True}\\n"
101 ... "x = len(y) + 5 - a[\\n"
102 ... "3] - a[2]\\n"
103 ... "+ len(z) - z[\\n"
104 ... "'b']\\n")
105 True
106
107Ordinary integers and binary operators
108
109 >>> dump_tokens("0xff <= 255")
Trent Nelson428de652008-03-18 22:41:35 +0000110 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000111 NUMBER '0xff' (1, 0) (1, 4)
112 OP '<=' (1, 5) (1, 7)
113 NUMBER '255' (1, 8) (1, 11)
Eric Smith74ca5572008-03-17 19:49:19 +0000114 >>> dump_tokens("0b10 <= 255")
Trent Nelson428de652008-03-18 22:41:35 +0000115 ENCODING 'utf-8' (0, 0) (0, 0)
Eric Smith74ca5572008-03-17 19:49:19 +0000116 NUMBER '0b10' (1, 0) (1, 4)
117 OP '<=' (1, 5) (1, 7)
118 NUMBER '255' (1, 8) (1, 11)
119 >>> dump_tokens("0o123 <= 0O123")
Trent Nelson428de652008-03-18 22:41:35 +0000120 ENCODING 'utf-8' (0, 0) (0, 0)
Eric Smith74ca5572008-03-17 19:49:19 +0000121 NUMBER '0o123' (1, 0) (1, 5)
122 OP '<=' (1, 6) (1, 8)
123 NUMBER '0O123' (1, 9) (1, 14)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000124 >>> dump_tokens("1234567 > ~0x15")
Trent Nelson428de652008-03-18 22:41:35 +0000125 ENCODING 'utf-8' (0, 0) (0, 0)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000126 NUMBER '1234567' (1, 0) (1, 7)
127 OP '>' (1, 8) (1, 9)
128 OP '~' (1, 10) (1, 11)
129 NUMBER '0x15' (1, 11) (1, 15)
130 >>> dump_tokens("2134568 != 1231515")
Trent Nelson428de652008-03-18 22:41:35 +0000131 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000132 NUMBER '2134568' (1, 0) (1, 7)
133 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000134 NUMBER '1231515' (1, 11) (1, 18)
135 >>> dump_tokens("(-124561-1) & 200000000")
Trent Nelson428de652008-03-18 22:41:35 +0000136 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000137 OP '(' (1, 0) (1, 1)
138 OP '-' (1, 1) (1, 2)
139 NUMBER '124561' (1, 2) (1, 8)
140 OP '-' (1, 8) (1, 9)
141 NUMBER '1' (1, 9) (1, 10)
142 OP ')' (1, 10) (1, 11)
143 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000144 NUMBER '200000000' (1, 14) (1, 23)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000145 >>> dump_tokens("0xdeadbeef != -1")
Trent Nelson428de652008-03-18 22:41:35 +0000146 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000147 NUMBER '0xdeadbeef' (1, 0) (1, 10)
148 OP '!=' (1, 11) (1, 13)
149 OP '-' (1, 14) (1, 15)
150 NUMBER '1' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000151 >>> dump_tokens("0xdeadc0de & 12345")
Trent Nelson428de652008-03-18 22:41:35 +0000152 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000153 NUMBER '0xdeadc0de' (1, 0) (1, 10)
154 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000155 NUMBER '12345' (1, 13) (1, 18)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000156 >>> dump_tokens("0xFF & 0x15 | 1234")
Trent Nelson428de652008-03-18 22:41:35 +0000157 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000158 NUMBER '0xFF' (1, 0) (1, 4)
159 OP '&' (1, 5) (1, 6)
160 NUMBER '0x15' (1, 7) (1, 11)
161 OP '|' (1, 12) (1, 13)
162 NUMBER '1234' (1, 14) (1, 18)
163
164Long integers
165
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000166 >>> dump_tokens("x = 0")
Trent Nelson428de652008-03-18 22:41:35 +0000167 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000168 NAME 'x' (1, 0) (1, 1)
169 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000170 NUMBER '0' (1, 4) (1, 5)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000171 >>> dump_tokens("x = 0xfffffffffff")
Trent Nelson428de652008-03-18 22:41:35 +0000172 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000173 NAME 'x' (1, 0) (1, 1)
174 OP '=' (1, 2) (1, 3)
175 NUMBER '0xffffffffff (1, 4) (1, 17)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000176 >>> dump_tokens("x = 123141242151251616110")
Trent Nelson428de652008-03-18 22:41:35 +0000177 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000178 NAME 'x' (1, 0) (1, 1)
179 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000180 NUMBER '123141242151 (1, 4) (1, 25)
181 >>> dump_tokens("x = -15921590215012591")
Trent Nelson428de652008-03-18 22:41:35 +0000182 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000183 NAME 'x' (1, 0) (1, 1)
184 OP '=' (1, 2) (1, 3)
185 OP '-' (1, 4) (1, 5)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000186 NUMBER '159215902150 (1, 5) (1, 22)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000187
188Floating point numbers
189
190 >>> dump_tokens("x = 3.14159")
Trent Nelson428de652008-03-18 22:41:35 +0000191 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000192 NAME 'x' (1, 0) (1, 1)
193 OP '=' (1, 2) (1, 3)
194 NUMBER '3.14159' (1, 4) (1, 11)
195 >>> dump_tokens("x = 314159.")
Trent Nelson428de652008-03-18 22:41:35 +0000196 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000197 NAME 'x' (1, 0) (1, 1)
198 OP '=' (1, 2) (1, 3)
199 NUMBER '314159.' (1, 4) (1, 11)
200 >>> dump_tokens("x = .314159")
Trent Nelson428de652008-03-18 22:41:35 +0000201 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000202 NAME 'x' (1, 0) (1, 1)
203 OP '=' (1, 2) (1, 3)
204 NUMBER '.314159' (1, 4) (1, 11)
205 >>> dump_tokens("x = 3e14159")
Trent Nelson428de652008-03-18 22:41:35 +0000206 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000207 NAME 'x' (1, 0) (1, 1)
208 OP '=' (1, 2) (1, 3)
209 NUMBER '3e14159' (1, 4) (1, 11)
210 >>> dump_tokens("x = 3E123")
Trent Nelson428de652008-03-18 22:41:35 +0000211 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000212 NAME 'x' (1, 0) (1, 1)
213 OP '=' (1, 2) (1, 3)
214 NUMBER '3E123' (1, 4) (1, 9)
215 >>> dump_tokens("x+y = 3e-1230")
Trent Nelson428de652008-03-18 22:41:35 +0000216 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000217 NAME 'x' (1, 0) (1, 1)
218 OP '+' (1, 1) (1, 2)
219 NAME 'y' (1, 2) (1, 3)
220 OP '=' (1, 4) (1, 5)
221 NUMBER '3e-1230' (1, 6) (1, 13)
222 >>> dump_tokens("x = 3.14e159")
Trent Nelson428de652008-03-18 22:41:35 +0000223 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000224 NAME 'x' (1, 0) (1, 1)
225 OP '=' (1, 2) (1, 3)
226 NUMBER '3.14e159' (1, 4) (1, 12)
227
228String literals
229
230 >>> dump_tokens("x = ''; y = \\\"\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000231 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000232 NAME 'x' (1, 0) (1, 1)
233 OP '=' (1, 2) (1, 3)
234 STRING "''" (1, 4) (1, 6)
235 OP ';' (1, 6) (1, 7)
236 NAME 'y' (1, 8) (1, 9)
237 OP '=' (1, 10) (1, 11)
238 STRING '""' (1, 12) (1, 14)
239 >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000240 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000241 NAME 'x' (1, 0) (1, 1)
242 OP '=' (1, 2) (1, 3)
243 STRING '\\'"\\'' (1, 4) (1, 7)
244 OP ';' (1, 7) (1, 8)
245 NAME 'y' (1, 9) (1, 10)
246 OP '=' (1, 11) (1, 12)
247 STRING '"\\'"' (1, 13) (1, 16)
248 >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000249 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000250 NAME 'x' (1, 0) (1, 1)
251 OP '=' (1, 2) (1, 3)
252 STRING '"doesn\\'t "' (1, 4) (1, 14)
253 NAME 'shrink' (1, 14) (1, 20)
254 STRING '", does it"' (1, 20) (1, 31)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000255 >>> dump_tokens("x = 'abc' + 'ABC'")
Trent Nelson428de652008-03-18 22:41:35 +0000256 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000257 NAME 'x' (1, 0) (1, 1)
258 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000259 STRING "'abc'" (1, 4) (1, 9)
260 OP '+' (1, 10) (1, 11)
261 STRING "'ABC'" (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000262 >>> dump_tokens('y = "ABC" + "ABC"')
Trent Nelson428de652008-03-18 22:41:35 +0000263 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000264 NAME 'y' (1, 0) (1, 1)
265 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000266 STRING '"ABC"' (1, 4) (1, 9)
267 OP '+' (1, 10) (1, 11)
268 STRING '"ABC"' (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000269 >>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'")
Trent Nelson428de652008-03-18 22:41:35 +0000270 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000271 NAME 'x' (1, 0) (1, 1)
272 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000273 STRING "r'abc'" (1, 4) (1, 10)
274 OP '+' (1, 11) (1, 12)
275 STRING "r'ABC'" (1, 13) (1, 19)
276 OP '+' (1, 20) (1, 21)
277 STRING "R'ABC'" (1, 22) (1, 28)
278 OP '+' (1, 29) (1, 30)
279 STRING "R'ABC'" (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000280 >>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"')
Trent Nelson428de652008-03-18 22:41:35 +0000281 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000282 NAME 'y' (1, 0) (1, 1)
283 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000284 STRING 'r"abc"' (1, 4) (1, 10)
285 OP '+' (1, 11) (1, 12)
286 STRING 'r"ABC"' (1, 13) (1, 19)
287 OP '+' (1, 20) (1, 21)
288 STRING 'R"ABC"' (1, 22) (1, 28)
289 OP '+' (1, 29) (1, 30)
290 STRING 'R"ABC"' (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000291
Meador Inge8d5c0b82012-06-16 21:49:08 -0500292 >>> dump_tokens("u'abc' + U'abc'")
293 ENCODING 'utf-8' (0, 0) (0, 0)
294 STRING "u'abc'" (1, 0) (1, 6)
295 OP '+' (1, 7) (1, 8)
296 STRING "U'abc'" (1, 9) (1, 15)
297 >>> dump_tokens('u"abc" + U"abc"')
298 ENCODING 'utf-8' (0, 0) (0, 0)
299 STRING 'u"abc"' (1, 0) (1, 6)
300 OP '+' (1, 7) (1, 8)
301 STRING 'U"abc"' (1, 9) (1, 15)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500302
303 >>> dump_tokens("b'abc' + B'abc'")
304 ENCODING 'utf-8' (0, 0) (0, 0)
305 STRING "b'abc'" (1, 0) (1, 6)
306 OP '+' (1, 7) (1, 8)
307 STRING "B'abc'" (1, 9) (1, 15)
308 >>> dump_tokens('b"abc" + B"abc"')
309 ENCODING 'utf-8' (0, 0) (0, 0)
310 STRING 'b"abc"' (1, 0) (1, 6)
311 OP '+' (1, 7) (1, 8)
312 STRING 'B"abc"' (1, 9) (1, 15)
313 >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
314 ENCODING 'utf-8' (0, 0) (0, 0)
315 STRING "br'abc'" (1, 0) (1, 7)
316 OP '+' (1, 8) (1, 9)
317 STRING "bR'abc'" (1, 10) (1, 17)
318 OP '+' (1, 18) (1, 19)
319 STRING "Br'abc'" (1, 20) (1, 27)
320 OP '+' (1, 28) (1, 29)
321 STRING "BR'abc'" (1, 30) (1, 37)
322 >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
323 ENCODING 'utf-8' (0, 0) (0, 0)
324 STRING 'br"abc"' (1, 0) (1, 7)
325 OP '+' (1, 8) (1, 9)
326 STRING 'bR"abc"' (1, 10) (1, 17)
327 OP '+' (1, 18) (1, 19)
328 STRING 'Br"abc"' (1, 20) (1, 27)
329 OP '+' (1, 28) (1, 29)
330 STRING 'BR"abc"' (1, 30) (1, 37)
331 >>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'")
332 ENCODING 'utf-8' (0, 0) (0, 0)
333 STRING "rb'abc'" (1, 0) (1, 7)
334 OP '+' (1, 8) (1, 9)
335 STRING "rB'abc'" (1, 10) (1, 17)
336 OP '+' (1, 18) (1, 19)
337 STRING "Rb'abc'" (1, 20) (1, 27)
338 OP '+' (1, 28) (1, 29)
339 STRING "RB'abc'" (1, 30) (1, 37)
340 >>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"')
341 ENCODING 'utf-8' (0, 0) (0, 0)
342 STRING 'rb"abc"' (1, 0) (1, 7)
343 OP '+' (1, 8) (1, 9)
344 STRING 'rB"abc"' (1, 10) (1, 17)
345 OP '+' (1, 18) (1, 19)
346 STRING 'Rb"abc"' (1, 20) (1, 27)
347 OP '+' (1, 28) (1, 29)
348 STRING 'RB"abc"' (1, 30) (1, 37)
349
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000350Operators
351
352 >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000353 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000354 NAME 'def' (1, 0) (1, 3)
355 NAME 'd22' (1, 4) (1, 7)
356 OP '(' (1, 7) (1, 8)
357 NAME 'a' (1, 8) (1, 9)
358 OP ',' (1, 9) (1, 10)
359 NAME 'b' (1, 11) (1, 12)
360 OP ',' (1, 12) (1, 13)
361 NAME 'c' (1, 14) (1, 15)
362 OP '=' (1, 15) (1, 16)
363 NUMBER '2' (1, 16) (1, 17)
364 OP ',' (1, 17) (1, 18)
365 NAME 'd' (1, 19) (1, 20)
366 OP '=' (1, 20) (1, 21)
367 NUMBER '2' (1, 21) (1, 22)
368 OP ',' (1, 22) (1, 23)
369 OP '*' (1, 24) (1, 25)
370 NAME 'k' (1, 25) (1, 26)
371 OP ')' (1, 26) (1, 27)
372 OP ':' (1, 27) (1, 28)
373 NAME 'pass' (1, 29) (1, 33)
374 >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000375 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000376 NAME 'def' (1, 0) (1, 3)
377 NAME 'd01v_' (1, 4) (1, 9)
378 OP '(' (1, 9) (1, 10)
379 NAME 'a' (1, 10) (1, 11)
380 OP '=' (1, 11) (1, 12)
381 NUMBER '1' (1, 12) (1, 13)
382 OP ',' (1, 13) (1, 14)
383 OP '*' (1, 15) (1, 16)
384 NAME 'k' (1, 16) (1, 17)
385 OP ',' (1, 17) (1, 18)
386 OP '**' (1, 19) (1, 21)
387 NAME 'w' (1, 21) (1, 22)
388 OP ')' (1, 22) (1, 23)
389 OP ':' (1, 23) (1, 24)
390 NAME 'pass' (1, 25) (1, 29)
391
392Comparison
393
394 >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
395 ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
Trent Nelson428de652008-03-18 22:41:35 +0000396 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000397 NAME 'if' (1, 0) (1, 2)
398 NUMBER '1' (1, 3) (1, 4)
399 OP '<' (1, 5) (1, 6)
400 NUMBER '1' (1, 7) (1, 8)
401 OP '>' (1, 9) (1, 10)
402 NUMBER '1' (1, 11) (1, 12)
403 OP '==' (1, 13) (1, 15)
404 NUMBER '1' (1, 16) (1, 17)
405 OP '>=' (1, 18) (1, 20)
406 NUMBER '5' (1, 21) (1, 22)
407 OP '<=' (1, 23) (1, 25)
408 NUMBER '0x15' (1, 26) (1, 30)
409 OP '<=' (1, 31) (1, 33)
410 NUMBER '0x12' (1, 34) (1, 38)
411 OP '!=' (1, 39) (1, 41)
412 NUMBER '1' (1, 42) (1, 43)
413 NAME 'and' (1, 44) (1, 47)
414 NUMBER '5' (1, 48) (1, 49)
415 NAME 'in' (1, 50) (1, 52)
416 NUMBER '1' (1, 53) (1, 54)
417 NAME 'not' (1, 55) (1, 58)
418 NAME 'in' (1, 59) (1, 61)
419 NUMBER '1' (1, 62) (1, 63)
420 NAME 'is' (1, 64) (1, 66)
421 NUMBER '1' (1, 67) (1, 68)
422 NAME 'or' (1, 69) (1, 71)
423 NUMBER '5' (1, 72) (1, 73)
424 NAME 'is' (1, 74) (1, 76)
425 NAME 'not' (1, 77) (1, 80)
426 NUMBER '1' (1, 81) (1, 82)
427 OP ':' (1, 82) (1, 83)
428 NAME 'pass' (1, 84) (1, 88)
429
430Shift
431
432 >>> dump_tokens("x = 1 << 1 >> 5")
Trent Nelson428de652008-03-18 22:41:35 +0000433 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000434 NAME 'x' (1, 0) (1, 1)
435 OP '=' (1, 2) (1, 3)
436 NUMBER '1' (1, 4) (1, 5)
437 OP '<<' (1, 6) (1, 8)
438 NUMBER '1' (1, 9) (1, 10)
439 OP '>>' (1, 11) (1, 13)
440 NUMBER '5' (1, 14) (1, 15)
441
442Additive
443
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000444 >>> dump_tokens("x = 1 - y + 15 - 1 + 0x124 + z + a[5]")
Trent Nelson428de652008-03-18 22:41:35 +0000445 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000446 NAME 'x' (1, 0) (1, 1)
447 OP '=' (1, 2) (1, 3)
448 NUMBER '1' (1, 4) (1, 5)
449 OP '-' (1, 6) (1, 7)
450 NAME 'y' (1, 8) (1, 9)
451 OP '+' (1, 10) (1, 11)
452 NUMBER '15' (1, 12) (1, 14)
453 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000454 NUMBER '1' (1, 17) (1, 18)
455 OP '+' (1, 19) (1, 20)
456 NUMBER '0x124' (1, 21) (1, 26)
457 OP '+' (1, 27) (1, 28)
458 NAME 'z' (1, 29) (1, 30)
459 OP '+' (1, 31) (1, 32)
460 NAME 'a' (1, 33) (1, 34)
461 OP '[' (1, 34) (1, 35)
462 NUMBER '5' (1, 35) (1, 36)
463 OP ']' (1, 36) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000464
465Multiplicative
466
467 >>> dump_tokens("x = 1//1*1/5*12%0x12")
Trent Nelson428de652008-03-18 22:41:35 +0000468 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000469 NAME 'x' (1, 0) (1, 1)
470 OP '=' (1, 2) (1, 3)
471 NUMBER '1' (1, 4) (1, 5)
472 OP '//' (1, 5) (1, 7)
473 NUMBER '1' (1, 7) (1, 8)
474 OP '*' (1, 8) (1, 9)
475 NUMBER '1' (1, 9) (1, 10)
476 OP '/' (1, 10) (1, 11)
477 NUMBER '5' (1, 11) (1, 12)
478 OP '*' (1, 12) (1, 13)
479 NUMBER '12' (1, 13) (1, 15)
480 OP '%' (1, 15) (1, 16)
481 NUMBER '0x12' (1, 16) (1, 20)
482
483Unary
484
485 >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
Trent Nelson428de652008-03-18 22:41:35 +0000486 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000487 OP '~' (1, 0) (1, 1)
488 NUMBER '1' (1, 1) (1, 2)
489 OP '^' (1, 3) (1, 4)
490 NUMBER '1' (1, 5) (1, 6)
491 OP '&' (1, 7) (1, 8)
492 NUMBER '1' (1, 9) (1, 10)
493 OP '|' (1, 11) (1, 12)
494 NUMBER '1' (1, 12) (1, 13)
495 OP '^' (1, 14) (1, 15)
496 OP '-' (1, 16) (1, 17)
497 NUMBER '1' (1, 17) (1, 18)
498 >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
Trent Nelson428de652008-03-18 22:41:35 +0000499 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000500 OP '-' (1, 0) (1, 1)
501 NUMBER '1' (1, 1) (1, 2)
502 OP '*' (1, 2) (1, 3)
503 NUMBER '1' (1, 3) (1, 4)
504 OP '/' (1, 4) (1, 5)
505 NUMBER '1' (1, 5) (1, 6)
506 OP '+' (1, 6) (1, 7)
507 NUMBER '1' (1, 7) (1, 8)
508 OP '*' (1, 8) (1, 9)
509 NUMBER '1' (1, 9) (1, 10)
510 OP '//' (1, 10) (1, 12)
511 NUMBER '1' (1, 12) (1, 13)
512 OP '-' (1, 14) (1, 15)
513 OP '-' (1, 16) (1, 17)
514 OP '-' (1, 17) (1, 18)
515 OP '-' (1, 18) (1, 19)
516 NUMBER '1' (1, 19) (1, 20)
517 OP '**' (1, 20) (1, 22)
518 NUMBER '1' (1, 22) (1, 23)
519
520Selector
521
522 >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
Trent Nelson428de652008-03-18 22:41:35 +0000523 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000524 NAME 'import' (1, 0) (1, 6)
525 NAME 'sys' (1, 7) (1, 10)
526 OP ',' (1, 10) (1, 11)
527 NAME 'time' (1, 12) (1, 16)
528 NEWLINE '\\n' (1, 16) (1, 17)
529 NAME 'x' (2, 0) (2, 1)
530 OP '=' (2, 2) (2, 3)
531 NAME 'sys' (2, 4) (2, 7)
532 OP '.' (2, 7) (2, 8)
533 NAME 'modules' (2, 8) (2, 15)
534 OP '[' (2, 15) (2, 16)
535 STRING "'time'" (2, 16) (2, 22)
536 OP ']' (2, 22) (2, 23)
537 OP '.' (2, 23) (2, 24)
538 NAME 'time' (2, 24) (2, 28)
539 OP '(' (2, 28) (2, 29)
540 OP ')' (2, 29) (2, 30)
541
542Methods
543
544 >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000545 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000546 OP '@' (1, 0) (1, 1)
547 NAME 'staticmethod (1, 1) (1, 13)
548 NEWLINE '\\n' (1, 13) (1, 14)
549 NAME 'def' (2, 0) (2, 3)
550 NAME 'foo' (2, 4) (2, 7)
551 OP '(' (2, 7) (2, 8)
552 NAME 'x' (2, 8) (2, 9)
553 OP ',' (2, 9) (2, 10)
554 NAME 'y' (2, 10) (2, 11)
555 OP ')' (2, 11) (2, 12)
556 OP ':' (2, 12) (2, 13)
557 NAME 'pass' (2, 14) (2, 18)
558
559Backslash means line continuation, except for comments
560
561 >>> roundtrip("x=1+\\\\n"
562 ... "1\\n"
563 ... "# This is a comment\\\\n"
564 ... "# This also\\n")
565 True
566 >>> roundtrip("# Comment \\\\nx = 0")
567 True
Christian Heimesba4af492008-03-28 00:55:15 +0000568
569Two string literals on the same line
570
571 >>> roundtrip("'' ''")
572 True
573
574Test roundtrip on random python modules.
Antoine Pitrou5bc4fa72010-10-14 15:34:31 +0000575pass the '-ucpu' option to process the full directory.
Christian Heimesba4af492008-03-28 00:55:15 +0000576
577 >>> import random
578 >>> tempdir = os.path.dirname(f) or os.curdir
579 >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
580
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500581Tokenize is broken on test_pep3131.py because regular expressions are
582broken on the obscure unicode identifiers in it. *sigh*
583With roundtrip extended to test the 5-tuple mode of untokenize,
5847 more testfiles fail. Remove them also until the failure is diagnosed.
585
Benjamin Peterson963e4022011-08-13 00:33:21 -0500586 >>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500587 >>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
588 ... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
589 ...
Antoine Pitrou5bc4fa72010-10-14 15:34:31 +0000590 >>> if not support.is_resource_enabled("cpu"):
Christian Heimesba4af492008-03-28 00:55:15 +0000591 ... testfiles = random.sample(testfiles, 10)
592 ...
593 >>> for testfile in testfiles:
594 ... if not roundtrip(open(testfile, 'rb')):
595 ... print("Roundtrip failed for file %s" % testfile)
596 ... break
597 ... else: True
598 True
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000599
600Evil tabs
Benjamin Peterson33856de2010-08-30 14:41:20 +0000601
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000602 >>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
603 ENCODING 'utf-8' (0, 0) (0, 0)
604 NAME 'def' (1, 0) (1, 3)
605 NAME 'f' (1, 4) (1, 5)
606 OP '(' (1, 5) (1, 6)
607 OP ')' (1, 6) (1, 7)
608 OP ':' (1, 7) (1, 8)
609 NEWLINE '\\n' (1, 8) (1, 9)
610 INDENT '\\t' (2, 0) (2, 1)
611 NAME 'if' (2, 1) (2, 3)
612 NAME 'x' (2, 4) (2, 5)
613 NEWLINE '\\n' (2, 5) (2, 6)
614 INDENT ' \\t' (3, 0) (3, 9)
615 NAME 'pass' (3, 9) (3, 13)
616 DEDENT '' (4, 0) (4, 0)
617 DEDENT '' (4, 0) (4, 0)
Benjamin Peterson33856de2010-08-30 14:41:20 +0000618
619Non-ascii identifiers
620
621 >>> dump_tokens("Örter = 'places'\\ngrün = 'green'")
622 ENCODING 'utf-8' (0, 0) (0, 0)
623 NAME 'Örter' (1, 0) (1, 5)
624 OP '=' (1, 6) (1, 7)
625 STRING "'places'" (1, 8) (1, 16)
626 NEWLINE '\\n' (1, 16) (1, 17)
627 NAME 'grün' (2, 0) (2, 4)
628 OP '=' (2, 5) (2, 6)
629 STRING "'green'" (2, 7) (2, 14)
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000630
631Legacy unicode literals:
632
Christian Heimes0b3847d2012-06-20 11:17:58 +0200633 >>> dump_tokens("Örter = u'places'\\ngrün = U'green'")
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000634 ENCODING 'utf-8' (0, 0) (0, 0)
635 NAME 'Örter' (1, 0) (1, 5)
636 OP '=' (1, 6) (1, 7)
637 STRING "u'places'" (1, 8) (1, 17)
638 NEWLINE '\\n' (1, 17) (1, 18)
639 NAME 'grün' (2, 0) (2, 4)
640 OP '=' (2, 5) (2, 6)
Christian Heimes0b3847d2012-06-20 11:17:58 +0200641 STRING "U'green'" (2, 7) (2, 15)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000642"""
643
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000644from test import support
Trent Nelson428de652008-03-18 22:41:35 +0000645from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
Meador Inge00c7f852012-01-19 00:44:45 -0600646 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
Terry Jan Reedy5e6db312014-02-17 16:45:48 -0500647 open as tokenize_open, Untokenizer)
Trent Nelson428de652008-03-18 22:41:35 +0000648from io import BytesIO
649from unittest import TestCase
650import os, sys, glob
Meador Inge00c7f852012-01-19 00:44:45 -0600651import token
Raymond Hettinger68c04532005-06-10 11:05:19 +0000652
Thomas Wouters89f507f2006-12-13 04:49:30 +0000653def dump_tokens(s):
654 """Print out the tokens in s in a table format.
655
656 The ENDMARKER is omitted.
657 """
Trent Nelson428de652008-03-18 22:41:35 +0000658 f = BytesIO(s.encode('utf-8'))
659 for type, token, start, end, line in tokenize(f.readline):
Thomas Wouters89f507f2006-12-13 04:49:30 +0000660 if type == ENDMARKER:
661 break
662 type = tok_name[type]
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000663 print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
Thomas Wouters89f507f2006-12-13 04:49:30 +0000664
Trent Nelson428de652008-03-18 22:41:35 +0000665def roundtrip(f):
666 """
667 Test roundtrip for `untokenize`. `f` is an open file or a string.
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500668 The source code in f is tokenized to both 5- and 2-tuples.
669 Both sequences are converted back to source code via
670 tokenize.untokenize(), and the latter tokenized again to 2-tuples.
671 The test fails if the 3 pair tokenizations do not match.
672
673 When untokenize bugs are fixed, untokenize with 5-tuples should
674 reproduce code that does not contain a backslash continuation
675 following spaces. A proper test should test this.
676
677 This function would be more useful for correcting bugs if it reported
678 the first point of failure, like assertEqual, rather than just
679 returning False -- or if it were only used in unittests and not
680 doctest and actually used assertEqual.
Trent Nelson428de652008-03-18 22:41:35 +0000681 """
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500682 # Get source code and original tokenizations
Trent Nelson428de652008-03-18 22:41:35 +0000683 if isinstance(f, str):
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500684 code = f.encode('utf-8')
685 else:
686 code = f.read()
Brian Curtin9f5f65c2010-10-30 21:35:28 +0000687 f.close()
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500688 readline = iter(code.splitlines(keepends=True)).__next__
689 tokens5 = list(tokenize(readline))
690 tokens2 = [tok[:2] for tok in tokens5]
691 # Reproduce tokens2 from pairs
692 bytes_from2 = untokenize(tokens2)
693 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
694 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
695 # Reproduce tokens2 from 5-tuples
696 bytes_from5 = untokenize(tokens5)
697 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
698 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
699 # Compare 3 versions
700 return tokens2 == tokens2_from2 == tokens2_from5
Thomas Wouters89f507f2006-12-13 04:49:30 +0000701
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000702# This is an example from the docs, set up as a doctest.
Raymond Hettinger68c04532005-06-10 11:05:19 +0000703def decistmt(s):
704 """Substitute Decimals for floats in a string of statements.
705
706 >>> from decimal import Decimal
Georg Brandl88fc6642007-02-09 21:28:07 +0000707 >>> s = 'print(+21.3e-5*-.1234/81.7)'
Raymond Hettinger68c04532005-06-10 11:05:19 +0000708 >>> decistmt(s)
Georg Brandl88fc6642007-02-09 21:28:07 +0000709 "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"
Raymond Hettinger68c04532005-06-10 11:05:19 +0000710
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000711 The format of the exponent is inherited from the platform C library.
712 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
Mark Dickinson388122d2010-08-04 20:56:28 +0000713 we're only showing 11 digits, and the 12th isn't close to 5, the
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000714 rest of the output should be platform-independent.
715
716 >>> exec(s) #doctest: +ELLIPSIS
Mark Dickinson388122d2010-08-04 20:56:28 +0000717 -3.2171603427...e-0...7
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000718
719 Output from calculations with Decimal should be identical across all
720 platforms.
721
Raymond Hettinger68c04532005-06-10 11:05:19 +0000722 >>> exec(decistmt(s))
723 -3.217160342717258261933904529E-7
Raymond Hettinger68c04532005-06-10 11:05:19 +0000724 """
725 result = []
Trent Nelson428de652008-03-18 22:41:35 +0000726 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
Raymond Hettinger68c04532005-06-10 11:05:19 +0000727 for toknum, tokval, _, _, _ in g:
728 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
729 result.extend([
730 (NAME, 'Decimal'),
731 (OP, '('),
732 (STRING, repr(tokval)),
733 (OP, ')')
734 ])
735 else:
736 result.append((toknum, tokval))
Trent Nelson428de652008-03-18 22:41:35 +0000737 return untokenize(result).decode('utf-8')
738
739
740class TestTokenizerAdheresToPep0263(TestCase):
741 """
742 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
743 """
744
745 def _testFile(self, filename):
746 path = os.path.join(os.path.dirname(__file__), filename)
747 return roundtrip(open(path, 'rb'))
748
749 def test_utf8_coding_cookie_and_no_utf8_bom(self):
Ned Deily2ea6fcc2011-07-19 16:15:27 -0700750 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
Trent Nelson428de652008-03-18 22:41:35 +0000751 self.assertTrue(self._testFile(f))
752
753 def test_latin1_coding_cookie_and_utf8_bom(self):
754 """
755 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
756 allowed encoding for the comment is 'utf-8'. The text file used in
757 this test starts with a BOM signature, but specifies latin1 as the
758 coding, so verify that a SyntaxError is raised, which matches the
759 behaviour of the interpreter when it encounters a similar condition.
760 """
761 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000762 self.assertRaises(SyntaxError, self._testFile, f)
Trent Nelson428de652008-03-18 22:41:35 +0000763
764 def test_no_coding_cookie_and_utf8_bom(self):
765 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
766 self.assertTrue(self._testFile(f))
767
768 def test_utf8_coding_cookie_and_utf8_bom(self):
769 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
770 self.assertTrue(self._testFile(f))
771
Florent Xicluna11f0b412012-07-07 12:13:35 +0200772 def test_bad_coding_cookie(self):
773 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
774 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
775
Trent Nelson428de652008-03-18 22:41:35 +0000776
777class Test_Tokenize(TestCase):
778
779 def test__tokenize_decodes_with_specified_encoding(self):
780 literal = '"ЉЊЈЁЂ"'
781 line = literal.encode('utf-8')
782 first = False
783 def readline():
784 nonlocal first
785 if not first:
786 first = True
787 return line
788 else:
789 return b''
790
791 # skip the initial encoding token and the end token
792 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
793 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000794 self.assertEqual(tokens, expected_tokens,
795 "bytes not decoded with encoding")
Trent Nelson428de652008-03-18 22:41:35 +0000796
797 def test__tokenize_does_not_decode_with_encoding_none(self):
798 literal = '"ЉЊЈЁЂ"'
799 first = False
800 def readline():
801 nonlocal first
802 if not first:
803 first = True
804 return literal
805 else:
806 return b''
807
808 # skip the end token
809 tokens = list(_tokenize(readline, encoding=None))[:-1]
810 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000811 self.assertEqual(tokens, expected_tokens,
812 "string not tokenized when encoding is None")
Trent Nelson428de652008-03-18 22:41:35 +0000813
814
815class TestDetectEncoding(TestCase):
816
817 def get_readline(self, lines):
818 index = 0
819 def readline():
820 nonlocal index
821 if index == len(lines):
822 raise StopIteration
823 line = lines[index]
824 index += 1
825 return line
826 return readline
827
828 def test_no_bom_no_encoding_cookie(self):
829 lines = (
830 b'# something\n',
831 b'print(something)\n',
832 b'do_something(else)\n'
833 )
834 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000835 self.assertEqual(encoding, 'utf-8')
836 self.assertEqual(consumed_lines, list(lines[:2]))
Trent Nelson428de652008-03-18 22:41:35 +0000837
838 def test_bom_no_cookie(self):
839 lines = (
840 b'\xef\xbb\xbf# something\n',
841 b'print(something)\n',
842 b'do_something(else)\n'
843 )
844 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000845 self.assertEqual(encoding, 'utf-8-sig')
846 self.assertEqual(consumed_lines,
847 [b'# something\n', b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000848
849 def test_cookie_first_line_no_bom(self):
850 lines = (
851 b'# -*- coding: latin-1 -*-\n',
852 b'print(something)\n',
853 b'do_something(else)\n'
854 )
855 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000856 self.assertEqual(encoding, 'iso-8859-1')
857 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000858
859 def test_matched_bom_and_cookie_first_line(self):
860 lines = (
861 b'\xef\xbb\xbf# coding=utf-8\n',
862 b'print(something)\n',
863 b'do_something(else)\n'
864 )
865 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000866 self.assertEqual(encoding, 'utf-8-sig')
867 self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000868
869 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
870 lines = (
871 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
872 b'print(something)\n',
873 b'do_something(else)\n'
874 )
875 readline = self.get_readline(lines)
876 self.assertRaises(SyntaxError, detect_encoding, readline)
877
878 def test_cookie_second_line_no_bom(self):
879 lines = (
880 b'#! something\n',
881 b'# vim: set fileencoding=ascii :\n',
882 b'print(something)\n',
883 b'do_something(else)\n'
884 )
885 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000886 self.assertEqual(encoding, 'ascii')
Trent Nelson428de652008-03-18 22:41:35 +0000887 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
Ezio Melottib3aedd42010-11-20 19:04:17 +0000888 self.assertEqual(consumed_lines, expected)
Trent Nelson428de652008-03-18 22:41:35 +0000889
890 def test_matched_bom_and_cookie_second_line(self):
891 lines = (
892 b'\xef\xbb\xbf#! something\n',
893 b'f# coding=utf-8\n',
894 b'print(something)\n',
895 b'do_something(else)\n'
896 )
897 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000898 self.assertEqual(encoding, 'utf-8-sig')
899 self.assertEqual(consumed_lines,
900 [b'#! something\n', b'f# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000901
902 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
903 lines = (
904 b'\xef\xbb\xbf#! something\n',
905 b'# vim: set fileencoding=ascii :\n',
906 b'print(something)\n',
907 b'do_something(else)\n'
908 )
909 readline = self.get_readline(lines)
910 self.assertRaises(SyntaxError, detect_encoding, readline)
911
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200912 def test_cookie_second_line_noncommented_first_line(self):
913 lines = (
914 b"print('\xc2\xa3')\n",
915 b'# vim: set fileencoding=iso8859-15 :\n',
916 b"print('\xe2\x82\xac')\n"
917 )
918 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
919 self.assertEqual(encoding, 'utf-8')
920 expected = [b"print('\xc2\xa3')\n"]
921 self.assertEqual(consumed_lines, expected)
922
923 def test_cookie_second_line_commented_first_line(self):
924 lines = (
925 b"#print('\xc2\xa3')\n",
926 b'# vim: set fileencoding=iso8859-15 :\n',
927 b"print('\xe2\x82\xac')\n"
928 )
929 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
930 self.assertEqual(encoding, 'iso8859-15')
931 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
932 self.assertEqual(consumed_lines, expected)
933
934 def test_cookie_second_line_empty_first_line(self):
935 lines = (
936 b'\n',
937 b'# vim: set fileencoding=iso8859-15 :\n',
938 b"print('\xe2\x82\xac')\n"
939 )
940 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
941 self.assertEqual(encoding, 'iso8859-15')
942 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
943 self.assertEqual(consumed_lines, expected)
944
Benjamin Petersond3afada2009-10-09 21:43:09 +0000945 def test_latin1_normalization(self):
946 # See get_normal_name() in tokenizer.c.
947 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
948 "iso-8859-1-unix", "iso-latin-1-mac")
949 for encoding in encodings:
950 for rep in ("-", "_"):
951 enc = encoding.replace("-", rep)
952 lines = (b"#!/usr/bin/python\n",
953 b"# coding: " + enc.encode("ascii") + b"\n",
954 b"print(things)\n",
955 b"do_something += 4\n")
956 rl = self.get_readline(lines)
957 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000958 self.assertEqual(found, "iso-8859-1")
Benjamin Petersond3afada2009-10-09 21:43:09 +0000959
Martin v. Löwis63674f42012-04-20 14:36:47 +0200960 def test_syntaxerror_latin1(self):
961 # Issue 14629: need to raise SyntaxError if the first
962 # line(s) have non-UTF-8 characters
963 lines = (
964 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
965 )
966 readline = self.get_readline(lines)
967 self.assertRaises(SyntaxError, detect_encoding, readline)
968
969
Benjamin Petersond3afada2009-10-09 21:43:09 +0000970 def test_utf8_normalization(self):
971 # See get_normal_name() in tokenizer.c.
972 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
973 for encoding in encodings:
974 for rep in ("-", "_"):
975 enc = encoding.replace("-", rep)
976 lines = (b"#!/usr/bin/python\n",
977 b"# coding: " + enc.encode("ascii") + b"\n",
978 b"1 + 3\n")
979 rl = self.get_readline(lines)
980 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000981 self.assertEqual(found, "utf-8")
Benjamin Petersond3afada2009-10-09 21:43:09 +0000982
Trent Nelson428de652008-03-18 22:41:35 +0000983 def test_short_files(self):
984 readline = self.get_readline((b'print(something)\n',))
985 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000986 self.assertEqual(encoding, 'utf-8')
987 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000988
989 encoding, consumed_lines = detect_encoding(self.get_readline(()))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000990 self.assertEqual(encoding, 'utf-8')
991 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +0000992
993 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
994 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000995 self.assertEqual(encoding, 'utf-8-sig')
996 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000997
998 readline = self.get_readline((b'\xef\xbb\xbf',))
999 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001000 self.assertEqual(encoding, 'utf-8-sig')
1001 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001002
Benjamin Peterson433f32c2008-12-12 01:25:05 +00001003 readline = self.get_readline((b'# coding: bad\n',))
1004 self.assertRaises(SyntaxError, detect_encoding, readline)
Trent Nelson428de652008-03-18 22:41:35 +00001005
Serhiy Storchakadafea852013-09-16 23:51:56 +03001006 def test_false_encoding(self):
1007 # Issue 18873: "Encoding" detected in non-comment lines
1008 readline = self.get_readline((b'print("#coding=fake")',))
1009 encoding, consumed_lines = detect_encoding(readline)
1010 self.assertEqual(encoding, 'utf-8')
1011 self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1012
Victor Stinner58c07522010-11-09 01:08:59 +00001013 def test_open(self):
1014 filename = support.TESTFN + '.py'
1015 self.addCleanup(support.unlink, filename)
1016
1017 # test coding cookie
1018 for encoding in ('iso-8859-15', 'utf-8'):
1019 with open(filename, 'w', encoding=encoding) as fp:
1020 print("# coding: %s" % encoding, file=fp)
1021 print("print('euro:\u20ac')", file=fp)
1022 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001023 self.assertEqual(fp.encoding, encoding)
1024 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001025
1026 # test BOM (no coding cookie)
1027 with open(filename, 'w', encoding='utf-8-sig') as fp:
1028 print("print('euro:\u20ac')", file=fp)
1029 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001030 self.assertEqual(fp.encoding, 'utf-8-sig')
1031 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001032
Brett Cannonc33f3f22012-04-20 13:23:54 -04001033 def test_filename_in_exception(self):
1034 # When possible, include the file name in the exception.
1035 path = 'some_file_path'
1036 lines = (
1037 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1038 )
1039 class Bunk:
1040 def __init__(self, lines, path):
1041 self.name = path
1042 self._lines = lines
1043 self._index = 0
1044
1045 def readline(self):
1046 if self._index == len(lines):
1047 raise StopIteration
1048 line = lines[self._index]
1049 self._index += 1
1050 return line
1051
1052 with self.assertRaises(SyntaxError):
1053 ins = Bunk(lines, path)
1054 # Make sure lacking a name isn't an issue.
1055 del ins.name
1056 detect_encoding(ins.readline)
1057 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1058 ins = Bunk(lines, path)
1059 detect_encoding(ins.readline)
1060
1061
Trent Nelson428de652008-03-18 22:41:35 +00001062class TestTokenize(TestCase):
1063
1064 def test_tokenize(self):
1065 import tokenize as tokenize_module
1066 encoding = object()
1067 encoding_used = None
1068 def mock_detect_encoding(readline):
1069 return encoding, ['first', 'second']
1070
1071 def mock__tokenize(readline, encoding):
1072 nonlocal encoding_used
1073 encoding_used = encoding
1074 out = []
1075 while True:
1076 next_line = readline()
1077 if next_line:
1078 out.append(next_line)
1079 continue
1080 return out
1081
1082 counter = 0
1083 def mock_readline():
1084 nonlocal counter
1085 counter += 1
1086 if counter == 5:
1087 return b''
1088 return counter
1089
1090 orig_detect_encoding = tokenize_module.detect_encoding
1091 orig__tokenize = tokenize_module._tokenize
1092 tokenize_module.detect_encoding = mock_detect_encoding
1093 tokenize_module._tokenize = mock__tokenize
1094 try:
1095 results = tokenize(mock_readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001096 self.assertEqual(list(results), ['first', 'second', 1, 2, 3, 4])
Trent Nelson428de652008-03-18 22:41:35 +00001097 finally:
1098 tokenize_module.detect_encoding = orig_detect_encoding
1099 tokenize_module._tokenize = orig__tokenize
1100
1101 self.assertTrue(encoding_used, encoding)
Raymond Hettinger68c04532005-06-10 11:05:19 +00001102
Meador Inge00c7f852012-01-19 00:44:45 -06001103 def assertExactTypeEqual(self, opstr, *optypes):
1104 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1105 num_optypes = len(optypes)
1106 self.assertEqual(len(tokens), 2 + num_optypes)
1107 self.assertEqual(token.tok_name[tokens[0].exact_type],
1108 token.tok_name[ENCODING])
1109 for i in range(num_optypes):
1110 self.assertEqual(token.tok_name[tokens[i + 1].exact_type],
1111 token.tok_name[optypes[i]])
1112 self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],
1113 token.tok_name[token.ENDMARKER])
1114
1115 def test_exact_type(self):
1116 self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1117 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1118 self.assertExactTypeEqual(':', token.COLON)
1119 self.assertExactTypeEqual(',', token.COMMA)
1120 self.assertExactTypeEqual(';', token.SEMI)
1121 self.assertExactTypeEqual('+', token.PLUS)
1122 self.assertExactTypeEqual('-', token.MINUS)
1123 self.assertExactTypeEqual('*', token.STAR)
1124 self.assertExactTypeEqual('/', token.SLASH)
1125 self.assertExactTypeEqual('|', token.VBAR)
1126 self.assertExactTypeEqual('&', token.AMPER)
1127 self.assertExactTypeEqual('<', token.LESS)
1128 self.assertExactTypeEqual('>', token.GREATER)
1129 self.assertExactTypeEqual('=', token.EQUAL)
1130 self.assertExactTypeEqual('.', token.DOT)
1131 self.assertExactTypeEqual('%', token.PERCENT)
1132 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1133 self.assertExactTypeEqual('==', token.EQEQUAL)
1134 self.assertExactTypeEqual('!=', token.NOTEQUAL)
1135 self.assertExactTypeEqual('<=', token.LESSEQUAL)
1136 self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1137 self.assertExactTypeEqual('~', token.TILDE)
1138 self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1139 self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1140 self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1141 self.assertExactTypeEqual('**', token.DOUBLESTAR)
1142 self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1143 self.assertExactTypeEqual('-=', token.MINEQUAL)
1144 self.assertExactTypeEqual('*=', token.STAREQUAL)
1145 self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1146 self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1147 self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1148 self.assertExactTypeEqual('|=', token.VBAREQUAL)
1149 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1150 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1151 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1152 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1153 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1154 self.assertExactTypeEqual('//', token.DOUBLESLASH)
1155 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
1156 self.assertExactTypeEqual('@', token.AT)
1157
1158 self.assertExactTypeEqual('a**2+b**2==c**2',
1159 NAME, token.DOUBLESTAR, NUMBER,
1160 token.PLUS,
1161 NAME, token.DOUBLESTAR, NUMBER,
1162 token.EQEQUAL,
1163 NAME, token.DOUBLESTAR, NUMBER)
1164 self.assertExactTypeEqual('{1, 2, 3}',
1165 token.LBRACE,
1166 token.NUMBER, token.COMMA,
1167 token.NUMBER, token.COMMA,
1168 token.NUMBER,
1169 token.RBRACE)
1170 self.assertExactTypeEqual('^(x & 0x1)',
1171 token.CIRCUMFLEX,
1172 token.LPAR,
1173 token.NAME, token.AMPER, token.NUMBER,
1174 token.RPAR)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001175
Ezio Melottifafa8b72012-11-03 17:46:51 +02001176 def test_pathological_trailing_whitespace(self):
1177 # See http://bugs.python.org/issue16152
1178 self.assertExactTypeEqual('@ ', token.AT)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001179
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001180class UntokenizeTest(TestCase):
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001181
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001182 def test_bad_input_order(self):
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001183 # raise if previous row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001184 u = Untokenizer()
1185 u.prev_row = 2
1186 u.prev_col = 2
1187 with self.assertRaises(ValueError) as cm:
1188 u.add_whitespace((1,3))
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001189 self.assertEqual(cm.exception.args[0],
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001190 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001191 # raise if previous column in row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001192 self.assertRaises(ValueError, u.add_whitespace, (2,1))
1193
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001194 def test_backslash_continuation(self):
1195 # The problem is that <whitespace>\<newline> leaves no token
1196 u = Untokenizer()
1197 u.prev_row = 1
1198 u.prev_col = 1
1199 u.tokens = []
1200 u.add_whitespace((2, 0))
1201 self.assertEqual(u.tokens, ['\\\n'])
1202 u.prev_row = 2
1203 u.add_whitespace((4, 4))
1204 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
1205 self.assertTrue(roundtrip('a\n b\n c\n \\\n c\n'))
1206
Terry Jan Reedy5b8d2c32014-02-17 23:12:16 -05001207 def test_iter_compat(self):
1208 u = Untokenizer()
1209 token = (NAME, 'Hello')
1210 tokens = [(ENCODING, 'utf-8'), token]
1211 u.compat(token, iter([]))
1212 self.assertEqual(u.tokens, ["Hello "])
1213 u = Untokenizer()
1214 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1215 u = Untokenizer()
1216 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1217 self.assertEqual(u.encoding, 'utf-8')
1218 self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1219
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001220
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001221__test__ = {"doctests" : doctests, 'decistmt': decistmt}
1222
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001223def test_main():
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001224 from test import test_tokenize
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001225 support.run_doctest(test_tokenize, True)
1226 support.run_unittest(TestTokenizerAdheresToPep0263)
1227 support.run_unittest(Test_Tokenize)
1228 support.run_unittest(TestDetectEncoding)
1229 support.run_unittest(TestTokenize)
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001230 support.run_unittest(UntokenizeTest)
Neal Norwitzc1505362006-12-28 06:47:50 +00001231
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001232if __name__ == "__main__":
1233 test_main()