blob: 03f614879f3c1eb39166eaa7fdd25d4c1d0960b8 [file] [log] [blame]
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001doctests = """
2Tests for the tokenize module.
Thomas Wouters89f507f2006-12-13 04:49:30 +00003
Christian Heimesdd15f6c2008-03-16 00:07:10 +00004The tests can be really simple. Given a small fragment of source
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05005code, print out a table with tokens. The ENDMARKER is omitted for
Thomas Wouters89f507f2006-12-13 04:49:30 +00006brevity.
7
Christian Heimesdd15f6c2008-03-16 00:07:10 +00008 >>> dump_tokens("1 + 1")
Trent Nelson428de652008-03-18 22:41:35 +00009 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +000010 NUMBER '1' (1, 0) (1, 1)
11 OP '+' (1, 2) (1, 3)
12 NUMBER '1' (1, 4) (1, 5)
Thomas Wouters89f507f2006-12-13 04:49:30 +000013
Christian Heimesdd15f6c2008-03-16 00:07:10 +000014 >>> dump_tokens("if False:\\n"
15 ... " # NL\\n"
16 ... " True = False # NEWLINE\\n")
Trent Nelson428de652008-03-18 22:41:35 +000017 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +000018 NAME 'if' (1, 0) (1, 2)
19 NAME 'False' (1, 3) (1, 8)
20 OP ':' (1, 8) (1, 9)
21 NEWLINE '\\n' (1, 9) (1, 10)
22 COMMENT '# NL' (2, 4) (2, 8)
23 NL '\\n' (2, 8) (2, 9)
24 INDENT ' ' (3, 0) (3, 4)
25 NAME 'True' (3, 4) (3, 8)
26 OP '=' (3, 9) (3, 10)
27 NAME 'False' (3, 11) (3, 16)
28 COMMENT '# NEWLINE' (3, 17) (3, 26)
29 NEWLINE '\\n' (3, 26) (3, 27)
30 DEDENT '' (4, 0) (4, 0)
Thomas Wouters89f507f2006-12-13 04:49:30 +000031
Christian Heimesdd15f6c2008-03-16 00:07:10 +000032 >>> indent_error_file = \"""
33 ... def k(x):
34 ... x += 2
35 ... x += 5
36 ... \"""
Trent Nelson428de652008-03-18 22:41:35 +000037 >>> readline = BytesIO(indent_error_file.encode('utf-8')).readline
38 >>> for tok in tokenize(readline): pass
Christian Heimesdd15f6c2008-03-16 00:07:10 +000039 Traceback (most recent call last):
40 ...
41 IndentationError: unindent does not match any outer indentation level
Thomas Wouters89f507f2006-12-13 04:49:30 +000042
Mark Dickinson3c0b3172010-06-29 07:38:37 +000043There are some standard formatting practices that are easy to get right.
Thomas Wouters89f507f2006-12-13 04:49:30 +000044
Christian Heimesdd15f6c2008-03-16 00:07:10 +000045 >>> roundtrip("if x == 1:\\n"
46 ... " print(x)\\n")
47 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000048
Christian Heimesdd15f6c2008-03-16 00:07:10 +000049 >>> roundtrip("# This is a comment\\n# This also")
50 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000051
52Some people use different formatting conventions, which makes
Christian Heimesdd15f6c2008-03-16 00:07:10 +000053untokenize a little trickier. Note that this test involves trailing
54whitespace after the colon. Note that we use hex escapes to make the
Trent Nelson428de652008-03-18 22:41:35 +000055two trailing blanks apparent in the expected output.
Thomas Wouters89f507f2006-12-13 04:49:30 +000056
Christian Heimesdd15f6c2008-03-16 00:07:10 +000057 >>> roundtrip("if x == 1 : \\n"
58 ... " print(x)\\n")
59 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000060
Benjamin Petersonee8712c2008-05-20 21:35:26 +000061 >>> f = support.findfile("tokenize_tests.txt")
Trent Nelson428de652008-03-18 22:41:35 +000062 >>> roundtrip(open(f, 'rb'))
Christian Heimesdd15f6c2008-03-16 00:07:10 +000063 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000064
Christian Heimesdd15f6c2008-03-16 00:07:10 +000065 >>> roundtrip("if x == 1:\\n"
66 ... " # A comment by itself.\\n"
67 ... " print(x) # Comment here, too.\\n"
68 ... " # Another comment.\\n"
69 ... "after_if = True\\n")
70 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000071
Christian Heimesdd15f6c2008-03-16 00:07:10 +000072 >>> roundtrip("if (x # The comments need to go in the right place\\n"
73 ... " == 1):\\n"
74 ... " print('x==1')\\n")
75 True
Thomas Wouters89f507f2006-12-13 04:49:30 +000076
Christian Heimesdd15f6c2008-03-16 00:07:10 +000077 >>> roundtrip("class Test: # A comment here\\n"
78 ... " # A comment with weird indent\\n"
79 ... " after_com = 5\\n"
80 ... " def x(m): return m*5 # a one liner\\n"
81 ... " def y(m): # A whitespace after the colon\\n"
82 ... " return y*4 # 3-space indent\\n")
83 True
84
85Some error-handling code
86
87 >>> roundtrip("try: import somemodule\\n"
88 ... "except ImportError: # comment\\n"
Christian Heimesba4af492008-03-28 00:55:15 +000089 ... " print('Can not import' # comment2\\n)"
Neal Norwitz752abd02008-05-13 04:55:24 +000090 ... "else: print('Loaded')\\n")
Christian Heimesdd15f6c2008-03-16 00:07:10 +000091 True
92
Eric Smith74ca5572008-03-17 19:49:19 +000093Balancing continuation
Christian Heimesdd15f6c2008-03-16 00:07:10 +000094
95 >>> roundtrip("a = (3,4, \\n"
96 ... "5,6)\\n"
97 ... "y = [3, 4,\\n"
98 ... "5]\\n"
99 ... "z = {'a': 5,\\n"
100 ... "'b':15, 'c':True}\\n"
101 ... "x = len(y) + 5 - a[\\n"
102 ... "3] - a[2]\\n"
103 ... "+ len(z) - z[\\n"
104 ... "'b']\\n")
105 True
106
107Ordinary integers and binary operators
108
109 >>> dump_tokens("0xff <= 255")
Trent Nelson428de652008-03-18 22:41:35 +0000110 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000111 NUMBER '0xff' (1, 0) (1, 4)
112 OP '<=' (1, 5) (1, 7)
113 NUMBER '255' (1, 8) (1, 11)
Eric Smith74ca5572008-03-17 19:49:19 +0000114 >>> dump_tokens("0b10 <= 255")
Trent Nelson428de652008-03-18 22:41:35 +0000115 ENCODING 'utf-8' (0, 0) (0, 0)
Eric Smith74ca5572008-03-17 19:49:19 +0000116 NUMBER '0b10' (1, 0) (1, 4)
117 OP '<=' (1, 5) (1, 7)
118 NUMBER '255' (1, 8) (1, 11)
119 >>> dump_tokens("0o123 <= 0O123")
Trent Nelson428de652008-03-18 22:41:35 +0000120 ENCODING 'utf-8' (0, 0) (0, 0)
Eric Smith74ca5572008-03-17 19:49:19 +0000121 NUMBER '0o123' (1, 0) (1, 5)
122 OP '<=' (1, 6) (1, 8)
123 NUMBER '0O123' (1, 9) (1, 14)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000124 >>> dump_tokens("1234567 > ~0x15")
Trent Nelson428de652008-03-18 22:41:35 +0000125 ENCODING 'utf-8' (0, 0) (0, 0)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000126 NUMBER '1234567' (1, 0) (1, 7)
127 OP '>' (1, 8) (1, 9)
128 OP '~' (1, 10) (1, 11)
129 NUMBER '0x15' (1, 11) (1, 15)
130 >>> dump_tokens("2134568 != 1231515")
Trent Nelson428de652008-03-18 22:41:35 +0000131 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000132 NUMBER '2134568' (1, 0) (1, 7)
133 OP '!=' (1, 8) (1, 10)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000134 NUMBER '1231515' (1, 11) (1, 18)
135 >>> dump_tokens("(-124561-1) & 200000000")
Trent Nelson428de652008-03-18 22:41:35 +0000136 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000137 OP '(' (1, 0) (1, 1)
138 OP '-' (1, 1) (1, 2)
139 NUMBER '124561' (1, 2) (1, 8)
140 OP '-' (1, 8) (1, 9)
141 NUMBER '1' (1, 9) (1, 10)
142 OP ')' (1, 10) (1, 11)
143 OP '&' (1, 12) (1, 13)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000144 NUMBER '200000000' (1, 14) (1, 23)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000145 >>> dump_tokens("0xdeadbeef != -1")
Trent Nelson428de652008-03-18 22:41:35 +0000146 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000147 NUMBER '0xdeadbeef' (1, 0) (1, 10)
148 OP '!=' (1, 11) (1, 13)
149 OP '-' (1, 14) (1, 15)
150 NUMBER '1' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000151 >>> dump_tokens("0xdeadc0de & 12345")
Trent Nelson428de652008-03-18 22:41:35 +0000152 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000153 NUMBER '0xdeadc0de' (1, 0) (1, 10)
154 OP '&' (1, 11) (1, 12)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000155 NUMBER '12345' (1, 13) (1, 18)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000156 >>> dump_tokens("0xFF & 0x15 | 1234")
Trent Nelson428de652008-03-18 22:41:35 +0000157 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000158 NUMBER '0xFF' (1, 0) (1, 4)
159 OP '&' (1, 5) (1, 6)
160 NUMBER '0x15' (1, 7) (1, 11)
161 OP '|' (1, 12) (1, 13)
162 NUMBER '1234' (1, 14) (1, 18)
163
164Long integers
165
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000166 >>> dump_tokens("x = 0")
Trent Nelson428de652008-03-18 22:41:35 +0000167 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000168 NAME 'x' (1, 0) (1, 1)
169 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000170 NUMBER '0' (1, 4) (1, 5)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000171 >>> dump_tokens("x = 0xfffffffffff")
Trent Nelson428de652008-03-18 22:41:35 +0000172 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000173 NAME 'x' (1, 0) (1, 1)
174 OP '=' (1, 2) (1, 3)
175 NUMBER '0xffffffffff (1, 4) (1, 17)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000176 >>> dump_tokens("x = 123141242151251616110")
Trent Nelson428de652008-03-18 22:41:35 +0000177 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000178 NAME 'x' (1, 0) (1, 1)
179 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000180 NUMBER '123141242151 (1, 4) (1, 25)
181 >>> dump_tokens("x = -15921590215012591")
Trent Nelson428de652008-03-18 22:41:35 +0000182 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000183 NAME 'x' (1, 0) (1, 1)
184 OP '=' (1, 2) (1, 3)
185 OP '-' (1, 4) (1, 5)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000186 NUMBER '159215902150 (1, 5) (1, 22)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000187
188Floating point numbers
189
190 >>> dump_tokens("x = 3.14159")
Trent Nelson428de652008-03-18 22:41:35 +0000191 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000192 NAME 'x' (1, 0) (1, 1)
193 OP '=' (1, 2) (1, 3)
194 NUMBER '3.14159' (1, 4) (1, 11)
195 >>> dump_tokens("x = 314159.")
Trent Nelson428de652008-03-18 22:41:35 +0000196 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000197 NAME 'x' (1, 0) (1, 1)
198 OP '=' (1, 2) (1, 3)
199 NUMBER '314159.' (1, 4) (1, 11)
200 >>> dump_tokens("x = .314159")
Trent Nelson428de652008-03-18 22:41:35 +0000201 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000202 NAME 'x' (1, 0) (1, 1)
203 OP '=' (1, 2) (1, 3)
204 NUMBER '.314159' (1, 4) (1, 11)
205 >>> dump_tokens("x = 3e14159")
Trent Nelson428de652008-03-18 22:41:35 +0000206 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000207 NAME 'x' (1, 0) (1, 1)
208 OP '=' (1, 2) (1, 3)
209 NUMBER '3e14159' (1, 4) (1, 11)
210 >>> dump_tokens("x = 3E123")
Trent Nelson428de652008-03-18 22:41:35 +0000211 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000212 NAME 'x' (1, 0) (1, 1)
213 OP '=' (1, 2) (1, 3)
214 NUMBER '3E123' (1, 4) (1, 9)
215 >>> dump_tokens("x+y = 3e-1230")
Trent Nelson428de652008-03-18 22:41:35 +0000216 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000217 NAME 'x' (1, 0) (1, 1)
218 OP '+' (1, 1) (1, 2)
219 NAME 'y' (1, 2) (1, 3)
220 OP '=' (1, 4) (1, 5)
221 NUMBER '3e-1230' (1, 6) (1, 13)
222 >>> dump_tokens("x = 3.14e159")
Trent Nelson428de652008-03-18 22:41:35 +0000223 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000224 NAME 'x' (1, 0) (1, 1)
225 OP '=' (1, 2) (1, 3)
226 NUMBER '3.14e159' (1, 4) (1, 12)
227
228String literals
229
230 >>> dump_tokens("x = ''; y = \\\"\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000231 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000232 NAME 'x' (1, 0) (1, 1)
233 OP '=' (1, 2) (1, 3)
234 STRING "''" (1, 4) (1, 6)
235 OP ';' (1, 6) (1, 7)
236 NAME 'y' (1, 8) (1, 9)
237 OP '=' (1, 10) (1, 11)
238 STRING '""' (1, 12) (1, 14)
239 >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000240 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000241 NAME 'x' (1, 0) (1, 1)
242 OP '=' (1, 2) (1, 3)
243 STRING '\\'"\\'' (1, 4) (1, 7)
244 OP ';' (1, 7) (1, 8)
245 NAME 'y' (1, 9) (1, 10)
246 OP '=' (1, 11) (1, 12)
247 STRING '"\\'"' (1, 13) (1, 16)
248 >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
Trent Nelson428de652008-03-18 22:41:35 +0000249 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000250 NAME 'x' (1, 0) (1, 1)
251 OP '=' (1, 2) (1, 3)
252 STRING '"doesn\\'t "' (1, 4) (1, 14)
253 NAME 'shrink' (1, 14) (1, 20)
254 STRING '", does it"' (1, 20) (1, 31)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000255 >>> dump_tokens("x = 'abc' + 'ABC'")
Trent Nelson428de652008-03-18 22:41:35 +0000256 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000257 NAME 'x' (1, 0) (1, 1)
258 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000259 STRING "'abc'" (1, 4) (1, 9)
260 OP '+' (1, 10) (1, 11)
261 STRING "'ABC'" (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000262 >>> dump_tokens('y = "ABC" + "ABC"')
Trent Nelson428de652008-03-18 22:41:35 +0000263 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000264 NAME 'y' (1, 0) (1, 1)
265 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000266 STRING '"ABC"' (1, 4) (1, 9)
267 OP '+' (1, 10) (1, 11)
268 STRING '"ABC"' (1, 12) (1, 17)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000269 >>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'")
Trent Nelson428de652008-03-18 22:41:35 +0000270 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000271 NAME 'x' (1, 0) (1, 1)
272 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000273 STRING "r'abc'" (1, 4) (1, 10)
274 OP '+' (1, 11) (1, 12)
275 STRING "r'ABC'" (1, 13) (1, 19)
276 OP '+' (1, 20) (1, 21)
277 STRING "R'ABC'" (1, 22) (1, 28)
278 OP '+' (1, 29) (1, 30)
279 STRING "R'ABC'" (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000280 >>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"')
Trent Nelson428de652008-03-18 22:41:35 +0000281 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000282 NAME 'y' (1, 0) (1, 1)
283 OP '=' (1, 2) (1, 3)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000284 STRING 'r"abc"' (1, 4) (1, 10)
285 OP '+' (1, 11) (1, 12)
286 STRING 'r"ABC"' (1, 13) (1, 19)
287 OP '+' (1, 20) (1, 21)
288 STRING 'R"ABC"' (1, 22) (1, 28)
289 OP '+' (1, 29) (1, 30)
290 STRING 'R"ABC"' (1, 31) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000291
Meador Inge8d5c0b82012-06-16 21:49:08 -0500292 >>> dump_tokens("u'abc' + U'abc'")
293 ENCODING 'utf-8' (0, 0) (0, 0)
294 STRING "u'abc'" (1, 0) (1, 6)
295 OP '+' (1, 7) (1, 8)
296 STRING "U'abc'" (1, 9) (1, 15)
297 >>> dump_tokens('u"abc" + U"abc"')
298 ENCODING 'utf-8' (0, 0) (0, 0)
299 STRING 'u"abc"' (1, 0) (1, 6)
300 OP '+' (1, 7) (1, 8)
301 STRING 'U"abc"' (1, 9) (1, 15)
Meador Inge8d5c0b82012-06-16 21:49:08 -0500302
303 >>> dump_tokens("b'abc' + B'abc'")
304 ENCODING 'utf-8' (0, 0) (0, 0)
305 STRING "b'abc'" (1, 0) (1, 6)
306 OP '+' (1, 7) (1, 8)
307 STRING "B'abc'" (1, 9) (1, 15)
308 >>> dump_tokens('b"abc" + B"abc"')
309 ENCODING 'utf-8' (0, 0) (0, 0)
310 STRING 'b"abc"' (1, 0) (1, 6)
311 OP '+' (1, 7) (1, 8)
312 STRING 'B"abc"' (1, 9) (1, 15)
313 >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
314 ENCODING 'utf-8' (0, 0) (0, 0)
315 STRING "br'abc'" (1, 0) (1, 7)
316 OP '+' (1, 8) (1, 9)
317 STRING "bR'abc'" (1, 10) (1, 17)
318 OP '+' (1, 18) (1, 19)
319 STRING "Br'abc'" (1, 20) (1, 27)
320 OP '+' (1, 28) (1, 29)
321 STRING "BR'abc'" (1, 30) (1, 37)
322 >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
323 ENCODING 'utf-8' (0, 0) (0, 0)
324 STRING 'br"abc"' (1, 0) (1, 7)
325 OP '+' (1, 8) (1, 9)
326 STRING 'bR"abc"' (1, 10) (1, 17)
327 OP '+' (1, 18) (1, 19)
328 STRING 'Br"abc"' (1, 20) (1, 27)
329 OP '+' (1, 28) (1, 29)
330 STRING 'BR"abc"' (1, 30) (1, 37)
331 >>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'")
332 ENCODING 'utf-8' (0, 0) (0, 0)
333 STRING "rb'abc'" (1, 0) (1, 7)
334 OP '+' (1, 8) (1, 9)
335 STRING "rB'abc'" (1, 10) (1, 17)
336 OP '+' (1, 18) (1, 19)
337 STRING "Rb'abc'" (1, 20) (1, 27)
338 OP '+' (1, 28) (1, 29)
339 STRING "RB'abc'" (1, 30) (1, 37)
340 >>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"')
341 ENCODING 'utf-8' (0, 0) (0, 0)
342 STRING 'rb"abc"' (1, 0) (1, 7)
343 OP '+' (1, 8) (1, 9)
344 STRING 'rB"abc"' (1, 10) (1, 17)
345 OP '+' (1, 18) (1, 19)
346 STRING 'Rb"abc"' (1, 20) (1, 27)
347 OP '+' (1, 28) (1, 29)
348 STRING 'RB"abc"' (1, 30) (1, 37)
349
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000350Operators
351
352 >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000353 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000354 NAME 'def' (1, 0) (1, 3)
355 NAME 'd22' (1, 4) (1, 7)
356 OP '(' (1, 7) (1, 8)
357 NAME 'a' (1, 8) (1, 9)
358 OP ',' (1, 9) (1, 10)
359 NAME 'b' (1, 11) (1, 12)
360 OP ',' (1, 12) (1, 13)
361 NAME 'c' (1, 14) (1, 15)
362 OP '=' (1, 15) (1, 16)
363 NUMBER '2' (1, 16) (1, 17)
364 OP ',' (1, 17) (1, 18)
365 NAME 'd' (1, 19) (1, 20)
366 OP '=' (1, 20) (1, 21)
367 NUMBER '2' (1, 21) (1, 22)
368 OP ',' (1, 22) (1, 23)
369 OP '*' (1, 24) (1, 25)
370 NAME 'k' (1, 25) (1, 26)
371 OP ')' (1, 26) (1, 27)
372 OP ':' (1, 27) (1, 28)
373 NAME 'pass' (1, 29) (1, 33)
374 >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000375 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000376 NAME 'def' (1, 0) (1, 3)
377 NAME 'd01v_' (1, 4) (1, 9)
378 OP '(' (1, 9) (1, 10)
379 NAME 'a' (1, 10) (1, 11)
380 OP '=' (1, 11) (1, 12)
381 NUMBER '1' (1, 12) (1, 13)
382 OP ',' (1, 13) (1, 14)
383 OP '*' (1, 15) (1, 16)
384 NAME 'k' (1, 16) (1, 17)
385 OP ',' (1, 17) (1, 18)
386 OP '**' (1, 19) (1, 21)
387 NAME 'w' (1, 21) (1, 22)
388 OP ')' (1, 22) (1, 23)
389 OP ':' (1, 23) (1, 24)
390 NAME 'pass' (1, 25) (1, 29)
391
392Comparison
393
394 >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
395 ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
Trent Nelson428de652008-03-18 22:41:35 +0000396 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000397 NAME 'if' (1, 0) (1, 2)
398 NUMBER '1' (1, 3) (1, 4)
399 OP '<' (1, 5) (1, 6)
400 NUMBER '1' (1, 7) (1, 8)
401 OP '>' (1, 9) (1, 10)
402 NUMBER '1' (1, 11) (1, 12)
403 OP '==' (1, 13) (1, 15)
404 NUMBER '1' (1, 16) (1, 17)
405 OP '>=' (1, 18) (1, 20)
406 NUMBER '5' (1, 21) (1, 22)
407 OP '<=' (1, 23) (1, 25)
408 NUMBER '0x15' (1, 26) (1, 30)
409 OP '<=' (1, 31) (1, 33)
410 NUMBER '0x12' (1, 34) (1, 38)
411 OP '!=' (1, 39) (1, 41)
412 NUMBER '1' (1, 42) (1, 43)
413 NAME 'and' (1, 44) (1, 47)
414 NUMBER '5' (1, 48) (1, 49)
415 NAME 'in' (1, 50) (1, 52)
416 NUMBER '1' (1, 53) (1, 54)
417 NAME 'not' (1, 55) (1, 58)
418 NAME 'in' (1, 59) (1, 61)
419 NUMBER '1' (1, 62) (1, 63)
420 NAME 'is' (1, 64) (1, 66)
421 NUMBER '1' (1, 67) (1, 68)
422 NAME 'or' (1, 69) (1, 71)
423 NUMBER '5' (1, 72) (1, 73)
424 NAME 'is' (1, 74) (1, 76)
425 NAME 'not' (1, 77) (1, 80)
426 NUMBER '1' (1, 81) (1, 82)
427 OP ':' (1, 82) (1, 83)
428 NAME 'pass' (1, 84) (1, 88)
429
430Shift
431
432 >>> dump_tokens("x = 1 << 1 >> 5")
Trent Nelson428de652008-03-18 22:41:35 +0000433 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000434 NAME 'x' (1, 0) (1, 1)
435 OP '=' (1, 2) (1, 3)
436 NUMBER '1' (1, 4) (1, 5)
437 OP '<<' (1, 6) (1, 8)
438 NUMBER '1' (1, 9) (1, 10)
439 OP '>>' (1, 11) (1, 13)
440 NUMBER '5' (1, 14) (1, 15)
441
442Additive
443
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000444 >>> dump_tokens("x = 1 - y + 15 - 1 + 0x124 + z + a[5]")
Trent Nelson428de652008-03-18 22:41:35 +0000445 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000446 NAME 'x' (1, 0) (1, 1)
447 OP '=' (1, 2) (1, 3)
448 NUMBER '1' (1, 4) (1, 5)
449 OP '-' (1, 6) (1, 7)
450 NAME 'y' (1, 8) (1, 9)
451 OP '+' (1, 10) (1, 11)
452 NUMBER '15' (1, 12) (1, 14)
453 OP '-' (1, 15) (1, 16)
Mark Dickinson0c1f7c02008-03-16 05:05:12 +0000454 NUMBER '1' (1, 17) (1, 18)
455 OP '+' (1, 19) (1, 20)
456 NUMBER '0x124' (1, 21) (1, 26)
457 OP '+' (1, 27) (1, 28)
458 NAME 'z' (1, 29) (1, 30)
459 OP '+' (1, 31) (1, 32)
460 NAME 'a' (1, 33) (1, 34)
461 OP '[' (1, 34) (1, 35)
462 NUMBER '5' (1, 35) (1, 36)
463 OP ']' (1, 36) (1, 37)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000464
465Multiplicative
466
Benjamin Petersond51374e2014-04-09 23:55:56 -0400467 >>> dump_tokens("x = 1//1*1/5*12%0x12@42")
Trent Nelson428de652008-03-18 22:41:35 +0000468 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000469 NAME 'x' (1, 0) (1, 1)
470 OP '=' (1, 2) (1, 3)
471 NUMBER '1' (1, 4) (1, 5)
472 OP '//' (1, 5) (1, 7)
473 NUMBER '1' (1, 7) (1, 8)
474 OP '*' (1, 8) (1, 9)
475 NUMBER '1' (1, 9) (1, 10)
476 OP '/' (1, 10) (1, 11)
477 NUMBER '5' (1, 11) (1, 12)
478 OP '*' (1, 12) (1, 13)
479 NUMBER '12' (1, 13) (1, 15)
480 OP '%' (1, 15) (1, 16)
481 NUMBER '0x12' (1, 16) (1, 20)
Benjamin Petersond51374e2014-04-09 23:55:56 -0400482 OP '@' (1, 20) (1, 21)
483 NUMBER '42' (1, 21) (1, 23)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000484
485Unary
486
487 >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
Trent Nelson428de652008-03-18 22:41:35 +0000488 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000489 OP '~' (1, 0) (1, 1)
490 NUMBER '1' (1, 1) (1, 2)
491 OP '^' (1, 3) (1, 4)
492 NUMBER '1' (1, 5) (1, 6)
493 OP '&' (1, 7) (1, 8)
494 NUMBER '1' (1, 9) (1, 10)
495 OP '|' (1, 11) (1, 12)
496 NUMBER '1' (1, 12) (1, 13)
497 OP '^' (1, 14) (1, 15)
498 OP '-' (1, 16) (1, 17)
499 NUMBER '1' (1, 17) (1, 18)
500 >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
Trent Nelson428de652008-03-18 22:41:35 +0000501 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000502 OP '-' (1, 0) (1, 1)
503 NUMBER '1' (1, 1) (1, 2)
504 OP '*' (1, 2) (1, 3)
505 NUMBER '1' (1, 3) (1, 4)
506 OP '/' (1, 4) (1, 5)
507 NUMBER '1' (1, 5) (1, 6)
508 OP '+' (1, 6) (1, 7)
509 NUMBER '1' (1, 7) (1, 8)
510 OP '*' (1, 8) (1, 9)
511 NUMBER '1' (1, 9) (1, 10)
512 OP '//' (1, 10) (1, 12)
513 NUMBER '1' (1, 12) (1, 13)
514 OP '-' (1, 14) (1, 15)
515 OP '-' (1, 16) (1, 17)
516 OP '-' (1, 17) (1, 18)
517 OP '-' (1, 18) (1, 19)
518 NUMBER '1' (1, 19) (1, 20)
519 OP '**' (1, 20) (1, 22)
520 NUMBER '1' (1, 22) (1, 23)
521
522Selector
523
524 >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
Trent Nelson428de652008-03-18 22:41:35 +0000525 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000526 NAME 'import' (1, 0) (1, 6)
527 NAME 'sys' (1, 7) (1, 10)
528 OP ',' (1, 10) (1, 11)
529 NAME 'time' (1, 12) (1, 16)
530 NEWLINE '\\n' (1, 16) (1, 17)
531 NAME 'x' (2, 0) (2, 1)
532 OP '=' (2, 2) (2, 3)
533 NAME 'sys' (2, 4) (2, 7)
534 OP '.' (2, 7) (2, 8)
535 NAME 'modules' (2, 8) (2, 15)
536 OP '[' (2, 15) (2, 16)
537 STRING "'time'" (2, 16) (2, 22)
538 OP ']' (2, 22) (2, 23)
539 OP '.' (2, 23) (2, 24)
540 NAME 'time' (2, 24) (2, 28)
541 OP '(' (2, 28) (2, 29)
542 OP ')' (2, 29) (2, 30)
543
544Methods
545
546 >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
Trent Nelson428de652008-03-18 22:41:35 +0000547 ENCODING 'utf-8' (0, 0) (0, 0)
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000548 OP '@' (1, 0) (1, 1)
549 NAME 'staticmethod (1, 1) (1, 13)
550 NEWLINE '\\n' (1, 13) (1, 14)
551 NAME 'def' (2, 0) (2, 3)
552 NAME 'foo' (2, 4) (2, 7)
553 OP '(' (2, 7) (2, 8)
554 NAME 'x' (2, 8) (2, 9)
555 OP ',' (2, 9) (2, 10)
556 NAME 'y' (2, 10) (2, 11)
557 OP ')' (2, 11) (2, 12)
558 OP ':' (2, 12) (2, 13)
559 NAME 'pass' (2, 14) (2, 18)
560
561Backslash means line continuation, except for comments
562
563 >>> roundtrip("x=1+\\\\n"
564 ... "1\\n"
565 ... "# This is a comment\\\\n"
566 ... "# This also\\n")
567 True
568 >>> roundtrip("# Comment \\\\nx = 0")
569 True
Christian Heimesba4af492008-03-28 00:55:15 +0000570
571Two string literals on the same line
572
573 >>> roundtrip("'' ''")
574 True
575
576Test roundtrip on random python modules.
Antoine Pitrou5bc4fa72010-10-14 15:34:31 +0000577pass the '-ucpu' option to process the full directory.
Christian Heimesba4af492008-03-28 00:55:15 +0000578
579 >>> import random
580 >>> tempdir = os.path.dirname(f) or os.curdir
581 >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
582
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500583Tokenize is broken on test_pep3131.py because regular expressions are
584broken on the obscure unicode identifiers in it. *sigh*
585With roundtrip extended to test the 5-tuple mode of untokenize,
5867 more testfiles fail. Remove them also until the failure is diagnosed.
587
Benjamin Peterson963e4022011-08-13 00:33:21 -0500588 >>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500589 >>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
590 ... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
591 ...
Antoine Pitrou5bc4fa72010-10-14 15:34:31 +0000592 >>> if not support.is_resource_enabled("cpu"):
Christian Heimesba4af492008-03-28 00:55:15 +0000593 ... testfiles = random.sample(testfiles, 10)
594 ...
595 >>> for testfile in testfiles:
596 ... if not roundtrip(open(testfile, 'rb')):
597 ... print("Roundtrip failed for file %s" % testfile)
598 ... break
599 ... else: True
600 True
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000601
602Evil tabs
Benjamin Peterson33856de2010-08-30 14:41:20 +0000603
Benjamin Petersona0dfa822009-11-13 02:25:08 +0000604 >>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
605 ENCODING 'utf-8' (0, 0) (0, 0)
606 NAME 'def' (1, 0) (1, 3)
607 NAME 'f' (1, 4) (1, 5)
608 OP '(' (1, 5) (1, 6)
609 OP ')' (1, 6) (1, 7)
610 OP ':' (1, 7) (1, 8)
611 NEWLINE '\\n' (1, 8) (1, 9)
612 INDENT '\\t' (2, 0) (2, 1)
613 NAME 'if' (2, 1) (2, 3)
614 NAME 'x' (2, 4) (2, 5)
615 NEWLINE '\\n' (2, 5) (2, 6)
616 INDENT ' \\t' (3, 0) (3, 9)
617 NAME 'pass' (3, 9) (3, 13)
618 DEDENT '' (4, 0) (4, 0)
619 DEDENT '' (4, 0) (4, 0)
Benjamin Peterson33856de2010-08-30 14:41:20 +0000620
621Non-ascii identifiers
622
623 >>> dump_tokens("Örter = 'places'\\ngrün = 'green'")
624 ENCODING 'utf-8' (0, 0) (0, 0)
625 NAME 'Örter' (1, 0) (1, 5)
626 OP '=' (1, 6) (1, 7)
627 STRING "'places'" (1, 8) (1, 16)
628 NEWLINE '\\n' (1, 16) (1, 17)
629 NAME 'grün' (2, 0) (2, 4)
630 OP '=' (2, 5) (2, 6)
631 STRING "'green'" (2, 7) (2, 14)
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000632
633Legacy unicode literals:
634
Christian Heimes0b3847d2012-06-20 11:17:58 +0200635 >>> dump_tokens("Örter = u'places'\\ngrün = U'green'")
Armin Ronacherc0eaeca2012-03-04 13:07:57 +0000636 ENCODING 'utf-8' (0, 0) (0, 0)
637 NAME 'Örter' (1, 0) (1, 5)
638 OP '=' (1, 6) (1, 7)
639 STRING "u'places'" (1, 8) (1, 17)
640 NEWLINE '\\n' (1, 17) (1, 18)
641 NAME 'grün' (2, 0) (2, 4)
642 OP '=' (2, 5) (2, 6)
Christian Heimes0b3847d2012-06-20 11:17:58 +0200643 STRING "U'green'" (2, 7) (2, 15)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000644"""
645
Benjamin Petersonee8712c2008-05-20 21:35:26 +0000646from test import support
Trent Nelson428de652008-03-18 22:41:35 +0000647from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
Meador Inge00c7f852012-01-19 00:44:45 -0600648 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
Terry Jan Reedy5e6db312014-02-17 16:45:48 -0500649 open as tokenize_open, Untokenizer)
Trent Nelson428de652008-03-18 22:41:35 +0000650from io import BytesIO
651from unittest import TestCase
652import os, sys, glob
Meador Inge00c7f852012-01-19 00:44:45 -0600653import token
Raymond Hettinger68c04532005-06-10 11:05:19 +0000654
Thomas Wouters89f507f2006-12-13 04:49:30 +0000655def dump_tokens(s):
656 """Print out the tokens in s in a table format.
657
658 The ENDMARKER is omitted.
659 """
Trent Nelson428de652008-03-18 22:41:35 +0000660 f = BytesIO(s.encode('utf-8'))
661 for type, token, start, end, line in tokenize(f.readline):
Thomas Wouters89f507f2006-12-13 04:49:30 +0000662 if type == ENDMARKER:
663 break
664 type = tok_name[type]
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000665 print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
Thomas Wouters89f507f2006-12-13 04:49:30 +0000666
Trent Nelson428de652008-03-18 22:41:35 +0000667def roundtrip(f):
668 """
669 Test roundtrip for `untokenize`. `f` is an open file or a string.
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500670 The source code in f is tokenized to both 5- and 2-tuples.
671 Both sequences are converted back to source code via
672 tokenize.untokenize(), and the latter tokenized again to 2-tuples.
673 The test fails if the 3 pair tokenizations do not match.
674
675 When untokenize bugs are fixed, untokenize with 5-tuples should
676 reproduce code that does not contain a backslash continuation
677 following spaces. A proper test should test this.
678
679 This function would be more useful for correcting bugs if it reported
680 the first point of failure, like assertEqual, rather than just
681 returning False -- or if it were only used in unittests and not
682 doctest and actually used assertEqual.
Trent Nelson428de652008-03-18 22:41:35 +0000683 """
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500684 # Get source code and original tokenizations
Trent Nelson428de652008-03-18 22:41:35 +0000685 if isinstance(f, str):
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500686 code = f.encode('utf-8')
687 else:
688 code = f.read()
Brian Curtin9f5f65c2010-10-30 21:35:28 +0000689 f.close()
Terry Jan Reedy938ba682014-02-23 18:00:31 -0500690 readline = iter(code.splitlines(keepends=True)).__next__
691 tokens5 = list(tokenize(readline))
692 tokens2 = [tok[:2] for tok in tokens5]
693 # Reproduce tokens2 from pairs
694 bytes_from2 = untokenize(tokens2)
695 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
696 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
697 # Reproduce tokens2 from 5-tuples
698 bytes_from5 = untokenize(tokens5)
699 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
700 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
701 # Compare 3 versions
702 return tokens2 == tokens2_from2 == tokens2_from5
Thomas Wouters89f507f2006-12-13 04:49:30 +0000703
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000704# This is an example from the docs, set up as a doctest.
Raymond Hettinger68c04532005-06-10 11:05:19 +0000705def decistmt(s):
706 """Substitute Decimals for floats in a string of statements.
707
708 >>> from decimal import Decimal
Georg Brandl88fc6642007-02-09 21:28:07 +0000709 >>> s = 'print(+21.3e-5*-.1234/81.7)'
Raymond Hettinger68c04532005-06-10 11:05:19 +0000710 >>> decistmt(s)
Georg Brandl88fc6642007-02-09 21:28:07 +0000711 "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"
Raymond Hettinger68c04532005-06-10 11:05:19 +0000712
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000713 The format of the exponent is inherited from the platform C library.
714 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
Mark Dickinson388122d2010-08-04 20:56:28 +0000715 we're only showing 11 digits, and the 12th isn't close to 5, the
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000716 rest of the output should be platform-independent.
717
718 >>> exec(s) #doctest: +ELLIPSIS
Mark Dickinson388122d2010-08-04 20:56:28 +0000719 -3.2171603427...e-0...7
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000720
721 Output from calculations with Decimal should be identical across all
722 platforms.
723
Raymond Hettinger68c04532005-06-10 11:05:19 +0000724 >>> exec(decistmt(s))
725 -3.217160342717258261933904529E-7
Raymond Hettinger68c04532005-06-10 11:05:19 +0000726 """
727 result = []
Trent Nelson428de652008-03-18 22:41:35 +0000728 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
Raymond Hettinger68c04532005-06-10 11:05:19 +0000729 for toknum, tokval, _, _, _ in g:
730 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
731 result.extend([
732 (NAME, 'Decimal'),
733 (OP, '('),
734 (STRING, repr(tokval)),
735 (OP, ')')
736 ])
737 else:
738 result.append((toknum, tokval))
Trent Nelson428de652008-03-18 22:41:35 +0000739 return untokenize(result).decode('utf-8')
740
741
742class TestTokenizerAdheresToPep0263(TestCase):
743 """
744 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
745 """
746
747 def _testFile(self, filename):
748 path = os.path.join(os.path.dirname(__file__), filename)
749 return roundtrip(open(path, 'rb'))
750
751 def test_utf8_coding_cookie_and_no_utf8_bom(self):
Ned Deily2ea6fcc2011-07-19 16:15:27 -0700752 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
Trent Nelson428de652008-03-18 22:41:35 +0000753 self.assertTrue(self._testFile(f))
754
755 def test_latin1_coding_cookie_and_utf8_bom(self):
756 """
757 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
758 allowed encoding for the comment is 'utf-8'. The text file used in
759 this test starts with a BOM signature, but specifies latin1 as the
760 coding, so verify that a SyntaxError is raised, which matches the
761 behaviour of the interpreter when it encounters a similar condition.
762 """
763 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000764 self.assertRaises(SyntaxError, self._testFile, f)
Trent Nelson428de652008-03-18 22:41:35 +0000765
766 def test_no_coding_cookie_and_utf8_bom(self):
767 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
768 self.assertTrue(self._testFile(f))
769
770 def test_utf8_coding_cookie_and_utf8_bom(self):
771 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
772 self.assertTrue(self._testFile(f))
773
Florent Xicluna11f0b412012-07-07 12:13:35 +0200774 def test_bad_coding_cookie(self):
775 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
776 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
777
Trent Nelson428de652008-03-18 22:41:35 +0000778
779class Test_Tokenize(TestCase):
780
781 def test__tokenize_decodes_with_specified_encoding(self):
782 literal = '"ЉЊЈЁЂ"'
783 line = literal.encode('utf-8')
784 first = False
785 def readline():
786 nonlocal first
787 if not first:
788 first = True
789 return line
790 else:
791 return b''
792
793 # skip the initial encoding token and the end token
794 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
795 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000796 self.assertEqual(tokens, expected_tokens,
797 "bytes not decoded with encoding")
Trent Nelson428de652008-03-18 22:41:35 +0000798
799 def test__tokenize_does_not_decode_with_encoding_none(self):
800 literal = '"ЉЊЈЁЂ"'
801 first = False
802 def readline():
803 nonlocal first
804 if not first:
805 first = True
806 return literal
807 else:
808 return b''
809
810 # skip the end token
811 tokens = list(_tokenize(readline, encoding=None))[:-1]
812 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000813 self.assertEqual(tokens, expected_tokens,
814 "string not tokenized when encoding is None")
Trent Nelson428de652008-03-18 22:41:35 +0000815
816
817class TestDetectEncoding(TestCase):
818
819 def get_readline(self, lines):
820 index = 0
821 def readline():
822 nonlocal index
823 if index == len(lines):
824 raise StopIteration
825 line = lines[index]
826 index += 1
827 return line
828 return readline
829
830 def test_no_bom_no_encoding_cookie(self):
831 lines = (
832 b'# something\n',
833 b'print(something)\n',
834 b'do_something(else)\n'
835 )
836 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000837 self.assertEqual(encoding, 'utf-8')
838 self.assertEqual(consumed_lines, list(lines[:2]))
Trent Nelson428de652008-03-18 22:41:35 +0000839
840 def test_bom_no_cookie(self):
841 lines = (
842 b'\xef\xbb\xbf# something\n',
843 b'print(something)\n',
844 b'do_something(else)\n'
845 )
846 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000847 self.assertEqual(encoding, 'utf-8-sig')
848 self.assertEqual(consumed_lines,
849 [b'# something\n', b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000850
851 def test_cookie_first_line_no_bom(self):
852 lines = (
853 b'# -*- coding: latin-1 -*-\n',
854 b'print(something)\n',
855 b'do_something(else)\n'
856 )
857 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000858 self.assertEqual(encoding, 'iso-8859-1')
859 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000860
861 def test_matched_bom_and_cookie_first_line(self):
862 lines = (
863 b'\xef\xbb\xbf# coding=utf-8\n',
864 b'print(something)\n',
865 b'do_something(else)\n'
866 )
867 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000868 self.assertEqual(encoding, 'utf-8-sig')
869 self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000870
871 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
872 lines = (
873 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
874 b'print(something)\n',
875 b'do_something(else)\n'
876 )
877 readline = self.get_readline(lines)
878 self.assertRaises(SyntaxError, detect_encoding, readline)
879
880 def test_cookie_second_line_no_bom(self):
881 lines = (
882 b'#! something\n',
883 b'# vim: set fileencoding=ascii :\n',
884 b'print(something)\n',
885 b'do_something(else)\n'
886 )
887 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000888 self.assertEqual(encoding, 'ascii')
Trent Nelson428de652008-03-18 22:41:35 +0000889 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
Ezio Melottib3aedd42010-11-20 19:04:17 +0000890 self.assertEqual(consumed_lines, expected)
Trent Nelson428de652008-03-18 22:41:35 +0000891
892 def test_matched_bom_and_cookie_second_line(self):
893 lines = (
894 b'\xef\xbb\xbf#! something\n',
895 b'f# coding=utf-8\n',
896 b'print(something)\n',
897 b'do_something(else)\n'
898 )
899 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000900 self.assertEqual(encoding, 'utf-8-sig')
901 self.assertEqual(consumed_lines,
902 [b'#! something\n', b'f# coding=utf-8\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000903
904 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
905 lines = (
906 b'\xef\xbb\xbf#! something\n',
907 b'# vim: set fileencoding=ascii :\n',
908 b'print(something)\n',
909 b'do_something(else)\n'
910 )
911 readline = self.get_readline(lines)
912 self.assertRaises(SyntaxError, detect_encoding, readline)
913
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200914 def test_cookie_second_line_noncommented_first_line(self):
915 lines = (
916 b"print('\xc2\xa3')\n",
917 b'# vim: set fileencoding=iso8859-15 :\n',
918 b"print('\xe2\x82\xac')\n"
919 )
920 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
921 self.assertEqual(encoding, 'utf-8')
922 expected = [b"print('\xc2\xa3')\n"]
923 self.assertEqual(consumed_lines, expected)
924
925 def test_cookie_second_line_commented_first_line(self):
926 lines = (
927 b"#print('\xc2\xa3')\n",
928 b'# vim: set fileencoding=iso8859-15 :\n',
929 b"print('\xe2\x82\xac')\n"
930 )
931 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
932 self.assertEqual(encoding, 'iso8859-15')
933 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
934 self.assertEqual(consumed_lines, expected)
935
936 def test_cookie_second_line_empty_first_line(self):
937 lines = (
938 b'\n',
939 b'# vim: set fileencoding=iso8859-15 :\n',
940 b"print('\xe2\x82\xac')\n"
941 )
942 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
943 self.assertEqual(encoding, 'iso8859-15')
944 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
945 self.assertEqual(consumed_lines, expected)
946
Benjamin Petersond3afada2009-10-09 21:43:09 +0000947 def test_latin1_normalization(self):
948 # See get_normal_name() in tokenizer.c.
949 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
950 "iso-8859-1-unix", "iso-latin-1-mac")
951 for encoding in encodings:
952 for rep in ("-", "_"):
953 enc = encoding.replace("-", rep)
954 lines = (b"#!/usr/bin/python\n",
955 b"# coding: " + enc.encode("ascii") + b"\n",
956 b"print(things)\n",
957 b"do_something += 4\n")
958 rl = self.get_readline(lines)
959 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000960 self.assertEqual(found, "iso-8859-1")
Benjamin Petersond3afada2009-10-09 21:43:09 +0000961
Martin v. Löwis63674f42012-04-20 14:36:47 +0200962 def test_syntaxerror_latin1(self):
963 # Issue 14629: need to raise SyntaxError if the first
964 # line(s) have non-UTF-8 characters
965 lines = (
966 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
967 )
968 readline = self.get_readline(lines)
969 self.assertRaises(SyntaxError, detect_encoding, readline)
970
971
Benjamin Petersond3afada2009-10-09 21:43:09 +0000972 def test_utf8_normalization(self):
973 # See get_normal_name() in tokenizer.c.
974 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
975 for encoding in encodings:
976 for rep in ("-", "_"):
977 enc = encoding.replace("-", rep)
978 lines = (b"#!/usr/bin/python\n",
979 b"# coding: " + enc.encode("ascii") + b"\n",
980 b"1 + 3\n")
981 rl = self.get_readline(lines)
982 found, consumed_lines = detect_encoding(rl)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000983 self.assertEqual(found, "utf-8")
Benjamin Petersond3afada2009-10-09 21:43:09 +0000984
Trent Nelson428de652008-03-18 22:41:35 +0000985 def test_short_files(self):
986 readline = self.get_readline((b'print(something)\n',))
987 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000988 self.assertEqual(encoding, 'utf-8')
989 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000990
991 encoding, consumed_lines = detect_encoding(self.get_readline(()))
Ezio Melottib3aedd42010-11-20 19:04:17 +0000992 self.assertEqual(encoding, 'utf-8')
993 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +0000994
995 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
996 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000997 self.assertEqual(encoding, 'utf-8-sig')
998 self.assertEqual(consumed_lines, [b'print(something)\n'])
Trent Nelson428de652008-03-18 22:41:35 +0000999
1000 readline = self.get_readline((b'\xef\xbb\xbf',))
1001 encoding, consumed_lines = detect_encoding(readline)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001002 self.assertEqual(encoding, 'utf-8-sig')
1003 self.assertEqual(consumed_lines, [])
Trent Nelson428de652008-03-18 22:41:35 +00001004
Benjamin Peterson433f32c2008-12-12 01:25:05 +00001005 readline = self.get_readline((b'# coding: bad\n',))
1006 self.assertRaises(SyntaxError, detect_encoding, readline)
Trent Nelson428de652008-03-18 22:41:35 +00001007
Serhiy Storchakadafea852013-09-16 23:51:56 +03001008 def test_false_encoding(self):
1009 # Issue 18873: "Encoding" detected in non-comment lines
1010 readline = self.get_readline((b'print("#coding=fake")',))
1011 encoding, consumed_lines = detect_encoding(readline)
1012 self.assertEqual(encoding, 'utf-8')
1013 self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1014
Victor Stinner58c07522010-11-09 01:08:59 +00001015 def test_open(self):
1016 filename = support.TESTFN + '.py'
1017 self.addCleanup(support.unlink, filename)
1018
1019 # test coding cookie
1020 for encoding in ('iso-8859-15', 'utf-8'):
1021 with open(filename, 'w', encoding=encoding) as fp:
1022 print("# coding: %s" % encoding, file=fp)
1023 print("print('euro:\u20ac')", file=fp)
1024 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001025 self.assertEqual(fp.encoding, encoding)
1026 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001027
1028 # test BOM (no coding cookie)
1029 with open(filename, 'w', encoding='utf-8-sig') as fp:
1030 print("print('euro:\u20ac')", file=fp)
1031 with tokenize_open(filename) as fp:
Victor Stinner92665ab2010-11-09 01:11:31 +00001032 self.assertEqual(fp.encoding, 'utf-8-sig')
1033 self.assertEqual(fp.mode, 'r')
Victor Stinner58c07522010-11-09 01:08:59 +00001034
Brett Cannonc33f3f22012-04-20 13:23:54 -04001035 def test_filename_in_exception(self):
1036 # When possible, include the file name in the exception.
1037 path = 'some_file_path'
1038 lines = (
1039 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1040 )
1041 class Bunk:
1042 def __init__(self, lines, path):
1043 self.name = path
1044 self._lines = lines
1045 self._index = 0
1046
1047 def readline(self):
1048 if self._index == len(lines):
1049 raise StopIteration
1050 line = lines[self._index]
1051 self._index += 1
1052 return line
1053
1054 with self.assertRaises(SyntaxError):
1055 ins = Bunk(lines, path)
1056 # Make sure lacking a name isn't an issue.
1057 del ins.name
1058 detect_encoding(ins.readline)
1059 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1060 ins = Bunk(lines, path)
1061 detect_encoding(ins.readline)
1062
1063
Trent Nelson428de652008-03-18 22:41:35 +00001064class TestTokenize(TestCase):
1065
1066 def test_tokenize(self):
1067 import tokenize as tokenize_module
1068 encoding = object()
1069 encoding_used = None
1070 def mock_detect_encoding(readline):
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001071 return encoding, [b'first', b'second']
Trent Nelson428de652008-03-18 22:41:35 +00001072
1073 def mock__tokenize(readline, encoding):
1074 nonlocal encoding_used
1075 encoding_used = encoding
1076 out = []
1077 while True:
1078 next_line = readline()
1079 if next_line:
1080 out.append(next_line)
1081 continue
1082 return out
1083
1084 counter = 0
1085 def mock_readline():
1086 nonlocal counter
1087 counter += 1
1088 if counter == 5:
1089 return b''
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001090 return str(counter).encode()
Trent Nelson428de652008-03-18 22:41:35 +00001091
1092 orig_detect_encoding = tokenize_module.detect_encoding
1093 orig__tokenize = tokenize_module._tokenize
1094 tokenize_module.detect_encoding = mock_detect_encoding
1095 tokenize_module._tokenize = mock__tokenize
1096 try:
1097 results = tokenize(mock_readline)
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +02001098 self.assertEqual(list(results),
1099 [b'first', b'second', b'1', b'2', b'3', b'4'])
Trent Nelson428de652008-03-18 22:41:35 +00001100 finally:
1101 tokenize_module.detect_encoding = orig_detect_encoding
1102 tokenize_module._tokenize = orig__tokenize
1103
1104 self.assertTrue(encoding_used, encoding)
Raymond Hettinger68c04532005-06-10 11:05:19 +00001105
Meador Inge00c7f852012-01-19 00:44:45 -06001106 def assertExactTypeEqual(self, opstr, *optypes):
1107 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1108 num_optypes = len(optypes)
1109 self.assertEqual(len(tokens), 2 + num_optypes)
1110 self.assertEqual(token.tok_name[tokens[0].exact_type],
1111 token.tok_name[ENCODING])
1112 for i in range(num_optypes):
1113 self.assertEqual(token.tok_name[tokens[i + 1].exact_type],
1114 token.tok_name[optypes[i]])
1115 self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],
1116 token.tok_name[token.ENDMARKER])
1117
1118 def test_exact_type(self):
1119 self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1120 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1121 self.assertExactTypeEqual(':', token.COLON)
1122 self.assertExactTypeEqual(',', token.COMMA)
1123 self.assertExactTypeEqual(';', token.SEMI)
1124 self.assertExactTypeEqual('+', token.PLUS)
1125 self.assertExactTypeEqual('-', token.MINUS)
1126 self.assertExactTypeEqual('*', token.STAR)
1127 self.assertExactTypeEqual('/', token.SLASH)
1128 self.assertExactTypeEqual('|', token.VBAR)
1129 self.assertExactTypeEqual('&', token.AMPER)
1130 self.assertExactTypeEqual('<', token.LESS)
1131 self.assertExactTypeEqual('>', token.GREATER)
1132 self.assertExactTypeEqual('=', token.EQUAL)
1133 self.assertExactTypeEqual('.', token.DOT)
1134 self.assertExactTypeEqual('%', token.PERCENT)
1135 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1136 self.assertExactTypeEqual('==', token.EQEQUAL)
1137 self.assertExactTypeEqual('!=', token.NOTEQUAL)
1138 self.assertExactTypeEqual('<=', token.LESSEQUAL)
1139 self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1140 self.assertExactTypeEqual('~', token.TILDE)
1141 self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1142 self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1143 self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1144 self.assertExactTypeEqual('**', token.DOUBLESTAR)
1145 self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1146 self.assertExactTypeEqual('-=', token.MINEQUAL)
1147 self.assertExactTypeEqual('*=', token.STAREQUAL)
1148 self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1149 self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1150 self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1151 self.assertExactTypeEqual('|=', token.VBAREQUAL)
1152 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1153 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1154 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1155 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1156 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1157 self.assertExactTypeEqual('//', token.DOUBLESLASH)
1158 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
1159 self.assertExactTypeEqual('@', token.AT)
Benjamin Petersond51374e2014-04-09 23:55:56 -04001160 self.assertExactTypeEqual('@=', token.ATEQUAL)
Meador Inge00c7f852012-01-19 00:44:45 -06001161
1162 self.assertExactTypeEqual('a**2+b**2==c**2',
1163 NAME, token.DOUBLESTAR, NUMBER,
1164 token.PLUS,
1165 NAME, token.DOUBLESTAR, NUMBER,
1166 token.EQEQUAL,
1167 NAME, token.DOUBLESTAR, NUMBER)
1168 self.assertExactTypeEqual('{1, 2, 3}',
1169 token.LBRACE,
1170 token.NUMBER, token.COMMA,
1171 token.NUMBER, token.COMMA,
1172 token.NUMBER,
1173 token.RBRACE)
1174 self.assertExactTypeEqual('^(x & 0x1)',
1175 token.CIRCUMFLEX,
1176 token.LPAR,
1177 token.NAME, token.AMPER, token.NUMBER,
1178 token.RPAR)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001179
Ezio Melottifafa8b72012-11-03 17:46:51 +02001180 def test_pathological_trailing_whitespace(self):
1181 # See http://bugs.python.org/issue16152
1182 self.assertExactTypeEqual('@ ', token.AT)
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001183
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001184class UntokenizeTest(TestCase):
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001185
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001186 def test_bad_input_order(self):
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001187 # raise if previous row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001188 u = Untokenizer()
1189 u.prev_row = 2
1190 u.prev_col = 2
1191 with self.assertRaises(ValueError) as cm:
1192 u.add_whitespace((1,3))
Terry Jan Reedy58edfd92014-02-17 16:49:06 -05001193 self.assertEqual(cm.exception.args[0],
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001194 'start (1,3) precedes previous end (2,2)')
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001195 # raise if previous column in row
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001196 self.assertRaises(ValueError, u.add_whitespace, (2,1))
1197
Terry Jan Reedy9dc3a362014-02-23 23:33:08 -05001198 def test_backslash_continuation(self):
1199 # The problem is that <whitespace>\<newline> leaves no token
1200 u = Untokenizer()
1201 u.prev_row = 1
1202 u.prev_col = 1
1203 u.tokens = []
1204 u.add_whitespace((2, 0))
1205 self.assertEqual(u.tokens, ['\\\n'])
1206 u.prev_row = 2
1207 u.add_whitespace((4, 4))
1208 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
1209 self.assertTrue(roundtrip('a\n b\n c\n \\\n c\n'))
1210
Terry Jan Reedy5b8d2c32014-02-17 23:12:16 -05001211 def test_iter_compat(self):
1212 u = Untokenizer()
1213 token = (NAME, 'Hello')
1214 tokens = [(ENCODING, 'utf-8'), token]
1215 u.compat(token, iter([]))
1216 self.assertEqual(u.tokens, ["Hello "])
1217 u = Untokenizer()
1218 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1219 u = Untokenizer()
1220 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1221 self.assertEqual(u.encoding, 'utf-8')
1222 self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1223
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001224
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001225__test__ = {"doctests" : doctests, 'decistmt': decistmt}
1226
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001227def test_main():
Christian Heimesdd15f6c2008-03-16 00:07:10 +00001228 from test import test_tokenize
Benjamin Petersonee8712c2008-05-20 21:35:26 +00001229 support.run_doctest(test_tokenize, True)
1230 support.run_unittest(TestTokenizerAdheresToPep0263)
1231 support.run_unittest(Test_Tokenize)
1232 support.run_unittest(TestDetectEncoding)
1233 support.run_unittest(TestTokenize)
Terry Jan Reedy5e6db312014-02-17 16:45:48 -05001234 support.run_unittest(UntokenizeTest)
Neal Norwitzc1505362006-12-28 06:47:50 +00001235
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001236if __name__ == "__main__":
1237 test_main()