blob: d14db60f7da89ea4d63e9a1af1c30e70ac4aeb1b [file] [log] [blame]
Martin v. Löwisef04c442008-03-19 05:04:44 +00001# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
2# All rights reserved.
3
4"""Tokenization help for Python programs.
5
6generate_tokens(readline) is a generator that breaks a stream of
7text into Python tokens. It accepts a readline-like method which is called
8repeatedly to get the next line of input (or "" for EOF). It generates
95-tuples with these members:
10
11 the token type (see token.py)
12 the token (a string)
13 the starting (row, column) indices of the token (a 2-tuple of ints)
14 the ending (row, column) indices of the token (a 2-tuple of ints)
15 the original line (string)
16
17It is designed to match the working of the Python tokenizer exactly, except
18that it produces COMMENT tokens for comments and gives type OP for all
19operators
20
21Older entry points
22 tokenize_loop(readline, tokeneater)
23 tokenize(readline, tokeneater=printtoken)
24are the same, except instead of generating tokens, tokeneater is a callback
25function to which the 5 fields described above are passed as 5 arguments,
26each time a new token is found."""
27
28__author__ = 'Ka-Ping Yee <ping@lfw.org>'
29__credits__ = \
30 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
31
32import string, re
Benjamin Petersond481e3d2009-05-09 19:42:23 +000033from codecs import BOM_UTF8, lookup
Martin v. Löwisef04c442008-03-19 05:04:44 +000034from lib2to3.pgen2.token import *
35
36from . import token
37__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
38 "generate_tokens", "untokenize"]
39del token
40
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +000041try:
42 bytes
43except NameError:
44 # Support bytes type in Python <= 2.5, so 2to3 turns itself into
45 # valid Python 3 code.
46 bytes = str
47
Martin v. Löwisef04c442008-03-19 05:04:44 +000048def group(*choices): return '(' + '|'.join(choices) + ')'
49def any(*choices): return group(*choices) + '*'
50def maybe(*choices): return group(*choices) + '?'
51
52Whitespace = r'[ \f\t]*'
53Comment = r'#[^\r\n]*'
54Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
55Name = r'[a-zA-Z_]\w*'
56
57Binnumber = r'0[bB][01]*'
58Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
59Octnumber = r'0[oO]?[0-7]*[lL]?'
60Decnumber = r'[1-9]\d*[lL]?'
61Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
62Exponent = r'[eE][-+]?\d+'
63Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
64Expfloat = r'\d+' + Exponent
65Floatnumber = group(Pointfloat, Expfloat)
66Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
67Number = group(Imagnumber, Floatnumber, Intnumber)
68
69# Tail end of ' string.
70Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
71# Tail end of " string.
72Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
73# Tail end of ''' string.
74Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
75# Tail end of """ string.
76Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
77Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
78# Single-line ' or " string.
79String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
80 r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
81
82# Because of leftmost-then-longest match semantics, be sure to put the
83# longest operators first (e.g., if = came before ==, == would get
84# recognized as two instances of =).
85Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
86 r"//=?", r"->",
Benjamin Peterson4ab92c82014-04-10 00:12:47 -040087 r"[+\-*/%&@|^=<>]=?",
Martin v. Löwisef04c442008-03-19 05:04:44 +000088 r"~")
89
90Bracket = '[][(){}]'
91Special = group(r'\r?\n', r'[:;.,`@]')
92Funny = group(Operator, Bracket, Special)
93
94PlainToken = group(Number, Funny, String, Name)
95Token = Ignore + PlainToken
96
97# First (or only) line of ' or " string.
98ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
99 group("'", r'\\\r?\n'),
100 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
101 group('"', r'\\\r?\n'))
102PseudoExtras = group(r'\\\r?\n', Comment, Triple)
103PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
104
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000105tokenprog, pseudoprog, single3prog, double3prog = list(map(
106 re.compile, (Token, PseudoToken, Single3, Double3)))
Martin v. Löwisef04c442008-03-19 05:04:44 +0000107endprogs = {"'": re.compile(Single), '"': re.compile(Double),
108 "'''": single3prog, '"""': double3prog,
109 "r'''": single3prog, 'r"""': double3prog,
110 "u'''": single3prog, 'u"""': double3prog,
111 "b'''": single3prog, 'b"""': double3prog,
112 "ur'''": single3prog, 'ur"""': double3prog,
113 "br'''": single3prog, 'br"""': double3prog,
114 "R'''": single3prog, 'R"""': double3prog,
115 "U'''": single3prog, 'U"""': double3prog,
116 "B'''": single3prog, 'B"""': double3prog,
117 "uR'''": single3prog, 'uR"""': double3prog,
118 "Ur'''": single3prog, 'Ur"""': double3prog,
119 "UR'''": single3prog, 'UR"""': double3prog,
120 "bR'''": single3prog, 'bR"""': double3prog,
121 "Br'''": single3prog, 'Br"""': double3prog,
122 "BR'''": single3prog, 'BR"""': double3prog,
123 'r': None, 'R': None,
124 'u': None, 'U': None,
125 'b': None, 'B': None}
126
127triple_quoted = {}
128for t in ("'''", '"""',
129 "r'''", 'r"""', "R'''", 'R"""',
130 "u'''", 'u"""', "U'''", 'U"""',
131 "b'''", 'b"""', "B'''", 'B"""',
132 "ur'''", 'ur"""', "Ur'''", 'Ur"""',
133 "uR'''", 'uR"""', "UR'''", 'UR"""',
134 "br'''", 'br"""', "Br'''", 'Br"""',
135 "bR'''", 'bR"""', "BR'''", 'BR"""',):
136 triple_quoted[t] = t
137single_quoted = {}
138for t in ("'", '"',
139 "r'", 'r"', "R'", 'R"',
140 "u'", 'u"', "U'", 'U"',
141 "b'", 'b"', "B'", 'B"',
142 "ur'", 'ur"', "Ur'", 'Ur"',
143 "uR'", 'uR"', "UR'", 'UR"',
144 "br'", 'br"', "Br'", 'Br"',
145 "bR'", 'bR"', "BR'", 'BR"', ):
146 single_quoted[t] = t
147
148tabsize = 8
149
150class TokenError(Exception): pass
151
152class StopTokenizing(Exception): pass
153
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000154def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing
155 (srow, scol) = xxx_todo_changeme
156 (erow, ecol) = xxx_todo_changeme1
157 print("%d,%d-%d,%d:\t%s\t%s" % \
158 (srow, scol, erow, ecol, tok_name[type], repr(token)))
Martin v. Löwisef04c442008-03-19 05:04:44 +0000159
160def tokenize(readline, tokeneater=printtoken):
161 """
162 The tokenize() function accepts two parameters: one representing the
163 input stream, and one providing an output mechanism for tokenize().
164
165 The first parameter, readline, must be a callable object which provides
166 the same interface as the readline() method of built-in file objects.
167 Each call to the function should return one line of input as a string.
168
169 The second parameter, tokeneater, must also be a callable object. It is
170 called once for each token, with five arguments, corresponding to the
171 tuples generated by generate_tokens().
172 """
173 try:
174 tokenize_loop(readline, tokeneater)
175 except StopTokenizing:
176 pass
177
178# backwards compatible interface
179def tokenize_loop(readline, tokeneater):
180 for token_info in generate_tokens(readline):
181 tokeneater(*token_info)
182
183class Untokenizer:
184
185 def __init__(self):
186 self.tokens = []
187 self.prev_row = 1
188 self.prev_col = 0
189
190 def add_whitespace(self, start):
191 row, col = start
192 assert row <= self.prev_row
193 col_offset = col - self.prev_col
194 if col_offset:
195 self.tokens.append(" " * col_offset)
196
197 def untokenize(self, iterable):
198 for t in iterable:
199 if len(t) == 2:
200 self.compat(t, iterable)
201 break
202 tok_type, token, start, end, line = t
203 self.add_whitespace(start)
204 self.tokens.append(token)
205 self.prev_row, self.prev_col = end
206 if tok_type in (NEWLINE, NL):
207 self.prev_row += 1
208 self.prev_col = 0
209 return "".join(self.tokens)
210
211 def compat(self, token, iterable):
212 startline = False
213 indents = []
214 toks_append = self.tokens.append
215 toknum, tokval = token
216 if toknum in (NAME, NUMBER):
217 tokval += ' '
218 if toknum in (NEWLINE, NL):
219 startline = True
220 for tok in iterable:
221 toknum, tokval = tok[:2]
222
Yury Selivanov75445082015-05-11 22:57:16 -0400223 if toknum in (NAME, NUMBER, ASYNC, AWAIT):
Martin v. Löwisef04c442008-03-19 05:04:44 +0000224 tokval += ' '
225
226 if toknum == INDENT:
227 indents.append(tokval)
228 continue
229 elif toknum == DEDENT:
230 indents.pop()
231 continue
232 elif toknum in (NEWLINE, NL):
233 startline = True
234 elif startline and indents:
235 toks_append(indents[-1])
236 startline = False
237 toks_append(tokval)
238
Serhiy Storchakae431d3c2016-03-20 23:36:29 +0200239cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200240blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000241
Benjamin Petersond9af52b2009-11-02 18:16:28 +0000242def _get_normal_name(orig_enc):
243 """Imitates get_normal_name in tokenizer.c."""
244 # Only care about the first 12 characters.
245 enc = orig_enc[:12].lower().replace("_", "-")
246 if enc == "utf-8" or enc.startswith("utf-8-"):
247 return "utf-8"
248 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
249 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
250 return "iso-8859-1"
251 return orig_enc
252
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000253def detect_encoding(readline):
254 """
255 The detect_encoding() function is used to detect the encoding that should
Ezio Melotti4bcc7962013-11-25 05:14:51 +0200256 be used to decode a Python source file. It requires one argument, readline,
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000257 in the same way as the tokenize() generator.
258
259 It will call readline a maximum of twice, and return the encoding used
260 (as a string) and a list of any lines (left as bytes) it has read
261 in.
262
263 It detects the encoding from the presence of a utf-8 bom or an encoding
Benjamin Peterson0af93982010-03-23 03:22:05 +0000264 cookie as specified in pep-0263. If both a bom and a cookie are present, but
265 disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
266 charset, raise a SyntaxError. Note that if a utf-8 bom is found,
267 'utf-8-sig' is returned.
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000268
269 If no encoding is specified, then the default of 'utf-8' will be returned.
270 """
271 bom_found = False
272 encoding = None
Benjamin Peterson0af93982010-03-23 03:22:05 +0000273 default = 'utf-8'
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000274 def read_or_stop():
275 try:
276 return readline()
277 except StopIteration:
Benjamin Peterson8d26b0b2010-05-07 19:10:11 +0000278 return bytes()
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000279
280 def find_cookie(line):
281 try:
282 line_string = line.decode('ascii')
283 except UnicodeDecodeError:
284 return None
Serhiy Storchakadafea852013-09-16 23:51:56 +0300285 match = cookie_re.match(line_string)
286 if not match:
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000287 return None
Serhiy Storchakadafea852013-09-16 23:51:56 +0300288 encoding = _get_normal_name(match.group(1))
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000289 try:
290 codec = lookup(encoding)
291 except LookupError:
292 # This behaviour mimics the Python interpreter
293 raise SyntaxError("unknown encoding: " + encoding)
294
Benjamin Peterson20211002009-11-25 18:34:42 +0000295 if bom_found:
296 if codec.name != 'utf-8':
297 # This behaviour mimics the Python interpreter
298 raise SyntaxError('encoding problem: utf-8')
Benjamin Peterson0af93982010-03-23 03:22:05 +0000299 encoding += '-sig'
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000300 return encoding
301
302 first = read_or_stop()
303 if first.startswith(BOM_UTF8):
304 bom_found = True
305 first = first[3:]
Benjamin Peterson0af93982010-03-23 03:22:05 +0000306 default = 'utf-8-sig'
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000307 if not first:
Benjamin Peterson0af93982010-03-23 03:22:05 +0000308 return default, []
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000309
310 encoding = find_cookie(first)
311 if encoding:
312 return encoding, [first]
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200313 if not blank_re.match(first):
314 return default, [first]
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000315
316 second = read_or_stop()
317 if not second:
Benjamin Peterson0af93982010-03-23 03:22:05 +0000318 return default, [first]
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000319
320 encoding = find_cookie(second)
321 if encoding:
322 return encoding, [first, second]
323
Benjamin Peterson0af93982010-03-23 03:22:05 +0000324 return default, [first, second]
Benjamin Petersond481e3d2009-05-09 19:42:23 +0000325
Martin v. Löwisef04c442008-03-19 05:04:44 +0000326def untokenize(iterable):
327 """Transform tokens back into Python source code.
328
329 Each element returned by the iterable must be a token sequence
330 with at least two elements, a token number and token value. If
331 only two tokens are passed, the resulting output is poor.
332
333 Round-trip invariant for full input:
334 Untokenized source will match input source exactly
335
336 Round-trip invariant for limited intput:
337 # Output text will tokenize the back to the input
338 t1 = [tok[:2] for tok in generate_tokens(f.readline)]
339 newcode = untokenize(t1)
340 readline = iter(newcode.splitlines(1)).next
341 t2 = [tok[:2] for tokin generate_tokens(readline)]
342 assert t1 == t2
343 """
344 ut = Untokenizer()
345 return ut.untokenize(iterable)
346
347def generate_tokens(readline):
348 """
Ezio Melotti4bcc7962013-11-25 05:14:51 +0200349 The generate_tokens() generator requires one argument, readline, which
Martin v. Löwisef04c442008-03-19 05:04:44 +0000350 must be a callable object which provides the same interface as the
351 readline() method of built-in file objects. Each call to the function
352 should return one line of input as a string. Alternately, readline
353 can be a callable function terminating with StopIteration:
354 readline = open(myfile).next # Example of alternate readline
355
356 The generator produces 5-tuples with these members: the token type; the
357 token string; a 2-tuple (srow, scol) of ints specifying the row and
358 column where the token begins in the source; a 2-tuple (erow, ecol) of
359 ints specifying the row and column where the token ends in the source;
360 and the line on which the token was found. The line passed is the
361 logical line; continuation lines are included.
362 """
363 lnum = parenlev = continued = 0
364 namechars, numchars = string.ascii_letters + '_', '0123456789'
365 contstr, needcont = '', 0
366 contline = None
367 indents = [0]
368
Yury Selivanov96ec9342015-07-23 15:01:58 +0300369 # 'stashed' and 'async_*' are used for async/await parsing
Yury Selivanov75445082015-05-11 22:57:16 -0400370 stashed = None
Yury Selivanov96ec9342015-07-23 15:01:58 +0300371 async_def = False
372 async_def_indent = 0
373 async_def_nl = False
Yury Selivanov75445082015-05-11 22:57:16 -0400374
Martin v. Löwisef04c442008-03-19 05:04:44 +0000375 while 1: # loop over lines in stream
376 try:
377 line = readline()
378 except StopIteration:
379 line = ''
380 lnum = lnum + 1
381 pos, max = 0, len(line)
382
383 if contstr: # continued string
384 if not line:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000385 raise TokenError("EOF in multi-line string", strstart)
Martin v. Löwisef04c442008-03-19 05:04:44 +0000386 endmatch = endprog.match(line)
387 if endmatch:
388 pos = end = endmatch.end(0)
389 yield (STRING, contstr + line[:end],
390 strstart, (lnum, end), contline + line)
391 contstr, needcont = '', 0
392 contline = None
393 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
394 yield (ERRORTOKEN, contstr + line,
395 strstart, (lnum, len(line)), contline)
396 contstr = ''
397 contline = None
398 continue
399 else:
400 contstr = contstr + line
401 contline = contline + line
402 continue
403
404 elif parenlev == 0 and not continued: # new statement
405 if not line: break
406 column = 0
407 while pos < max: # measure leading whitespace
408 if line[pos] == ' ': column = column + 1
Benjamin Petersond9af52b2009-11-02 18:16:28 +0000409 elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
Martin v. Löwisef04c442008-03-19 05:04:44 +0000410 elif line[pos] == '\f': column = 0
411 else: break
412 pos = pos + 1
413 if pos == max: break
414
Yury Selivanov75445082015-05-11 22:57:16 -0400415 if stashed:
416 yield stashed
417 stashed = None
418
Martin v. Löwisef04c442008-03-19 05:04:44 +0000419 if line[pos] in '#\r\n': # skip comments or blank lines
420 if line[pos] == '#':
421 comment_token = line[pos:].rstrip('\r\n')
422 nl_pos = pos + len(comment_token)
423 yield (COMMENT, comment_token,
424 (lnum, pos), (lnum, pos + len(comment_token)), line)
425 yield (NL, line[nl_pos:],
426 (lnum, nl_pos), (lnum, len(line)), line)
427 else:
428 yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
429 (lnum, pos), (lnum, len(line)), line)
430 continue
431
432 if column > indents[-1]: # count indents or dedents
433 indents.append(column)
434 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
435 while column < indents[-1]:
436 if column not in indents:
437 raise IndentationError(
438 "unindent does not match any outer indentation level",
439 ("<tokenize>", lnum, pos, line))
440 indents = indents[:-1]
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300441
Yury Selivanov96ec9342015-07-23 15:01:58 +0300442 if async_def and async_def_indent >= indents[-1]:
443 async_def = False
444 async_def_nl = False
445 async_def_indent = 0
Yury Selivanov8fb307c2015-07-22 13:33:45 +0300446
Martin v. Löwisef04c442008-03-19 05:04:44 +0000447 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
448
Yury Selivanov96ec9342015-07-23 15:01:58 +0300449 if async_def and async_def_nl and async_def_indent >= indents[-1]:
450 async_def = False
451 async_def_nl = False
452 async_def_indent = 0
453
Martin v. Löwisef04c442008-03-19 05:04:44 +0000454 else: # continued statement
455 if not line:
Martin v. Löwis8a5f8ca2008-03-19 05:33:36 +0000456 raise TokenError("EOF in multi-line statement", (lnum, 0))
Martin v. Löwisef04c442008-03-19 05:04:44 +0000457 continued = 0
458
459 while pos < max:
460 pseudomatch = pseudoprog.match(line, pos)
461 if pseudomatch: # scan for tokens
462 start, end = pseudomatch.span(1)
463 spos, epos, pos = (lnum, start), (lnum, end), end
464 token, initial = line[start:end], line[start]
465
466 if initial in numchars or \
467 (initial == '.' and token != '.'): # ordinary number
468 yield (NUMBER, token, spos, epos, line)
469 elif initial in '\r\n':
470 newline = NEWLINE
471 if parenlev > 0:
472 newline = NL
Yury Selivanov96ec9342015-07-23 15:01:58 +0300473 elif async_def:
474 async_def_nl = True
Yury Selivanov75445082015-05-11 22:57:16 -0400475 if stashed:
476 yield stashed
477 stashed = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000478 yield (newline, token, spos, epos, line)
Yury Selivanov96ec9342015-07-23 15:01:58 +0300479
Martin v. Löwisef04c442008-03-19 05:04:44 +0000480 elif initial == '#':
481 assert not token.endswith("\n")
Yury Selivanov75445082015-05-11 22:57:16 -0400482 if stashed:
483 yield stashed
484 stashed = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000485 yield (COMMENT, token, spos, epos, line)
486 elif token in triple_quoted:
487 endprog = endprogs[token]
488 endmatch = endprog.match(line, pos)
489 if endmatch: # all on one line
490 pos = endmatch.end(0)
491 token = line[start:pos]
Yury Selivanov75445082015-05-11 22:57:16 -0400492 if stashed:
493 yield stashed
494 stashed = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000495 yield (STRING, token, spos, (lnum, pos), line)
496 else:
497 strstart = (lnum, start) # multiple lines
498 contstr = line[start:]
499 contline = line
500 break
501 elif initial in single_quoted or \
502 token[:2] in single_quoted or \
503 token[:3] in single_quoted:
504 if token[-1] == '\n': # continued string
505 strstart = (lnum, start)
506 endprog = (endprogs[initial] or endprogs[token[1]] or
507 endprogs[token[2]])
508 contstr, needcont = line[start:], 1
509 contline = line
510 break
511 else: # ordinary string
Yury Selivanov75445082015-05-11 22:57:16 -0400512 if stashed:
513 yield stashed
514 stashed = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000515 yield (STRING, token, spos, epos, line)
516 elif initial in namechars: # ordinary name
Yury Selivanov75445082015-05-11 22:57:16 -0400517 if token in ('async', 'await'):
Yury Selivanov96ec9342015-07-23 15:01:58 +0300518 if async_def:
Yury Selivanov75445082015-05-11 22:57:16 -0400519 yield (ASYNC if token == 'async' else AWAIT,
520 token, spos, epos, line)
521 continue
522
523 tok = (NAME, token, spos, epos, line)
524 if token == 'async' and not stashed:
525 stashed = tok
526 continue
527
528 if token == 'def':
529 if (stashed
530 and stashed[0] == NAME
531 and stashed[1] == 'async'):
532
Yury Selivanov96ec9342015-07-23 15:01:58 +0300533 async_def = True
534 async_def_indent = indents[-1]
Yury Selivanov75445082015-05-11 22:57:16 -0400535
536 yield (ASYNC, stashed[1],
537 stashed[2], stashed[3],
538 stashed[4])
539 stashed = None
Yury Selivanov75445082015-05-11 22:57:16 -0400540
541 if stashed:
542 yield stashed
543 stashed = None
544
545 yield tok
Martin v. Löwisef04c442008-03-19 05:04:44 +0000546 elif initial == '\\': # continued stmt
547 # This yield is new; needed for better idempotency:
Yury Selivanov75445082015-05-11 22:57:16 -0400548 if stashed:
549 yield stashed
550 stashed = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000551 yield (NL, token, spos, (lnum, pos), line)
552 continued = 1
553 else:
554 if initial in '([{': parenlev = parenlev + 1
555 elif initial in ')]}': parenlev = parenlev - 1
Yury Selivanov75445082015-05-11 22:57:16 -0400556 if stashed:
557 yield stashed
558 stashed = None
Martin v. Löwisef04c442008-03-19 05:04:44 +0000559 yield (OP, token, spos, epos, line)
560 else:
561 yield (ERRORTOKEN, line[pos],
562 (lnum, pos), (lnum, pos+1), line)
563 pos = pos + 1
564
Yury Selivanov75445082015-05-11 22:57:16 -0400565 if stashed:
566 yield stashed
567 stashed = None
568
Martin v. Löwisef04c442008-03-19 05:04:44 +0000569 for indent in indents[1:]: # pop remaining indent levels
570 yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
571 yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
572
573if __name__ == '__main__': # testing
574 import sys
575 if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
576 else: tokenize(sys.stdin.readline)