Blame - Lib/lib2to3/pgen2/tokenize.py - platform/external/python/cpython3

2008-03-19 05:04:44 +0000

[diff] [blame]

"""Tokenization help for Python programs.

5

6

generate_tokens(readline) is a generator that breaks a stream of

7

text into Python tokens. It accepts a readline-like method which is called

8

repeatedly to get the next line of input (or "" for EOF). It generates

9

5-tuples with these members:

10

11

the token type (see token.py)

12

the token (a string)

13

the starting (row, column) indices of the token (a 2-tuple of ints)

14

the ending (row, column) indices of the token (a 2-tuple of ints)

15

the original line (string)

16

17

It is designed to match the working of the Python tokenizer exactly, except

18

that it produces COMMENT tokens for comments and gives type OP for all

operators

Older entry points

tokenize_loop(readline, tokeneater)

23

tokenize(readline, tokeneater=printtoken)

24

are the same, except instead of generating tokens, tokeneater is a callback

25

function to which the 5 fields described above are passed as 5 arguments,

26

each time a new token is found."""

27

28

__author__ = 'Ka-Ping Yee <ping@lfw.org>'

29

__credits__ = \

30

'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'

31

32

import string, re

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

33

from codecs import BOM_UTF8, lookup

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

34

from lib2to3.pgen2.token import *

35

36

from . import token

37

__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",

38

"generate_tokens", "untokenize"]

39

del token

40

Benjamin Peterson

8d26b0b

2010-05-07 19:10:11 +0000

[diff] [blame]

try:

bytes

except NameError:

# Support bytes type in Python <= 2.5, so 2to3 turns itself into

45

# valid Python 3 code.

46

bytes = str

47

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

48

def group(*choices): return '(' + '|'.join(choices) + ')'

49

def any(*choices): return group(*choices) + '*'

50

def maybe(*choices): return group(*choices) + '?'

Zsolt Dollenstein

2018-04-16 22:33:31 +0100

[diff] [blame]

51

def _combinations(*l):

52

return set(

53

x + y for x in l for y in l + ("",) if x.casefold() != y.casefold()

54

)

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

55

56

Whitespace = r'[ \f\t]*'

57

Comment = r'#[^\r\n]*'

58

Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)

Monson Shao

2018-09-16 01:32:29 +0800

[diff] [blame^]

59

Name = r'\w+'

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

60

Nevada Sanchez

a6e395d

2017-04-13 13:32:54 -0400

[diff] [blame]

61

Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'

62

Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'

63

Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'

64

Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?')

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

65

Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)

Nevada Sanchez

a6e395d

2017-04-13 13:32:54 -0400

[diff] [blame]

66

Exponent = r'[eE][-+]?\d+(?:_\d+)*'

67

Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent)

68

Expfloat = r'\d+(?:_\d+)*' + Exponent

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

69

Floatnumber = group(Pointfloat, Expfloat)

Nevada Sanchez

a6e395d

2017-04-13 13:32:54 -0400

[diff] [blame]

70

Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]')

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

71

Number = group(Imagnumber, Floatnumber, Intnumber)

72

73

# Tail end of ' string.

74

Single = r"[^'\\]*(?:\\.[^'\\]*)*'"

75

# Tail end of " string.

76

Double = r'[^"\\]*(?:\\.[^"\\]*)*"'

77

# Tail end of ''' string.

78

Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"

79

# Tail end of """ string.

80

Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'

Zsolt Dollenstein

2018-04-16 22:33:31 +0100

[diff] [blame]

81

_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"

Łukasz Langa

2017-05-22 15:19:09 -0700

[diff] [blame]

82

Triple = group(_litprefix + "'''", _litprefix + '"""')

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

83

# Single-line ' or " string.

Łukasz Langa

2017-05-22 15:19:09 -0700

[diff] [blame]

84

String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",

85

_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

86

87

# Because of leftmost-then-longest match semantics, be sure to put the

88

# longest operators first (e.g., if = came before ==, == would get

89

# recognized as two instances of =).

90

Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",

91

r"//=?", r"->",

Benjamin Peterson

4ab92c8

2014-04-10 00:12:47 -0400

[diff] [blame]

92

r"[+\-*/%&@|^=<>]=?",

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

r"~")

Bracket = '[][(){}]'

Special = group(r'\r?\n', r'[:;.,`@]')

97

Funny = group(Operator, Bracket, Special)

98

99

PlainToken = group(Number, Funny, String, Name)

100

Token = Ignore + PlainToken

101

102

# First (or only) line of ' or " string.

Łukasz Langa

2017-05-22 15:19:09 -0700

[diff] [blame]

103

ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

104

group("'", r'\\\r?\n'),

Łukasz Langa

2017-05-22 15:19:09 -0700

[diff] [blame]

105

_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

106

group('"', r'\\\r?\n'))

107

PseudoExtras = group(r'\\\r?\n', Comment, Triple)

108

PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)

109

Monson Shao

2018-09-16 01:32:29 +0800

[diff] [blame^]

110

tokenprog, pseudoprog, single3prog, double3prog = map(

111

re.compile, (Token, PseudoToken, Single3, Double3))

Zsolt Dollenstein

2018-04-16 22:33:31 +0100

[diff] [blame]

112

113

_strprefixes = (

114

_combinations('r', 'R', 'f', 'F') |

115

_combinations('r', 'R', 'b', 'B') |

116

{'u', 'U', 'ur', 'uR', 'Ur', 'UR'}

117

)

118

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

119

endprogs = {"'": re.compile(Single), '"': re.compile(Double),

120

"'''": single3prog, '"""': double3prog,

Zsolt Dollenstein

2018-04-16 22:33:31 +0100

[diff] [blame]

121

**{f"{prefix}'''": single3prog for prefix in _strprefixes},

122

**{f'{prefix}"""': double3prog for prefix in _strprefixes},

123

**{prefix: None for prefix in _strprefixes}}

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

124

Zsolt Dollenstein

2018-04-16 22:33:31 +0100

[diff] [blame]

125

triple_quoted = (

126

{"'''", '"""'} |

127

{f"{prefix}'''" for prefix in _strprefixes} |

128

{f'{prefix}"""' for prefix in _strprefixes}

)

single_quoted = (

{"'", '"'} |

{f"{prefix}'" for prefix in _strprefixes} |

133

{f'{prefix}"' for prefix in _strprefixes}

134

)

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

tabsize = 8

class TokenError(Exception): pass

139

140

class StopTokenizing(Exception): pass

141

Martin v. Löwis

8a5f8ca

2008-03-19 05:33:36 +0000

[diff] [blame]

142

def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing

143

(srow, scol) = xxx_todo_changeme

144

(erow, ecol) = xxx_todo_changeme1

145

print("%d,%d-%d,%d:\t%s\t%s" % \

146

(srow, scol, erow, ecol, tok_name[type], repr(token)))

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

147

148

def tokenize(readline, tokeneater=printtoken):

149

"""

150

The tokenize() function accepts two parameters: one representing the

151

input stream, and one providing an output mechanism for tokenize().

152

153

The first parameter, readline, must be a callable object which provides

154

the same interface as the readline() method of built-in file objects.

155

Each call to the function should return one line of input as a string.

156

157

The second parameter, tokeneater, must also be a callable object. It is

158

called once for each token, with five arguments, corresponding to the

159

tuples generated by generate_tokens().

160

"""

161

try:

162

tokenize_loop(readline, tokeneater)

163

except StopTokenizing:

164

pass

165

166

# backwards compatible interface

167

def tokenize_loop(readline, tokeneater):

168

for token_info in generate_tokens(readline):

169

tokeneater(*token_info)

class Untokenizer:

def __init__(self):

self.tokens = []

self.prev_row = 1

self.prev_col = 0

def add_whitespace(self, start):

179

row, col = start

180

assert row <= self.prev_row

181

col_offset = col - self.prev_col

182

if col_offset:

183

self.tokens.append(" " * col_offset)

184

185

def untokenize(self, iterable):

186

for t in iterable:

187

if len(t) == 2:

188

self.compat(t, iterable)

189

break

190

tok_type, token, start, end, line = t

191

self.add_whitespace(start)

192

self.tokens.append(token)

193

self.prev_row, self.prev_col = end

194

if tok_type in (NEWLINE, NL):

195

self.prev_row += 1

196

self.prev_col = 0

197

return "".join(self.tokens)

198

199

def compat(self, token, iterable):

200

startline = False

201

indents = []

202

toks_append = self.tokens.append

203

toknum, tokval = token

204

if toknum in (NAME, NUMBER):

205

tokval += ' '

206

if toknum in (NEWLINE, NL):

207

startline = True

208

for tok in iterable:

209

toknum, tokval = tok[:2]

210

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

211

if toknum in (NAME, NUMBER, ASYNC, AWAIT):

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

tokval += ' '

if toknum == INDENT:

indents.append(tokval)

216

continue

217

elif toknum == DEDENT:

218

indents.pop()

219

continue

220

elif toknum in (NEWLINE, NL):

221

startline = True

222

elif startline and indents:

223

toks_append(indents[-1])

startline = False

toks_append(tokval)

Serhiy Storchaka

2016-03-20 23:36:29 +0200

[diff] [blame]

227

cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

228

blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

229

Benjamin Peterson

d9af52b

2009-11-02 18:16:28 +0000

[diff] [blame]

230

def _get_normal_name(orig_enc):

231

"""Imitates get_normal_name in tokenizer.c."""

232

# Only care about the first 12 characters.

233

enc = orig_enc[:12].lower().replace("_", "-")

234

if enc == "utf-8" or enc.startswith("utf-8-"):

235

return "utf-8"

236

if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \

237

enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):

return "iso-8859-1"

return orig_enc

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

241

def detect_encoding(readline):

242

"""

243

The detect_encoding() function is used to detect the encoding that should

Ezio Melotti

4bcc796

2013-11-25 05:14:51 +0200

[diff] [blame]

244

be used to decode a Python source file. It requires one argument, readline,

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

245

in the same way as the tokenize() generator.

246

247

It will call readline a maximum of twice, and return the encoding used

248

(as a string) and a list of any lines (left as bytes) it has read

249

in.

250

251

It detects the encoding from the presence of a utf-8 bom or an encoding

Benjamin Peterson

2010-03-23 03:22:05 +0000

[diff] [blame]

252

cookie as specified in pep-0263. If both a bom and a cookie are present, but

253

disagree, a SyntaxError will be raised. If the encoding cookie is an invalid

254

charset, raise a SyntaxError. Note that if a utf-8 bom is found,

255

'utf-8-sig' is returned.

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

256

257

If no encoding is specified, then the default of 'utf-8' will be returned.

258

"""

259

bom_found = False

260

encoding = None

Benjamin Peterson

2010-03-23 03:22:05 +0000

[diff] [blame]

261

default = 'utf-8'

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

def read_or_stop():

try:

return readline()

except StopIteration:

Benjamin Peterson

8d26b0b

2010-05-07 19:10:11 +0000

[diff] [blame]

266

return bytes()

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

267

268

def find_cookie(line):

269

try:

270

line_string = line.decode('ascii')

271

except UnicodeDecodeError:

272

return None

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

273

match = cookie_re.match(line_string)

274

if not match:

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

275

return None

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

276

encoding = _get_normal_name(match.group(1))

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

277

try:

278

codec = lookup(encoding)

279

except LookupError:

280

# This behaviour mimics the Python interpreter

281

raise SyntaxError("unknown encoding: " + encoding)

282

Benjamin Peterson

2021100

2009-11-25 18:34:42 +0000

[diff] [blame]

283

if bom_found:

284

if codec.name != 'utf-8':

285

# This behaviour mimics the Python interpreter

286

raise SyntaxError('encoding problem: utf-8')

Benjamin Peterson

2010-03-23 03:22:05 +0000

[diff] [blame]

287

encoding += '-sig'

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

288

return encoding

289

290

first = read_or_stop()

291

if first.startswith(BOM_UTF8):

292

bom_found = True

293

first = first[3:]

Benjamin Peterson

2010-03-23 03:22:05 +0000

[diff] [blame]

294

default = 'utf-8-sig'

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

295

if not first:

Benjamin Peterson

2010-03-23 03:22:05 +0000

[diff] [blame]

296

return default, []

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

297

298

encoding = find_cookie(first)

299

if encoding:

300

return encoding, [first]

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

301

if not blank_re.match(first):

302

return default, [first]

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

303

304

second = read_or_stop()

305

if not second:

Benjamin Peterson

2010-03-23 03:22:05 +0000

[diff] [blame]

306

return default, [first]

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

307

308

encoding = find_cookie(second)

309

if encoding:

310

return encoding, [first, second]

311

Benjamin Peterson

2010-03-23 03:22:05 +0000

[diff] [blame]

312

return default, [first, second]

Benjamin Peterson

2009-05-09 19:42:23 +0000

[diff] [blame]

313

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

314

def untokenize(iterable):

315

"""Transform tokens back into Python source code.

316

317

Each element returned by the iterable must be a token sequence

318

with at least two elements, a token number and token value. If

319

only two tokens are passed, the resulting output is poor.

320

321

Round-trip invariant for full input:

322

Untokenized source will match input source exactly

323

324

Round-trip invariant for limited intput:

325

# Output text will tokenize the back to the input

326

t1 = [tok[:2] for tok in generate_tokens(f.readline)]

327

newcode = untokenize(t1)

328

readline = iter(newcode.splitlines(1)).next

329

t2 = [tok[:2] for tokin generate_tokens(readline)]

assert t1 == t2

"""

ut = Untokenizer()

return ut.untokenize(iterable)

334

335

def generate_tokens(readline):

336

"""

Ezio Melotti

4bcc796

2013-11-25 05:14:51 +0200

[diff] [blame]

337

The generate_tokens() generator requires one argument, readline, which

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

338

must be a callable object which provides the same interface as the

339

readline() method of built-in file objects. Each call to the function

340

should return one line of input as a string. Alternately, readline

341

can be a callable function terminating with StopIteration:

342

readline = open(myfile).next # Example of alternate readline

343

344

The generator produces 5-tuples with these members: the token type; the

345

token string; a 2-tuple (srow, scol) of ints specifying the row and

346

column where the token begins in the source; a 2-tuple (erow, ecol) of

347

ints specifying the row and column where the token ends in the source;

348

and the line on which the token was found. The line passed is the

349

logical line; continuation lines are included.

350

"""

351

lnum = parenlev = continued = 0

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

352

contstr, needcont = '', 0

contline = None

indents = [0]

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

356

# 'stashed' and 'async_*' are used for async/await parsing

stashed = None

async_def = False

async_def_indent = 0

async_def_nl = False

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

362

while 1: # loop over lines in stream

363

try:

364

line = readline()

365

except StopIteration:

366

line = ''

367

lnum = lnum + 1

368

pos, max = 0, len(line)

369

370

if contstr: # continued string

371

if not line:

Martin v. Löwis

8a5f8ca

2008-03-19 05:33:36 +0000

[diff] [blame]

372

raise TokenError("EOF in multi-line string", strstart)

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

373

endmatch = endprog.match(line)

374

if endmatch:

375

pos = end = endmatch.end(0)

376

yield (STRING, contstr + line[:end],

377

strstart, (lnum, end), contline + line)

378

contstr, needcont = '', 0

379

contline = None

380

elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':

381

yield (ERRORTOKEN, contstr + line,

382

strstart, (lnum, len(line)), contline)

contstr = ''

contline = None

continue

else:

contstr = contstr + line

388

contline = contline + line

389

continue

390

391

elif parenlev == 0 and not continued: # new statement

392

if not line: break

393

column = 0

394

while pos < max: # measure leading whitespace

395

if line[pos] == ' ': column = column + 1

Benjamin Peterson

d9af52b

2009-11-02 18:16:28 +0000

[diff] [blame]

396

elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

397

elif line[pos] == '\f': column = 0

else: break

pos = pos + 1

if pos == max: break

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

if stashed:

yield stashed

stashed = None

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

406

if line[pos] in '#\r\n': # skip comments or blank lines

407

if line[pos] == '#':

408

comment_token = line[pos:].rstrip('\r\n')

409

nl_pos = pos + len(comment_token)

410

yield (COMMENT, comment_token,

411

(lnum, pos), (lnum, pos + len(comment_token)), line)

412

yield (NL, line[nl_pos:],

413

(lnum, nl_pos), (lnum, len(line)), line)

414

else:

415

yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],

416

(lnum, pos), (lnum, len(line)), line)

417

continue

418

419

if column > indents[-1]: # count indents or dedents

420

indents.append(column)

421

yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)

422

while column < indents[-1]:

423

if column not in indents:

424

raise IndentationError(

425

"unindent does not match any outer indentation level",

426

("<tokenize>", lnum, pos, line))

427

indents = indents[:-1]

Yury Selivanov

8fb307c

2015-07-22 13:33:45 +0300

[diff] [blame]

428

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

429

if async_def and async_def_indent >= indents[-1]:

async_def = False

async_def_nl = False

async_def_indent = 0

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

434

yield (DEDENT, '', (lnum, pos), (lnum, pos), line)

435

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

436

if async_def and async_def_nl and async_def_indent >= indents[-1]:

async_def = False

async_def_nl = False

async_def_indent = 0

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

441

else: # continued statement

442

if not line:

Martin v. Löwis

8a5f8ca

2008-03-19 05:33:36 +0000

[diff] [blame]

443

raise TokenError("EOF in multi-line statement", (lnum, 0))

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

continued = 0

while pos < max:

pseudomatch = pseudoprog.match(line, pos)

448

if pseudomatch: # scan for tokens

449

start, end = pseudomatch.span(1)

450

spos, epos, pos = (lnum, start), (lnum, end), end

451

token, initial = line[start:end], line[start]

452

Monson Shao

2018-09-16 01:32:29 +0800

[diff] [blame^]

453

if initial in string.digits or \

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

454

(initial == '.' and token != '.'): # ordinary number

455

yield (NUMBER, token, spos, epos, line)

456

elif initial in '\r\n':

457

newline = NEWLINE

458

if parenlev > 0:

459

newline = NL

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

elif async_def:

async_def_nl = True

if stashed:

yield stashed

stashed = None

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

465

yield (newline, token, spos, epos, line)

Yury Selivanov

96ec934

2015-07-23 15:01:58 +0300

[diff] [blame]

466

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

467

elif initial == '#':

468

assert not token.endswith("\n")

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

469

if stashed:

470

yield stashed

471

stashed = None

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

472

yield (COMMENT, token, spos, epos, line)

473

elif token in triple_quoted:

474

endprog = endprogs[token]

475

endmatch = endprog.match(line, pos)

476

if endmatch: # all on one line

477

pos = endmatch.end(0)

478

token = line[start:pos]

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

479

if stashed:

480

yield stashed

481

stashed = None

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

482

yield (STRING, token, spos, (lnum, pos), line)

483

else:

484

strstart = (lnum, start) # multiple lines

485

contstr = line[start:]

486

contline = line

487

break

488

elif initial in single_quoted or \

489

token[:2] in single_quoted or \

490

token[:3] in single_quoted:

491

if token[-1] == '\n': # continued string

492

strstart = (lnum, start)

493

endprog = (endprogs[initial] or endprogs[token[1]] or

494

endprogs[token[2]])

495

contstr, needcont = line[start:], 1

496

contline = line

497

break

498

else: # ordinary string

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

499

if stashed:

500

yield stashed

501

stashed = None

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

502

yield (STRING, token, spos, epos, line)

Monson Shao

2018-09-16 01:32:29 +0800

[diff] [blame^]

503

elif initial.isidentifier(): # ordinary name

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

504

if token in ('async', 'await'):

505

if async_def:

506

yield (ASYNC if token == 'async' else AWAIT,

507

token, spos, epos, line)

508

continue

509

510

tok = (NAME, token, spos, epos, line)

511

if token == 'async' and not stashed:

stashed = tok

continue

if token == 'def':

if (stashed

and stashed[0] == NAME

518

and stashed[1] == 'async'):

519

520

async_def = True

521

async_def_indent = indents[-1]

522

523

yield (ASYNC, stashed[1],

524

stashed[2], stashed[3],

stashed[4])

stashed = None

if stashed:

yield stashed

stashed = None

yield tok

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

533

elif initial == '\\': # continued stmt

534

# This yield is new; needed for better idempotency:

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

535

if stashed:

536

yield stashed

537

stashed = None

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

538

yield (NL, token, spos, (lnum, pos), line)

539

continued = 1

540

else:

541

if initial in '([{': parenlev = parenlev + 1

542

elif initial in ')]}': parenlev = parenlev - 1

Jelle Zijlstra

2018-03-18 09:54:33 -0700

[diff] [blame]

543

if stashed:

544

yield stashed

545

stashed = None

Martin v. Löwis

2008-03-19 05:04:44 +0000

[diff] [blame]

546

yield (OP, token, spos, epos, line)

547

else:

548

yield (ERRORTOKEN, line[pos],

549

(lnum, pos), (lnum, pos+1), line)

550

pos = pos + 1

551

Jelle Zijlstra