Blame - Lib/tokenize.py - platform/external/python/cpython3

1992-01-01 19:34:47 +0000

[diff] [blame]

2

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

3

tokenize(readline) is a generator that breaks a stream of bytes into

4

Python tokens. It decodes the bytes according to PEP-0263 for

5

determining source file encoding.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

6

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

7

It accepts a readline-like method which is called repeatedly to get the

8

next line of input (or b"" for EOF). It generates 5-tuples with these

9

members:

Tim Peters

4efb6e9

2001-06-29 23:51:08 +0000

[diff] [blame]

10

11

the token type (see token.py)

12

the token (a string)

13

the starting (row, column) indices of the token (a 2-tuple of ints)

14

the ending (row, column) indices of the token (a 2-tuple of ints)

15

the original line (string)

16

17

It is designed to match the working of the Python tokenizer exactly, except

18

that it produces COMMENT tokens for comments and gives type OP for all

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

19

operators. Additionally, all token lists start with an ENCODING token

20

which tells you which encoding was used to decode the bytes stream.

21

"""

Guido van Rossum

b51eaa1

1997-03-07 00:21:55 +0000

[diff] [blame]

22

Ka-Ping Yee

244c593

2001-03-01 13:56:40 +0000

[diff] [blame]

23

__author__ = 'Ka-Ping Yee <ping@lfw.org>'

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

24

__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '

25

'Skip Montanaro, Raymond Hettinger, Trent Nelson, '

26

'Michael Foord')

Brett Cannon

f304278

2011-02-22 03:25:12 +0000

[diff] [blame]

27

import builtins

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

28

import re

29

import sys

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

30

from token import *

Benjamin Peterson

2008-12-12 01:25:05 +0000

[diff] [blame]

31

from codecs import lookup, BOM_UTF8

Raymond Hettinger

3fb79c7

2010-09-09 07:15:18 +0000

[diff] [blame]

32

import collections

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

33

from io import TextIOWrapper

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

34

cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

35

Skip Montanaro

40fc160

2001-03-01 04:27:19 +0000

[diff] [blame]

36

import token

Alexander Belopolsky

b9d10d0

2010-11-11 14:07:41 +0000

[diff] [blame]

37

__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",

38

"NL", "untokenize", "ENCODING", "TokenInfo"]

Skip Montanaro

40fc160

2001-03-01 04:27:19 +0000

[diff] [blame]

39

del token

40

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

41

COMMENT = N_TOKENS

42

tok_name[COMMENT] = 'COMMENT'

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

43

NL = N_TOKENS + 1

44

tok_name[NL] = 'NL'

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

45

ENCODING = N_TOKENS + 2

46

tok_name[ENCODING] = 'ENCODING'

47

N_TOKENS += 3

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

48

EXACT_TOKEN_TYPES = {

'(': LPAR,

')': RPAR,

'[': LSQB,

']': RSQB,

':': COLON,

',': COMMA,

';': SEMI,

'+': PLUS,

'-': MINUS,

'*': STAR,

'/': SLASH,

'|': VBAR,

'&': AMPER,

'<': LESS,

'>': GREATER,

'=': EQUAL,

'.': DOT,

'%': PERCENT,

'{': LBRACE,

'}': RBRACE,

'==': EQEQUAL,

'!=': NOTEQUAL,

'<=': LESSEQUAL,

'>=': GREATEREQUAL,

'~': TILDE,

'^': CIRCUMFLEX,

'<<': LEFTSHIFT,

'>>': RIGHTSHIFT,

'**': DOUBLESTAR,

'+=': PLUSEQUAL,

'-=': MINEQUAL,

'*=': STAREQUAL,

'/=': SLASHEQUAL,

'%=': PERCENTEQUAL,

'&=': AMPEREQUAL,

'|=': VBAREQUAL,

'^=': CIRCUMFLEXEQUAL,

86

'<<=': LEFTSHIFTEQUAL,

87

'>>=': RIGHTSHIFTEQUAL,

88

'**=': DOUBLESTAREQUAL,

89

'//': DOUBLESLASH,

90

'//=': DOUBLESLASHEQUAL,

91

'@': AT

92

}

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

93

Raymond Hettinger

3fb79c7

2010-09-09 07:15:18 +0000

[diff] [blame]

94

class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):

Raymond Hettinger

aa17a7f

2009-04-29 14:21:25 +0000

[diff] [blame]

95

def __repr__(self):

Raymond Hettinger

a0e7940

2010-09-09 08:29:05 +0000

[diff] [blame]

96

annotated_type = '%d (%s)' % (self.type, tok_name[self.type])

97

return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %

98

self._replace(type=annotated_type))

Raymond Hettinger

aa17a7f

2009-04-29 14:21:25 +0000

[diff] [blame]

99

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

100

@property

101

def exact_type(self):

102

if self.type == OP and self.string in EXACT_TOKEN_TYPES:

103

return EXACT_TOKEN_TYPES[self.string]

else:

return self.type

Eric S. Raymond

2001-02-09 11:10:16 +0000

[diff] [blame]

107

def group(*choices): return '(' + '|'.join(choices) + ')'

Guido van Rossum

68468eb

2003-02-27 20:14:51 +0000

[diff] [blame]

108

def any(*choices): return group(*choices) + '*'

109

def maybe(*choices): return group(*choices) + '?'

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

110

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

111

# Note: we use unicode matching for names ("\w") but ascii matching for

112

# number literals.

Guido van Rossum

1997-10-27 20:44:15 +0000

[diff] [blame]

113

Whitespace = r'[ \f\t]*'

114

Comment = r'#[^\r\n]*'

115

Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)

Benjamin Peterson

2010-08-30 14:41:20 +0000

[diff] [blame]

116

Name = r'\w+'

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

117

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

118

Hexnumber = r'0[xX][0-9a-fA-F]+'

Georg Brandl

fceab5a

2008-01-19 20:08:23 +0000

[diff] [blame]

119

Binnumber = r'0[bB][01]+'

120

Octnumber = r'0[oO][0-7]+'

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

121

Decnumber = r'(?:0+|[1-9][0-9]*)'

Guido van Rossum

cd16bf6

2007-06-13 18:07:49 +0000

[diff] [blame]

122

Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

123

Exponent = r'[eE][-+]?[0-9]+'

124

Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)

125

Expfloat = r'[0-9]+' + Exponent

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

126

Floatnumber = group(Pointfloat, Expfloat)

Antoine Pitrou

2008-08-19 17:56:33 +0000

[diff] [blame]

127

Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

128

Number = group(Imagnumber, Floatnumber, Intnumber)

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

129

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

130

StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

131

Tim Peters

2000-10-07 05:09:39 +0000

[diff] [blame]

132

# Tail end of ' string.

133

Single = r"[^'\\]*(?:\\.[^'\\]*)*'"

134

# Tail end of " string.

135

Double = r'[^"\\]*(?:\\.[^"\\]*)*"'

136

# Tail end of ''' string.

137

Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"

138

# Tail end of """ string.

139

Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

140

Triple = group(StringPrefix + "'''", StringPrefix + '"""')

Tim Peters

2000-10-07 05:09:39 +0000

[diff] [blame]

141

# Single-line ' or " string.

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

142

String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",

143

StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

144

Tim Peters

2000-10-07 05:09:39 +0000

[diff] [blame]

145

# Because of leftmost-then-longest match semantics, be sure to put the

146

# longest operators first (e.g., if = came before ==, == would get

147

# recognized as two instances of =).

Guido van Rossum

b053cd8

2006-08-24 03:53:23 +0000

[diff] [blame]

148

Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",

Neal Norwitz

c150536

2006-12-28 06:47:50 +0000

[diff] [blame]

149

r"//=?", r"->",

Tim Peters

2000-10-07 05:09:39 +0000

[diff] [blame]

150

r"[+\-*/%&|^=<>]=?",

151

r"~")

Thomas Wouters

e1519a1

2000-08-24 21:44:52 +0000

[diff] [blame]

152

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

153

Bracket = '[][(){}]'

Georg Brandl

dde0028

2007-03-18 19:01:53 +0000

[diff] [blame]

154

Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

155

Funny = group(Operator, Bracket, Special)

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

156

Guido van Rossum

1997-10-27 20:44:15 +0000

[diff] [blame]

157

PlainToken = group(Number, Funny, String, Name)

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

158

Token = Ignore + PlainToken

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

159

Tim Peters

2000-10-07 05:09:39 +0000

[diff] [blame]

160

# First (or only) line of ' or " string.

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

161

ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +

Ka-Ping Yee

1ff08b1

2001-01-15 22:04:30 +0000

[diff] [blame]

162

group("'", r'\\\r?\n'),

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

163

StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +

Ka-Ping Yee

1ff08b1

2001-01-15 22:04:30 +0000

[diff] [blame]

164

group('"', r'\\\r?\n'))

Ezio Melotti

2cc3b4b

2012-11-03 17:38:43 +0200

[diff] [blame]

165

PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)

Guido van Rossum

1997-10-27 20:44:15 +0000

[diff] [blame]

166

PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

167

Benjamin Peterson

2010-08-30 14:41:20 +0000

[diff] [blame]

168

def _compile(expr):

169

return re.compile(expr, re.UNICODE)

170

Antoine Pitrou

2011-10-11 15:45:56 +0200

[diff] [blame]

171

endpats = {"'": Single, '"': Double,

172

"'''": Single3, '"""': Double3,

173

"r'''": Single3, 'r"""': Double3,

174

"b'''": Single3, 'b"""': Double3,

Antoine Pitrou

2011-10-11 15:45:56 +0200

[diff] [blame]

175

"R'''": Single3, 'R"""': Double3,

176

"B'''": Single3, 'B"""': Double3,

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

177

"br'''": Single3, 'br"""': Double3,

Antoine Pitrou

2011-10-11 15:45:56 +0200

[diff] [blame]

178

"bR'''": Single3, 'bR"""': Double3,

179

"Br'''": Single3, 'Br"""': Double3,

180

"BR'''": Single3, 'BR"""': Double3,

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

181

"rb'''": Single3, 'rb"""': Double3,

182

"Rb'''": Single3, 'Rb"""': Double3,

183

"rB'''": Single3, 'rB"""': Double3,

184

"RB'''": Single3, 'RB"""': Double3,

Armin Ronacher

2012-03-04 12:04:06 +0000

[diff] [blame]

185

"u'''": Single3, 'u"""': Double3,

Armin Ronacher

2012-03-04 12:04:06 +0000

[diff] [blame]

186

"R'''": Single3, 'R"""': Double3,

187

"U'''": Single3, 'U"""': Double3,

Armin Ronacher

2012-03-04 12:04:06 +0000

[diff] [blame]

188

'r': None, 'R': None, 'b': None, 'B': None,

189

'u': None, 'U': None}

Guido van Rossum

1992-01-01 19:34:47 +0000

[diff] [blame]

190

Guido van Rossum

2002-08-24 06:54:19 +0000

[diff] [blame]

191

triple_quoted = {}

192

for t in ("'''", '"""',

193

"r'''", 'r"""', "R'''", 'R"""',

Guido van Rossum

4fe72f9

2007-11-12 17:40:10 +0000

[diff] [blame]

194

"b'''", 'b"""', "B'''", 'B"""',

195

"br'''", 'br"""', "Br'''", 'Br"""',

Armin Ronacher

2012-03-04 12:04:06 +0000

[diff] [blame]

196

"bR'''", 'bR"""', "BR'''", 'BR"""',

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

197

"rb'''", 'rb"""', "rB'''", 'rB"""',

198

"Rb'''", 'Rb"""', "RB'''", 'RB"""',

Armin Ronacher

2012-03-04 12:04:06 +0000

[diff] [blame]

199

"u'''", 'u"""', "U'''", 'U"""',

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

200

):

Guido van Rossum

2002-08-24 06:54:19 +0000

[diff] [blame]

triple_quoted[t] = t

single_quoted = {}

for t in ("'", '"',

"r'", 'r"', "R'", 'R"',

Guido van Rossum

4fe72f9

2007-11-12 17:40:10 +0000

[diff] [blame]

205

"b'", 'b"', "B'", 'B"',

206

"br'", 'br"', "Br'", 'Br"',

Armin Ronacher

2012-03-04 12:04:06 +0000

[diff] [blame]

207

"bR'", 'bR"', "BR'", 'BR"' ,

Armin Ronacher

2012-03-04 13:07:57 +0000

[diff] [blame]

208

"rb'", 'rb"', "rB'", 'rB"',

209

"Rb'", 'Rb"', "RB'", 'RB"' ,

Armin Ronacher

2012-03-04 12:04:06 +0000

[diff] [blame]

210

"u'", 'u"', "U'", 'U"',

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

211

):

Guido van Rossum

2002-08-24 06:54:19 +0000

[diff] [blame]

212

single_quoted[t] = t

213

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

214

tabsize = 8

Fred Drake

9b8d801

2000-08-17 04:45:13 +0000

[diff] [blame]

215

Ka-Ping Yee

28c62bb

2001-03-23 05:22:49 +0000

[diff] [blame]

216

class TokenError(Exception): pass

217

218

class StopTokenizing(Exception): pass

Fred Drake

9b8d801

2000-08-17 04:45:13 +0000

[diff] [blame]

219

Tim Peters

5ca576e

2001-06-18 22:08:13 +0000

[diff] [blame]

220

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

class Untokenizer:

def __init__(self):

self.tokens = []

self.prev_row = 1

self.prev_col = 0

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

227

self.encoding = None

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

228

229

def add_whitespace(self, start):

230

row, col = start

231

assert row <= self.prev_row

232

col_offset = col - self.prev_col

233

if col_offset:

234

self.tokens.append(" " * col_offset)

235

236

def untokenize(self, iterable):

237

for t in iterable:

238

if len(t) == 2:

239

self.compat(t, iterable)

240

break

241

tok_type, token, start, end, line = t

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

242

if tok_type == ENCODING:

243

self.encoding = token

244

continue

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

245

self.add_whitespace(start)

246

self.tokens.append(token)

247

self.prev_row, self.prev_col = end

248

if tok_type in (NEWLINE, NL):

249

self.prev_row += 1

250

self.prev_col = 0

251

return "".join(self.tokens)

252

253

def compat(self, token, iterable):

254

startline = False

255

indents = []

256

toks_append = self.tokens.append

257

toknum, tokval = token

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

258

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

259

if toknum in (NAME, NUMBER):

260

tokval += ' '

261

if toknum in (NEWLINE, NL):

262

startline = True

Christian Heimes

ba4af49

2008-03-28 00:55:15 +0000

[diff] [blame]

263

prevstring = False

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

264

for tok in iterable:

265

toknum, tokval = tok[:2]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

266

if toknum == ENCODING:

267

self.encoding = tokval

268

continue

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

269

270

if toknum in (NAME, NUMBER):

271

tokval += ' '

272

Christian Heimes

ba4af49

2008-03-28 00:55:15 +0000

[diff] [blame]

273

# Insert a space between two consecutive strings

274

if toknum == STRING:

275

if prevstring:

276

tokval = ' ' + tokval

prevstring = True

else:

prevstring = False

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

281

if toknum == INDENT:

282

indents.append(tokval)

283

continue

284

elif toknum == DEDENT:

285

indents.pop()

286

continue

287

elif toknum in (NEWLINE, NL):

288

startline = True

289

elif startline and indents:

290

toks_append(indents[-1])

291

startline = False

292

toks_append(tokval)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

293

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

294

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

295

def untokenize(iterable):

296

"""Transform tokens back into Python source code.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

297

It returns a bytes object, encoded using the ENCODING

298

token, which is the first token sequence output by tokenize.

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

299

300

Each element returned by the iterable must be a token sequence

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

301

with at least two elements, a token number and token value. If

302

only two tokens are passed, the resulting output is poor.

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

303

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

304

Round-trip invariant for full input:

305

Untokenized source will match input source exactly

306

307

Round-trip invariant for limited intput:

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

308

# Output bytes will tokenize the back to the input

309

t1 = [tok[:2] for tok in tokenize(f.readline)]

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

310

newcode = untokenize(t1)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

311

readline = BytesIO(newcode).readline

312

t2 = [tok[:2] for tok in tokenize(readline)]

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

313

assert t1 == t2

314

"""

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

315

ut = Untokenizer()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

316

out = ut.untokenize(iterable)

317

if ut.encoding is not None:

318

out = out.encode(ut.encoding)

319

return out

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

320

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

321

Benjamin Peterson

d3afada

2009-10-09 21:43:09 +0000

[diff] [blame]

322

def _get_normal_name(orig_enc):

323

"""Imitates get_normal_name in tokenizer.c."""

324

# Only care about the first 12 characters.

325

enc = orig_enc[:12].lower().replace("_", "-")

326

if enc == "utf-8" or enc.startswith("utf-8-"):

327

return "utf-8"

328

if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \

329

enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):

return "iso-8859-1"

return orig_enc

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

333

def detect_encoding(readline):

Raymond Hettinger

2002-05-15 02:56:03 +0000

[diff] [blame]

334

"""

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

335

The detect_encoding() function is used to detect the encoding that should

Ezio Melotti

4bcc796

2013-11-25 05:14:51 +0200

[diff] [blame^]

336

be used to decode a Python source file. It requires one argument, readline,

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

337

in the same way as the tokenize() generator.

338

339

It will call readline a maximum of twice, and return the encoding used

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

340

(as a string) and a list of any lines (left as bytes) it has read in.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

341

342

It detects the encoding from the presence of a utf-8 bom or an encoding

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

343

cookie as specified in pep-0263. If both a bom and a cookie are present,

344

but disagree, a SyntaxError will be raised. If the encoding cookie is an

345

invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,

Benjamin Peterson

2010-03-18 22:29:52 +0000

[diff] [blame]

346

'utf-8-sig' is returned.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

347

348

If no encoding is specified, then the default of 'utf-8' will be returned.

349

"""

Brett Cannon

2012-04-20 13:23:54 -0400

[diff] [blame]

350

try:

351

filename = readline.__self__.name

352

except AttributeError:

353

filename = None

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

354

bom_found = False

355

encoding = None

Benjamin Peterson

2010-03-18 22:29:52 +0000

[diff] [blame]

356

default = 'utf-8'

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

def read_or_stop():

try:

return readline()

except StopIteration:

361

return b''

362

363

def find_cookie(line):

364

try:

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

365

# Decode as UTF-8. Either the line is an encoding declaration,

366

# in which case it should be pure ASCII, or it must be UTF-8

367

# per default encoding.

368

line_string = line.decode('utf-8')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

369

except UnicodeDecodeError:

Brett Cannon

2012-04-20 13:23:54 -0400

[diff] [blame]

370

msg = "invalid or missing encoding declaration"

371

if filename is not None:

372

msg = '{} for {!r}'.format(msg, filename)

373

raise SyntaxError(msg)

Benjamin Peterson

2008-12-12 01:25:05 +0000

[diff] [blame]

374

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

375

match = cookie_re.match(line_string)

376

if not match:

Benjamin Peterson

2008-12-12 01:25:05 +0000

[diff] [blame]

377

return None

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

378

encoding = _get_normal_name(match.group(1))

Benjamin Peterson

2008-12-12 01:25:05 +0000

[diff] [blame]

379

try:

380

codec = lookup(encoding)

381

except LookupError:

382

# This behaviour mimics the Python interpreter

Brett Cannon

2012-04-20 13:23:54 -0400

[diff] [blame]

383

if filename is None:

384

msg = "unknown encoding: " + encoding

385

else:

386

msg = "unknown encoding for {!r}: {}".format(filename,

387

encoding)

388

raise SyntaxError(msg)

Benjamin Peterson

2008-12-12 01:25:05 +0000

[diff] [blame]

389

Benjamin Peterson

1613ed8

2010-03-18 22:34:15 +0000

[diff] [blame]

390

if bom_found:

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

391

if encoding != 'utf-8':

Benjamin Peterson

1613ed8

2010-03-18 22:34:15 +0000

[diff] [blame]

392

# This behaviour mimics the Python interpreter

Brett Cannon

2012-04-20 13:23:54 -0400

[diff] [blame]

393

if filename is None:

394

msg = 'encoding problem: utf-8'

395

else:

396

msg = 'encoding problem for {!r}: utf-8'.format(filename)

397

raise SyntaxError(msg)

Benjamin Peterson

1613ed8

2010-03-18 22:34:15 +0000

[diff] [blame]

398

encoding += '-sig'

Benjamin Peterson

2008-12-12 01:25:05 +0000

[diff] [blame]

399

return encoding

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

400

401

first = read_or_stop()

Benjamin Peterson

2008-12-12 01:25:05 +0000

[diff] [blame]

402

if first.startswith(BOM_UTF8):

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

403

bom_found = True

404

first = first[3:]

Benjamin Peterson

2010-03-18 22:29:52 +0000

[diff] [blame]

405

default = 'utf-8-sig'

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

406

if not first:

Benjamin Peterson

2010-03-18 22:29:52 +0000

[diff] [blame]

407

return default, []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

408

409

encoding = find_cookie(first)

410

if encoding:

411

return encoding, [first]

412

413

second = read_or_stop()

414

if not second:

Benjamin Peterson

2010-03-18 22:29:52 +0000

[diff] [blame]

415

return default, [first]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

416

417

encoding = find_cookie(second)

418

if encoding:

419

return encoding, [first, second]

420

Benjamin Peterson

2010-03-18 22:29:52 +0000

[diff] [blame]

421

return default, [first, second]

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

422

423

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

424

def open(filename):

425

"""Open a file in read only mode using the encoding detected by

426

detect_encoding().

427

"""

Brett Cannon

f304278

2011-02-22 03:25:12 +0000

[diff] [blame]

428

buffer = builtins.open(filename, 'rb')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

429

encoding, lines = detect_encoding(buffer.readline)

430

buffer.seek(0)

431

text = TextIOWrapper(buffer, encoding, line_buffering=True)

text.mode = 'r'

return text

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

436

def tokenize(readline):

437

"""

438

The tokenize() generator requires one argment, readline, which

Raymond Hettinger

2002-05-15 02:56:03 +0000

[diff] [blame]

439

must be a callable object which provides the same interface as the

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

440

readline() method of built-in file objects. Each call to the function

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

441

should return one line of input as bytes. Alternately, readline

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

442

can be a callable function terminating with StopIteration:

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

443

readline = open(myfile, 'rb').__next__ # Example of alternate readline

Tim Peters

8ac1495

2002-05-23 15:15:30 +0000

[diff] [blame]

444

Raymond Hettinger

2002-05-15 02:56:03 +0000

[diff] [blame]

445

The generator produces 5-tuples with these members: the token type; the

446

token string; a 2-tuple (srow, scol) of ints specifying the row and

447

column where the token begins in the source; a 2-tuple (erow, ecol) of

448

ints specifying the row and column where the token ends in the source;

Florent Xicluna

2010-09-03 19:54:02 +0000

[diff] [blame]

449

and the line on which the token was found. The line passed is the

Tim Peters

8ac1495

2002-05-23 15:15:30 +0000

[diff] [blame]

450

logical line; continuation lines are included.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

451

452

The first token sequence will always be an ENCODING token

453

which tells you which encoding was used to decode the bytes stream.

Raymond Hettinger

2002-05-15 02:56:03 +0000

[diff] [blame]

454

"""

Benjamin Peterson

21db77e

2009-11-14 16:27:26 +0000

[diff] [blame]

455

# This import is here to avoid problems when the itertools module is not

456

# built yet and tokenize is imported.

Benjamin Peterson

81dd8b9

2009-11-14 18:09:17 +0000

[diff] [blame]

457

from itertools import chain, repeat

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

458

encoding, consumed = detect_encoding(readline)

Benjamin Peterson

81dd8b9

2009-11-14 18:09:17 +0000

[diff] [blame]

459

rl_gen = iter(readline, b"")

460

empty = repeat(b"")

461

return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

462

463

464

def _tokenize(readline, encoding):

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

465

lnum = parenlev = continued = 0

Benjamin Peterson

2010-08-30 14:41:20 +0000

[diff] [blame]

466

numchars = '0123456789'

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

467

contstr, needcont = '', 0

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

468

contline = None

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

469

indents = [0]

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

470

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

471

if encoding is not None:

Benjamin Peterson

2010-03-18 22:29:52 +0000

[diff] [blame]

472

if encoding == "utf-8-sig":

473

# BOM will already have been stripped.

474

encoding = "utf-8"

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

475

yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')

Benjamin Peterson

0fe1438

2008-06-05 23:07:42 +0000

[diff] [blame]

476

while True: # loop over lines in stream

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

477

try:

478

line = readline()

479

except StopIteration:

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

480

line = b''

481

482

if encoding is not None:

483

line = line.decode(encoding)

Benjamin Peterson

2009-11-13 02:25:08 +0000

[diff] [blame]

484

lnum += 1

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

485

pos, max = 0, len(line)

486

487

if contstr: # continued string

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

488

if not line:

Collin Winter

ce36ad8

2007-08-30 01:19:48 +0000

[diff] [blame]

489

raise TokenError("EOF in multi-line string", strstart)

Guido van Rossum

1997-10-27 20:44:15 +0000

[diff] [blame]

490

endmatch = endprog.match(line)

491

if endmatch:

492

pos = end = endmatch.end(0)

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

493

yield TokenInfo(STRING, contstr + line[:end],

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

494

strstart, (lnum, end), contline + line)

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

495

contstr, needcont = '', 0

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

496

contline = None

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

497

elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

498

yield TokenInfo(ERRORTOKEN, contstr + line,

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

499

strstart, (lnum, len(line)), contline)

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

500

contstr = ''

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

501

contline = None

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

502

continue

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

503

else:

504

contstr = contstr + line

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

505

contline = contline + line

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

506

continue

507

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

508

elif parenlev == 0 and not continued: # new statement

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

509

if not line: break

510

column = 0

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

511

while pos < max: # measure leading whitespace

Benjamin Peterson

2009-11-13 02:25:08 +0000

[diff] [blame]

512

if line[pos] == ' ':

513

column += 1

514

elif line[pos] == '\t':

515

column = (column//tabsize + 1)*tabsize

516

elif line[pos] == '\f':

column = 0

else:

break

pos += 1

if pos == max:

break

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

523

524

if line[pos] in '#\r\n': # skip comments or blank lines

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

525

if line[pos] == '#':

526

comment_token = line[pos:].rstrip('\r\n')

527

nl_pos = pos + len(comment_token)

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

528

yield TokenInfo(COMMENT, comment_token,

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

529

(lnum, pos), (lnum, pos + len(comment_token)), line)

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

530

yield TokenInfo(NL, line[nl_pos:],

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

531

(lnum, nl_pos), (lnum, len(line)), line)

532

else:

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

533

yield TokenInfo((NL, COMMENT)[line[pos] == '#'], line[pos:],

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

534

(lnum, pos), (lnum, len(line)), line)

535

continue

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

536

537

if column > indents[-1]: # count indents or dedents

538

indents.append(column)

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

539

yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

540

while column < indents[-1]:

Raymond Hettinger

da99d1c

2005-06-21 07:43:58 +0000

[diff] [blame]

541

if column not in indents:

542

raise IndentationError(

Thomas Wouters

00ee7ba

2006-08-21 19:07:27 +0000

[diff] [blame]

543

"unindent does not match any outer indentation level",

544

("<tokenize>", lnum, pos, line))

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

545

indents = indents[:-1]

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

546

yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

547

548

else: # continued statement

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

549

if not line:

Collin Winter

ce36ad8

2007-08-30 01:19:48 +0000

[diff] [blame]

550

raise TokenError("EOF in multi-line statement", (lnum, 0))

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

551

continued = 0

552

553

while pos < max:

Antoine Pitrou

2011-10-11 15:45:56 +0200

[diff] [blame]

554

pseudomatch = _compile(PseudoToken).match(line, pos)

Guido van Rossum

1997-10-27 20:44:15 +0000

[diff] [blame]

555

if pseudomatch: # scan for tokens

556

start, end = pseudomatch.span(1)

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

557

spos, epos, pos = (lnum, start), (lnum, end), end

Ezio Melotti

2cc3b4b

2012-11-03 17:38:43 +0200

[diff] [blame]

558

if start == end:

559

continue

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

560

token, initial = line[start:end], line[start]

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

561

Georg Brandl

dde0028

2007-03-18 19:01:53 +0000

[diff] [blame]

562

if (initial in numchars or # ordinary number

563

(initial == '.' and token != '.' and token != '...')):

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

564

yield TokenInfo(NUMBER, token, spos, epos, line)

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

565

elif initial in '\r\n':

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

566

yield TokenInfo(NL if parenlev > 0 else NEWLINE,

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

567

token, spos, epos, line)

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

568

elif initial == '#':

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

569

assert not token.endswith("\n")

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

570

yield TokenInfo(COMMENT, token, spos, epos, line)

Guido van Rossum

2002-08-24 06:54:19 +0000

[diff] [blame]

571

elif token in triple_quoted:

Antoine Pitrou

2011-10-11 15:45:56 +0200

[diff] [blame]

572

endprog = _compile(endpats[token])

Guido van Rossum

1997-10-27 20:44:15 +0000

[diff] [blame]

573

endmatch = endprog.match(line, pos)

574

if endmatch: # all on one line

575

pos = endmatch.end(0)

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

576

token = line[start:pos]

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

577

yield TokenInfo(STRING, token, spos, (lnum, pos), line)

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

578

else:

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

579

strstart = (lnum, start) # multiple lines

580

contstr = line[start:]

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

581

contline = line

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

582

break

Guido van Rossum

2002-08-24 06:54:19 +0000

[diff] [blame]

583

elif initial in single_quoted or \

584

token[:2] in single_quoted or \

585

token[:3] in single_quoted:

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

586

if token[-1] == '\n': # continued string

Guido van Rossum

1997-04-08 14:24:39 +0000

[diff] [blame]

587

strstart = (lnum, start)

Antoine Pitrou

2011-10-11 15:45:56 +0200

[diff] [blame]

588

endprog = _compile(endpats[initial] or

589

endpats[token[1]] or

590

endpats[token[2]])

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

591

contstr, needcont = line[start:], 1

Guido van Rossum

1998-04-03 16:05:38 +0000

[diff] [blame]

592

contline = line

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

593

break

594

else: # ordinary string

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

595

yield TokenInfo(STRING, token, spos, epos, line)

Benjamin Peterson

2010-08-30 14:41:20 +0000

[diff] [blame]

596

elif initial.isidentifier(): # ordinary name

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

597

yield TokenInfo(NAME, token, spos, epos, line)

Guido van Rossum

1997-10-27 20:44:15 +0000

[diff] [blame]

598

elif initial == '\\': # continued stmt

599

continued = 1

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

600

else:

Benjamin Peterson

2009-11-13 02:25:08 +0000

[diff] [blame]

601

if initial in '([{':

602

parenlev += 1

603

elif initial in ')]}':

604

parenlev -= 1

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

605

yield TokenInfo(OP, token, spos, epos, line)

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

606

else:

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

607

yield TokenInfo(ERRORTOKEN, line[pos],

Guido van Rossum

1997-04-09 17:15:54 +0000

[diff] [blame]

608

(lnum, pos), (lnum, pos+1), line)

Benjamin Peterson

2009-11-13 02:25:08 +0000

[diff] [blame]

609

pos += 1

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

610

611

for indent in indents[1:]: # pop remaining indent levels

Raymond Hettinger

2009-04-29 00:34:27 +0000

[diff] [blame]

612

yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')

613

yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')

Guido van Rossum

1997-03-07 00:21:12 +0000

[diff] [blame]

614

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

615

616

# An undocumented, backwards compatible, API for all the places in the standard

617

# library that expect to be able to use tokenize with strings

618

def generate_tokens(readline):

619

return _tokenize(readline, None)

Raymond Hettinger

6c60d09

2010-09-09 04:32:39 +0000

[diff] [blame]

620

Meador Inge

2011-10-07 08:53:38 -0500

[diff] [blame]

def main():

import argparse

# Helper error handling routines

625

def perror(message):

626

print(message, file=sys.stderr)

627

628

def error(message, filename=None, location=None):

629

if location:

630

args = (filename,) + location + (message,)

631

perror("%s:%d:%d: error: %s" % args)

632

elif filename:

633

perror("%s: error: %s" % (filename, message))

634

else:

635

perror("error: %s" % message)

636

sys.exit(1)

637

638

# Parse the arguments and options

639

parser = argparse.ArgumentParser(prog='python -m tokenize')

640

parser.add_argument(dest='filename', nargs='?',

641

metavar='filename.py',

642

help='the file to tokenize; defaults to stdin')

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

643

parser.add_argument('-e', '--exact', dest='exact', action='store_true',

644

help='display token names using the exact type')

Meador Inge

2011-10-07 08:53:38 -0500

[diff] [blame]

645

args = parser.parse_args()

try:

# Tokenize the input

if args.filename:

filename = args.filename

651

with builtins.open(filename, 'rb') as f:

652

tokens = list(tokenize(f.readline))

653

else:

654

filename = "<stdin>"

655

tokens = _tokenize(sys.stdin.readline, None)

656

657

# Output the tokenization

658

for token in tokens:

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

659

token_type = token.type

660

if args.exact:

661

token_type = token.exact_type

Meador Inge

2011-10-07 08:53:38 -0500

[diff] [blame]

662

token_range = "%d,%d-%d,%d:" % (token.start + token.end)

663

print("%-20s%-15s%-15r" %

Meador Inge

2012-01-19 00:44:45 -0600

[diff] [blame]

664

(token_range, tok_name[token_type], token.string))

Meador Inge

2011-10-07 08:53:38 -0500

[diff] [blame]

665

except IndentationError as err:

666

line, column = err.args[1][1:3]

667

error(err.args[0], filename, (line, column))

668

except TokenError as err:

669

line, column = err.args[1]

670

error(err.args[0], filename, (line, column))

671

except SyntaxError as err:

672

error(err, filename)

673

except IOError as err:

674

error(err)

675

except KeyboardInterrupt:

676

print("interrupted\n")

677

except Exception as err:

678

perror("unexpected error: %s" % err)

679

raise

680

Raymond Hettinger

6c60d09

2010-09-09 04:32:39 +0000

[diff] [blame]

681

if __name__ == "__main__":

Meador Inge