Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

4325e37

2008-05-01 22:59:47 +0200

[diff] [blame^]

19

from operator import itemgetter

20

from collections import deque

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

21

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

22

from jinja2.utils import LRUCache

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

23

24

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

25

# cache for the lexers. Exists in order to be able to have multiple

26

# environments with the same lexer

Armin Ronacher

187bde1

2008-05-01 18:19:16 +0200

[diff] [blame]

27

_lexer_cache = LRUCache(50)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

28

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

29

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

30

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

31

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

32

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

33

integer_re = re.compile(r'\d+')

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

34

name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

35

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

36

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

37

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

63

',': 'comma',

64

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

65

}

66

67

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

68

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

69

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

70

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

71

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

simple_escapes = {

'a': '\a',

'n': '\n',

'r': '\r',

'f': '\f',

't': '\t',

'v': '\v',

'\\': '\\',

'"': '"',

"'": "'",

'0': '\x00'

}

unicode_escapes = {

'x': 2,

'u': 4,

'U': 8

}

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

90

91

def unescape_string(lineno, filename, s):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

92

r"""Unescape a string. Supported escapes:

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

93

\a, \n, \r\, \f, \v, \\, \", \', \0

94

95

\x00, \u0000, \U00000000, \N{...}

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

96

"""

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

97

try:

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

98

return s.encode('ascii', 'backslashreplace').decode('unicode-escape')

99

except UnicodeError, e:

100

msg = str(e).split(':')[-1].strip()

101

raise TemplateSyntaxError(msg, lineno, filename)

Armin Ronacher

2894f22

2007-03-19 22:39:55 +0100

[diff] [blame]

102

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

103

104

class Failure(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

105

"""Class that raises a `TemplateSyntaxError` if called.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

106

Used by the `Lexer` to specify known errors.

107

"""

108

109

def __init__(self, message, cls=TemplateSyntaxError):

110

self.message = message

111

self.error_class = cls

112

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

113

def __call__(self, lineno, filename):

114

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

115

116

Armin Ronacher

4325e37

2008-05-01 22:59:47 +0200

[diff] [blame^]

class Token(tuple):

"""Token class."""

__slots__ = ()

lineno, type, value = (property(itemgetter(x)) for x in range(3))

121

122

def __new__(cls, lineno, type, value):

123

return tuple.__new__(cls, (lineno, intern(str(type)), value))

124

125

def __str__(self):

126

from jinja.lexer import keywords, reverse_operators

127

if self.type in keywords:

128

return self.type

129

elif self.type in reverse_operators:

130

return reverse_operators[self.type]

131

elif self.type is 'name':

return self.value

return self.type

def test(self, expr):

136

"""Test a token against a token expression. This can either be a

137

token type or 'token_type:token_value'. This can only test against

138

string values!

139

"""

140

# here we do a regular string equality check as test_many is usually

141

# passed an iterable of not interned strings.

142

if self.type == expr:

143

return True

144

elif ':' in expr:

145

return expr.split(':', 1) == [self.type, self.value]

146

return False

147

148

def test_many(self, iterable):

149

"""Test against multiple token expressions."""

150

for expr in iterable:

if self.test(expr):

return True

return False

def __repr__(self):

return 'Token(%r, %r, %r)' % (

self.lineno,

self.type,

self.value

)

class TokenStreamIterator(object):

164

"""The iterator for tokenstreams. Iterate over the stream

165

until the eof token is reached.

166

"""

167

168

def __init__(self, stream):

169

self._stream = stream

def __iter__(self):

return self

def next(self):

token = self._stream.current

176

if token.type == 'eof':

177

self._stream.close()

178

raise StopIteration()

179

self._stream.next(False)

return token

class TokenStream(object):

184

"""A token stream wraps a generator and supports pushing tokens back.

185

It also provides some functions to expect tokens and similar stuff.

186

187

Important note: Do never push more than one token back to the

188

stream. Although the stream object won't stop you

189

from doing so, the behavior is undefined. Multiple

190

pushed tokens are only used internally!

191

"""

192

193

def __init__(self, generator, filename):

194

self._next = generator.next

195

self._pushed = deque()

196

self.current = Token(1, 'initial', '')

197

self.filename = filename

self.next()

def __iter__(self):

return TokenStreamIterator(self)

202

203

def __nonzero__(self):

204

"""Are we at the end of the tokenstream?"""

205

return bool(self._pushed) or self.current.type != 'eof'

206

207

eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)

208

209

def push(self, token):

210

"""Push a token back to the stream."""

211

self._pushed.append(token)

212

213

def look(self):

214

"""Look at the next token."""

215

old_token = self.next()

216

result = self.current

217

self.push(result)

218

self.current = old_token

return result

def skip(self, n):

"""Got n tokens ahead."""

for x in xrange(n):

self.next()

def next(self, skip_eol=True):

227

"""Go one token ahead and return the old one"""

rv = self.current

while 1:

if self._pushed:

self.current = self._pushed.popleft()

232

elif self.current.type is not 'eof':

233

try:

234

self.current = self._next()

235

except StopIteration:

236

self.close()

237

if not skip_eol or self.current.type is not 'eol':

break

return rv

def close(self):

"""Close the stream."""

243

self.current = Token(self.current.lineno, 'eof', '')

244

self._next = None

245

246

def expect(self, expr):

247

"""Expect a given token type and return it"""

248

if not self.current.test(expr):

249

if ':' in expr:

250

expr = expr.split(':')[1]

251

if self.current.type is 'eof':

252

raise TemplateSyntaxError('unexpected end of template, '

253

'expected %r.' % expr,

254

self.current.lineno,

255

self.filename)

256

raise TemplateSyntaxError("expected token %r, got %r" %

257

(expr, str(self.current)),

self.current.lineno,

self.filename)

try:

return self.current

finally:

self.next()

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

266

class LexerMeta(type):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

267

"""Metaclass for the lexer that caches instances for

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

268

the same configuration in a weak value dictionary.

269

"""

270

271

def __call__(cls, environment):

Armin Ronacher

203bfcb

2008-04-24 21:54:44 +0200

[diff] [blame]

272

key = (environment.block_start_string,

273

environment.block_end_string,

274

environment.variable_start_string,

275

environment.variable_end_string,

276

environment.comment_start_string,

277

environment.comment_end_string,

278

environment.line_statement_prefix,

279

environment.trim_blocks)

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

280

lexer = _lexer_cache.get(key)

281

if lexer is None:

282

lexer = type.__call__(cls, environment)

283

_lexer_cache[key] = lexer

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

287

class Lexer(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

288

"""Class that implements a lexer for a given environment. Automatically

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

289

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

290

291

Note that the lexer is not automatically bound to an environment.

292

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

293

"""

294

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

295

__metaclass__ = LexerMeta

296

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

297

def __init__(self, environment):

298

# shortcuts

299

c = lambda x: re.compile(x, re.M | re.S)

300

e = re.escape

301

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

302

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

303

tag_rules = [

304

(whitespace_re, None, None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

305

(float_re, 'float', None),

306

(integer_re, 'integer', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

307

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

308

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

309

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

310

]

311

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

312

# assamble the root lexing rule. because "|" is ungreedy

313

# we have to sort by length so that the lexer continues working

314

# as expected when we have parsing rules like <% for block and

315

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

33d528a

2007-05-14 18:21:44 +0200

[diff] [blame]

316

# variables are just part of the rules if variable processing

317

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

318

root_tag_rules = [

319

('comment', environment.comment_start_string),

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

320

('block', environment.block_start_string),

321

('variable', environment.variable_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

322

]

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

323

root_tag_rules.sort(key=lambda x: -len(x[1]))

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

324

325

# now escape the rules. This is done here so that the escape

326

# signs don't count for the lengths of the tags.

327

root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]

328

329

# if we have a line statement prefix we need an extra rule for

330

# that. We add this rule *after* all the others.

331

if environment.line_statement_prefix is not None:

332

prefix = e(environment.line_statement_prefix)

333

root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

334

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

335

# block suffix if trimming is enabled

336

block_suffix_re = environment.trim_blocks and '\\n?' or ''

337

338

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

339

self.rules = {

340

'root': [

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

341

# directives

342

(c('(.*?)(?:%s)' % '|'.join(

343

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

344

e(environment.block_start_string),

345

e(environment.block_start_string),

346

e(environment.block_end_string)

347

)] + [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

348

'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

349

for n, r in root_tag_rules

350

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

351

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

352

(c('.+'), 'data', None)

353

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

354

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

355

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

356

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

357

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

358

e(environment.comment_end_string),

359

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

360

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

361

(c('(.)'), (Failure('Missing end of comment tag'),), None)

362

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

363

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

364

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

365

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

366

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

367

e(environment.block_end_string),

368

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

369

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

370

] + tag_rules,

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

# variables

'variable_begin': [

(c('\-%s\s*|%s' % (

e(environment.variable_end_string),

375

e(environment.variable_end_string)

376

)), 'variable_end', '#pop')

377

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

378

# raw block

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

379

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

380

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

381

e(environment.block_start_string),

382

e(environment.block_start_string),

383

e(environment.block_end_string),

384

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

385

block_suffix_re

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

386

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

387

(c('(.)'), (Failure('Missing end of raw directive'),), None)

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

388

],

389

# line statements

390

'linestatement_begin': [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

391

(c(r'\s*(\n|$)'), 'linestatement_end', '#pop')

392

] + tag_rules

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

393

}

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

394

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

395

def tokenize(self, source, filename=None):

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

396

"""Works like `tokeniter` but returns a tokenstream of tokens and not

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

397

a generator or token tuples. Additionally all token values are already

Armin Ronacher

115de2e

2008-05-01 22:20:05 +0200

[diff] [blame]

398

converted into types and postprocessed. For example comments are removed,

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

399

integers and floats converted, strings unescaped etc.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

400

"""

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

401

source = unicode(source)

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

402

def generate():

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

403

for lineno, token, value in self.tokeniter(source, filename):

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

404

if token in ('comment_begin', 'comment', 'comment_end'):

405

continue

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

406

elif token == 'linestatement_begin':

407

token = 'block_begin'

408

elif token == 'linestatement_end':

409

token = 'block_end'

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

410

# we are not interested in those tokens in the parser

411

elif token in ('raw_begin', 'raw_end'):

412

continue

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

413

elif token == 'data':

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

418

elif token == 'keyword':

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

419

token = value

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

420

elif token == 'name':

421

value = str(value)

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

422

elif token == 'string':

423

value = unescape_string(lineno, filename, value[1:-1])

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

428

elif token == 'integer':

429

value = int(value)

430

elif token == 'float':

431

value = float(value)

432

elif token == 'operator':

433

token = operators[value]

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

434

yield Token(lineno, token, value)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

435

return TokenStream(generate(), filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

436

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

437

def tokeniter(self, source, filename=None):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

438

"""This method tokenizes the text and returns the tokens in a

439

generator. Use this method if you just want to tokenize a template.

440

The output you get is not compatible with the input the jinja parser

441

wants. The parser uses the `tokenize` function with returns a

442

`TokenStream` and postprocessed tokens.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

443

"""

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

444

source = '\n'.join(source.splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

445

pos = 0

446

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

447

stack = ['root']

448

statetokens = self.rules['root']

449

source_length = len(source)

450

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

451

balancing_stack = []

452

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

453

while 1:

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

454

# tokenizer loop

455

for regex, tokens, new_state in statetokens:

456

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

457

# if no match we try again with the next rule

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

458

if m is None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

459

continue

460

461

# we only match blocks and variables if brances / parentheses

462

# are balanced. continue parsing with the lower rule which

463

# is the operator rule. do this only if the end tags look

464

# like operators

465

if balancing_stack and \

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

466

tokens in ('variable_end', 'block_end',

467

'linestatement_end'):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

468

continue

469

470

# tuples support more options

471

if isinstance(tokens, tuple):

472

for idx, token in enumerate(tokens):

# hidden group

if token is None:

g = m.group(idx)

if g:

lineno += g.count('\n')

478

continue

479

# failure group

Armin Ronacher

ecc051b

2007-06-01 18:25:28 +0200

[diff] [blame]

480

elif token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

481

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

482

# bygroup is a bit more complex, in that case we

483

# yield for the current token the first named

484

# group that matched

485

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

486

for key, value in m.groupdict().iteritems():

487

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

488

yield lineno, key, value

489

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

490

break

491

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

492

raise RuntimeError('%r wanted to resolve '

493

'the token dynamically'

494

' but no group matched'

495

% regex)

496

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

497

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

498

data = m.group(idx + 1)

499

if data:

500

yield lineno, token, data

501

lineno += data.count('\n')

502

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

503

# strings as token just are yielded as it.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

504

else:

505

data = m.group()

506

# update brace/parentheses balance

507

if tokens == 'operator':

508

if data == '{':

509

balancing_stack.append('}')

510

elif data == '(':

511

balancing_stack.append(')')

512

elif data == '[':

513

balancing_stack.append(']')

514

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

515

if not balancing_stack:

516

raise TemplateSyntaxError('unexpected "%s"' %

517

data, lineno,

518

filename)

519

expected_op = balancing_stack.pop()

520

if expected_op != data:

521

raise TemplateSyntaxError('unexpected "%s", '

522

'expected "%s"' %

523

(data, expected_op),

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

524

lineno, filename)

525

# yield items

526

if tokens is not None:

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

527

yield lineno, tokens, data

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

528

lineno += data.count('\n')

529

530

# fetch new position into new variable so that we can check

531

# if there is a internal parsing error which would result

532

# in an infinite loop

533

pos2 = m.end()

534

535

# handle state changes

536

if new_state is not None:

537

# remove the uppermost state

538

if new_state == '#pop':

539

stack.pop()

540

# resolve the new state by group checking

541

elif new_state == '#bygroup':

542

for key, value in m.groupdict().iteritems():

543

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

548

'new state dynamically but'

549

' no group matched' %

550

regex)

551

# direct state name given

552

else:

553

stack.append(new_state)

554

statetokens = self.rules[stack[-1]]

555

# we are still at the same position and no stack change.

556

# this means a loop without break condition, avoid that and

557

# raise error

558

elif pos2 == pos:

559

raise RuntimeError('%r yielded empty string without '

560

'stack change' % regex)

561

# publish new function and start again

562

pos = pos2

563

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

564

# if loop terminated without break we havn't found a single match

565

# either we are at the end of the file or we have a problem

566

else:

567

# end of text

568

if pos >= source_length:

569

return

570

# something went wrong

571

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher