Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

19

from operator import itemgetter

20

from collections import deque

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

21

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

22

from jinja2.utils import LRUCache

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

23

24

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

25

# cache for the lexers. Exists in order to be able to have multiple

26

# environments with the same lexer

Armin Ronacher

187bde1

2008-05-01 18:19:16 +0200

[diff] [blame]

27

_lexer_cache = LRUCache(50)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

28

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

29

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

30

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

31

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

32

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

33

integer_re = re.compile(r'\d+')

Armin Ronacher

2008-05-11 00:18:35 +0200

[diff] [blame^]

34

name_re = re.compile(r'\b[^\W\d]\w*\b(?u)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

35

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

36

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

37

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

63

',': 'comma',

64

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

65

}

66

67

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

68

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

69

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

70

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

71

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

simple_escapes = {

'a': '\a',

'n': '\n',

'r': '\r',

'f': '\f',

't': '\t',

'v': '\v',

'\\': '\\',

'"': '"',

"'": "'",

'0': '\x00'

}

unicode_escapes = {

'x': 2,

'u': 4,

'U': 8

}

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

90

Armin Ronacher

2008-05-11 00:18:35 +0200

[diff] [blame^]

def _trystr(s):

try:

return str(s)

except UnicodeError:

return s

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

98

def unescape_string(lineno, filename, s):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

99

r"""Unescape a string. Supported escapes:

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

100

\a, \n, \r\, \f, \v, \\, \", \', \0

101

102

\x00, \u0000, \U00000000, \N{...}

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

103

"""

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

104

try:

Armin Ronacher

2008-05-11 00:18:35 +0200

[diff] [blame^]

105

return _trystr(s.encode('ascii', 'backslashreplace')

106

.decode('unicode-escape'))

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

107

except UnicodeError, e:

108

msg = str(e).split(':')[-1].strip()

109

raise TemplateSyntaxError(msg, lineno, filename)

Armin Ronacher

2894f22

2007-03-19 22:39:55 +0100

[diff] [blame]

110

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

111

112

class Failure(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

113

"""Class that raises a `TemplateSyntaxError` if called.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

114

Used by the `Lexer` to specify known errors.

115

"""

116

117

def __init__(self, message, cls=TemplateSyntaxError):

118

self.message = message

119

self.error_class = cls

120

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

121

def __call__(self, lineno, filename):

122

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

123

124

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

class Token(tuple):

"""Token class."""

__slots__ = ()

lineno, type, value = (property(itemgetter(x)) for x in range(3))

129

130

def __new__(cls, lineno, type, value):

131

return tuple.__new__(cls, (lineno, intern(str(type)), value))

132

133

def __str__(self):

134

from jinja.lexer import keywords, reverse_operators

135

if self.type in keywords:

136

return self.type

137

elif self.type in reverse_operators:

138

return reverse_operators[self.type]

139

elif self.type is 'name':

return self.value

return self.type

def test(self, expr):

144

"""Test a token against a token expression. This can either be a

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

145

token type or ``'token_type:token_value'``. This can only test

146

against string values and types.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

147

"""

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

148

# here we do a regular string equality check as test_any is usually

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

149

# passed an iterable of not interned strings.

150

if self.type == expr:

151

return True

152

elif ':' in expr:

153

return expr.split(':', 1) == [self.type, self.value]

154

return False

155

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

156

def test_any(self, *iterable):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

157

"""Test against multiple token expressions."""

158

for expr in iterable:

if self.test(expr):

return True

return False

def __repr__(self):

return 'Token(%r, %r, %r)' % (

self.lineno,

self.type,

self.value

)

class TokenStreamIterator(object):

172

"""The iterator for tokenstreams. Iterate over the stream

173

until the eof token is reached.

174

"""

175

176

def __init__(self, stream):

177

self._stream = stream

def __iter__(self):

return self

def next(self):

token = self._stream.current

184

if token.type == 'eof':

185

self._stream.close()

186

raise StopIteration()

187

self._stream.next(False)

return token

class TokenStream(object):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

192

"""A token stream is an iterable that yields :class:`Token`\s. The

193

parser however does not iterate over it but calls :meth:`next` to go

194

one token ahead. The current active token is stored as :attr:`current`.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

195

"""

196

197

def __init__(self, generator, filename):

198

self._next = generator.next

199

self._pushed = deque()

200

self.current = Token(1, 'initial', '')

201

self.filename = filename

self.next()

def __iter__(self):

return TokenStreamIterator(self)

206

207

def __nonzero__(self):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

208

"""Are we at the end of the stream?"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

209

return bool(self._pushed) or self.current.type != 'eof'

210

211

eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)

212

213

def push(self, token):

214

"""Push a token back to the stream."""

215

self._pushed.append(token)

216

217

def look(self):

218

"""Look at the next token."""

219

old_token = self.next()

220

result = self.current

221

self.push(result)

222

self.current = old_token

223

return result

224

Armin Ronacher

ea847c5

2008-05-02 20:04:32 +0200

[diff] [blame]

225

def skip(self, n=1):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

226

"""Got n tokens ahead."""

for x in xrange(n):

self.next()

def next(self, skip_eol=True):

231

"""Go one token ahead and return the old one"""

rv = self.current

while 1:

if self._pushed:

self.current = self._pushed.popleft()

236

elif self.current.type is not 'eof':

237

try:

238

self.current = self._next()

239

except StopIteration:

240

self.close()

241

if not skip_eol or self.current.type is not 'eol':

break

return rv

def close(self):

"""Close the stream."""

247

self.current = Token(self.current.lineno, 'eof', '')

248

self._next = None

249

250

def expect(self, expr):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

251

"""Expect a given token type and return it. This accepts the same

252

argument as :meth:`jinja2.lexer.Token.test`.

253

"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

254

if not self.current.test(expr):

255

if ':' in expr:

256

expr = expr.split(':')[1]

257

if self.current.type is 'eof':

258

raise TemplateSyntaxError('unexpected end of template, '

259

'expected %r.' % expr,

260

self.current.lineno,

261

self.filename)

262

raise TemplateSyntaxError("expected token %r, got %r" %

263

(expr, str(self.current)),

self.current.lineno,

self.filename)

try:

return self.current

finally:

self.next()

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

272

class LexerMeta(type):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

273

"""Metaclass for the lexer that caches instances for

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

274

the same configuration in a weak value dictionary.

275

"""

276

277

def __call__(cls, environment):

Armin Ronacher

203bfcb

2008-04-24 21:54:44 +0200

[diff] [blame]

278

key = (environment.block_start_string,

279

environment.block_end_string,

280

environment.variable_start_string,

281

environment.variable_end_string,

282

environment.comment_start_string,

283

environment.comment_end_string,

284

environment.line_statement_prefix,

285

environment.trim_blocks)

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

286

lexer = _lexer_cache.get(key)

287

if lexer is None:

288

lexer = type.__call__(cls, environment)

289

_lexer_cache[key] = lexer

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

293

class Lexer(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

294

"""Class that implements a lexer for a given environment. Automatically

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

295

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

296

297

Note that the lexer is not automatically bound to an environment.

298

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

299

"""

300

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

301

__metaclass__ = LexerMeta

302

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

303

def __init__(self, environment):

304

# shortcuts

305

c = lambda x: re.compile(x, re.M | re.S)

306

e = re.escape

307

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

308

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

309

tag_rules = [

310

(whitespace_re, None, None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

311

(float_re, 'float', None),

312

(integer_re, 'integer', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

313

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

314

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

315

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

316

]

317

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

318

# assamble the root lexing rule. because "|" is ungreedy

319

# we have to sort by length so that the lexer continues working

320

# as expected when we have parsing rules like <% for block and

321

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

33d528a

2007-05-14 18:21:44 +0200

[diff] [blame]

322

# variables are just part of the rules if variable processing

323

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

324

root_tag_rules = [

325

('comment', environment.comment_start_string),

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

326

('block', environment.block_start_string),

327

('variable', environment.variable_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

328

]

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

329

root_tag_rules.sort(key=lambda x: -len(x[1]))

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

330

331

# now escape the rules. This is done here so that the escape

332

# signs don't count for the lengths of the tags.

333

root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]

334

335

# if we have a line statement prefix we need an extra rule for

336

# that. We add this rule *after* all the others.

337

if environment.line_statement_prefix is not None:

338

prefix = e(environment.line_statement_prefix)

339

root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

340

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

341

# block suffix if trimming is enabled

342

block_suffix_re = environment.trim_blocks and '\\n?' or ''

343

344

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

345

self.rules = {

346

'root': [

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

347

# directives

348

(c('(.*?)(?:%s)' % '|'.join(

349

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

350

e(environment.block_start_string),

351

e(environment.block_start_string),

352

e(environment.block_end_string)

353

)] + [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

354

'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

355

for n, r in root_tag_rules

356

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

357

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

358

(c('.+'), 'data', None)

359

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

360

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

361

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

362

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

363

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

364

e(environment.comment_end_string),

365

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

366

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

367

(c('(.)'), (Failure('Missing end of comment tag'),), None)

368

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

369

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

370

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

371

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

372

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

373

e(environment.block_end_string),

374

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

375

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

376

] + tag_rules,

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

# variables

'variable_begin': [

(c('\-%s\s*|%s' % (

e(environment.variable_end_string),

381

e(environment.variable_end_string)

382

)), 'variable_end', '#pop')

383

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

384

# raw block

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

385

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

386

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

387

e(environment.block_start_string),

388

e(environment.block_start_string),

389

e(environment.block_end_string),

390

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

391

block_suffix_re

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

392

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

393

(c('(.)'), (Failure('Missing end of raw directive'),), None)

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

394

],

395

# line statements

396

'linestatement_begin': [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

397

(c(r'\s*(\n|$)'), 'linestatement_end', '#pop')

398

] + tag_rules

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

399

}

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

400

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

401

def tokenize(self, source, filename=None):

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

402

"""Works like `tokeniter` but returns a tokenstream of tokens and not

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

403

a generator or token tuples. Additionally all token values are already

Armin Ronacher

115de2e

2008-05-01 22:20:05 +0200

[diff] [blame]

404

converted into types and postprocessed. For example comments are removed,

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

405

integers and floats converted, strings unescaped etc.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

406

"""

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

407

source = unicode(source)

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

408

def generate():

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

409

for lineno, token, value in self.tokeniter(source, filename):

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

410

if token in ('comment_begin', 'comment', 'comment_end'):

411

continue

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

412

elif token == 'linestatement_begin':

413

token = 'block_begin'

414

elif token == 'linestatement_end':

415

token = 'block_end'

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

416

# we are not interested in those tokens in the parser

417

elif token in ('raw_begin', 'raw_end'):

418

continue

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

419

elif token == 'data':

Armin Ronacher

2008-05-11 00:18:35 +0200

[diff] [blame^]

420

value = _trystr(value)

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

421

elif token == 'keyword':

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

422

token = value

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

423

elif token == 'name':

Armin Ronacher

2008-05-11 00:18:35 +0200

[diff] [blame^]

424

value = _trystr(value)

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

425

elif token == 'string':

426

value = unescape_string(lineno, filename, value[1:-1])

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

427

elif token == 'integer':

428

value = int(value)

429

elif token == 'float':

430

value = float(value)

431

elif token == 'operator':

432

token = operators[value]

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

433

yield Token(lineno, token, value)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

434

return TokenStream(generate(), filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

435

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

436

def tokeniter(self, source, filename=None):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

437

"""This method tokenizes the text and returns the tokens in a

438

generator. Use this method if you just want to tokenize a template.

439

The output you get is not compatible with the input the jinja parser

440

wants. The parser uses the `tokenize` function with returns a

441

`TokenStream` and postprocessed tokens.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

442

"""

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

443

source = '\n'.join(source.splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

444

pos = 0

445

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

446

stack = ['root']

447

statetokens = self.rules['root']

448

source_length = len(source)

449

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

450

balancing_stack = []

451

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

452

while 1:

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

453

# tokenizer loop

454

for regex, tokens, new_state in statetokens:

455

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

456

# if no match we try again with the next rule

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

457

if m is None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

458

continue

459

460

# we only match blocks and variables if brances / parentheses

461

# are balanced. continue parsing with the lower rule which

462

# is the operator rule. do this only if the end tags look

463

# like operators

464

if balancing_stack and \

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

465

tokens in ('variable_end', 'block_end',

466

'linestatement_end'):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

467

continue

468

469

# tuples support more options

470

if isinstance(tokens, tuple):

471

for idx, token in enumerate(tokens):

# hidden group

if token is None:

g = m.group(idx)

if g:

lineno += g.count('\n')

477

continue

478

# failure group

Armin Ronacher

ecc051b

2007-06-01 18:25:28 +0200

[diff] [blame]

479

elif token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

480

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

481

# bygroup is a bit more complex, in that case we

482

# yield for the current token the first named

483

# group that matched

484

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

485

for key, value in m.groupdict().iteritems():

486

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

487

yield lineno, key, value

488

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

489

break

490

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

491

raise RuntimeError('%r wanted to resolve '

492

'the token dynamically'

493

' but no group matched'

494

% regex)

495

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

496

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

497

data = m.group(idx + 1)

498

if data:

499

yield lineno, token, data

500

lineno += data.count('\n')

501

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

502

# strings as token just are yielded as it.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

503

else:

504

data = m.group()

505

# update brace/parentheses balance

506

if tokens == 'operator':

507

if data == '{':

508

balancing_stack.append('}')

509

elif data == '(':

510

balancing_stack.append(')')

511

elif data == '[':

512

balancing_stack.append(']')

513

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

514

if not balancing_stack:

515

raise TemplateSyntaxError('unexpected "%s"' %

516

data, lineno,

517

filename)

518

expected_op = balancing_stack.pop()

519

if expected_op != data:

520

raise TemplateSyntaxError('unexpected "%s", '

521

'expected "%s"' %

522

(data, expected_op),

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

523

lineno, filename)

524

# yield items

525

if tokens is not None:

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

526

yield lineno, tokens, data

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

527

lineno += data.count('\n')

528

529

# fetch new position into new variable so that we can check

530

# if there is a internal parsing error which would result

531

# in an infinite loop

532

pos2 = m.end()

533

534

# handle state changes

535

if new_state is not None:

536

# remove the uppermost state

537

if new_state == '#pop':

538

stack.pop()

539

# resolve the new state by group checking

540

elif new_state == '#bygroup':

541

for key, value in m.groupdict().iteritems():

542

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

547

'new state dynamically but'

548

' no group matched' %

549

regex)

550

# direct state name given

551

else:

552

stack.append(new_state)

553

statetokens = self.rules[stack[-1]]

554

# we are still at the same position and no stack change.

555

# this means a loop without break condition, avoid that and

556

# raise error

557

elif pos2 == pos:

558

raise RuntimeError('%r yielded empty string without '

559

'stack change' % regex)

560

# publish new function and start again

561

pos = pos2

562

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

563

# if loop terminated without break we havn't found a single match

564

# either we are at the end of the file or we have a problem

565

else:

566

# end of text

567

if pos >= source_length:

568

return

569

# something went wrong

570

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher