Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

19

from operator import itemgetter

20

from collections import deque

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

21

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

22

from jinja2.utils import LRUCache

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

23

24

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

25

# cache for the lexers. Exists in order to be able to have multiple

26

# environments with the same lexer

Armin Ronacher

187bde1

2008-05-01 18:19:16 +0200

[diff] [blame]

27

_lexer_cache = LRUCache(50)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

28

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

29

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

30

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

31

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

32

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

33

integer_re = re.compile(r'\d+')

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

34

name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

35

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

36

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

37

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

63

',': 'comma',

64

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

65

}

66

67

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

68

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

69

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

70

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

71

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

simple_escapes = {

'a': '\a',

'n': '\n',

'r': '\r',

'f': '\f',

't': '\t',

'v': '\v',

'\\': '\\',

'"': '"',

"'": "'",

'0': '\x00'

}

unicode_escapes = {

'x': 2,

'u': 4,

'U': 8

}

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

90

91

def unescape_string(lineno, filename, s):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

92

r"""Unescape a string. Supported escapes:

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

93

\a, \n, \r\, \f, \v, \\, \", \', \0

94

95

\x00, \u0000, \U00000000, \N{...}

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

96

"""

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

97

try:

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

98

return s.encode('ascii', 'backslashreplace').decode('unicode-escape')

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

99

except UnicodeError, e:

100

msg = str(e).split(':')[-1].strip()

101

raise TemplateSyntaxError(msg, lineno, filename)

Armin Ronacher

2894f22

2007-03-19 22:39:55 +0100

[diff] [blame]

102

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

103

104

class Failure(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

105

"""Class that raises a `TemplateSyntaxError` if called.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

106

Used by the `Lexer` to specify known errors.

107

"""

108

109

def __init__(self, message, cls=TemplateSyntaxError):

110

self.message = message

111

self.error_class = cls

112

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

113

def __call__(self, lineno, filename):

114

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

115

116

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

class Token(tuple):

"""Token class."""

__slots__ = ()

lineno, type, value = (property(itemgetter(x)) for x in range(3))

121

122

def __new__(cls, lineno, type, value):

123

return tuple.__new__(cls, (lineno, intern(str(type)), value))

124

125

def __str__(self):

126

from jinja.lexer import keywords, reverse_operators

127

if self.type in keywords:

128

return self.type

129

elif self.type in reverse_operators:

130

return reverse_operators[self.type]

131

elif self.type is 'name':

return self.value

return self.type

def test(self, expr):

136

"""Test a token against a token expression. This can either be a

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

137

token type or ``'token_type:token_value'``. This can only test

138

against string values and types.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

139

"""

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

140

# here we do a regular string equality check as test_any is usually

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

141

# passed an iterable of not interned strings.

142

if self.type == expr:

143

return True

144

elif ':' in expr:

145

return expr.split(':', 1) == [self.type, self.value]

146

return False

147

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

148

def test_any(self, *iterable):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

149

"""Test against multiple token expressions."""

150

for expr in iterable:

if self.test(expr):

return True

return False

def __repr__(self):

return 'Token(%r, %r, %r)' % (

self.lineno,

self.type,

self.value

)

class TokenStreamIterator(object):

164

"""The iterator for tokenstreams. Iterate over the stream

165

until the eof token is reached.

166

"""

167

168

def __init__(self, stream):

169

self._stream = stream

def __iter__(self):

return self

def next(self):

token = self._stream.current

176

if token.type == 'eof':

177

self._stream.close()

178

raise StopIteration()

179

self._stream.next(False)

return token

class TokenStream(object):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

184

"""A token stream is an iterable that yields :class:`Token`\s. The

185

parser however does not iterate over it but calls :meth:`next` to go

186

one token ahead. The current active token is stored as :attr:`current`.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

187

"""

188

189

def __init__(self, generator, filename):

190

self._next = generator.next

191

self._pushed = deque()

192

self.current = Token(1, 'initial', '')

193

self.filename = filename

self.next()

def __iter__(self):

return TokenStreamIterator(self)

198

199

def __nonzero__(self):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

200

"""Are we at the end of the stream?"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

201

return bool(self._pushed) or self.current.type != 'eof'

202

203

eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)

204

205

def push(self, token):

206

"""Push a token back to the stream."""

207

self._pushed.append(token)

208

209

def look(self):

210

"""Look at the next token."""

211

old_token = self.next()

212

result = self.current

213

self.push(result)

214

self.current = old_token

215

return result

216

Armin Ronacher

ea847c5

2008-05-02 20:04:32 +0200

[diff] [blame]

217

def skip(self, n=1):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

218

"""Got n tokens ahead."""

for x in xrange(n):

self.next()

Armin Ronacher

2008-05-11 22:20:51 +0200

[diff] [blame^]

222

def next_if(self, expr):

223

"""Perform the token test and return the token if it matched.

224

Otherwise the return value is `None`.

225

"""

226

if self.current.test(expr):

227

return self.next()

228

229

def skip_if(self, expr):

230

"""Like `next_if` but only returns `True` or `False`."""

231

return self.next_if(expr) is not None

232

233

def next(self):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

234

"""Go one token ahead and return the old one"""

235

rv = self.current

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame^]

236

if self._pushed:

237

self.current = self._pushed.popleft()

238

elif self.current.type is not 'eof':

239

try:

240

self.current = self._next()

241

except StopIteration:

242

self.close()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

return rv

def close(self):

"""Close the stream."""

247

self.current = Token(self.current.lineno, 'eof', '')

248

self._next = None

249

250

def expect(self, expr):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

251

"""Expect a given token type and return it. This accepts the same

252

argument as :meth:`jinja2.lexer.Token.test`.

253

"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

254

if not self.current.test(expr):

255

if ':' in expr:

256

expr = expr.split(':')[1]

257

if self.current.type is 'eof':

258

raise TemplateSyntaxError('unexpected end of template, '

259

'expected %r.' % expr,

260

self.current.lineno,

261

self.filename)

262

raise TemplateSyntaxError("expected token %r, got %r" %

263

(expr, str(self.current)),

self.current.lineno,

self.filename)

try:

return self.current

finally:

self.next()

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

272

class LexerMeta(type):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

273

"""Metaclass for the lexer that caches instances for

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

274

the same configuration in a weak value dictionary.

275

"""

276

277

def __call__(cls, environment):

Armin Ronacher

203bfcb

2008-04-24 21:54:44 +0200

[diff] [blame]

278

key = (environment.block_start_string,

279

environment.block_end_string,

280

environment.variable_start_string,

281

environment.variable_end_string,

282

environment.comment_start_string,

283

environment.comment_end_string,

284

environment.line_statement_prefix,

285

environment.trim_blocks)

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

286

lexer = _lexer_cache.get(key)

287

if lexer is None:

288

lexer = type.__call__(cls, environment)

289

_lexer_cache[key] = lexer

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

293

class Lexer(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

294

"""Class that implements a lexer for a given environment. Automatically

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

295

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

296

297

Note that the lexer is not automatically bound to an environment.

298

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

299

"""

300

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

301

__metaclass__ = LexerMeta

302

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

303

def __init__(self, environment):

304

# shortcuts

305

c = lambda x: re.compile(x, re.M | re.S)

306

e = re.escape

307

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

308

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

309

tag_rules = [

310

(whitespace_re, None, None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

311

(float_re, 'float', None),

312

(integer_re, 'integer', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

313

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

314

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

315

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

316

]

317

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

318

# assamble the root lexing rule. because "|" is ungreedy

319

# we have to sort by length so that the lexer continues working

320

# as expected when we have parsing rules like <% for block and

321

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

33d528a

2007-05-14 18:21:44 +0200

[diff] [blame]

322

# variables are just part of the rules if variable processing

323

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

324

root_tag_rules = [

325

('comment', environment.comment_start_string),

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

326

('block', environment.block_start_string),

327

('variable', environment.variable_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

328

]

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

329

root_tag_rules.sort(key=lambda x: -len(x[1]))

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

330

331

# now escape the rules. This is done here so that the escape

332

# signs don't count for the lengths of the tags.

333

root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]

334

335

# if we have a line statement prefix we need an extra rule for

336

# that. We add this rule *after* all the others.

337

if environment.line_statement_prefix is not None:

338

prefix = e(environment.line_statement_prefix)

339

root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

340

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

341

# block suffix if trimming is enabled

342

block_suffix_re = environment.trim_blocks and '\\n?' or ''

343

344

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

345

self.rules = {

346

'root': [

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

347

# directives

348

(c('(.*?)(?:%s)' % '|'.join(

349

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

350

e(environment.block_start_string),

351

e(environment.block_start_string),

352

e(environment.block_end_string)

353

)] + [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

354

'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

355

for n, r in root_tag_rules

356

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

357

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

358

(c('.+'), 'data', None)

359

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

360

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

361

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

362

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

363

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

364

e(environment.comment_end_string),

365

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

366

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

367

(c('(.)'), (Failure('Missing end of comment tag'),), None)

368

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

369

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

370

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

371

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

372

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

373

e(environment.block_end_string),

374

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

375

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

376

] + tag_rules,

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

# variables

'variable_begin': [

(c('\-%s\s*|%s' % (

e(environment.variable_end_string),

381

e(environment.variable_end_string)

382

)), 'variable_end', '#pop')

383

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

384

# raw block

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

385

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

386

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

387

e(environment.block_start_string),

388

e(environment.block_start_string),

389

e(environment.block_end_string),

390

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

391

block_suffix_re

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

392

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

393

(c('(.)'), (Failure('Missing end of raw directive'),), None)

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

394

],

395

# line statements

396

'linestatement_begin': [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

397

(c(r'\s*(\n|$)'), 'linestatement_end', '#pop')

398

] + tag_rules

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

399

}

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

400

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

401

def tokenize(self, source, filename=None):

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

402

"""Works like `tokeniter` but returns a tokenstream of tokens and not

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

403

a generator or token tuples. Additionally all token values are already

Armin Ronacher

115de2e

2008-05-01 22:20:05 +0200

[diff] [blame]

404

converted into types and postprocessed. For example comments are removed,

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

405

integers and floats converted, strings unescaped etc.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

406

"""

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

407

source = unicode(source)

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

408

def generate():

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

409

for lineno, token, value in self.tokeniter(source, filename):

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

410

if token in ('comment_begin', 'comment', 'comment_end'):

411

continue

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

412

elif token == 'linestatement_begin':

413

token = 'block_begin'

414

elif token == 'linestatement_end':

415

token = 'block_end'

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

416

# we are not interested in those tokens in the parser

417

elif token in ('raw_begin', 'raw_end'):

418

continue

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

419

elif token == 'data':

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

424

elif token == 'keyword':

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

425

token = value

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

426

elif token == 'name':

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

427

value = str(value)

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

428

elif token == 'string':

429

value = unescape_string(lineno, filename, value[1:-1])

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

434

elif token == 'integer':

435

value = int(value)

436

elif token == 'float':

437

value = float(value)

438

elif token == 'operator':

439

token = operators[value]

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

440

yield Token(lineno, token, value)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

441

return TokenStream(generate(), filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

442

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

443

def tokeniter(self, source, filename=None):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

444

"""This method tokenizes the text and returns the tokens in a

445

generator. Use this method if you just want to tokenize a template.

446

The output you get is not compatible with the input the jinja parser

447

wants. The parser uses the `tokenize` function with returns a

448

`TokenStream` and postprocessed tokens.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

449

"""

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

450

source = '\n'.join(source.splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

451

pos = 0

452

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

453

stack = ['root']

454

statetokens = self.rules['root']

455

source_length = len(source)

456

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

457

balancing_stack = []

458

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

459

while 1:

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

460

# tokenizer loop

461

for regex, tokens, new_state in statetokens:

462

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

463

# if no match we try again with the next rule

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

464

if m is None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

465

continue

466

467

# we only match blocks and variables if brances / parentheses

468

# are balanced. continue parsing with the lower rule which

469

# is the operator rule. do this only if the end tags look

470

# like operators

471

if balancing_stack and \

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

472

tokens in ('variable_end', 'block_end',

473

'linestatement_end'):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

474

continue

475

476

# tuples support more options

477

if isinstance(tokens, tuple):

478

for idx, token in enumerate(tokens):

# hidden group

if token is None:

g = m.group(idx)

if g:

lineno += g.count('\n')

484

continue

485

# failure group

Armin Ronacher

ecc051b

2007-06-01 18:25:28 +0200

[diff] [blame]

486

elif token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

487

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

488

# bygroup is a bit more complex, in that case we

489

# yield for the current token the first named

490

# group that matched

491

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

492

for key, value in m.groupdict().iteritems():

493

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

494

yield lineno, key, value

495

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

496

break

497

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

498

raise RuntimeError('%r wanted to resolve '

499

'the token dynamically'

500

' but no group matched'

501

% regex)

502

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

503

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

504

data = m.group(idx + 1)

505

if data:

506

yield lineno, token, data

507

lineno += data.count('\n')

508

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

509

# strings as token just are yielded as it.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

510

else:

511

data = m.group()

512

# update brace/parentheses balance

513

if tokens == 'operator':

514

if data == '{':

515

balancing_stack.append('}')

516

elif data == '(':

517

balancing_stack.append(')')

518

elif data == '[':

519

balancing_stack.append(']')

520

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

521

if not balancing_stack:

522

raise TemplateSyntaxError('unexpected "%s"' %

523

data, lineno,

524

filename)

525

expected_op = balancing_stack.pop()

526

if expected_op != data:

527

raise TemplateSyntaxError('unexpected "%s", '

528

'expected "%s"' %

529

(data, expected_op),

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

530

lineno, filename)

531

# yield items

532

if tokens is not None:

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

533

yield lineno, tokens, data

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

534

lineno += data.count('\n')

535

536

# fetch new position into new variable so that we can check

537

# if there is a internal parsing error which would result

538

# in an infinite loop

539

pos2 = m.end()

540

541

# handle state changes

542

if new_state is not None:

543

# remove the uppermost state

544

if new_state == '#pop':

545

stack.pop()

546

# resolve the new state by group checking

547

elif new_state == '#bygroup':

548

for key, value in m.groupdict().iteritems():

549

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

554

'new state dynamically but'

555

' no group matched' %

556

regex)

557

# direct state name given

558

else:

559

stack.append(new_state)

560

statetokens = self.rules[stack[-1]]

561

# we are still at the same position and no stack change.

562

# this means a loop without break condition, avoid that and

563

# raise error

564

elif pos2 == pos:

565

raise RuntimeError('%r yielded empty string without '

566

'stack change' % regex)

567

# publish new function and start again

568

pos = pos2

569

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

570

# if loop terminated without break we havn't found a single match

571

# either we are at the end of the file or we have a problem

572

else:

573

# end of text

574

if pos >= source_length:

575

return

576

# something went wrong

577

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher