Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

19

from operator import itemgetter

20

from collections import deque

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

21

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

22

from jinja2.utils import LRUCache

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

23

24

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

25

# cache for the lexers. Exists in order to be able to have multiple

26

# environments with the same lexer

Armin Ronacher

187bde1

2008-05-01 18:19:16 +0200

[diff] [blame]

27

_lexer_cache = LRUCache(50)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

28

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

29

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

30

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

31

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

32

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

33

integer_re = re.compile(r'\d+')

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

34

name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

35

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

36

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

37

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

63

',': 'comma',

64

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

65

}

66

67

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

68

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

69

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

70

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

71

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

72

73

class Failure(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

74

"""Class that raises a `TemplateSyntaxError` if called.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

75

Used by the `Lexer` to specify known errors.

76

"""

77

78

def __init__(self, message, cls=TemplateSyntaxError):

79

self.message = message

80

self.error_class = cls

81

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

82

def __call__(self, lineno, filename):

83

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

84

85

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

class Token(tuple):

"""Token class."""

__slots__ = ()

lineno, type, value = (property(itemgetter(x)) for x in range(3))

90

91

def __new__(cls, lineno, type, value):

92

return tuple.__new__(cls, (lineno, intern(str(type)), value))

93

94

def __str__(self):

Armin Ronacher

8a1d27f

2008-05-19 08:37:19 +0200

[diff] [blame]

95

if self.type in reverse_operators:

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

96

return reverse_operators[self.type]

97

elif self.type is 'name':

return self.value

return self.type

def test(self, expr):

102

"""Test a token against a token expression. This can either be a

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

103

token type or ``'token_type:token_value'``. This can only test

104

against string values and types.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

105

"""

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

106

# here we do a regular string equality check as test_any is usually

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

107

# passed an iterable of not interned strings.

108

if self.type == expr:

109

return True

110

elif ':' in expr:

111

return expr.split(':', 1) == [self.type, self.value]

112

return False

113

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

114

def test_any(self, *iterable):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

115

"""Test against multiple token expressions."""

116

for expr in iterable:

if self.test(expr):

return True

return False

def __repr__(self):

return 'Token(%r, %r, %r)' % (

self.lineno,

self.type,

self.value

)

class TokenStreamIterator(object):

130

"""The iterator for tokenstreams. Iterate over the stream

131

until the eof token is reached.

132

"""

133

134

def __init__(self, stream):

135

self._stream = stream

def __iter__(self):

return self

def next(self):

token = self._stream.current

142

if token.type == 'eof':

143

self._stream.close()

144

raise StopIteration()

145

self._stream.next(False)

return token

class TokenStream(object):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

150

"""A token stream is an iterable that yields :class:`Token`\s. The

151

parser however does not iterate over it but calls :meth:`next` to go

152

one token ahead. The current active token is stored as :attr:`current`.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

153

"""

154

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

155

def __init__(self, generator, name, filename):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

156

self._next = generator.next

157

self._pushed = deque()

158

self.current = Token(1, 'initial', '')

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

159

self.name = name

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

160

self.filename = filename

self.next()

def __iter__(self):

return TokenStreamIterator(self)

165

166

def __nonzero__(self):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

167

"""Are we at the end of the stream?"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

168

return bool(self._pushed) or self.current.type != 'eof'

169

170

eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)

171

172

def push(self, token):

173

"""Push a token back to the stream."""

174

self._pushed.append(token)

175

176

def look(self):

177

"""Look at the next token."""

178

old_token = self.next()

179

result = self.current

180

self.push(result)

181

self.current = old_token

182

return result

183

Armin Ronacher

ea847c5

2008-05-02 20:04:32 +0200

[diff] [blame]

184

def skip(self, n=1):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

185

"""Got n tokens ahead."""

for x in xrange(n):

self.next()

Armin Ronacher

2008-05-11 22:20:51 +0200

[diff] [blame]

189

def next_if(self, expr):

190

"""Perform the token test and return the token if it matched.

191

Otherwise the return value is `None`.

192

"""

193

if self.current.test(expr):

194

return self.next()

195

196

def skip_if(self, expr):

197

"""Like `next_if` but only returns `True` or `False`."""

198

return self.next_if(expr) is not None

199

200

def next(self):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

201

"""Go one token ahead and return the old one"""

202

rv = self.current

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame]

203

if self._pushed:

204

self.current = self._pushed.popleft()

205

elif self.current.type is not 'eof':

206

try:

207

self.current = self._next()

208

except StopIteration:

209

self.close()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

return rv

def close(self):

"""Close the stream."""

214

self.current = Token(self.current.lineno, 'eof', '')

215

self._next = None

216

217

def expect(self, expr):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

218

"""Expect a given token type and return it. This accepts the same

219

argument as :meth:`jinja2.lexer.Token.test`.

220

"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

221

if not self.current.test(expr):

222

if ':' in expr:

223

expr = expr.split(':')[1]

224

if self.current.type is 'eof':

225

raise TemplateSyntaxError('unexpected end of template, '

226

'expected %r.' % expr,

227

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

228

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

229

raise TemplateSyntaxError("expected token %r, got %r" %

230

(expr, str(self.current)),

231

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

232

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

try:

return self.current

finally:

self.next()

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

239

class LexerMeta(type):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

240

"""Metaclass for the lexer that caches instances for

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

241

the same configuration in a weak value dictionary.

242

"""

243

244

def __call__(cls, environment):

Armin Ronacher

203bfcb

2008-04-24 21:54:44 +0200

[diff] [blame]

245

key = (environment.block_start_string,

246

environment.block_end_string,

247

environment.variable_start_string,

248

environment.variable_end_string,

249

environment.comment_start_string,

250

environment.comment_end_string,

251

environment.line_statement_prefix,

252

environment.trim_blocks)

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

253

lexer = _lexer_cache.get(key)

254

if lexer is None:

255

lexer = type.__call__(cls, environment)

256

_lexer_cache[key] = lexer

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

260

class Lexer(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

261

"""Class that implements a lexer for a given environment. Automatically

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

262

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

263

264

Note that the lexer is not automatically bound to an environment.

265

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

266

"""

267

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

268

__metaclass__ = LexerMeta

269

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

270

def __init__(self, environment):

271

# shortcuts

272

c = lambda x: re.compile(x, re.M | re.S)

273

e = re.escape

274

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

275

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

276

tag_rules = [

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

277

(whitespace_re, 'whitespace', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

278

(float_re, 'float', None),

279

(integer_re, 'integer', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

280

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

281

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

282

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

283

]

284

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

285

# assamble the root lexing rule. because "|" is ungreedy

286

# we have to sort by length so that the lexer continues working

287

# as expected when we have parsing rules like <% for block and

288

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

33d528a

2007-05-14 18:21:44 +0200

[diff] [blame]

289

# variables are just part of the rules if variable processing

290

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

291

root_tag_rules = [

292

('comment', environment.comment_start_string),

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

293

('block', environment.block_start_string),

294

('variable', environment.variable_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

295

]

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

296

root_tag_rules.sort(key=lambda x: -len(x[1]))

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

297

298

# now escape the rules. This is done here so that the escape

299

# signs don't count for the lengths of the tags.

300

root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]

301

302

# if we have a line statement prefix we need an extra rule for

303

# that. We add this rule *after* all the others.

304

if environment.line_statement_prefix is not None:

305

prefix = e(environment.line_statement_prefix)

306

root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

307

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

308

# block suffix if trimming is enabled

309

block_suffix_re = environment.trim_blocks and '\\n?' or ''

310

311

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

312

self.rules = {

313

'root': [

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

314

# directives

315

(c('(.*?)(?:%s)' % '|'.join(

316

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

317

e(environment.block_start_string),

318

e(environment.block_start_string),

319

e(environment.block_end_string)

320

)] + [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

321

'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

322

for n, r in root_tag_rules

323

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

324

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

325

(c('.+'), 'data', None)

326

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

327

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

328

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

329

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

330

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

331

e(environment.comment_end_string),

332

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

333

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

334

(c('(.)'), (Failure('Missing end of comment tag'),), None)

335

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

336

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

337

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

338

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

339

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

340

e(environment.block_end_string),

341

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

342

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

343

] + tag_rules,

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

# variables

'variable_begin': [

(c('\-%s\s*|%s' % (

e(environment.variable_end_string),

348

e(environment.variable_end_string)

349

)), 'variable_end', '#pop')

350

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

351

# raw block

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

352

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

353

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

354

e(environment.block_start_string),

355

e(environment.block_start_string),

356

e(environment.block_end_string),

357

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

358

block_suffix_re

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

359

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

360

(c('(.)'), (Failure('Missing end of raw directive'),), None)

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

361

],

362

# line statements

363

'linestatement_begin': [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

364

(c(r'\s*(\n|$)'), 'linestatement_end', '#pop')

365

] + tag_rules

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

366

}

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

367

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

368

def tokenize(self, source, name=None, filename=None):

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

369

"""Works like `tokeniter` but returns a tokenstream of tokens and not

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

370

a generator or token tuples. Additionally all token values are already

Armin Ronacher

115de2e

2008-05-01 22:20:05 +0200

[diff] [blame]

371

converted into types and postprocessed. For example comments are removed,

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

372

integers and floats converted, strings unescaped etc.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

373

"""

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

374

source = unicode(source)

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

375

def generate():

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

376

for lineno, token, value in self.tokeniter(source, name, filename):

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

377

if token in ('comment_begin', 'comment', 'comment_end',

378

'whitespace'):

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

379

continue

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

380

elif token == 'linestatement_begin':

381

token = 'block_begin'

382

elif token == 'linestatement_end':

383

token = 'block_end'

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

384

# we are not interested in those tokens in the parser

385

elif token in ('raw_begin', 'raw_end'):

386

continue

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

387

elif token == 'data':

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

392

elif token == 'keyword':

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

393

token = value

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

394

elif token == 'name':

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

395

value = str(value)

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

396

elif token == 'string':

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

397

# try to unescape string

398

try:

399

value = value[1:-1] \

400

.encode('ascii', 'backslashreplace') \

401

.decode('unicode-escape')

402

except Exception, e:

403

msg = str(e).split(':')[-1].strip()

404

raise TemplateSyntaxError(msg, lineno, name, filename)

405

# if we can express it as bytestring (ascii only)

406

# we do that for support of semi broken APIs

407

# as datetime.datetime.strftime

Armin Ronacher

2008-05-11 00:30:43 +0200

[diff] [blame]

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

412

elif token == 'integer':

413

value = int(value)

414

elif token == 'float':

415

value = float(value)

416

elif token == 'operator':

417

token = operators[value]

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

418

yield Token(lineno, token, value)

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

419

return TokenStream(generate(), name, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

420

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

421

def tokeniter(self, source, name, filename=None):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

422

"""This method tokenizes the text and returns the tokens in a

423

generator. Use this method if you just want to tokenize a template.

424

The output you get is not compatible with the input the jinja parser

425

wants. The parser uses the `tokenize` function with returns a

426

`TokenStream` and postprocessed tokens.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

427

"""

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

428

source = '\n'.join(source.splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

429

pos = 0

430

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

431

stack = ['root']

432

statetokens = self.rules['root']

433

source_length = len(source)

434

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

435

balancing_stack = []

436

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

437

while 1:

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

438

# tokenizer loop

439

for regex, tokens, new_state in statetokens:

440

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

441

# if no match we try again with the next rule

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

442

if m is None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

443

continue

444

445

# we only match blocks and variables if brances / parentheses

446

# are balanced. continue parsing with the lower rule which

447

# is the operator rule. do this only if the end tags look

448

# like operators

449

if balancing_stack and \

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

450

tokens in ('variable_end', 'block_end',

451

'linestatement_end'):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

452

continue

453

454

# tuples support more options

455

if isinstance(tokens, tuple):

456

for idx, token in enumerate(tokens):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

457

# failure group

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

458

if token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

459

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

460

# bygroup is a bit more complex, in that case we

461

# yield for the current token the first named

462

# group that matched

463

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

464

for key, value in m.groupdict().iteritems():

465

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

466

yield lineno, key, value

467

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

468

break

469

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

470

raise RuntimeError('%r wanted to resolve '

471

'the token dynamically'

472

' but no group matched'

473

% regex)

474

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

475

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

476

data = m.group(idx + 1)

477

if data:

478

yield lineno, token, data

479

lineno += data.count('\n')

480

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

481

# strings as token just are yielded as it.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

482

else:

483

data = m.group()

484

# update brace/parentheses balance

485

if tokens == 'operator':

486

if data == '{':

487

balancing_stack.append('}')

488

elif data == '(':

489

balancing_stack.append(')')

490

elif data == '[':

491

balancing_stack.append(']')

492

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

493

if not balancing_stack:

494

raise TemplateSyntaxError('unexpected "%s"' %

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

495

data, lineno, name,

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

496

filename)

497

expected_op = balancing_stack.pop()

498

if expected_op != data:

499

raise TemplateSyntaxError('unexpected "%s", '

500

'expected "%s"' %

501

(data, expected_op),

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

502

lineno, name,

503

filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

504

# yield items

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

505

yield lineno, tokens, data

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

506

lineno += data.count('\n')

507

508

# fetch new position into new variable so that we can check

509

# if there is a internal parsing error which would result

510

# in an infinite loop

511

pos2 = m.end()

512

513

# handle state changes

514

if new_state is not None:

515

# remove the uppermost state

516

if new_state == '#pop':

517

stack.pop()

518

# resolve the new state by group checking

519

elif new_state == '#bygroup':

520

for key, value in m.groupdict().iteritems():

521

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

526

'new state dynamically but'

527

' no group matched' %

528

regex)

529

# direct state name given

530

else:

531

stack.append(new_state)

532

statetokens = self.rules[stack[-1]]

533

# we are still at the same position and no stack change.

534

# this means a loop without break condition, avoid that and

535

# raise error

536

elif pos2 == pos:

537

raise RuntimeError('%r yielded empty string without '

538

'stack change' % regex)

539

# publish new function and start again

540

pos = pos2

541

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

542

# if loop terminated without break we havn't found a single match

543

# either we are at the end of the file or we have a problem

544

else:

545

# end of text

546

if pos >= source_length:

547

return

548

# something went wrong

549

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

550

(source[pos], pos), lineno,

Armin Ronacher