Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

19

from operator import itemgetter

20

from collections import deque

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

21

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

22

from jinja2.utils import LRUCache

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

23

24

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

25

# cache for the lexers. Exists in order to be able to have multiple

26

# environments with the same lexer

Armin Ronacher

187bde1

2008-05-01 18:19:16 +0200

[diff] [blame]

27

_lexer_cache = LRUCache(50)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

28

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

29

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

30

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

31

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

32

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

33

integer_re = re.compile(r'\d+')

Armin Ronacher

d1ff858

2008-05-11 00:30:43 +0200

[diff] [blame]

34

name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

35

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

36

newline_re = re.compile(r'(\r\n|\r|\n)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

37

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

38

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

64

',': 'comma',

65

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

66

}

67

68

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

69

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

70

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

71

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

72

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

73

74

class Failure(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

75

"""Class that raises a `TemplateSyntaxError` if called.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

76

Used by the `Lexer` to specify known errors.

77

"""

78

79

def __init__(self, message, cls=TemplateSyntaxError):

80

self.message = message

81

self.error_class = cls

82

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

83

def __call__(self, lineno, filename):

84

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

85

86

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

class Token(tuple):

"""Token class."""

__slots__ = ()

lineno, type, value = (property(itemgetter(x)) for x in range(3))

91

92

def __new__(cls, lineno, type, value):

93

return tuple.__new__(cls, (lineno, intern(str(type)), value))

94

95

def __str__(self):

Armin Ronacher

8a1d27f

2008-05-19 08:37:19 +0200

[diff] [blame]

96

if self.type in reverse_operators:

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

97

return reverse_operators[self.type]

98

elif self.type is 'name':

return self.value

return self.type

def test(self, expr):

103

"""Test a token against a token expression. This can either be a

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

104

token type or ``'token_type:token_value'``. This can only test

105

against string values and types.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

106

"""

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

107

# here we do a regular string equality check as test_any is usually

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

108

# passed an iterable of not interned strings.

109

if self.type == expr:

110

return True

111

elif ':' in expr:

112

return expr.split(':', 1) == [self.type, self.value]

113

return False

114

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

115

def test_any(self, *iterable):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

116

"""Test against multiple token expressions."""

117

for expr in iterable:

if self.test(expr):

return True

return False

def __repr__(self):

return 'Token(%r, %r, %r)' % (

self.lineno,

self.type,

self.value

)

class TokenStreamIterator(object):

131

"""The iterator for tokenstreams. Iterate over the stream

132

until the eof token is reached.

133

"""

134

135

def __init__(self, stream):

136

self._stream = stream

def __iter__(self):

return self

def next(self):

token = self._stream.current

143

if token.type == 'eof':

144

self._stream.close()

145

raise StopIteration()

146

self._stream.next(False)

return token

class TokenStream(object):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

151

"""A token stream is an iterable that yields :class:`Token`\s. The

152

parser however does not iterate over it but calls :meth:`next` to go

153

one token ahead. The current active token is stored as :attr:`current`.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

154

"""

155

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

156

def __init__(self, generator, name, filename):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

157

self._next = generator.next

158

self._pushed = deque()

159

self.current = Token(1, 'initial', '')

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

160

self.name = name

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

161

self.filename = filename

self.next()

def __iter__(self):

return TokenStreamIterator(self)

166

167

def __nonzero__(self):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

168

"""Are we at the end of the stream?"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

169

return bool(self._pushed) or self.current.type != 'eof'

170

171

eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)

172

173

def push(self, token):

174

"""Push a token back to the stream."""

175

self._pushed.append(token)

176

177

def look(self):

178

"""Look at the next token."""

179

old_token = self.next()

180

result = self.current

181

self.push(result)

182

self.current = old_token

183

return result

184

Armin Ronacher

ea847c5

2008-05-02 20:04:32 +0200

[diff] [blame]

185

def skip(self, n=1):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

186

"""Got n tokens ahead."""

for x in xrange(n):

self.next()

Armin Ronacher

2008-05-11 22:20:51 +0200

[diff] [blame]

190

def next_if(self, expr):

191

"""Perform the token test and return the token if it matched.

192

Otherwise the return value is `None`.

193

"""

194

if self.current.test(expr):

195

return self.next()

196

197

def skip_if(self, expr):

Armin Ronacher

9cf9591

2008-05-24 19:54:43 +0200

[diff] [blame^]

198

"""Like :meth:`next_if` but only returns `True` or `False`."""

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame]

199

return self.next_if(expr) is not None

200

201

def next(self):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

202

"""Go one token ahead and return the old one"""

203

rv = self.current

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame]

204

if self._pushed:

205

self.current = self._pushed.popleft()

206

elif self.current.type is not 'eof':

207

try:

208

self.current = self._next()

209

except StopIteration:

210

self.close()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

return rv

def close(self):

"""Close the stream."""

215

self.current = Token(self.current.lineno, 'eof', '')

216

self._next = None

217

218

def expect(self, expr):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

219

"""Expect a given token type and return it. This accepts the same

220

argument as :meth:`jinja2.lexer.Token.test`.

221

"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

222

if not self.current.test(expr):

223

if ':' in expr:

224

expr = expr.split(':')[1]

225

if self.current.type is 'eof':

226

raise TemplateSyntaxError('unexpected end of template, '

227

'expected %r.' % expr,

228

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

229

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

230

raise TemplateSyntaxError("expected token %r, got %r" %

231

(expr, str(self.current)),

232

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

233

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

try:

return self.current

finally:

self.next()

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

240

class LexerMeta(type):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

241

"""Metaclass for the lexer that caches instances for

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

242

the same configuration in a weak value dictionary.

243

"""

244

245

def __call__(cls, environment):

Armin Ronacher

203bfcb

2008-04-24 21:54:44 +0200

[diff] [blame]

246

key = (environment.block_start_string,

247

environment.block_end_string,

248

environment.variable_start_string,

249

environment.variable_end_string,

250

environment.comment_start_string,

251

environment.comment_end_string,

252

environment.line_statement_prefix,

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

253

environment.trim_blocks,

254

environment.newline_sequence)

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

255

lexer = _lexer_cache.get(key)

256

if lexer is None:

257

lexer = type.__call__(cls, environment)

258

_lexer_cache[key] = lexer

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

262

class Lexer(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

263

"""Class that implements a lexer for a given environment. Automatically

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

264

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

265

266

Note that the lexer is not automatically bound to an environment.

267

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

268

"""

269

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

270

__metaclass__ = LexerMeta

271

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

272

def __init__(self, environment):

273

# shortcuts

274

c = lambda x: re.compile(x, re.M | re.S)

275

e = re.escape

276

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

277

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

278

tag_rules = [

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

279

(whitespace_re, 'whitespace', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

280

(float_re, 'float', None),

281

(integer_re, 'integer', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

282

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

283

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

284

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

285

]

286

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

287

# assamble the root lexing rule. because "|" is ungreedy

288

# we have to sort by length so that the lexer continues working

289

# as expected when we have parsing rules like <% for block and

290

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

33d528a

2007-05-14 18:21:44 +0200

[diff] [blame]

291

# variables are just part of the rules if variable processing

292

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

293

root_tag_rules = [

294

('comment', environment.comment_start_string),

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

295

('block', environment.block_start_string),

296

('variable', environment.variable_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

297

]

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

298

root_tag_rules.sort(key=lambda x: -len(x[1]))

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

299

300

# now escape the rules. This is done here so that the escape

301

# signs don't count for the lengths of the tags.

302

root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]

303

304

# if we have a line statement prefix we need an extra rule for

305

# that. We add this rule *after* all the others.

306

if environment.line_statement_prefix is not None:

307

prefix = e(environment.line_statement_prefix)

308

root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

309

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

310

# block suffix if trimming is enabled

311

block_suffix_re = environment.trim_blocks and '\\n?' or ''

312

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

313

self.newline_sequence = environment.newline_sequence

314

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

315

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

316

self.rules = {

317

'root': [

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

318

# directives

319

(c('(.*?)(?:%s)' % '|'.join(

320

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

321

e(environment.block_start_string),

322

e(environment.block_start_string),

323

e(environment.block_end_string)

324

)] + [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

325

'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

326

for n, r in root_tag_rules

327

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

328

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

329

(c('.+'), 'data', None)

330

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

331

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

332

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

333

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

334

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

335

e(environment.comment_end_string),

336

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

337

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

338

(c('(.)'), (Failure('Missing end of comment tag'),), None)

339

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

340

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

341

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

342

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

343

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

344

e(environment.block_end_string),

345

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

346

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

347

] + tag_rules,

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

# variables

'variable_begin': [

(c('\-%s\s*|%s' % (

e(environment.variable_end_string),

352

e(environment.variable_end_string)

353

)), 'variable_end', '#pop')

354

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

355

# raw block

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

356

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

357

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

358

e(environment.block_start_string),

359

e(environment.block_start_string),

360

e(environment.block_end_string),

361

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

362

block_suffix_re

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

363

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

364

(c('(.)'), (Failure('Missing end of raw directive'),), None)

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

365

],

366

# line statements

367

'linestatement_begin': [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

368

(c(r'\s*(\n|$)'), 'linestatement_end', '#pop')

369

] + tag_rules

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

370

}

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

371

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

372

def _normalize_newlines(self, value):

373

"""Called for strings and template data to normlize it to unicode."""

374

return newline_re.sub(self.newline_sequence, value)

375

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

376

def tokenize(self, source, name=None, filename=None):

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

377

"""Works like `tokeniter` but returns a tokenstream of tokens and not

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

378

a generator or token tuples. Additionally all token values are already

Armin Ronacher

115de2e

2008-05-01 22:20:05 +0200

[diff] [blame]

379

converted into types and postprocessed. For example comments are removed,

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

380

integers and floats converted, strings unescaped etc.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

381

"""

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

382

def generate():

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

383

for lineno, token, value in self.tokeniter(source, name, filename):

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

384

if token in ('comment_begin', 'comment', 'comment_end',

385

'whitespace'):

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

386

continue

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

387

elif token == 'linestatement_begin':

388

token = 'block_begin'

389

elif token == 'linestatement_end':

390

token = 'block_end'

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

391

# we are not interested in those tokens in the parser

392

elif token in ('raw_begin', 'raw_end'):

393

continue

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

394

elif token == 'data':

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

395

value = self._normalize_newlines(value)

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

396

elif token == 'keyword':

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

397

token = value

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

398

elif token == 'name':

Armin Ronacher

d1ff858

2008-05-11 00:30:43 +0200

[diff] [blame]

399

value = str(value)

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

400

elif token == 'string':

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

401

# try to unescape string

402

try:

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

403

value = self._normalize_newlines(value[1:-1]) \

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

404

.encode('ascii', 'backslashreplace') \

405

.decode('unicode-escape')

406

except Exception, e:

407

msg = str(e).split(':')[-1].strip()

408

raise TemplateSyntaxError(msg, lineno, name, filename)

409

# if we can express it as bytestring (ascii only)

410

# we do that for support of semi broken APIs

411

# as datetime.datetime.strftime

Armin Ronacher

d1ff858

2008-05-11 00:30:43 +0200

[diff] [blame]

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

416

elif token == 'integer':

417

value = int(value)

418

elif token == 'float':

419

value = float(value)

420

elif token == 'operator':

421

token = operators[value]

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

422

yield Token(lineno, token, value)

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

423

return TokenStream(generate(), name, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

424

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

425

def tokeniter(self, source, name, filename=None):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

426

"""This method tokenizes the text and returns the tokens in a

427

generator. Use this method if you just want to tokenize a template.

428

The output you get is not compatible with the input the jinja parser

429

wants. The parser uses the `tokenize` function with returns a

430

`TokenStream` and postprocessed tokens.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

431

"""

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

432

source = '\n'.join(unicode(source).splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

433

pos = 0

434

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

435

stack = ['root']

436

statetokens = self.rules['root']

437

source_length = len(source)

438

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

439

balancing_stack = []

440

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

441

while 1:

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

442

# tokenizer loop

443

for regex, tokens, new_state in statetokens:

444

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

445

# if no match we try again with the next rule

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

446

if m is None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

447

continue

448

449

# we only match blocks and variables if brances / parentheses

450

# are balanced. continue parsing with the lower rule which

451

# is the operator rule. do this only if the end tags look

452

# like operators

453

if balancing_stack and \

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

454

tokens in ('variable_end', 'block_end',

455

'linestatement_end'):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

456

continue

457

458

# tuples support more options

459

if isinstance(tokens, tuple):

460

for idx, token in enumerate(tokens):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

461

# failure group

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

462

if token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

463

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

464

# bygroup is a bit more complex, in that case we

465

# yield for the current token the first named

466

# group that matched

467

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

468

for key, value in m.groupdict().iteritems():

469

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

470

yield lineno, key, value

471

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

472

break

473

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

474

raise RuntimeError('%r wanted to resolve '

475

'the token dynamically'

476

' but no group matched'

477

% regex)

478

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

479

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

480

data = m.group(idx + 1)

481

if data:

482

yield lineno, token, data

483

lineno += data.count('\n')

484

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

485

# strings as token just are yielded as it.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

486

else:

487

data = m.group()

488

# update brace/parentheses balance

489

if tokens == 'operator':

490

if data == '{':

491

balancing_stack.append('}')

492

elif data == '(':

493

balancing_stack.append(')')

494

elif data == '[':

495

balancing_stack.append(']')

496

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

497

if not balancing_stack:

498

raise TemplateSyntaxError('unexpected "%s"' %

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

499

data, lineno, name,

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

500

filename)

501

expected_op = balancing_stack.pop()

502

if expected_op != data:

503

raise TemplateSyntaxError('unexpected "%s", '

504

'expected "%s"' %

505

(data, expected_op),

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

506

lineno, name,

507

filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

508

# yield items

Armin Ronacher

2008-05-22 21:28:32 +0200

[diff] [blame]

509

yield lineno, tokens, data

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

510

lineno += data.count('\n')

511

512

# fetch new position into new variable so that we can check

513

# if there is a internal parsing error which would result

514

# in an infinite loop

515

pos2 = m.end()

516

517

# handle state changes

518

if new_state is not None:

519

# remove the uppermost state

520

if new_state == '#pop':

521

stack.pop()

522

# resolve the new state by group checking

523

elif new_state == '#bygroup':

524

for key, value in m.groupdict().iteritems():

525

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

530

'new state dynamically but'

531

' no group matched' %

532

regex)

533

# direct state name given

534

else:

535

stack.append(new_state)

536

statetokens = self.rules[stack[-1]]

537

# we are still at the same position and no stack change.

538

# this means a loop without break condition, avoid that and

539

# raise error

540

elif pos2 == pos:

541

raise RuntimeError('%r yielded empty string without '

542

'stack change' % regex)

543

# publish new function and start again

544

pos = pos2

545

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

546

# if loop terminated without break we havn't found a single match

547

# either we are at the end of the file or we have a problem

548

else:

549

# end of text

550

if pos >= source_length:

551

return

552

# something went wrong

553

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

554

(source[pos], pos), lineno,

Armin Ronacher