Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

19

from operator import itemgetter

20

from collections import deque

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

21

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

22

from jinja2.utils import LRUCache

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

23

24

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

25

# cache for the lexers. Exists in order to be able to have multiple

26

# environments with the same lexer

Armin Ronacher

187bde1

2008-05-01 18:19:16 +0200

[diff] [blame]

27

_lexer_cache = LRUCache(50)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

28

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

29

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

30

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

31

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

32

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

33

integer_re = re.compile(r'\d+')

Armin Ronacher

d1ff858

2008-05-11 00:30:43 +0200

[diff] [blame]

34

name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

35

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

36

newline_re = re.compile(r'(\r\n|\r|\n)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

37

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

38

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

64

',': 'comma',

65

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

66

}

67

68

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

69

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

70

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

71

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

72

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

73

74

class Failure(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

75

"""Class that raises a `TemplateSyntaxError` if called.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

76

Used by the `Lexer` to specify known errors.

77

"""

78

79

def __init__(self, message, cls=TemplateSyntaxError):

80

self.message = message

81

self.error_class = cls

82

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

83

def __call__(self, lineno, filename):

84

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

85

86

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

class Token(tuple):

"""Token class."""

__slots__ = ()

lineno, type, value = (property(itemgetter(x)) for x in range(3))

91

92

def __new__(cls, lineno, type, value):

93

return tuple.__new__(cls, (lineno, intern(str(type)), value))

94

95

def __str__(self):

Armin Ronacher

8a1d27f

2008-05-19 08:37:19 +0200

[diff] [blame]

96

if self.type in reverse_operators:

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

97

return reverse_operators[self.type]

98

elif self.type is 'name':

return self.value

return self.type

def test(self, expr):

103

"""Test a token against a token expression. This can either be a

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

104

token type or ``'token_type:token_value'``. This can only test

105

against string values and types.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

106

"""

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

107

# here we do a regular string equality check as test_any is usually

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

108

# passed an iterable of not interned strings.

109

if self.type == expr:

110

return True

111

elif ':' in expr:

112

return expr.split(':', 1) == [self.type, self.value]

113

return False

114

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

115

def test_any(self, *iterable):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

116

"""Test against multiple token expressions."""

117

for expr in iterable:

if self.test(expr):

return True

return False

def __repr__(self):

return 'Token(%r, %r, %r)' % (

self.lineno,

self.type,

self.value

)

class TokenStreamIterator(object):

131

"""The iterator for tokenstreams. Iterate over the stream

132

until the eof token is reached.

133

"""

134

135

def __init__(self, stream):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

136

self.stream = stream

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

def __iter__(self):

return self

def next(self):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

142

token = self.stream.current

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

143

if token.type == 'eof':

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

144

self.stream.close()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

145

raise StopIteration()

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

146

self.stream.next()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

return token

class TokenStream(object):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

151

"""A token stream is an iterable that yields :class:`Token`\s. The

152

parser however does not iterate over it but calls :meth:`next` to go

153

one token ahead. The current active token is stored as :attr:`current`.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

154

"""

155

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

156

def __init__(self, generator, name, filename):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

157

self._next = iter(generator).next

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

158

self._pushed = deque()

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

159

self.name = name

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

160

self.filename = filename

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

161

self.closed = False

162

self.current = Token(1, 'initial', '')

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

self.next()

def __iter__(self):

return TokenStreamIterator(self)

167

168

def __nonzero__(self):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

169

"""Are we at the end of the stream?"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

170

return bool(self._pushed) or self.current.type != 'eof'

171

172

eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)

173

174

def push(self, token):

175

"""Push a token back to the stream."""

176

self._pushed.append(token)

177

178

def look(self):

179

"""Look at the next token."""

180

old_token = self.next()

181

result = self.current

182

self.push(result)

183

self.current = old_token

184

return result

185

Armin Ronacher

ea847c5

2008-05-02 20:04:32 +0200

[diff] [blame]

186

def skip(self, n=1):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

187

"""Got n tokens ahead."""

for x in xrange(n):

self.next()

Armin Ronacher

2008-05-11 22:20:51 +0200

[diff] [blame]

191

def next_if(self, expr):

192

"""Perform the token test and return the token if it matched.

193

Otherwise the return value is `None`.

194

"""

195

if self.current.test(expr):

196

return self.next()

197

198

def skip_if(self, expr):

Armin Ronacher

9cf9591

2008-05-24 19:54:43 +0200

[diff] [blame]

199

"""Like :meth:`next_if` but only returns `True` or `False`."""

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame]

200

return self.next_if(expr) is not None

201

202

def next(self):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

203

"""Go one token ahead and return the old one"""

204

rv = self.current

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame]

205

if self._pushed:

206

self.current = self._pushed.popleft()

207

elif self.current.type is not 'eof':

208

try:

209

self.current = self._next()

210

except StopIteration:

211

self.close()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

return rv

def close(self):

"""Close the stream."""

216

self.current = Token(self.current.lineno, 'eof', '')

217

self._next = None

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

218

self.closed = True

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

219

220

def expect(self, expr):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

221

"""Expect a given token type and return it. This accepts the same

222

argument as :meth:`jinja2.lexer.Token.test`.

223

"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

224

if not self.current.test(expr):

225

if ':' in expr:

226

expr = expr.split(':')[1]

227

if self.current.type is 'eof':

228

raise TemplateSyntaxError('unexpected end of template, '

229

'expected %r.' % expr,

230

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

231

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

232

raise TemplateSyntaxError("expected token %r, got %r" %

233

(expr, str(self.current)),

234

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

235

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

try:

return self.current

finally:

self.next()

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

242

class LexerMeta(type):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

243

"""Metaclass for the lexer that caches instances for

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

244

the same configuration in a weak value dictionary.

245

"""

246

247

def __call__(cls, environment):

Armin Ronacher

203bfcb

2008-04-24 21:54:44 +0200

[diff] [blame]

248

key = (environment.block_start_string,

249

environment.block_end_string,

250

environment.variable_start_string,

251

environment.variable_end_string,

252

environment.comment_start_string,

253

environment.comment_end_string,

254

environment.line_statement_prefix,

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

255

environment.trim_blocks,

256

environment.newline_sequence)

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

257

lexer = _lexer_cache.get(key)

258

if lexer is None:

259

lexer = type.__call__(cls, environment)

260

_lexer_cache[key] = lexer

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

264

class Lexer(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

265

"""Class that implements a lexer for a given environment. Automatically

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

266

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

267

268

Note that the lexer is not automatically bound to an environment.

269

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

270

"""

271

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

272

__metaclass__ = LexerMeta

273

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

274

def __init__(self, environment):

275

# shortcuts

276

c = lambda x: re.compile(x, re.M | re.S)

277

e = re.escape

278

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

279

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

280

tag_rules = [

Armin Ronacher

d8b8c3e

2008-05-22 21:28:32 +0200

[diff] [blame]

281

(whitespace_re, 'whitespace', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

282

(float_re, 'float', None),

283

(integer_re, 'integer', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

284

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

285

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

286

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

287

]

288

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

289

# assamble the root lexing rule. because "|" is ungreedy

290

# we have to sort by length so that the lexer continues working

291

# as expected when we have parsing rules like <% for block and

292

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

33d528a

2007-05-14 18:21:44 +0200

[diff] [blame]

293

# variables are just part of the rules if variable processing

294

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

295

root_tag_rules = [

296

('comment', environment.comment_start_string),

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

297

('block', environment.block_start_string),

298

('variable', environment.variable_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

299

]

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

300

root_tag_rules.sort(key=lambda x: -len(x[1]))

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

301

302

# now escape the rules. This is done here so that the escape

303

# signs don't count for the lengths of the tags.

304

root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]

305

306

# if we have a line statement prefix we need an extra rule for

307

# that. We add this rule *after* all the others.

308

if environment.line_statement_prefix is not None:

309

prefix = e(environment.line_statement_prefix)

310

root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

311

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

312

# block suffix if trimming is enabled

313

block_suffix_re = environment.trim_blocks and '\\n?' or ''

314

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

315

self.newline_sequence = environment.newline_sequence

316

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

317

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

318

self.rules = {

319

'root': [

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

320

# directives

321

(c('(.*?)(?:%s)' % '|'.join(

322

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

323

e(environment.block_start_string),

324

e(environment.block_start_string),

325

e(environment.block_end_string)

326

)] + [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

327

'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

328

for n, r in root_tag_rules

329

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

330

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

331

(c('.+'), 'data', None)

332

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

333

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

334

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

335

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

336

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

337

e(environment.comment_end_string),

338

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

339

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

340

(c('(.)'), (Failure('Missing end of comment tag'),), None)

341

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

342

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

343

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

344

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

345

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

346

e(environment.block_end_string),

347

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

348

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

349

] + tag_rules,

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

# variables

'variable_begin': [

(c('\-%s\s*|%s' % (

e(environment.variable_end_string),

354

e(environment.variable_end_string)

355

)), 'variable_end', '#pop')

356

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

357

# raw block

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

358

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

359

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

360

e(environment.block_start_string),

361

e(environment.block_start_string),

362

e(environment.block_end_string),

363

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

364

block_suffix_re

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

365

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

366

(c('(.)'), (Failure('Missing end of raw directive'),), None)

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

367

],

368

# line statements

369

'linestatement_begin': [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

370

(c(r'\s*(\n|$)'), 'linestatement_end', '#pop')

371

] + tag_rules

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

372

}

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

373

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

374

def _normalize_newlines(self, value):

375

"""Called for strings and template data to normlize it to unicode."""

376

return newline_re.sub(self.newline_sequence, value)

377

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

378

def tokenize(self, source, name=None, filename=None):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

379

"""Calls tokeniter + tokenize and wraps it in a token stream.

380

This is currently only used for unittests.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

381

"""

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

382

stream = self.tokeniter(source, name, filename)

383

return TokenStream(self.wrap(stream, name, filename), name, filename)

384

385

def wrap(self, stream, name=None, filename=None):

386

"""This is called with the stream as returned by `tokenize` and wraps

387

every token in a :class:`Token` and converts the value.

388

"""

389

for lineno, token, value in stream:

390

if token in ('comment_begin', 'comment', 'comment_end',

391

'whitespace'):

392

continue

393

elif token == 'linestatement_begin':

394

token = 'block_begin'

395

elif token == 'linestatement_end':

396

token = 'block_end'

397

# we are not interested in those tokens in the parser

398

elif token in ('raw_begin', 'raw_end'):

399

continue

400

elif token == 'data':

401

value = self._normalize_newlines(value)

402

elif token == 'keyword':

403

token = value

404

elif token == 'name':

405

value = str(value)

406

elif token == 'string':

407

# try to unescape string

408

try:

409

value = self._normalize_newlines(value[1:-1]) \

410

.encode('ascii', 'backslashreplace') \

411

.decode('unicode-escape')

412

except Exception, e:

413

msg = str(e).split(':')[-1].strip()

414

raise TemplateSyntaxError(msg, lineno, name, filename)

415

# if we can express it as bytestring (ascii only)

416

# we do that for support of semi broken APIs

417

# as datetime.datetime.strftime

418

try:

Armin Ronacher

d1ff858

2008-05-11 00:30:43 +0200

[diff] [blame]

419

value = str(value)

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame^]

420

except UnicodeError:

421

pass

422

elif token == 'integer':

423

value = int(value)

424

elif token == 'float':

425

value = float(value)

426

elif token == 'operator':

427

token = operators[value]

428

yield Token(lineno, token, value)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

429

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

430

def tokeniter(self, source, name, filename=None):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

431

"""This method tokenizes the text and returns the tokens in a

432

generator. Use this method if you just want to tokenize a template.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

433

"""

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

434

source = '\n'.join(unicode(source).splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

435

pos = 0

436

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

437

stack = ['root']

438

statetokens = self.rules['root']

439

source_length = len(source)

440

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

441

balancing_stack = []

442

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

443

while 1:

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

444

# tokenizer loop

445

for regex, tokens, new_state in statetokens:

446

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

447

# if no match we try again with the next rule

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

448

if m is None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

449

continue

450

451

# we only match blocks and variables if brances / parentheses

452

# are balanced. continue parsing with the lower rule which

453

# is the operator rule. do this only if the end tags look

454

# like operators

455

if balancing_stack and \

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

456

tokens in ('variable_end', 'block_end',

457

'linestatement_end'):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

458

continue

459

460

# tuples support more options

461

if isinstance(tokens, tuple):

462

for idx, token in enumerate(tokens):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

463

# failure group

Armin Ronacher

d8b8c3e

2008-05-22 21:28:32 +0200

[diff] [blame]

464

if token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

465

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

466

# bygroup is a bit more complex, in that case we

467

# yield for the current token the first named

468

# group that matched

469

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

470

for key, value in m.groupdict().iteritems():

471

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

472

yield lineno, key, value

473

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

474

break

475

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

476

raise RuntimeError('%r wanted to resolve '

477

'the token dynamically'

478

' but no group matched'

479

% regex)

480

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

481

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

482

data = m.group(idx + 1)

483

if data:

484

yield lineno, token, data

485

lineno += data.count('\n')

486

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

487

# strings as token just are yielded as it.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

488

else:

489

data = m.group()

490

# update brace/parentheses balance

491

if tokens == 'operator':

492

if data == '{':

493

balancing_stack.append('}')

494

elif data == '(':

495

balancing_stack.append(')')

496

elif data == '[':

497

balancing_stack.append(']')

498

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

499

if not balancing_stack:

500

raise TemplateSyntaxError('unexpected "%s"' %

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

501

data, lineno, name,

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

502

filename)

503

expected_op = balancing_stack.pop()

504

if expected_op != data:

505

raise TemplateSyntaxError('unexpected "%s", '

506

'expected "%s"' %

507

(data, expected_op),

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

508

lineno, name,

509

filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

510

# yield items

Armin Ronacher

d8b8c3e

2008-05-22 21:28:32 +0200

[diff] [blame]

511

yield lineno, tokens, data

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

512

lineno += data.count('\n')

513

514

# fetch new position into new variable so that we can check

515

# if there is a internal parsing error which would result

516

# in an infinite loop

517

pos2 = m.end()

518

519

# handle state changes

520

if new_state is not None:

521

# remove the uppermost state

522

if new_state == '#pop':

523

stack.pop()

524

# resolve the new state by group checking

525

elif new_state == '#bygroup':

526

for key, value in m.groupdict().iteritems():

527

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

532

'new state dynamically but'

533

' no group matched' %

534

regex)

535

# direct state name given

536

else:

537

stack.append(new_state)

538

statetokens = self.rules[stack[-1]]

539

# we are still at the same position and no stack change.

540

# this means a loop without break condition, avoid that and

541

# raise error

542

elif pos2 == pos:

543

raise RuntimeError('%r yielded empty string without '

544

'stack change' % regex)

545

# publish new function and start again

546

pos = pos2

547

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

548

# if loop terminated without break we havn't found a single match

549

# either we are at the end of the file or we have a problem

550

else:

551

# end of text

552

if pos >= source_length:

553

return

554

# something went wrong

555

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

556

(source[pos], pos), lineno,

Armin Ronacher