Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

5a8e497

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

1d51f63

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

19

from operator import itemgetter

20

from collections import deque

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

21

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

22

from jinja2.utils import LRUCache

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

23

24

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

25

# cache for the lexers. Exists in order to be able to have multiple

26

# environments with the same lexer

Armin Ronacher

187bde1

2008-05-01 18:19:16 +0200

[diff] [blame]

27

_lexer_cache = LRUCache(50)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

28

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

29

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

30

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

31

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

32

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

33

integer_re = re.compile(r'\d+')

Armin Ronacher

d1ff858

2008-05-11 00:30:43 +0200

[diff] [blame]

34

name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

35

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

36

newline_re = re.compile(r'(\r\n|\r|\n)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

37

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

38

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

07bc684

2008-03-31 14:18:49 +0200

[diff] [blame]

64

',': 'comma',

65

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

66

}

67

68

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

69

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

e791c2a

2008-04-07 18:39:54 +0200

[diff] [blame]

70

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

71

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

72

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

73

Armin Ronacher

d02fc7d

2008-06-14 14:19:47 +0200

[diff] [blame]

74

def count_newlines(value):

75

"""Count the number of newline characters in the string. This is

76

useful for extensions that filter a stream.

77

"""

78

return len(newline_re.findall(value))

79

80

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

81

class Failure(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

82

"""Class that raises a `TemplateSyntaxError` if called.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

83

Used by the `Lexer` to specify known errors.

84

"""

85

86

def __init__(self, message, cls=TemplateSyntaxError):

87

self.message = message

88

self.error_class = cls

89

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

90

def __call__(self, lineno, filename):

91

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

92

93

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

class Token(tuple):

"""Token class."""

__slots__ = ()

lineno, type, value = (property(itemgetter(x)) for x in range(3))

98

99

def __new__(cls, lineno, type, value):

100

return tuple.__new__(cls, (lineno, intern(str(type)), value))

101

102

def __str__(self):

Armin Ronacher

8a1d27f

2008-05-19 08:37:19 +0200

[diff] [blame]

103

if self.type in reverse_operators:

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

104

return reverse_operators[self.type]

105

elif self.type is 'name':

return self.value

return self.type

def test(self, expr):

110

"""Test a token against a token expression. This can either be a

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

111

token type or ``'token_type:token_value'``. This can only test

112

against string values and types.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

113

"""

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

114

# here we do a regular string equality check as test_any is usually

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

115

# passed an iterable of not interned strings.

116

if self.type == expr:

117

return True

118

elif ':' in expr:

119

return expr.split(':', 1) == [self.type, self.value]

120

return False

121

Armin Ronacher

cda43df

2008-05-03 17:10:05 +0200

[diff] [blame]

122

def test_any(self, *iterable):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

123

"""Test against multiple token expressions."""

124

for expr in iterable:

if self.test(expr):

return True

return False

def __repr__(self):

return 'Token(%r, %r, %r)' % (

self.lineno,

self.type,

self.value

)

class TokenStreamIterator(object):

138

"""The iterator for tokenstreams. Iterate over the stream

139

until the eof token is reached.

140

"""

141

142

def __init__(self, stream):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

143

self.stream = stream

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

def __iter__(self):

return self

def next(self):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

149

token = self.stream.current

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

150

if token.type == 'eof':

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

151

self.stream.close()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

152

raise StopIteration()

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

153

self.stream.next()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

return token

class TokenStream(object):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

158

"""A token stream is an iterable that yields :class:`Token`\s. The

159

parser however does not iterate over it but calls :meth:`next` to go

160

one token ahead. The current active token is stored as :attr:`current`.

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

161

"""

162

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

163

def __init__(self, generator, name, filename):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

164

self._next = iter(generator).next

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

165

self._pushed = deque()

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

166

self.name = name

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

167

self.filename = filename

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

168

self.closed = False

169

self.current = Token(1, 'initial', '')

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

self.next()

def __iter__(self):

return TokenStreamIterator(self)

174

175

def __nonzero__(self):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

176

"""Are we at the end of the stream?"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

177

return bool(self._pushed) or self.current.type != 'eof'

178

179

eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)

180

181

def push(self, token):

182

"""Push a token back to the stream."""

183

self._pushed.append(token)

184

185

def look(self):

186

"""Look at the next token."""

187

old_token = self.next()

188

result = self.current

189

self.push(result)

190

self.current = old_token

191

return result

192

Armin Ronacher

ea847c5

2008-05-02 20:04:32 +0200

[diff] [blame]

193

def skip(self, n=1):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

194

"""Got n tokens ahead."""

for x in xrange(n):

self.next()

Armin Ronacher

2008-05-11 22:20:51 +0200

[diff] [blame]

198

def next_if(self, expr):

199

"""Perform the token test and return the token if it matched.

200

Otherwise the return value is `None`.

201

"""

202

if self.current.test(expr):

203

return self.next()

204

205

def skip_if(self, expr):

Armin Ronacher

9cf9591

2008-05-24 19:54:43 +0200

[diff] [blame]

206

"""Like :meth:`next_if` but only returns `True` or `False`."""

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame]

207

return self.next_if(expr) is not None

208

209

def next(self):

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

210

"""Go one token ahead and return the old one"""

211

rv = self.current

Armin Ronacher

fdf9530

2008-05-11 22:20:51 +0200

[diff] [blame]

212

if self._pushed:

213

self.current = self._pushed.popleft()

214

elif self.current.type is not 'eof':

215

try:

216

self.current = self._next()

217

except StopIteration:

218

self.close()

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

return rv

def close(self):

"""Close the stream."""

223

self.current = Token(self.current.lineno, 'eof', '')

224

self._next = None

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

225

self.closed = True

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

226

227

def expect(self, expr):

Armin Ronacher

2008-05-08 11:03:10 +0200

[diff] [blame]

228

"""Expect a given token type and return it. This accepts the same

229

argument as :meth:`jinja2.lexer.Token.test`.

230

"""

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

231

if not self.current.test(expr):

232

if ':' in expr:

233

expr = expr.split(':')[1]

234

if self.current.type is 'eof':

235

raise TemplateSyntaxError('unexpected end of template, '

236

'expected %r.' % expr,

237

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

238

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

239

raise TemplateSyntaxError("expected token %r, got %r" %

240

(expr, str(self.current)),

241

self.current.lineno,

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

242

self.name, self.filename)

Armin Ronacher

2008-05-01 22:59:47 +0200

[diff] [blame]

try:

return self.current

finally:

self.next()

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

249

class LexerMeta(type):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

250

"""Metaclass for the lexer that caches instances for

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

251

the same configuration in a weak value dictionary.

252

"""

253

254

def __call__(cls, environment):

Armin Ronacher

203bfcb

2008-04-24 21:54:44 +0200

[diff] [blame]

255

key = (environment.block_start_string,

256

environment.block_end_string,

257

environment.variable_start_string,

258

environment.variable_end_string,

259

environment.comment_start_string,

260

environment.comment_end_string,

261

environment.line_statement_prefix,

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

262

environment.trim_blocks,

263

environment.newline_sequence)

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

264

lexer = _lexer_cache.get(key)

265

if lexer is None:

266

lexer = type.__call__(cls, environment)

267

_lexer_cache[key] = lexer

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

271

class Lexer(object):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

272

"""Class that implements a lexer for a given environment. Automatically

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

273

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

274

275

Note that the lexer is not automatically bound to an environment.

276

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

277

"""

278

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

279

__metaclass__ = LexerMeta

280

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

281

def __init__(self, environment):

282

# shortcuts

283

c = lambda x: re.compile(x, re.M | re.S)

284

e = re.escape

285

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

286

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

287

tag_rules = [

Armin Ronacher

d8b8c3e

2008-05-22 21:28:32 +0200

[diff] [blame]

288

(whitespace_re, 'whitespace', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

289

(float_re, 'float', None),

290

(integer_re, 'integer', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

291

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

292

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

293

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

294

]

295

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

296

# assamble the root lexing rule. because "|" is ungreedy

297

# we have to sort by length so that the lexer continues working

298

# as expected when we have parsing rules like <% for block and

299

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

33d528a

2007-05-14 18:21:44 +0200

[diff] [blame]

300

# variables are just part of the rules if variable processing

301

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

302

root_tag_rules = [

303

('comment', environment.comment_start_string),

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

304

('block', environment.block_start_string),

305

('variable', environment.variable_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

306

]

Armin Ronacher

4f7d2d5

2008-04-22 10:40:26 +0200

[diff] [blame]

307

root_tag_rules.sort(key=lambda x: -len(x[1]))

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

308

309

# now escape the rules. This is done here so that the escape

310

# signs don't count for the lengths of the tags.

311

root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]

312

313

# if we have a line statement prefix we need an extra rule for

314

# that. We add this rule *after* all the others.

315

if environment.line_statement_prefix is not None:

316

prefix = e(environment.line_statement_prefix)

317

root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

318

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

319

# block suffix if trimming is enabled

320

block_suffix_re = environment.trim_blocks and '\\n?' or ''

321

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

322

self.newline_sequence = environment.newline_sequence

323

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

324

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

325

self.rules = {

326

'root': [

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

327

# directives

328

(c('(.*?)(?:%s)' % '|'.join(

329

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

330

e(environment.block_start_string),

331

e(environment.block_start_string),

332

e(environment.block_end_string)

333

)] + [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

334

'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

335

for n, r in root_tag_rules

336

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

337

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

338

(c('.+'), 'data', None)

339

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

340

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

341

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

342

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

343

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

344

e(environment.comment_end_string),

345

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

346

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

347

(c('(.)'), (Failure('Missing end of comment tag'),), None)

348

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

349

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

350

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

351

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

352

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

353

e(environment.block_end_string),

354

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

355

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

356

] + tag_rules,

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

# variables

'variable_begin': [

(c('\-%s\s*|%s' % (

e(environment.variable_end_string),

361

e(environment.variable_end_string)

362

)), 'variable_end', '#pop')

363

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

364

# raw block

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

365

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

366

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

367

e(environment.block_start_string),

368

e(environment.block_start_string),

369

e(environment.block_end_string),

370

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

371

block_suffix_re

Armin Ronacher

2007-11-17 23:45:04 +0100

[diff] [blame]

372

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

373

(c('(.)'), (Failure('Missing end of raw directive'),), None)

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

374

],

375

# line statements

376

'linestatement_begin': [

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

377

(c(r'\s*(\n|$)'), 'linestatement_end', '#pop')

378

] + tag_rules

Armin Ronacher

2008-04-16 14:21:57 +0200

[diff] [blame]

379

}

Armin Ronacher

2008-04-12 12:02:36 +0200

[diff] [blame]

380

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

381

def _normalize_newlines(self, value):

382

"""Called for strings and template data to normlize it to unicode."""

383

return newline_re.sub(self.newline_sequence, value)

384

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

385

def tokenize(self, source, name=None, filename=None):

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

386

"""Calls tokeniter + tokenize and wraps it in a token stream.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

387

"""

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

388

stream = self.tokeniter(source, name, filename)

389

return TokenStream(self.wrap(stream, name, filename), name, filename)

390

391

def wrap(self, stream, name=None, filename=None):

392

"""This is called with the stream as returned by `tokenize` and wraps

393

every token in a :class:`Token` and converts the value.

394

"""

395

for lineno, token, value in stream:

396

if token in ('comment_begin', 'comment', 'comment_end',

397

'whitespace'):

398

continue

399

elif token == 'linestatement_begin':

400

token = 'block_begin'

401

elif token == 'linestatement_end':

402

token = 'block_end'

403

# we are not interested in those tokens in the parser

404

elif token in ('raw_begin', 'raw_end'):

405

continue

406

elif token == 'data':

407

value = self._normalize_newlines(value)

408

elif token == 'keyword':

409

token = value

410

elif token == 'name':

411

value = str(value)

412

elif token == 'string':

413

# try to unescape string

414

try:

415

value = self._normalize_newlines(value[1:-1]) \

416

.encode('ascii', 'backslashreplace') \

417

.decode('unicode-escape')

418

except Exception, e:

419

msg = str(e).split(':')[-1].strip()

420

raise TemplateSyntaxError(msg, lineno, name, filename)

421

# if we can express it as bytestring (ascii only)

422

# we do that for support of semi broken APIs

423

# as datetime.datetime.strftime

424

try:

Armin Ronacher

d1ff858

2008-05-11 00:30:43 +0200

[diff] [blame]

425

value = str(value)

Armin Ronacher

2008-06-13 22:44:01 +0200

[diff] [blame]

426

except UnicodeError:

427

pass

428

elif token == 'integer':

429

value = int(value)

430

elif token == 'float':

431

value = float(value)

432

elif token == 'operator':

433

token = operators[value]

434

yield Token(lineno, token, value)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

435

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

436

def tokeniter(self, source, name, filename=None):

Armin Ronacher

2008-04-25 00:36:14 +0200

[diff] [blame]

437

"""This method tokenizes the text and returns the tokens in a

438

generator. Use this method if you just want to tokenize a template.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

439

"""

Armin Ronacher

2008-05-23 23:18:14 +0200

[diff] [blame]

440

source = '\n'.join(unicode(source).splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

441

pos = 0

442

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

443

stack = ['root']

444

statetokens = self.rules['root']

445

source_length = len(source)

446

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

447

balancing_stack = []

448

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

449

while 1:

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

450

# tokenizer loop

451

for regex, tokens, new_state in statetokens:

452

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

453

# if no match we try again with the next rule

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

454

if m is None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

455

continue

456

457

# we only match blocks and variables if brances / parentheses

458

# are balanced. continue parsing with the lower rule which

459

# is the operator rule. do this only if the end tags look

460

# like operators

461

if balancing_stack and \

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

462

tokens in ('variable_end', 'block_end',

463

'linestatement_end'):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

464

continue

465

466

# tuples support more options

467

if isinstance(tokens, tuple):

468

for idx, token in enumerate(tokens):

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

469

# failure group

Armin Ronacher

d8b8c3e

2008-05-22 21:28:32 +0200

[diff] [blame]

470

if token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

471

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

472

# bygroup is a bit more complex, in that case we

473

# yield for the current token the first named

474

# group that matched

475

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

476

for key, value in m.groupdict().iteritems():

477

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

478

yield lineno, key, value

479

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

480

break

481

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

482

raise RuntimeError('%r wanted to resolve '

483

'the token dynamically'

484

' but no group matched'

485

% regex)

486

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

487

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

488

data = m.group(idx + 1)

489

if data:

490

yield lineno, token, data

491

lineno += data.count('\n')

492

Armin Ronacher

2008-04-12 14:19:36 +0200

[diff] [blame]

493

# strings as token just are yielded as it.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

494

else:

495

data = m.group()

496

# update brace/parentheses balance

497

if tokens == 'operator':

498

if data == '{':

499

balancing_stack.append('}')

500

elif data == '(':

501

balancing_stack.append(')')

502

elif data == '[':

503

balancing_stack.append(']')

504

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

505

if not balancing_stack:

506

raise TemplateSyntaxError('unexpected "%s"' %

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

507

data, lineno, name,

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

508

filename)

509

expected_op = balancing_stack.pop()

510

if expected_op != data:

511

raise TemplateSyntaxError('unexpected "%s", '

512

'expected "%s"' %

513

(data, expected_op),

Armin Ronacher

2008-05-16 09:11:39 +0200

[diff] [blame]

514

lineno, name,

515

filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

516

# yield items

Armin Ronacher

d8b8c3e

2008-05-22 21:28:32 +0200

[diff] [blame]

517

yield lineno, tokens, data

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

518

lineno += data.count('\n')

519

520

# fetch new position into new variable so that we can check

521

# if there is a internal parsing error which would result

522

# in an infinite loop

523

pos2 = m.end()

524

525

# handle state changes

526

if new_state is not None:

527

# remove the uppermost state

528

if new_state == '#pop':

529

stack.pop()

530

# resolve the new state by group checking

531

elif new_state == '#bygroup':

532

for key, value in m.groupdict().iteritems():

533

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

538

'new state dynamically but'

539

' no group matched' %

540

regex)

541

# direct state name given

542

else:

543

stack.append(new_state)

544

statetokens = self.rules[stack[-1]]

545

# we are still at the same position and no stack change.

546

# this means a loop without break condition, avoid that and

547

# raise error

548

elif pos2 == pos:

549

raise RuntimeError('%r yielded empty string without '

550

'stack change' % regex)

551

# publish new function and start again

552

pos = pos2

553

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

554

# if loop terminated without break we havn't found a single match

555

# either we are at the end of the file or we have a problem

556

else:

557

# end of text

558

if pos >= source_length:

559

return

560

# something went wrong

561

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

562

(source[pos], pos), lineno,

Armin Ronacher