Blame - jinja2/lexer.py - platform/external/python/jinja

2007-02-26 22:17:32 +0100

[diff] [blame]

1

# -*- coding: utf-8 -*-

2

"""

Armin Ronacher

2008-03-31 14:18:49 +0200

[diff] [blame]

3

jinja2.lexer

4

~~~~~~~~~~~~

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

5

Armin Ronacher

2007-04-05 11:21:38 +0200

[diff] [blame]

6

This module implements a Jinja / Python combination lexer. The

7

`Lexer` class provided by this module is used to do some preprocessing

8

for Jinja.

9

10

On the one hand it filters out invalid operators like the bitshift

11

operators we don't allow in templates. On the other hand it separates

12

template code and python code in expressions.

13

Armin Ronacher

2008-03-25 14:34:45 +0100

[diff] [blame]

14

:copyright: 2007-2008 by Armin Ronacher.

Armin Ronacher

3b65b8a

2007-02-27 20:21:45 +0100

[diff] [blame]

15

:license: BSD, see LICENSE for more details.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

16

"""

17

import re

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

18

import unicodedata

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

19

from jinja2.datastructure import TokenStream, Token

20

from jinja2.exceptions import TemplateSyntaxError

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

21

from weakref import WeakValueDictionary

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

22

23

Armin Ronacher

2007-04-05 11:21:38 +0200

[diff] [blame]

24

__all__ = ['Lexer', 'Failure', 'keywords']

25

26

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

27

# cache for the lexers. Exists in order to be able to have multiple

28

# environments with the same lexer

29

_lexer_cache = WeakValueDictionary()

30

31

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

32

# static regular expressions

Armin Ronacher

0949e4d

2007-10-07 18:53:29 +0200

[diff] [blame]

33

whitespace_re = re.compile(r'\s+(?um)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

34

string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"

35

r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

36

integer_re = re.compile(r'\d+')

Armin Ronacher

2008-04-07 18:39:54 +0200

[diff] [blame^]

37

name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

38

float_re = re.compile(r'\d+\.\d+')

Armin Ronacher

2008-04-07 18:39:54 +0200

[diff] [blame^]

39

eol_re = re.compile(r'(\s*$\s*)+(?m)')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

40

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

41

Armin Ronacher

9baa5ba

2007-03-21 18:05:32 +0100

[diff] [blame]

42

# set of used keywords

Armin Ronacher

2008-04-07 18:39:54 +0200

[diff] [blame^]

43

keywords = set(['and', 'block', 'elif', 'else', 'endblock', 'print',

Armin Ronacher

9baa5ba

2007-03-21 18:05:32 +0100

[diff] [blame]

44

'endfilter', 'endfor', 'endif', 'endmacro', 'endraw',

45

'endtrans', 'extends', 'filter', 'for', 'if', 'in',

46

'include', 'is', 'macro', 'not', 'or', 'pluralize', 'raw',

Armin Ronacher

2008-04-07 18:39:54 +0200

[diff] [blame^]

47

'recursive', 'set', 'trans', 'call', 'endcall'])

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

48

49

# bind operators to token types

operators = {

'+': 'add',

'-': 'sub',

'/': 'div',

'//': 'floordiv',

'*': 'mul',

'%': 'mod',

'**': 'pow',

'~': 'tilde',

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

'[': 'lbracket',

']': 'rbracket',

'(': 'lparen',

')': 'rparen',

'{': 'lbrace',

'}': 'rbrace',

'==': 'eq',

'!=': 'ne',

'>': 'gt',

'>=': 'gteq',

'<': 'lt',

'<=': 'lteq',

'=': 'assign',

'.': 'dot',

':': 'colon',

'|': 'pipe',

Armin Ronacher

2008-03-31 14:18:49 +0200

[diff] [blame]

75

',': 'comma',

76

';': 'semicolon'

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

77

}

78

79

reverse_operators = dict([(v, k) for k, v in operators.iteritems()])

80

assert len(operators) == len(reverse_operators), 'operators dropped'

Armin Ronacher

2008-04-07 18:39:54 +0200

[diff] [blame^]

81

operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in

82

sorted(operators, key=lambda x: -len(x))))

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

83

Armin Ronacher

2008-03-25 14:34:45 +0100

[diff] [blame]

simple_escapes = {

'a': '\a',

'n': '\n',

'r': '\r',

'f': '\f',

't': '\t',

'v': '\v',

'\\': '\\',

'"': '"',

"'": "'",

'0': '\x00'

}

unicode_escapes = {

'x': 2,

'u': 4,

'U': 8

}

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

102

103

def unescape_string(lineno, filename, s):

104

r"""

105

Unescape a string. Supported escapes:

106

\a, \n, \r\, \f, \v, \\, \", \', \0

107

108

\x00, \u0000, \U00000000, \N{...}

109

110

Not supported are \101 because imho redundant.

111

"""

112

result = []

113

write = result.append

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

114

chariter = iter(s)

115

next_char = chariter.next

116

Armin Ronacher

2008-03-25 14:34:45 +0100

[diff] [blame]

117

# faster lookup

118

sescapes = simple_escapes

119

uescapes = unicode_escapes

120

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

121

try:

122

for char in chariter:

123

if char == '\\':

124

char = next_char()

Armin Ronacher

2008-03-25 14:34:45 +0100

[diff] [blame]

125

if char in sescapes:

126

write(sescapes[char])

127

elif char in uescapes:

128

seq = [next_char() for x in xrange(uescapes[char])]

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

129

try:

130

write(unichr(int(''.join(seq), 16)))

131

except ValueError:

132

raise TemplateSyntaxError('invalid unicode codepoint',

133

lineno, filename)

134

elif char == 'N':

135

if next_char() != '{':

136

raise TemplateSyntaxError('no name for codepoint',

137

lineno, filename)

138

seq = []

Armin Ronacher

2008-03-25 14:34:45 +0100

[diff] [blame]

139

while 1:

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

char = next_char()

if char == '}':

break

seq.append(char)

try:

write(unicodedata.lookup(u''.join(seq)))

146

except KeyError:

147

raise TemplateSyntaxError('unknown character name',

lineno, filename)

else:

write('\\' + char)

else:

write(char)

except StopIteration:

154

raise TemplateSyntaxError('invalid string escape', lineno, filename)

155

return u''.join(result)

156

157

158

def unescape_regex(s):

159

"""

160

Unescape rules for regular expressions.

161

"""

162

buffer = []

163

write = buffer.append

in_escape = False

for char in s:

if in_escape:

in_escape = False

if char not in safe_chars:

write('\\' + char)

continue

write(char)

return u''.join(buffer)

Armin Ronacher

2894f22

2007-03-19 22:39:55 +0100

[diff] [blame]

173

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

174

175

class Failure(object):

176

"""

177

Class that raises a `TemplateSyntaxError` if called.

178

Used by the `Lexer` to specify known errors.

179

"""

180

181

def __init__(self, message, cls=TemplateSyntaxError):

182

self.message = message

183

self.error_class = cls

184

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

185

def __call__(self, lineno, filename):

186

raise self.error_class(self.message, lineno, filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

187

188

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

189

class LexerMeta(type):

190

"""

191

Metaclass for the lexer that caches instances for

192

the same configuration in a weak value dictionary.

193

"""

194

195

def __call__(cls, environment):

196

key = hash((environment.block_start_string,

197

environment.block_end_string,

198

environment.variable_start_string,

199

environment.variable_end_string,

200

environment.comment_start_string,

201

environment.comment_end_string,

202

environment.trim_blocks))

203

204

# use the cached lexer if possible

205

if key in _lexer_cache:

206

return _lexer_cache[key]

207

208

# create a new lexer and cache it

209

lexer = type.__call__(cls, environment)

210

_lexer_cache[key] = lexer

return lexer

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

214

class Lexer(object):

215

"""

216

Class that implements a lexer for a given environment. Automatically

217

created by the environment class, usually you don't have to do that.

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

218

219

Note that the lexer is not automatically bound to an environment.

220

Multiple environments can share the same lexer.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

221

"""

222

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

223

__metaclass__ = LexerMeta

224

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

225

def __init__(self, environment):

226

# shortcuts

227

c = lambda x: re.compile(x, re.M | re.S)

228

e = re.escape

229

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

230

# lexing rules for tags

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

231

tag_rules = [

Armin Ronacher

2008-04-07 18:39:54 +0200

[diff] [blame^]

232

(eol_re, 'eol', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

233

(whitespace_re, None, None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

234

(float_re, 'float', None),

235

(integer_re, 'integer', None),

Armin Ronacher

2008-04-07 18:39:54 +0200

[diff] [blame^]

236

(c(r'\b(?:%s)\b' % '|'.join(sorted(keywords, key=lambda x: -len(x)))),

Armin Ronacher

2008-03-31 14:18:49 +0200

[diff] [blame]

237

'keyword', None),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

238

(name_re, 'name', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

239

(string_re, 'string', None),

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

240

(operator_re, 'operator', None)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

241

]

242

Armin Ronacher

2007-05-14 18:21:44 +0200

[diff] [blame]

243

#: if variables and blocks have the same delimiters we won't

244

#: receive any variable blocks in the parser. This variable is `True`

245

#: if we need that.

246

self.no_variable_block = (

247

(environment.variable_start_string is

248

environment.variable_end_string is None) or

249

(environment.variable_start_string ==

250

environment.block_start_string and

251

environment.variable_end_string ==

252

environment.block_end_string)

253

)

254

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

255

# assamble the root lexing rule. because "|" is ungreedy

256

# we have to sort by length so that the lexer continues working

257

# as expected when we have parsing rules like <% for block and

258

# <%= for variables. (if someone wants asp like syntax)

Armin Ronacher

2007-05-14 18:21:44 +0200

[diff] [blame]

259

# variables are just part of the rules if variable processing

260

# is required.

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

261

root_tag_rules = [

262

('comment', environment.comment_start_string),

Armin Ronacher

2007-05-14 18:21:44 +0200

[diff] [blame]

263

('block', environment.block_start_string)

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

264

]

Armin Ronacher

2007-05-14 18:21:44 +0200

[diff] [blame]

265

if not self.no_variable_block:

266

root_tag_rules.append(('variable',

267

environment.variable_start_string))

Armin Ronacher

2007-02-27 20:51:59 +0100

[diff] [blame]

268

root_tag_rules.sort(lambda a, b: cmp(len(b[1]), len(a[1])))

269

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

270

# block suffix if trimming is enabled

271

block_suffix_re = environment.trim_blocks and '\\n?' or ''

272

273

# global lexing rules

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

274

self.rules = {

275

'root': [

Armin Ronacher

523bf4c

2007-11-17 23:45:04 +0100

[diff] [blame]

276

# directives

277

(c('(.*?)(?:%s)' % '|'.join(

278

['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (

279

e(environment.block_start_string),

280

e(environment.block_start_string),

281

e(environment.block_end_string)

282

)] + [

283

'(?P<%s_begin>\s*%s\-|%s)' % (n, e(r), e(r))

284

for n, r in root_tag_rules

285

])), ('data', '#bygroup'), '#bygroup'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

286

# data

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

287

(c('.+'), 'data', None)

288

],

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

289

# comments

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

290

'comment_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

291

(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

292

e(environment.comment_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

293

e(environment.comment_end_string),

294

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

295

)), ('comment', 'comment_end'), '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

296

(c('(.)'), (Failure('Missing end of comment tag'),), None)

297

],

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

298

# blocks

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

299

'block_begin': [

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

300

(c('(?:\-%s\s*|%s)%s' % (

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

301

e(environment.block_end_string),

Armin Ronacher

2007-03-31 20:40:38 +0200

[diff] [blame]

302

e(environment.block_end_string),

303

block_suffix_re

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

304

)), 'block_end', '#pop'),

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

305

] + tag_rules,

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

306

# raw block

Armin Ronacher

523bf4c

2007-11-17 23:45:04 +0100

[diff] [blame]

307

'raw_begin': [

Armin Ronacher

2007-03-28 21:44:04 +0200

[diff] [blame]

308

(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (

309

e(environment.block_start_string),

310

e(environment.block_start_string),

311

e(environment.block_end_string),

312

e(environment.block_end_string),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

313

block_suffix_re

Armin Ronacher

523bf4c

2007-11-17 23:45:04 +0100

[diff] [blame]

314

)), ('data', 'raw_end'), '#pop'),

Armin Ronacher

2007-03-27 22:51:51 +0200

[diff] [blame]

315

(c('(.)'), (Failure('Missing end of raw directive'),), None)

316

]

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

317

}

318

Armin Ronacher

2007-05-14 18:21:44 +0200

[diff] [blame]

319

# only add the variable rules to the list if we process variables

320

# the variable_end_string variable could be None and break things.

321

if not self.no_variable_block:

322

self.rules['variable_begin'] = [

323

(c('\-%s\s*|%s' % (

324

e(environment.variable_end_string),

325

e(environment.variable_end_string)

326

)), 'variable_end', '#pop')

327

] + tag_rules

328

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

329

def tokenize(self, source, filename=None):

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

330

"""

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

331

Works like `tokeniter` but returns a tokenstream of tokens and not a

332

generator or token tuples. Additionally all token values are already

333

converted into types and postprocessed. For example keywords are

334

already keyword tokens, not named tokens, comments are removed,

335

integers and floats converted, strings unescaped etc.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

336

"""

Armin Ronacher

2007-04-05 11:21:38 +0200

[diff] [blame]

337

def generate():

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

338

for lineno, token, value in self.tokeniter(source, filename):

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

339

if token in ('comment_begin', 'comment', 'comment_end'):

340

continue

341

elif token == 'data':

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

2008-03-31 14:18:49 +0200

[diff] [blame]

346

elif token == 'keyword':

Armin Ronacher

82b3f3d

2008-03-31 20:01:08 +0200

[diff] [blame]

347

token = value

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

348

elif token == 'name':

349

value = str(value)

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

350

elif token == 'string':

351

value = unescape_string(lineno, filename, value[1:-1])

try:

value = str(value)

except UnicodeError:

pass

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

356

elif token == 'integer':

357

value = int(value)

358

elif token == 'float':

359

value = float(value)

360

elif token == 'operator':

361

token = operators[value]

362

value = ''

363

yield Token(lineno, token, value)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

364

return TokenStream(generate(), filename)

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

365

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

366

def tokeniter(self, source, filename=None):

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

367

"""

368

This method tokenizes the text and returns the tokens in a generator.

Armin Ronacher

f626c8e

2007-03-23 16:13:10 +0100

[diff] [blame]

369

Use this method if you just want to tokenize a template. The output

370

you get is not compatible with the input the jinja parser wants. The

Armin Ronacher

2007-09-07 17:52:41 +0200

[diff] [blame]

371

parser uses the `tokenize` function with returns a `TokenStream` and

372

keywords instead of just names.

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

373

"""

Armin Ronacher

2007-04-05 11:21:38 +0200

[diff] [blame]

374

source = '\n'.join(source.splitlines())

Armin Ronacher

7977e5c

2007-03-12 07:22:17 +0100

[diff] [blame]

375

pos = 0

376

lineno = 1

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

377

stack = ['root']

378

statetokens = self.rules['root']

379

source_length = len(source)

380

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

381

balancing_stack = []

382

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

383

while True:

384

# tokenizer loop

385

for regex, tokens, new_state in statetokens:

386

m = regex.match(source, pos)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

387

# if no match we try again with the next rule

if not m:

continue

# we only match blocks and variables if brances / parentheses

392

# are balanced. continue parsing with the lower rule which

393

# is the operator rule. do this only if the end tags look

394

# like operators

395

if balancing_stack and \

396

tokens in ('variable_end', 'block_end'):

397

continue

398

399

# tuples support more options

400

if isinstance(tokens, tuple):

401

for idx, token in enumerate(tokens):

# hidden group

if token is None:

g = m.group(idx)

if g:

lineno += g.count('\n')

407

continue

408

# failure group

Armin Ronacher

ecc051b

2007-06-01 18:25:28 +0200

[diff] [blame]

409

elif token.__class__ is Failure:

Armin Ronacher

720e55b

2007-05-30 00:57:49 +0200

[diff] [blame]

410

raise token(lineno, filename)

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

411

# bygroup is a bit more complex, in that case we

412

# yield for the current token the first named

413

# group that matched

414

elif token == '#bygroup':

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

415

for key, value in m.groupdict().iteritems():

416

if value is not None:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

417

yield lineno, key, value

418

lineno += value.count('\n')

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

419

break

420

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

421

raise RuntimeError('%r wanted to resolve '

422

'the token dynamically'

423

' but no group matched'

424

% regex)

425

# normal group

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

426

else:

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

427

data = m.group(idx + 1)

428

if data:

429

yield lineno, token, data

430

lineno += data.count('\n')

431

432

# strings as token just are yielded as it, but just

433

# if the data is not empty

434

else:

435

data = m.group()

436

# update brace/parentheses balance

437

if tokens == 'operator':

438

if data == '{':

439

balancing_stack.append('}')

440

elif data == '(':

441

balancing_stack.append(')')

442

elif data == '[':

443

balancing_stack.append(']')

444

elif data in ('}', ')', ']'):

Armin Ronacher

f750daa

2007-05-29 23:22:38 +0200

[diff] [blame]

445

if not balancing_stack:

446

raise TemplateSyntaxError('unexpected "%s"' %

447

data, lineno,

448

filename)

449

expected_op = balancing_stack.pop()

450

if expected_op != data:

451

raise TemplateSyntaxError('unexpected "%s", '

452

'expected "%s"' %

453

(data, expected_op),

Armin Ronacher

2007-04-17 17:13:10 +0200

[diff] [blame]

454

lineno, filename)

455

# yield items

456

if tokens is not None:

457

if data:

458

yield lineno, tokens, data

459

lineno += data.count('\n')

460

461

# fetch new position into new variable so that we can check

462

# if there is a internal parsing error which would result

463

# in an infinite loop

464

pos2 = m.end()

465

466

# handle state changes

467

if new_state is not None:

468

# remove the uppermost state

469

if new_state == '#pop':

470

stack.pop()

471

# resolve the new state by group checking

472

elif new_state == '#bygroup':

473

for key, value in m.groupdict().iteritems():

474

if value is not None:

stack.append(key)

break

else:

raise RuntimeError('%r wanted to resolve the '

479

'new state dynamically but'

480

' no group matched' %

481

regex)

482

# direct state name given

483

else:

484

stack.append(new_state)

485

statetokens = self.rules[stack[-1]]

486

# we are still at the same position and no stack change.

487

# this means a loop without break condition, avoid that and

488

# raise error

489

elif pos2 == pos:

490

raise RuntimeError('%r yielded empty string without '

491

'stack change' % regex)

492

# publish new function and start again

493

pos = pos2

494

break

Armin Ronacher

2007-02-26 22:17:32 +0100

[diff] [blame]

495

# if loop terminated without break we havn't found a single match

496

# either we are at the end of the file or we have a problem

497

else:

498

# end of text

499

if pos >= source_length:

500

return

501

# something went wrong

502

raise TemplateSyntaxError('unexpected char %r at %d' %

Armin Ronacher