blob: 2719dccb01062dcb91fdfaefe4955307172af3ca [file] [log] [blame]
Armin Ronacher92f572f2007-02-26 22:17:32 +01001# -*- coding: utf-8 -*-
2"""
Armin Ronacher07bc6842008-03-31 14:18:49 +02003 jinja2.lexer
4 ~~~~~~~~~~~~
Armin Ronacher3b65b8a2007-02-27 20:21:45 +01005
Armin Ronacher5a8e4972007-04-05 11:21:38 +02006 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
9
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
13
Armin Ronacher1d51f632008-03-25 14:34:45 +010014 :copyright: 2007-2008 by Armin Ronacher.
Armin Ronacher3b65b8a2007-02-27 20:21:45 +010015 :license: BSD, see LICENSE for more details.
Armin Ronacher92f572f2007-02-26 22:17:32 +010016"""
17import re
Armin Ronacher1cc232c2007-09-07 17:52:41 +020018import unicodedata
Armin Ronacher4325e372008-05-01 22:59:47 +020019from operator import itemgetter
20from collections import deque
Armin Ronacher82b3f3d2008-03-31 20:01:08 +020021from jinja2.exceptions import TemplateSyntaxError
Armin Ronacherb5124e62008-04-25 00:36:14 +020022from jinja2.utils import LRUCache
Armin Ronacher92f572f2007-02-26 22:17:32 +010023
24
Armin Ronacher21580912007-04-17 17:13:10 +020025# cache for the lexers. Exists in order to be able to have multiple
26# environments with the same lexer
Armin Ronacher187bde12008-05-01 18:19:16 +020027_lexer_cache = LRUCache(50)
Armin Ronacher21580912007-04-17 17:13:10 +020028
Armin Ronacher92f572f2007-02-26 22:17:32 +010029# static regular expressions
Armin Ronacher0949e4d2007-10-07 18:53:29 +020030whitespace_re = re.compile(r'\s+(?um)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010031string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020033integer_re = re.compile(r'\d+')
Armin Ronacherd1ff8582008-05-11 00:30:43 +020034name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020035float_re = re.compile(r'\d+\.\d+')
Armin Ronacher92f572f2007-02-26 22:17:32 +010036
Armin Ronacher1cc232c2007-09-07 17:52:41 +020037# bind operators to token types
38operators = {
39 '+': 'add',
40 '-': 'sub',
41 '/': 'div',
42 '//': 'floordiv',
43 '*': 'mul',
44 '%': 'mod',
45 '**': 'pow',
46 '~': 'tilde',
Armin Ronacher1cc232c2007-09-07 17:52:41 +020047 '[': 'lbracket',
48 ']': 'rbracket',
49 '(': 'lparen',
50 ')': 'rparen',
51 '{': 'lbrace',
52 '}': 'rbrace',
53 '==': 'eq',
54 '!=': 'ne',
55 '>': 'gt',
56 '>=': 'gteq',
57 '<': 'lt',
58 '<=': 'lteq',
59 '=': 'assign',
60 '.': 'dot',
61 ':': 'colon',
62 '|': 'pipe',
Armin Ronacher07bc6842008-03-31 14:18:49 +020063 ',': 'comma',
64 ';': 'semicolon'
Armin Ronacher1cc232c2007-09-07 17:52:41 +020065}
66
67reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
68assert len(operators) == len(reverse_operators), 'operators dropped'
Armin Ronachere791c2a2008-04-07 18:39:54 +020069operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
70 sorted(operators, key=lambda x: -len(x))))
Armin Ronacher1cc232c2007-09-07 17:52:41 +020071
Armin Ronacher92f572f2007-02-26 22:17:32 +010072
73class Failure(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +020074 """Class that raises a `TemplateSyntaxError` if called.
Armin Ronacher92f572f2007-02-26 22:17:32 +010075 Used by the `Lexer` to specify known errors.
76 """
77
78 def __init__(self, message, cls=TemplateSyntaxError):
79 self.message = message
80 self.error_class = cls
81
Armin Ronacher720e55b2007-05-30 00:57:49 +020082 def __call__(self, lineno, filename):
83 raise self.error_class(self.message, lineno, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +010084
85
Armin Ronacher4325e372008-05-01 22:59:47 +020086class Token(tuple):
87 """Token class."""
88 __slots__ = ()
89 lineno, type, value = (property(itemgetter(x)) for x in range(3))
90
91 def __new__(cls, lineno, type, value):
92 return tuple.__new__(cls, (lineno, intern(str(type)), value))
93
94 def __str__(self):
95 from jinja.lexer import keywords, reverse_operators
96 if self.type in keywords:
97 return self.type
98 elif self.type in reverse_operators:
99 return reverse_operators[self.type]
100 elif self.type is 'name':
101 return self.value
102 return self.type
103
104 def test(self, expr):
105 """Test a token against a token expression. This can either be a
Armin Ronacher023b5e92008-05-08 11:03:10 +0200106 token type or ``'token_type:token_value'``. This can only test
107 against string values and types.
Armin Ronacher4325e372008-05-01 22:59:47 +0200108 """
Armin Ronachercda43df2008-05-03 17:10:05 +0200109 # here we do a regular string equality check as test_any is usually
Armin Ronacher4325e372008-05-01 22:59:47 +0200110 # passed an iterable of not interned strings.
111 if self.type == expr:
112 return True
113 elif ':' in expr:
114 return expr.split(':', 1) == [self.type, self.value]
115 return False
116
Armin Ronachercda43df2008-05-03 17:10:05 +0200117 def test_any(self, *iterable):
Armin Ronacher4325e372008-05-01 22:59:47 +0200118 """Test against multiple token expressions."""
119 for expr in iterable:
120 if self.test(expr):
121 return True
122 return False
123
124 def __repr__(self):
125 return 'Token(%r, %r, %r)' % (
126 self.lineno,
127 self.type,
128 self.value
129 )
130
131
132class TokenStreamIterator(object):
133 """The iterator for tokenstreams. Iterate over the stream
134 until the eof token is reached.
135 """
136
137 def __init__(self, stream):
138 self._stream = stream
139
140 def __iter__(self):
141 return self
142
143 def next(self):
144 token = self._stream.current
145 if token.type == 'eof':
146 self._stream.close()
147 raise StopIteration()
148 self._stream.next(False)
149 return token
150
151
152class TokenStream(object):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200153 """A token stream is an iterable that yields :class:`Token`\s. The
154 parser however does not iterate over it but calls :meth:`next` to go
155 one token ahead. The current active token is stored as :attr:`current`.
Armin Ronacher4325e372008-05-01 22:59:47 +0200156 """
157
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200158 def __init__(self, generator, name, filename):
Armin Ronacher4325e372008-05-01 22:59:47 +0200159 self._next = generator.next
160 self._pushed = deque()
161 self.current = Token(1, 'initial', '')
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200162 self.name = name
Armin Ronacher4325e372008-05-01 22:59:47 +0200163 self.filename = filename
164 self.next()
165
166 def __iter__(self):
167 return TokenStreamIterator(self)
168
169 def __nonzero__(self):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200170 """Are we at the end of the stream?"""
Armin Ronacher4325e372008-05-01 22:59:47 +0200171 return bool(self._pushed) or self.current.type != 'eof'
172
173 eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)
174
175 def push(self, token):
176 """Push a token back to the stream."""
177 self._pushed.append(token)
178
179 def look(self):
180 """Look at the next token."""
181 old_token = self.next()
182 result = self.current
183 self.push(result)
184 self.current = old_token
185 return result
186
Armin Ronacherea847c52008-05-02 20:04:32 +0200187 def skip(self, n=1):
Armin Ronacher4325e372008-05-01 22:59:47 +0200188 """Got n tokens ahead."""
189 for x in xrange(n):
190 self.next()
191
Armin Ronacherfdf95302008-05-11 22:20:51 +0200192 def next_if(self, expr):
193 """Perform the token test and return the token if it matched.
194 Otherwise the return value is `None`.
195 """
196 if self.current.test(expr):
197 return self.next()
198
199 def skip_if(self, expr):
200 """Like `next_if` but only returns `True` or `False`."""
201 return self.next_if(expr) is not None
202
203 def next(self):
Armin Ronacher4325e372008-05-01 22:59:47 +0200204 """Go one token ahead and return the old one"""
205 rv = self.current
Armin Ronacherfdf95302008-05-11 22:20:51 +0200206 if self._pushed:
207 self.current = self._pushed.popleft()
208 elif self.current.type is not 'eof':
209 try:
210 self.current = self._next()
211 except StopIteration:
212 self.close()
Armin Ronacher4325e372008-05-01 22:59:47 +0200213 return rv
214
215 def close(self):
216 """Close the stream."""
217 self.current = Token(self.current.lineno, 'eof', '')
218 self._next = None
219
220 def expect(self, expr):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200221 """Expect a given token type and return it. This accepts the same
222 argument as :meth:`jinja2.lexer.Token.test`.
223 """
Armin Ronacher4325e372008-05-01 22:59:47 +0200224 if not self.current.test(expr):
225 if ':' in expr:
226 expr = expr.split(':')[1]
227 if self.current.type is 'eof':
228 raise TemplateSyntaxError('unexpected end of template, '
229 'expected %r.' % expr,
230 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200231 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200232 raise TemplateSyntaxError("expected token %r, got %r" %
233 (expr, str(self.current)),
234 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200235 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200236 try:
237 return self.current
238 finally:
239 self.next()
240
241
Armin Ronacher21580912007-04-17 17:13:10 +0200242class LexerMeta(type):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200243 """Metaclass for the lexer that caches instances for
Armin Ronacher21580912007-04-17 17:13:10 +0200244 the same configuration in a weak value dictionary.
245 """
246
247 def __call__(cls, environment):
Armin Ronacher203bfcb2008-04-24 21:54:44 +0200248 key = (environment.block_start_string,
249 environment.block_end_string,
250 environment.variable_start_string,
251 environment.variable_end_string,
252 environment.comment_start_string,
253 environment.comment_end_string,
254 environment.line_statement_prefix,
255 environment.trim_blocks)
Armin Ronacherb5124e62008-04-25 00:36:14 +0200256 lexer = _lexer_cache.get(key)
257 if lexer is None:
258 lexer = type.__call__(cls, environment)
259 _lexer_cache[key] = lexer
Armin Ronacher21580912007-04-17 17:13:10 +0200260 return lexer
261
262
Armin Ronacher92f572f2007-02-26 22:17:32 +0100263class Lexer(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200264 """Class that implements a lexer for a given environment. Automatically
Armin Ronacher92f572f2007-02-26 22:17:32 +0100265 created by the environment class, usually you don't have to do that.
Armin Ronacher21580912007-04-17 17:13:10 +0200266
267 Note that the lexer is not automatically bound to an environment.
268 Multiple environments can share the same lexer.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100269 """
270
Armin Ronacher21580912007-04-17 17:13:10 +0200271 __metaclass__ = LexerMeta
272
Armin Ronacher92f572f2007-02-26 22:17:32 +0100273 def __init__(self, environment):
274 # shortcuts
275 c = lambda x: re.compile(x, re.M | re.S)
276 e = re.escape
277
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200278 # lexing rules for tags
Armin Ronacher92f572f2007-02-26 22:17:32 +0100279 tag_rules = [
280 (whitespace_re, None, None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200281 (float_re, 'float', None),
282 (integer_re, 'integer', None),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100283 (name_re, 'name', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200284 (string_re, 'string', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200285 (operator_re, 'operator', None)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100286 ]
287
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100288 # assamble the root lexing rule. because "|" is ungreedy
289 # we have to sort by length so that the lexer continues working
290 # as expected when we have parsing rules like <% for block and
291 # <%= for variables. (if someone wants asp like syntax)
Armin Ronacher33d528a2007-05-14 18:21:44 +0200292 # variables are just part of the rules if variable processing
293 # is required.
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100294 root_tag_rules = [
295 ('comment', environment.comment_start_string),
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200296 ('block', environment.block_start_string),
297 ('variable', environment.variable_start_string)
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100298 ]
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200299 root_tag_rules.sort(key=lambda x: -len(x[1]))
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200300
301 # now escape the rules. This is done here so that the escape
302 # signs don't count for the lengths of the tags.
303 root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]
304
305 # if we have a line statement prefix we need an extra rule for
306 # that. We add this rule *after* all the others.
307 if environment.line_statement_prefix is not None:
308 prefix = e(environment.line_statement_prefix)
309 root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100310
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200311 # block suffix if trimming is enabled
312 block_suffix_re = environment.trim_blocks and '\\n?' or ''
313
314 # global lexing rules
Armin Ronacher92f572f2007-02-26 22:17:32 +0100315 self.rules = {
316 'root': [
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100317 # directives
318 (c('(.*?)(?:%s)' % '|'.join(
319 ['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
320 e(environment.block_start_string),
321 e(environment.block_start_string),
322 e(environment.block_end_string)
323 )] + [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200324 '(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100325 for n, r in root_tag_rules
326 ])), ('data', '#bygroup'), '#bygroup'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200327 # data
Armin Ronacher92f572f2007-02-26 22:17:32 +0100328 (c('.+'), 'data', None)
329 ],
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200330 # comments
Armin Ronacher92f572f2007-02-26 22:17:32 +0100331 'comment_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200332 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200333 e(environment.comment_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200334 e(environment.comment_end_string),
335 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200336 )), ('comment', 'comment_end'), '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100337 (c('(.)'), (Failure('Missing end of comment tag'),), None)
338 ],
Armin Ronacher21580912007-04-17 17:13:10 +0200339 # blocks
Armin Ronacher92f572f2007-02-26 22:17:32 +0100340 'block_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200341 (c('(?:\-%s\s*|%s)%s' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200342 e(environment.block_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200343 e(environment.block_end_string),
344 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200345 )), 'block_end', '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100346 ] + tag_rules,
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200347 # variables
348 'variable_begin': [
349 (c('\-%s\s*|%s' % (
350 e(environment.variable_end_string),
351 e(environment.variable_end_string)
352 )), 'variable_end', '#pop')
353 ] + tag_rules,
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200354 # raw block
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100355 'raw_begin': [
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200356 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
357 e(environment.block_start_string),
358 e(environment.block_start_string),
359 e(environment.block_end_string),
360 e(environment.block_end_string),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200361 block_suffix_re
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100362 )), ('data', 'raw_end'), '#pop'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200363 (c('(.)'), (Failure('Missing end of raw directive'),), None)
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200364 ],
365 # line statements
366 'linestatement_begin': [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200367 (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
368 ] + tag_rules
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200369 }
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200370
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200371 def tokenize(self, source, name=None, filename=None):
Armin Ronacher71082072008-04-12 14:19:36 +0200372 """Works like `tokeniter` but returns a tokenstream of tokens and not
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200373 a generator or token tuples. Additionally all token values are already
Armin Ronacher115de2e2008-05-01 22:20:05 +0200374 converted into types and postprocessed. For example comments are removed,
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200375 integers and floats converted, strings unescaped etc.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100376 """
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200377 source = unicode(source)
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200378 def generate():
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200379 for lineno, token, value in self.tokeniter(source, name, filename):
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200380 if token in ('comment_begin', 'comment', 'comment_end'):
381 continue
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200382 elif token == 'linestatement_begin':
383 token = 'block_begin'
384 elif token == 'linestatement_end':
385 token = 'block_end'
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200386 # we are not interested in those tokens in the parser
387 elif token in ('raw_begin', 'raw_end'):
388 continue
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200389 elif token == 'data':
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200390 try:
391 value = str(value)
392 except UnicodeError:
393 pass
Armin Ronacher07bc6842008-03-31 14:18:49 +0200394 elif token == 'keyword':
Armin Ronacher82b3f3d2008-03-31 20:01:08 +0200395 token = value
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200396 elif token == 'name':
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200397 value = str(value)
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200398 elif token == 'string':
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200399 # try to unescape string
400 try:
401 value = value[1:-1] \
402 .encode('ascii', 'backslashreplace') \
403 .decode('unicode-escape')
404 except Exception, e:
405 msg = str(e).split(':')[-1].strip()
406 raise TemplateSyntaxError(msg, lineno, name, filename)
407 # if we can express it as bytestring (ascii only)
408 # we do that for support of semi broken APIs
409 # as datetime.datetime.strftime
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200410 try:
411 value = str(value)
412 except UnicodeError:
413 pass
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200414 elif token == 'integer':
415 value = int(value)
416 elif token == 'float':
417 value = float(value)
418 elif token == 'operator':
419 token = operators[value]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200420 yield Token(lineno, token, value)
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200421 return TokenStream(generate(), name, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100422
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200423 def tokeniter(self, source, name, filename=None):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200424 """This method tokenizes the text and returns the tokens in a
425 generator. Use this method if you just want to tokenize a template.
426 The output you get is not compatible with the input the jinja parser
427 wants. The parser uses the `tokenize` function with returns a
428 `TokenStream` and postprocessed tokens.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100429 """
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200430 source = '\n'.join(source.splitlines())
Armin Ronacher7977e5c2007-03-12 07:22:17 +0100431 pos = 0
432 lineno = 1
Armin Ronacher92f572f2007-02-26 22:17:32 +0100433 stack = ['root']
434 statetokens = self.rules['root']
435 source_length = len(source)
436
Armin Ronacher21580912007-04-17 17:13:10 +0200437 balancing_stack = []
438
Armin Ronacher71082072008-04-12 14:19:36 +0200439 while 1:
Armin Ronacher92f572f2007-02-26 22:17:32 +0100440 # tokenizer loop
441 for regex, tokens, new_state in statetokens:
442 m = regex.match(source, pos)
Armin Ronacher21580912007-04-17 17:13:10 +0200443 # if no match we try again with the next rule
Armin Ronacher71082072008-04-12 14:19:36 +0200444 if m is None:
Armin Ronacher21580912007-04-17 17:13:10 +0200445 continue
446
447 # we only match blocks and variables if brances / parentheses
448 # are balanced. continue parsing with the lower rule which
449 # is the operator rule. do this only if the end tags look
450 # like operators
451 if balancing_stack and \
Armin Ronacher71082072008-04-12 14:19:36 +0200452 tokens in ('variable_end', 'block_end',
453 'linestatement_end'):
Armin Ronacher21580912007-04-17 17:13:10 +0200454 continue
455
456 # tuples support more options
457 if isinstance(tokens, tuple):
458 for idx, token in enumerate(tokens):
459 # hidden group
460 if token is None:
461 g = m.group(idx)
462 if g:
463 lineno += g.count('\n')
464 continue
465 # failure group
Armin Ronacherecc051b2007-06-01 18:25:28 +0200466 elif token.__class__ is Failure:
Armin Ronacher720e55b2007-05-30 00:57:49 +0200467 raise token(lineno, filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200468 # bygroup is a bit more complex, in that case we
469 # yield for the current token the first named
470 # group that matched
471 elif token == '#bygroup':
Armin Ronacher92f572f2007-02-26 22:17:32 +0100472 for key, value in m.groupdict().iteritems():
473 if value is not None:
Armin Ronacher21580912007-04-17 17:13:10 +0200474 yield lineno, key, value
475 lineno += value.count('\n')
Armin Ronacher92f572f2007-02-26 22:17:32 +0100476 break
477 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200478 raise RuntimeError('%r wanted to resolve '
479 'the token dynamically'
480 ' but no group matched'
481 % regex)
482 # normal group
Armin Ronacher92f572f2007-02-26 22:17:32 +0100483 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200484 data = m.group(idx + 1)
485 if data:
486 yield lineno, token, data
487 lineno += data.count('\n')
488
Armin Ronacher71082072008-04-12 14:19:36 +0200489 # strings as token just are yielded as it.
Armin Ronacher21580912007-04-17 17:13:10 +0200490 else:
491 data = m.group()
492 # update brace/parentheses balance
493 if tokens == 'operator':
494 if data == '{':
495 balancing_stack.append('}')
496 elif data == '(':
497 balancing_stack.append(')')
498 elif data == '[':
499 balancing_stack.append(']')
500 elif data in ('}', ')', ']'):
Armin Ronacherf750daa2007-05-29 23:22:38 +0200501 if not balancing_stack:
502 raise TemplateSyntaxError('unexpected "%s"' %
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200503 data, lineno, name,
Armin Ronacherf750daa2007-05-29 23:22:38 +0200504 filename)
505 expected_op = balancing_stack.pop()
506 if expected_op != data:
507 raise TemplateSyntaxError('unexpected "%s", '
508 'expected "%s"' %
509 (data, expected_op),
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200510 lineno, name,
511 filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200512 # yield items
513 if tokens is not None:
Armin Ronacher71082072008-04-12 14:19:36 +0200514 yield lineno, tokens, data
Armin Ronacher21580912007-04-17 17:13:10 +0200515 lineno += data.count('\n')
516
517 # fetch new position into new variable so that we can check
518 # if there is a internal parsing error which would result
519 # in an infinite loop
520 pos2 = m.end()
521
522 # handle state changes
523 if new_state is not None:
524 # remove the uppermost state
525 if new_state == '#pop':
526 stack.pop()
527 # resolve the new state by group checking
528 elif new_state == '#bygroup':
529 for key, value in m.groupdict().iteritems():
530 if value is not None:
531 stack.append(key)
532 break
533 else:
534 raise RuntimeError('%r wanted to resolve the '
535 'new state dynamically but'
536 ' no group matched' %
537 regex)
538 # direct state name given
539 else:
540 stack.append(new_state)
541 statetokens = self.rules[stack[-1]]
542 # we are still at the same position and no stack change.
543 # this means a loop without break condition, avoid that and
544 # raise error
545 elif pos2 == pos:
546 raise RuntimeError('%r yielded empty string without '
547 'stack change' % regex)
548 # publish new function and start again
549 pos = pos2
550 break
Armin Ronacher92f572f2007-02-26 22:17:32 +0100551 # if loop terminated without break we havn't found a single match
552 # either we are at the end of the file or we have a problem
553 else:
554 # end of text
555 if pos >= source_length:
556 return
557 # something went wrong
558 raise TemplateSyntaxError('unexpected char %r at %d' %
Armin Ronacher21580912007-04-17 17:13:10 +0200559 (source[pos], pos), lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200560 name, filename)