blob: 639b2852cd5ab96fe0d269b1c3b2164d848145a0 [file] [log] [blame]
Armin Ronacher92f572f2007-02-26 22:17:32 +01001# -*- coding: utf-8 -*-
2"""
Armin Ronacher07bc6842008-03-31 14:18:49 +02003 jinja2.lexer
4 ~~~~~~~~~~~~
Armin Ronacher3b65b8a2007-02-27 20:21:45 +01005
Armin Ronacher5a8e4972007-04-05 11:21:38 +02006 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
9
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
13
Armin Ronacher1d51f632008-03-25 14:34:45 +010014 :copyright: 2007-2008 by Armin Ronacher.
Armin Ronacher3b65b8a2007-02-27 20:21:45 +010015 :license: BSD, see LICENSE for more details.
Armin Ronacher92f572f2007-02-26 22:17:32 +010016"""
17import re
Armin Ronacher1cc232c2007-09-07 17:52:41 +020018import unicodedata
Armin Ronacher4325e372008-05-01 22:59:47 +020019from operator import itemgetter
20from collections import deque
Armin Ronacher82b3f3d2008-03-31 20:01:08 +020021from jinja2.exceptions import TemplateSyntaxError
Armin Ronacherb5124e62008-04-25 00:36:14 +020022from jinja2.utils import LRUCache
Armin Ronacher92f572f2007-02-26 22:17:32 +010023
24
Armin Ronacher21580912007-04-17 17:13:10 +020025# cache for the lexers. Exists in order to be able to have multiple
26# environments with the same lexer
Armin Ronacher187bde12008-05-01 18:19:16 +020027_lexer_cache = LRUCache(50)
Armin Ronacher21580912007-04-17 17:13:10 +020028
Armin Ronacher92f572f2007-02-26 22:17:32 +010029# static regular expressions
Armin Ronacher0949e4d2007-10-07 18:53:29 +020030whitespace_re = re.compile(r'\s+(?um)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010031string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020033integer_re = re.compile(r'\d+')
Armin Ronacherd1ff8582008-05-11 00:30:43 +020034name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020035float_re = re.compile(r'\d+\.\d+')
Armin Ronacher92f572f2007-02-26 22:17:32 +010036
Armin Ronacher1cc232c2007-09-07 17:52:41 +020037# bind operators to token types
38operators = {
39 '+': 'add',
40 '-': 'sub',
41 '/': 'div',
42 '//': 'floordiv',
43 '*': 'mul',
44 '%': 'mod',
45 '**': 'pow',
46 '~': 'tilde',
Armin Ronacher1cc232c2007-09-07 17:52:41 +020047 '[': 'lbracket',
48 ']': 'rbracket',
49 '(': 'lparen',
50 ')': 'rparen',
51 '{': 'lbrace',
52 '}': 'rbrace',
53 '==': 'eq',
54 '!=': 'ne',
55 '>': 'gt',
56 '>=': 'gteq',
57 '<': 'lt',
58 '<=': 'lteq',
59 '=': 'assign',
60 '.': 'dot',
61 ':': 'colon',
62 '|': 'pipe',
Armin Ronacher07bc6842008-03-31 14:18:49 +020063 ',': 'comma',
64 ';': 'semicolon'
Armin Ronacher1cc232c2007-09-07 17:52:41 +020065}
66
67reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
68assert len(operators) == len(reverse_operators), 'operators dropped'
Armin Ronachere791c2a2008-04-07 18:39:54 +020069operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
70 sorted(operators, key=lambda x: -len(x))))
Armin Ronacher1cc232c2007-09-07 17:52:41 +020071
Armin Ronacher1d51f632008-03-25 14:34:45 +010072simple_escapes = {
73 'a': '\a',
74 'n': '\n',
75 'r': '\r',
76 'f': '\f',
77 't': '\t',
78 'v': '\v',
79 '\\': '\\',
80 '"': '"',
81 "'": "'",
82 '0': '\x00'
83}
84unicode_escapes = {
85 'x': 2,
86 'u': 4,
87 'U': 8
88}
89
Armin Ronacher1cc232c2007-09-07 17:52:41 +020090
91def unescape_string(lineno, filename, s):
Armin Ronacherb5124e62008-04-25 00:36:14 +020092 r"""Unescape a string. Supported escapes:
Armin Ronacher1cc232c2007-09-07 17:52:41 +020093 \a, \n, \r\, \f, \v, \\, \", \', \0
94
95 \x00, \u0000, \U00000000, \N{...}
Armin Ronacher1cc232c2007-09-07 17:52:41 +020096 """
Armin Ronacher1cc232c2007-09-07 17:52:41 +020097 try:
Armin Ronacherd1ff8582008-05-11 00:30:43 +020098 return s.encode('ascii', 'backslashreplace').decode('unicode-escape')
Armin Ronacherb5124e62008-04-25 00:36:14 +020099 except UnicodeError, e:
100 msg = str(e).split(':')[-1].strip()
101 raise TemplateSyntaxError(msg, lineno, filename)
Armin Ronacher2894f222007-03-19 22:39:55 +0100102
Armin Ronacher92f572f2007-02-26 22:17:32 +0100103
104class Failure(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200105 """Class that raises a `TemplateSyntaxError` if called.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100106 Used by the `Lexer` to specify known errors.
107 """
108
109 def __init__(self, message, cls=TemplateSyntaxError):
110 self.message = message
111 self.error_class = cls
112
Armin Ronacher720e55b2007-05-30 00:57:49 +0200113 def __call__(self, lineno, filename):
114 raise self.error_class(self.message, lineno, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100115
116
Armin Ronacher4325e372008-05-01 22:59:47 +0200117class Token(tuple):
118 """Token class."""
119 __slots__ = ()
120 lineno, type, value = (property(itemgetter(x)) for x in range(3))
121
122 def __new__(cls, lineno, type, value):
123 return tuple.__new__(cls, (lineno, intern(str(type)), value))
124
125 def __str__(self):
126 from jinja.lexer import keywords, reverse_operators
127 if self.type in keywords:
128 return self.type
129 elif self.type in reverse_operators:
130 return reverse_operators[self.type]
131 elif self.type is 'name':
132 return self.value
133 return self.type
134
135 def test(self, expr):
136 """Test a token against a token expression. This can either be a
Armin Ronacher023b5e92008-05-08 11:03:10 +0200137 token type or ``'token_type:token_value'``. This can only test
138 against string values and types.
Armin Ronacher4325e372008-05-01 22:59:47 +0200139 """
Armin Ronachercda43df2008-05-03 17:10:05 +0200140 # here we do a regular string equality check as test_any is usually
Armin Ronacher4325e372008-05-01 22:59:47 +0200141 # passed an iterable of not interned strings.
142 if self.type == expr:
143 return True
144 elif ':' in expr:
145 return expr.split(':', 1) == [self.type, self.value]
146 return False
147
Armin Ronachercda43df2008-05-03 17:10:05 +0200148 def test_any(self, *iterable):
Armin Ronacher4325e372008-05-01 22:59:47 +0200149 """Test against multiple token expressions."""
150 for expr in iterable:
151 if self.test(expr):
152 return True
153 return False
154
155 def __repr__(self):
156 return 'Token(%r, %r, %r)' % (
157 self.lineno,
158 self.type,
159 self.value
160 )
161
162
163class TokenStreamIterator(object):
164 """The iterator for tokenstreams. Iterate over the stream
165 until the eof token is reached.
166 """
167
168 def __init__(self, stream):
169 self._stream = stream
170
171 def __iter__(self):
172 return self
173
174 def next(self):
175 token = self._stream.current
176 if token.type == 'eof':
177 self._stream.close()
178 raise StopIteration()
179 self._stream.next(False)
180 return token
181
182
183class TokenStream(object):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200184 """A token stream is an iterable that yields :class:`Token`\s. The
185 parser however does not iterate over it but calls :meth:`next` to go
186 one token ahead. The current active token is stored as :attr:`current`.
Armin Ronacher4325e372008-05-01 22:59:47 +0200187 """
188
189 def __init__(self, generator, filename):
190 self._next = generator.next
191 self._pushed = deque()
192 self.current = Token(1, 'initial', '')
193 self.filename = filename
194 self.next()
195
196 def __iter__(self):
197 return TokenStreamIterator(self)
198
199 def __nonzero__(self):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200200 """Are we at the end of the stream?"""
Armin Ronacher4325e372008-05-01 22:59:47 +0200201 return bool(self._pushed) or self.current.type != 'eof'
202
203 eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)
204
205 def push(self, token):
206 """Push a token back to the stream."""
207 self._pushed.append(token)
208
209 def look(self):
210 """Look at the next token."""
211 old_token = self.next()
212 result = self.current
213 self.push(result)
214 self.current = old_token
215 return result
216
Armin Ronacherea847c52008-05-02 20:04:32 +0200217 def skip(self, n=1):
Armin Ronacher4325e372008-05-01 22:59:47 +0200218 """Got n tokens ahead."""
219 for x in xrange(n):
220 self.next()
221
Armin Ronacherfdf95302008-05-11 22:20:51 +0200222 def next_if(self, expr):
223 """Perform the token test and return the token if it matched.
224 Otherwise the return value is `None`.
225 """
226 if self.current.test(expr):
227 return self.next()
228
229 def skip_if(self, expr):
230 """Like `next_if` but only returns `True` or `False`."""
231 return self.next_if(expr) is not None
232
233 def next(self):
Armin Ronacher4325e372008-05-01 22:59:47 +0200234 """Go one token ahead and return the old one"""
235 rv = self.current
Armin Ronacherfdf95302008-05-11 22:20:51 +0200236 if self._pushed:
237 self.current = self._pushed.popleft()
238 elif self.current.type is not 'eof':
239 try:
240 self.current = self._next()
241 except StopIteration:
242 self.close()
Armin Ronacher4325e372008-05-01 22:59:47 +0200243 return rv
244
245 def close(self):
246 """Close the stream."""
247 self.current = Token(self.current.lineno, 'eof', '')
248 self._next = None
249
250 def expect(self, expr):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200251 """Expect a given token type and return it. This accepts the same
252 argument as :meth:`jinja2.lexer.Token.test`.
253 """
Armin Ronacher4325e372008-05-01 22:59:47 +0200254 if not self.current.test(expr):
255 if ':' in expr:
256 expr = expr.split(':')[1]
257 if self.current.type is 'eof':
258 raise TemplateSyntaxError('unexpected end of template, '
259 'expected %r.' % expr,
260 self.current.lineno,
261 self.filename)
262 raise TemplateSyntaxError("expected token %r, got %r" %
263 (expr, str(self.current)),
264 self.current.lineno,
265 self.filename)
266 try:
267 return self.current
268 finally:
269 self.next()
270
271
Armin Ronacher21580912007-04-17 17:13:10 +0200272class LexerMeta(type):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200273 """Metaclass for the lexer that caches instances for
Armin Ronacher21580912007-04-17 17:13:10 +0200274 the same configuration in a weak value dictionary.
275 """
276
277 def __call__(cls, environment):
Armin Ronacher203bfcb2008-04-24 21:54:44 +0200278 key = (environment.block_start_string,
279 environment.block_end_string,
280 environment.variable_start_string,
281 environment.variable_end_string,
282 environment.comment_start_string,
283 environment.comment_end_string,
284 environment.line_statement_prefix,
285 environment.trim_blocks)
Armin Ronacherb5124e62008-04-25 00:36:14 +0200286 lexer = _lexer_cache.get(key)
287 if lexer is None:
288 lexer = type.__call__(cls, environment)
289 _lexer_cache[key] = lexer
Armin Ronacher21580912007-04-17 17:13:10 +0200290 return lexer
291
292
Armin Ronacher92f572f2007-02-26 22:17:32 +0100293class Lexer(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200294 """Class that implements a lexer for a given environment. Automatically
Armin Ronacher92f572f2007-02-26 22:17:32 +0100295 created by the environment class, usually you don't have to do that.
Armin Ronacher21580912007-04-17 17:13:10 +0200296
297 Note that the lexer is not automatically bound to an environment.
298 Multiple environments can share the same lexer.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100299 """
300
Armin Ronacher21580912007-04-17 17:13:10 +0200301 __metaclass__ = LexerMeta
302
Armin Ronacher92f572f2007-02-26 22:17:32 +0100303 def __init__(self, environment):
304 # shortcuts
305 c = lambda x: re.compile(x, re.M | re.S)
306 e = re.escape
307
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200308 # lexing rules for tags
Armin Ronacher92f572f2007-02-26 22:17:32 +0100309 tag_rules = [
310 (whitespace_re, None, None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200311 (float_re, 'float', None),
312 (integer_re, 'integer', None),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100313 (name_re, 'name', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200314 (string_re, 'string', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200315 (operator_re, 'operator', None)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100316 ]
317
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100318 # assamble the root lexing rule. because "|" is ungreedy
319 # we have to sort by length so that the lexer continues working
320 # as expected when we have parsing rules like <% for block and
321 # <%= for variables. (if someone wants asp like syntax)
Armin Ronacher33d528a2007-05-14 18:21:44 +0200322 # variables are just part of the rules if variable processing
323 # is required.
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100324 root_tag_rules = [
325 ('comment', environment.comment_start_string),
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200326 ('block', environment.block_start_string),
327 ('variable', environment.variable_start_string)
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100328 ]
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200329 root_tag_rules.sort(key=lambda x: -len(x[1]))
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200330
331 # now escape the rules. This is done here so that the escape
332 # signs don't count for the lengths of the tags.
333 root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]
334
335 # if we have a line statement prefix we need an extra rule for
336 # that. We add this rule *after* all the others.
337 if environment.line_statement_prefix is not None:
338 prefix = e(environment.line_statement_prefix)
339 root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100340
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200341 # block suffix if trimming is enabled
342 block_suffix_re = environment.trim_blocks and '\\n?' or ''
343
344 # global lexing rules
Armin Ronacher92f572f2007-02-26 22:17:32 +0100345 self.rules = {
346 'root': [
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100347 # directives
348 (c('(.*?)(?:%s)' % '|'.join(
349 ['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
350 e(environment.block_start_string),
351 e(environment.block_start_string),
352 e(environment.block_end_string)
353 )] + [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200354 '(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100355 for n, r in root_tag_rules
356 ])), ('data', '#bygroup'), '#bygroup'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200357 # data
Armin Ronacher92f572f2007-02-26 22:17:32 +0100358 (c('.+'), 'data', None)
359 ],
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200360 # comments
Armin Ronacher92f572f2007-02-26 22:17:32 +0100361 'comment_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200362 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200363 e(environment.comment_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200364 e(environment.comment_end_string),
365 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200366 )), ('comment', 'comment_end'), '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100367 (c('(.)'), (Failure('Missing end of comment tag'),), None)
368 ],
Armin Ronacher21580912007-04-17 17:13:10 +0200369 # blocks
Armin Ronacher92f572f2007-02-26 22:17:32 +0100370 'block_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200371 (c('(?:\-%s\s*|%s)%s' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200372 e(environment.block_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200373 e(environment.block_end_string),
374 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200375 )), 'block_end', '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100376 ] + tag_rules,
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200377 # variables
378 'variable_begin': [
379 (c('\-%s\s*|%s' % (
380 e(environment.variable_end_string),
381 e(environment.variable_end_string)
382 )), 'variable_end', '#pop')
383 ] + tag_rules,
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200384 # raw block
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100385 'raw_begin': [
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200386 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
387 e(environment.block_start_string),
388 e(environment.block_start_string),
389 e(environment.block_end_string),
390 e(environment.block_end_string),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200391 block_suffix_re
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100392 )), ('data', 'raw_end'), '#pop'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200393 (c('(.)'), (Failure('Missing end of raw directive'),), None)
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200394 ],
395 # line statements
396 'linestatement_begin': [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200397 (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
398 ] + tag_rules
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200399 }
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200400
Armin Ronacher21580912007-04-17 17:13:10 +0200401 def tokenize(self, source, filename=None):
Armin Ronacher71082072008-04-12 14:19:36 +0200402 """Works like `tokeniter` but returns a tokenstream of tokens and not
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200403 a generator or token tuples. Additionally all token values are already
Armin Ronacher115de2e2008-05-01 22:20:05 +0200404 converted into types and postprocessed. For example comments are removed,
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200405 integers and floats converted, strings unescaped etc.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100406 """
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200407 source = unicode(source)
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200408 def generate():
Armin Ronacher21580912007-04-17 17:13:10 +0200409 for lineno, token, value in self.tokeniter(source, filename):
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200410 if token in ('comment_begin', 'comment', 'comment_end'):
411 continue
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200412 elif token == 'linestatement_begin':
413 token = 'block_begin'
414 elif token == 'linestatement_end':
415 token = 'block_end'
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200416 # we are not interested in those tokens in the parser
417 elif token in ('raw_begin', 'raw_end'):
418 continue
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200419 elif token == 'data':
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200420 try:
421 value = str(value)
422 except UnicodeError:
423 pass
Armin Ronacher07bc6842008-03-31 14:18:49 +0200424 elif token == 'keyword':
Armin Ronacher82b3f3d2008-03-31 20:01:08 +0200425 token = value
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200426 elif token == 'name':
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200427 value = str(value)
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200428 elif token == 'string':
429 value = unescape_string(lineno, filename, value[1:-1])
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200430 try:
431 value = str(value)
432 except UnicodeError:
433 pass
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200434 elif token == 'integer':
435 value = int(value)
436 elif token == 'float':
437 value = float(value)
438 elif token == 'operator':
439 token = operators[value]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200440 yield Token(lineno, token, value)
Armin Ronacher21580912007-04-17 17:13:10 +0200441 return TokenStream(generate(), filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100442
Armin Ronacher21580912007-04-17 17:13:10 +0200443 def tokeniter(self, source, filename=None):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200444 """This method tokenizes the text and returns the tokens in a
445 generator. Use this method if you just want to tokenize a template.
446 The output you get is not compatible with the input the jinja parser
447 wants. The parser uses the `tokenize` function with returns a
448 `TokenStream` and postprocessed tokens.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100449 """
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200450 source = '\n'.join(source.splitlines())
Armin Ronacher7977e5c2007-03-12 07:22:17 +0100451 pos = 0
452 lineno = 1
Armin Ronacher92f572f2007-02-26 22:17:32 +0100453 stack = ['root']
454 statetokens = self.rules['root']
455 source_length = len(source)
456
Armin Ronacher21580912007-04-17 17:13:10 +0200457 balancing_stack = []
458
Armin Ronacher71082072008-04-12 14:19:36 +0200459 while 1:
Armin Ronacher92f572f2007-02-26 22:17:32 +0100460 # tokenizer loop
461 for regex, tokens, new_state in statetokens:
462 m = regex.match(source, pos)
Armin Ronacher21580912007-04-17 17:13:10 +0200463 # if no match we try again with the next rule
Armin Ronacher71082072008-04-12 14:19:36 +0200464 if m is None:
Armin Ronacher21580912007-04-17 17:13:10 +0200465 continue
466
467 # we only match blocks and variables if brances / parentheses
468 # are balanced. continue parsing with the lower rule which
469 # is the operator rule. do this only if the end tags look
470 # like operators
471 if balancing_stack and \
Armin Ronacher71082072008-04-12 14:19:36 +0200472 tokens in ('variable_end', 'block_end',
473 'linestatement_end'):
Armin Ronacher21580912007-04-17 17:13:10 +0200474 continue
475
476 # tuples support more options
477 if isinstance(tokens, tuple):
478 for idx, token in enumerate(tokens):
479 # hidden group
480 if token is None:
481 g = m.group(idx)
482 if g:
483 lineno += g.count('\n')
484 continue
485 # failure group
Armin Ronacherecc051b2007-06-01 18:25:28 +0200486 elif token.__class__ is Failure:
Armin Ronacher720e55b2007-05-30 00:57:49 +0200487 raise token(lineno, filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200488 # bygroup is a bit more complex, in that case we
489 # yield for the current token the first named
490 # group that matched
491 elif token == '#bygroup':
Armin Ronacher92f572f2007-02-26 22:17:32 +0100492 for key, value in m.groupdict().iteritems():
493 if value is not None:
Armin Ronacher21580912007-04-17 17:13:10 +0200494 yield lineno, key, value
495 lineno += value.count('\n')
Armin Ronacher92f572f2007-02-26 22:17:32 +0100496 break
497 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200498 raise RuntimeError('%r wanted to resolve '
499 'the token dynamically'
500 ' but no group matched'
501 % regex)
502 # normal group
Armin Ronacher92f572f2007-02-26 22:17:32 +0100503 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200504 data = m.group(idx + 1)
505 if data:
506 yield lineno, token, data
507 lineno += data.count('\n')
508
Armin Ronacher71082072008-04-12 14:19:36 +0200509 # strings as token just are yielded as it.
Armin Ronacher21580912007-04-17 17:13:10 +0200510 else:
511 data = m.group()
512 # update brace/parentheses balance
513 if tokens == 'operator':
514 if data == '{':
515 balancing_stack.append('}')
516 elif data == '(':
517 balancing_stack.append(')')
518 elif data == '[':
519 balancing_stack.append(']')
520 elif data in ('}', ')', ']'):
Armin Ronacherf750daa2007-05-29 23:22:38 +0200521 if not balancing_stack:
522 raise TemplateSyntaxError('unexpected "%s"' %
523 data, lineno,
524 filename)
525 expected_op = balancing_stack.pop()
526 if expected_op != data:
527 raise TemplateSyntaxError('unexpected "%s", '
528 'expected "%s"' %
529 (data, expected_op),
Armin Ronacher21580912007-04-17 17:13:10 +0200530 lineno, filename)
531 # yield items
532 if tokens is not None:
Armin Ronacher71082072008-04-12 14:19:36 +0200533 yield lineno, tokens, data
Armin Ronacher21580912007-04-17 17:13:10 +0200534 lineno += data.count('\n')
535
536 # fetch new position into new variable so that we can check
537 # if there is a internal parsing error which would result
538 # in an infinite loop
539 pos2 = m.end()
540
541 # handle state changes
542 if new_state is not None:
543 # remove the uppermost state
544 if new_state == '#pop':
545 stack.pop()
546 # resolve the new state by group checking
547 elif new_state == '#bygroup':
548 for key, value in m.groupdict().iteritems():
549 if value is not None:
550 stack.append(key)
551 break
552 else:
553 raise RuntimeError('%r wanted to resolve the '
554 'new state dynamically but'
555 ' no group matched' %
556 regex)
557 # direct state name given
558 else:
559 stack.append(new_state)
560 statetokens = self.rules[stack[-1]]
561 # we are still at the same position and no stack change.
562 # this means a loop without break condition, avoid that and
563 # raise error
564 elif pos2 == pos:
565 raise RuntimeError('%r yielded empty string without '
566 'stack change' % regex)
567 # publish new function and start again
568 pos = pos2
569 break
Armin Ronacher92f572f2007-02-26 22:17:32 +0100570 # if loop terminated without break we havn't found a single match
571 # either we are at the end of the file or we have a problem
572 else:
573 # end of text
574 if pos >= source_length:
575 return
576 # something went wrong
577 raise TemplateSyntaxError('unexpected char %r at %d' %
Armin Ronacher21580912007-04-17 17:13:10 +0200578 (source[pos], pos), lineno,
579 filename)