blob: 7f0b33f757a3bb92b068e129edde58ba46baa149 [file] [log] [blame]
Armin Ronacher92f572f2007-02-26 22:17:32 +01001# -*- coding: utf-8 -*-
2"""
Armin Ronacher07bc6842008-03-31 14:18:49 +02003 jinja2.lexer
4 ~~~~~~~~~~~~
Armin Ronacher3b65b8a2007-02-27 20:21:45 +01005
Armin Ronacher5a8e4972007-04-05 11:21:38 +02006 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
9
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
13
Armin Ronacher1d51f632008-03-25 14:34:45 +010014 :copyright: 2007-2008 by Armin Ronacher.
Armin Ronacher3b65b8a2007-02-27 20:21:45 +010015 :license: BSD, see LICENSE for more details.
Armin Ronacher92f572f2007-02-26 22:17:32 +010016"""
17import re
Armin Ronacher1cc232c2007-09-07 17:52:41 +020018import unicodedata
Armin Ronacher4325e372008-05-01 22:59:47 +020019from operator import itemgetter
20from collections import deque
Armin Ronacher82b3f3d2008-03-31 20:01:08 +020021from jinja2.exceptions import TemplateSyntaxError
Armin Ronacherb5124e62008-04-25 00:36:14 +020022from jinja2.utils import LRUCache
Armin Ronacher92f572f2007-02-26 22:17:32 +010023
24
Armin Ronacher21580912007-04-17 17:13:10 +020025# cache for the lexers. Exists in order to be able to have multiple
26# environments with the same lexer
Armin Ronacher187bde12008-05-01 18:19:16 +020027_lexer_cache = LRUCache(50)
Armin Ronacher21580912007-04-17 17:13:10 +020028
Armin Ronacher92f572f2007-02-26 22:17:32 +010029# static regular expressions
Armin Ronacher0949e4d2007-10-07 18:53:29 +020030whitespace_re = re.compile(r'\s+(?um)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010031string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020033integer_re = re.compile(r'\d+')
Armin Ronacherd1ff8582008-05-11 00:30:43 +020034name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020035float_re = re.compile(r'\d+\.\d+')
Armin Ronacher92f572f2007-02-26 22:17:32 +010036
Armin Ronacher1cc232c2007-09-07 17:52:41 +020037# bind operators to token types
38operators = {
39 '+': 'add',
40 '-': 'sub',
41 '/': 'div',
42 '//': 'floordiv',
43 '*': 'mul',
44 '%': 'mod',
45 '**': 'pow',
46 '~': 'tilde',
Armin Ronacher1cc232c2007-09-07 17:52:41 +020047 '[': 'lbracket',
48 ']': 'rbracket',
49 '(': 'lparen',
50 ')': 'rparen',
51 '{': 'lbrace',
52 '}': 'rbrace',
53 '==': 'eq',
54 '!=': 'ne',
55 '>': 'gt',
56 '>=': 'gteq',
57 '<': 'lt',
58 '<=': 'lteq',
59 '=': 'assign',
60 '.': 'dot',
61 ':': 'colon',
62 '|': 'pipe',
Armin Ronacher07bc6842008-03-31 14:18:49 +020063 ',': 'comma',
64 ';': 'semicolon'
Armin Ronacher1cc232c2007-09-07 17:52:41 +020065}
66
67reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
68assert len(operators) == len(reverse_operators), 'operators dropped'
Armin Ronachere791c2a2008-04-07 18:39:54 +020069operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
70 sorted(operators, key=lambda x: -len(x))))
Armin Ronacher1cc232c2007-09-07 17:52:41 +020071
Armin Ronacher92f572f2007-02-26 22:17:32 +010072
73class Failure(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +020074 """Class that raises a `TemplateSyntaxError` if called.
Armin Ronacher92f572f2007-02-26 22:17:32 +010075 Used by the `Lexer` to specify known errors.
76 """
77
78 def __init__(self, message, cls=TemplateSyntaxError):
79 self.message = message
80 self.error_class = cls
81
Armin Ronacher720e55b2007-05-30 00:57:49 +020082 def __call__(self, lineno, filename):
83 raise self.error_class(self.message, lineno, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +010084
85
Armin Ronacher4325e372008-05-01 22:59:47 +020086class Token(tuple):
87 """Token class."""
88 __slots__ = ()
89 lineno, type, value = (property(itemgetter(x)) for x in range(3))
90
91 def __new__(cls, lineno, type, value):
92 return tuple.__new__(cls, (lineno, intern(str(type)), value))
93
94 def __str__(self):
Armin Ronacher8a1d27f2008-05-19 08:37:19 +020095 if self.type in reverse_operators:
Armin Ronacher4325e372008-05-01 22:59:47 +020096 return reverse_operators[self.type]
97 elif self.type is 'name':
98 return self.value
99 return self.type
100
101 def test(self, expr):
102 """Test a token against a token expression. This can either be a
Armin Ronacher023b5e92008-05-08 11:03:10 +0200103 token type or ``'token_type:token_value'``. This can only test
104 against string values and types.
Armin Ronacher4325e372008-05-01 22:59:47 +0200105 """
Armin Ronachercda43df2008-05-03 17:10:05 +0200106 # here we do a regular string equality check as test_any is usually
Armin Ronacher4325e372008-05-01 22:59:47 +0200107 # passed an iterable of not interned strings.
108 if self.type == expr:
109 return True
110 elif ':' in expr:
111 return expr.split(':', 1) == [self.type, self.value]
112 return False
113
Armin Ronachercda43df2008-05-03 17:10:05 +0200114 def test_any(self, *iterable):
Armin Ronacher4325e372008-05-01 22:59:47 +0200115 """Test against multiple token expressions."""
116 for expr in iterable:
117 if self.test(expr):
118 return True
119 return False
120
121 def __repr__(self):
122 return 'Token(%r, %r, %r)' % (
123 self.lineno,
124 self.type,
125 self.value
126 )
127
128
129class TokenStreamIterator(object):
130 """The iterator for tokenstreams. Iterate over the stream
131 until the eof token is reached.
132 """
133
134 def __init__(self, stream):
135 self._stream = stream
136
137 def __iter__(self):
138 return self
139
140 def next(self):
141 token = self._stream.current
142 if token.type == 'eof':
143 self._stream.close()
144 raise StopIteration()
145 self._stream.next(False)
146 return token
147
148
149class TokenStream(object):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200150 """A token stream is an iterable that yields :class:`Token`\s. The
151 parser however does not iterate over it but calls :meth:`next` to go
152 one token ahead. The current active token is stored as :attr:`current`.
Armin Ronacher4325e372008-05-01 22:59:47 +0200153 """
154
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200155 def __init__(self, generator, name, filename):
Armin Ronacher4325e372008-05-01 22:59:47 +0200156 self._next = generator.next
157 self._pushed = deque()
158 self.current = Token(1, 'initial', '')
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200159 self.name = name
Armin Ronacher4325e372008-05-01 22:59:47 +0200160 self.filename = filename
161 self.next()
162
163 def __iter__(self):
164 return TokenStreamIterator(self)
165
166 def __nonzero__(self):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200167 """Are we at the end of the stream?"""
Armin Ronacher4325e372008-05-01 22:59:47 +0200168 return bool(self._pushed) or self.current.type != 'eof'
169
170 eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)
171
172 def push(self, token):
173 """Push a token back to the stream."""
174 self._pushed.append(token)
175
176 def look(self):
177 """Look at the next token."""
178 old_token = self.next()
179 result = self.current
180 self.push(result)
181 self.current = old_token
182 return result
183
Armin Ronacherea847c52008-05-02 20:04:32 +0200184 def skip(self, n=1):
Armin Ronacher4325e372008-05-01 22:59:47 +0200185 """Got n tokens ahead."""
186 for x in xrange(n):
187 self.next()
188
Armin Ronacherfdf95302008-05-11 22:20:51 +0200189 def next_if(self, expr):
190 """Perform the token test and return the token if it matched.
191 Otherwise the return value is `None`.
192 """
193 if self.current.test(expr):
194 return self.next()
195
196 def skip_if(self, expr):
197 """Like `next_if` but only returns `True` or `False`."""
198 return self.next_if(expr) is not None
199
200 def next(self):
Armin Ronacher4325e372008-05-01 22:59:47 +0200201 """Go one token ahead and return the old one"""
202 rv = self.current
Armin Ronacherfdf95302008-05-11 22:20:51 +0200203 if self._pushed:
204 self.current = self._pushed.popleft()
205 elif self.current.type is not 'eof':
206 try:
207 self.current = self._next()
208 except StopIteration:
209 self.close()
Armin Ronacher4325e372008-05-01 22:59:47 +0200210 return rv
211
212 def close(self):
213 """Close the stream."""
214 self.current = Token(self.current.lineno, 'eof', '')
215 self._next = None
216
217 def expect(self, expr):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200218 """Expect a given token type and return it. This accepts the same
219 argument as :meth:`jinja2.lexer.Token.test`.
220 """
Armin Ronacher4325e372008-05-01 22:59:47 +0200221 if not self.current.test(expr):
222 if ':' in expr:
223 expr = expr.split(':')[1]
224 if self.current.type is 'eof':
225 raise TemplateSyntaxError('unexpected end of template, '
226 'expected %r.' % expr,
227 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200228 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200229 raise TemplateSyntaxError("expected token %r, got %r" %
230 (expr, str(self.current)),
231 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200232 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200233 try:
234 return self.current
235 finally:
236 self.next()
237
238
Armin Ronacher21580912007-04-17 17:13:10 +0200239class LexerMeta(type):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200240 """Metaclass for the lexer that caches instances for
Armin Ronacher21580912007-04-17 17:13:10 +0200241 the same configuration in a weak value dictionary.
242 """
243
244 def __call__(cls, environment):
Armin Ronacher203bfcb2008-04-24 21:54:44 +0200245 key = (environment.block_start_string,
246 environment.block_end_string,
247 environment.variable_start_string,
248 environment.variable_end_string,
249 environment.comment_start_string,
250 environment.comment_end_string,
251 environment.line_statement_prefix,
252 environment.trim_blocks)
Armin Ronacherb5124e62008-04-25 00:36:14 +0200253 lexer = _lexer_cache.get(key)
254 if lexer is None:
255 lexer = type.__call__(cls, environment)
256 _lexer_cache[key] = lexer
Armin Ronacher21580912007-04-17 17:13:10 +0200257 return lexer
258
259
Armin Ronacher92f572f2007-02-26 22:17:32 +0100260class Lexer(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200261 """Class that implements a lexer for a given environment. Automatically
Armin Ronacher92f572f2007-02-26 22:17:32 +0100262 created by the environment class, usually you don't have to do that.
Armin Ronacher21580912007-04-17 17:13:10 +0200263
264 Note that the lexer is not automatically bound to an environment.
265 Multiple environments can share the same lexer.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100266 """
267
Armin Ronacher21580912007-04-17 17:13:10 +0200268 __metaclass__ = LexerMeta
269
Armin Ronacher92f572f2007-02-26 22:17:32 +0100270 def __init__(self, environment):
271 # shortcuts
272 c = lambda x: re.compile(x, re.M | re.S)
273 e = re.escape
274
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200275 # lexing rules for tags
Armin Ronacher92f572f2007-02-26 22:17:32 +0100276 tag_rules = [
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200277 (whitespace_re, 'whitespace', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200278 (float_re, 'float', None),
279 (integer_re, 'integer', None),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100280 (name_re, 'name', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200281 (string_re, 'string', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200282 (operator_re, 'operator', None)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100283 ]
284
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100285 # assamble the root lexing rule. because "|" is ungreedy
286 # we have to sort by length so that the lexer continues working
287 # as expected when we have parsing rules like <% for block and
288 # <%= for variables. (if someone wants asp like syntax)
Armin Ronacher33d528a2007-05-14 18:21:44 +0200289 # variables are just part of the rules if variable processing
290 # is required.
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100291 root_tag_rules = [
292 ('comment', environment.comment_start_string),
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200293 ('block', environment.block_start_string),
294 ('variable', environment.variable_start_string)
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100295 ]
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200296 root_tag_rules.sort(key=lambda x: -len(x[1]))
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200297
298 # now escape the rules. This is done here so that the escape
299 # signs don't count for the lengths of the tags.
300 root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]
301
302 # if we have a line statement prefix we need an extra rule for
303 # that. We add this rule *after* all the others.
304 if environment.line_statement_prefix is not None:
305 prefix = e(environment.line_statement_prefix)
306 root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100307
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200308 # block suffix if trimming is enabled
309 block_suffix_re = environment.trim_blocks and '\\n?' or ''
310
311 # global lexing rules
Armin Ronacher92f572f2007-02-26 22:17:32 +0100312 self.rules = {
313 'root': [
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100314 # directives
315 (c('(.*?)(?:%s)' % '|'.join(
316 ['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
317 e(environment.block_start_string),
318 e(environment.block_start_string),
319 e(environment.block_end_string)
320 )] + [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200321 '(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100322 for n, r in root_tag_rules
323 ])), ('data', '#bygroup'), '#bygroup'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200324 # data
Armin Ronacher92f572f2007-02-26 22:17:32 +0100325 (c('.+'), 'data', None)
326 ],
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200327 # comments
Armin Ronacher92f572f2007-02-26 22:17:32 +0100328 'comment_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200329 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200330 e(environment.comment_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200331 e(environment.comment_end_string),
332 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200333 )), ('comment', 'comment_end'), '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100334 (c('(.)'), (Failure('Missing end of comment tag'),), None)
335 ],
Armin Ronacher21580912007-04-17 17:13:10 +0200336 # blocks
Armin Ronacher92f572f2007-02-26 22:17:32 +0100337 'block_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200338 (c('(?:\-%s\s*|%s)%s' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200339 e(environment.block_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200340 e(environment.block_end_string),
341 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200342 )), 'block_end', '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100343 ] + tag_rules,
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200344 # variables
345 'variable_begin': [
346 (c('\-%s\s*|%s' % (
347 e(environment.variable_end_string),
348 e(environment.variable_end_string)
349 )), 'variable_end', '#pop')
350 ] + tag_rules,
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200351 # raw block
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100352 'raw_begin': [
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200353 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
354 e(environment.block_start_string),
355 e(environment.block_start_string),
356 e(environment.block_end_string),
357 e(environment.block_end_string),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200358 block_suffix_re
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100359 )), ('data', 'raw_end'), '#pop'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200360 (c('(.)'), (Failure('Missing end of raw directive'),), None)
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200361 ],
362 # line statements
363 'linestatement_begin': [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200364 (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
365 ] + tag_rules
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200366 }
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200367
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200368 def tokenize(self, source, name=None, filename=None):
Armin Ronacher71082072008-04-12 14:19:36 +0200369 """Works like `tokeniter` but returns a tokenstream of tokens and not
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200370 a generator or token tuples. Additionally all token values are already
Armin Ronacher115de2e2008-05-01 22:20:05 +0200371 converted into types and postprocessed. For example comments are removed,
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200372 integers and floats converted, strings unescaped etc.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100373 """
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200374 source = unicode(source)
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200375 def generate():
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200376 for lineno, token, value in self.tokeniter(source, name, filename):
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200377 if token in ('comment_begin', 'comment', 'comment_end',
378 'whitespace'):
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200379 continue
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200380 elif token == 'linestatement_begin':
381 token = 'block_begin'
382 elif token == 'linestatement_end':
383 token = 'block_end'
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200384 # we are not interested in those tokens in the parser
385 elif token in ('raw_begin', 'raw_end'):
386 continue
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200387 elif token == 'data':
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200388 try:
389 value = str(value)
390 except UnicodeError:
391 pass
Armin Ronacher07bc6842008-03-31 14:18:49 +0200392 elif token == 'keyword':
Armin Ronacher82b3f3d2008-03-31 20:01:08 +0200393 token = value
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200394 elif token == 'name':
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200395 value = str(value)
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200396 elif token == 'string':
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200397 # try to unescape string
398 try:
399 value = value[1:-1] \
400 .encode('ascii', 'backslashreplace') \
401 .decode('unicode-escape')
402 except Exception, e:
403 msg = str(e).split(':')[-1].strip()
404 raise TemplateSyntaxError(msg, lineno, name, filename)
405 # if we can express it as bytestring (ascii only)
406 # we do that for support of semi broken APIs
407 # as datetime.datetime.strftime
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200408 try:
409 value = str(value)
410 except UnicodeError:
411 pass
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200412 elif token == 'integer':
413 value = int(value)
414 elif token == 'float':
415 value = float(value)
416 elif token == 'operator':
417 token = operators[value]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200418 yield Token(lineno, token, value)
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200419 return TokenStream(generate(), name, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100420
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200421 def tokeniter(self, source, name, filename=None):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200422 """This method tokenizes the text and returns the tokens in a
423 generator. Use this method if you just want to tokenize a template.
424 The output you get is not compatible with the input the jinja parser
425 wants. The parser uses the `tokenize` function with returns a
426 `TokenStream` and postprocessed tokens.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100427 """
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200428 source = '\n'.join(source.splitlines())
Armin Ronacher7977e5c2007-03-12 07:22:17 +0100429 pos = 0
430 lineno = 1
Armin Ronacher92f572f2007-02-26 22:17:32 +0100431 stack = ['root']
432 statetokens = self.rules['root']
433 source_length = len(source)
434
Armin Ronacher21580912007-04-17 17:13:10 +0200435 balancing_stack = []
436
Armin Ronacher71082072008-04-12 14:19:36 +0200437 while 1:
Armin Ronacher92f572f2007-02-26 22:17:32 +0100438 # tokenizer loop
439 for regex, tokens, new_state in statetokens:
440 m = regex.match(source, pos)
Armin Ronacher21580912007-04-17 17:13:10 +0200441 # if no match we try again with the next rule
Armin Ronacher71082072008-04-12 14:19:36 +0200442 if m is None:
Armin Ronacher21580912007-04-17 17:13:10 +0200443 continue
444
445 # we only match blocks and variables if brances / parentheses
446 # are balanced. continue parsing with the lower rule which
447 # is the operator rule. do this only if the end tags look
448 # like operators
449 if balancing_stack and \
Armin Ronacher71082072008-04-12 14:19:36 +0200450 tokens in ('variable_end', 'block_end',
451 'linestatement_end'):
Armin Ronacher21580912007-04-17 17:13:10 +0200452 continue
453
454 # tuples support more options
455 if isinstance(tokens, tuple):
456 for idx, token in enumerate(tokens):
Armin Ronacher21580912007-04-17 17:13:10 +0200457 # failure group
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200458 if token.__class__ is Failure:
Armin Ronacher720e55b2007-05-30 00:57:49 +0200459 raise token(lineno, filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200460 # bygroup is a bit more complex, in that case we
461 # yield for the current token the first named
462 # group that matched
463 elif token == '#bygroup':
Armin Ronacher92f572f2007-02-26 22:17:32 +0100464 for key, value in m.groupdict().iteritems():
465 if value is not None:
Armin Ronacher21580912007-04-17 17:13:10 +0200466 yield lineno, key, value
467 lineno += value.count('\n')
Armin Ronacher92f572f2007-02-26 22:17:32 +0100468 break
469 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200470 raise RuntimeError('%r wanted to resolve '
471 'the token dynamically'
472 ' but no group matched'
473 % regex)
474 # normal group
Armin Ronacher92f572f2007-02-26 22:17:32 +0100475 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200476 data = m.group(idx + 1)
477 if data:
478 yield lineno, token, data
479 lineno += data.count('\n')
480
Armin Ronacher71082072008-04-12 14:19:36 +0200481 # strings as token just are yielded as it.
Armin Ronacher21580912007-04-17 17:13:10 +0200482 else:
483 data = m.group()
484 # update brace/parentheses balance
485 if tokens == 'operator':
486 if data == '{':
487 balancing_stack.append('}')
488 elif data == '(':
489 balancing_stack.append(')')
490 elif data == '[':
491 balancing_stack.append(']')
492 elif data in ('}', ')', ']'):
Armin Ronacherf750daa2007-05-29 23:22:38 +0200493 if not balancing_stack:
494 raise TemplateSyntaxError('unexpected "%s"' %
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200495 data, lineno, name,
Armin Ronacherf750daa2007-05-29 23:22:38 +0200496 filename)
497 expected_op = balancing_stack.pop()
498 if expected_op != data:
499 raise TemplateSyntaxError('unexpected "%s", '
500 'expected "%s"' %
501 (data, expected_op),
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200502 lineno, name,
503 filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200504 # yield items
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200505 yield lineno, tokens, data
Armin Ronacher21580912007-04-17 17:13:10 +0200506 lineno += data.count('\n')
507
508 # fetch new position into new variable so that we can check
509 # if there is a internal parsing error which would result
510 # in an infinite loop
511 pos2 = m.end()
512
513 # handle state changes
514 if new_state is not None:
515 # remove the uppermost state
516 if new_state == '#pop':
517 stack.pop()
518 # resolve the new state by group checking
519 elif new_state == '#bygroup':
520 for key, value in m.groupdict().iteritems():
521 if value is not None:
522 stack.append(key)
523 break
524 else:
525 raise RuntimeError('%r wanted to resolve the '
526 'new state dynamically but'
527 ' no group matched' %
528 regex)
529 # direct state name given
530 else:
531 stack.append(new_state)
532 statetokens = self.rules[stack[-1]]
533 # we are still at the same position and no stack change.
534 # this means a loop without break condition, avoid that and
535 # raise error
536 elif pos2 == pos:
537 raise RuntimeError('%r yielded empty string without '
538 'stack change' % regex)
539 # publish new function and start again
540 pos = pos2
541 break
Armin Ronacher92f572f2007-02-26 22:17:32 +0100542 # if loop terminated without break we havn't found a single match
543 # either we are at the end of the file or we have a problem
544 else:
545 # end of text
546 if pos >= source_length:
547 return
548 # something went wrong
549 raise TemplateSyntaxError('unexpected char %r at %d' %
Armin Ronacher21580912007-04-17 17:13:10 +0200550 (source[pos], pos), lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200551 name, filename)