blob: 1f22ed7b89fa0dda1db8e79d43bc386a76889553 [file] [log] [blame]
Armin Ronacher92f572f2007-02-26 22:17:32 +01001# -*- coding: utf-8 -*-
2"""
Armin Ronacher07bc6842008-03-31 14:18:49 +02003 jinja2.lexer
4 ~~~~~~~~~~~~
Armin Ronacher3b65b8a2007-02-27 20:21:45 +01005
Armin Ronacher5a8e4972007-04-05 11:21:38 +02006 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
9
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
13
Armin Ronacher1d51f632008-03-25 14:34:45 +010014 :copyright: 2007-2008 by Armin Ronacher.
Armin Ronacher3b65b8a2007-02-27 20:21:45 +010015 :license: BSD, see LICENSE for more details.
Armin Ronacher92f572f2007-02-26 22:17:32 +010016"""
17import re
Armin Ronacher1cc232c2007-09-07 17:52:41 +020018import unicodedata
Armin Ronacher4325e372008-05-01 22:59:47 +020019from operator import itemgetter
20from collections import deque
Armin Ronacher82b3f3d2008-03-31 20:01:08 +020021from jinja2.exceptions import TemplateSyntaxError
Armin Ronacherb5124e62008-04-25 00:36:14 +020022from jinja2.utils import LRUCache
Armin Ronacher92f572f2007-02-26 22:17:32 +010023
24
Armin Ronacher21580912007-04-17 17:13:10 +020025# cache for the lexers. Exists in order to be able to have multiple
26# environments with the same lexer
Armin Ronacher187bde12008-05-01 18:19:16 +020027_lexer_cache = LRUCache(50)
Armin Ronacher21580912007-04-17 17:13:10 +020028
Armin Ronacher92f572f2007-02-26 22:17:32 +010029# static regular expressions
Armin Ronacher0949e4d2007-10-07 18:53:29 +020030whitespace_re = re.compile(r'\s+(?um)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010031string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020033integer_re = re.compile(r'\d+')
Armin Ronacherd1ff8582008-05-11 00:30:43 +020034name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020035float_re = re.compile(r'\d+\.\d+')
Armin Ronacherf3c35c42008-05-23 23:18:14 +020036newline_re = re.compile(r'(\r\n|\r|\n)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010037
Armin Ronacher1cc232c2007-09-07 17:52:41 +020038# bind operators to token types
39operators = {
40 '+': 'add',
41 '-': 'sub',
42 '/': 'div',
43 '//': 'floordiv',
44 '*': 'mul',
45 '%': 'mod',
46 '**': 'pow',
47 '~': 'tilde',
Armin Ronacher1cc232c2007-09-07 17:52:41 +020048 '[': 'lbracket',
49 ']': 'rbracket',
50 '(': 'lparen',
51 ')': 'rparen',
52 '{': 'lbrace',
53 '}': 'rbrace',
54 '==': 'eq',
55 '!=': 'ne',
56 '>': 'gt',
57 '>=': 'gteq',
58 '<': 'lt',
59 '<=': 'lteq',
60 '=': 'assign',
61 '.': 'dot',
62 ':': 'colon',
63 '|': 'pipe',
Armin Ronacher07bc6842008-03-31 14:18:49 +020064 ',': 'comma',
65 ';': 'semicolon'
Armin Ronacher1cc232c2007-09-07 17:52:41 +020066}
67
68reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
69assert len(operators) == len(reverse_operators), 'operators dropped'
Armin Ronachere791c2a2008-04-07 18:39:54 +020070operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
71 sorted(operators, key=lambda x: -len(x))))
Armin Ronacher1cc232c2007-09-07 17:52:41 +020072
Armin Ronacher92f572f2007-02-26 22:17:32 +010073
74class Failure(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +020075 """Class that raises a `TemplateSyntaxError` if called.
Armin Ronacher92f572f2007-02-26 22:17:32 +010076 Used by the `Lexer` to specify known errors.
77 """
78
79 def __init__(self, message, cls=TemplateSyntaxError):
80 self.message = message
81 self.error_class = cls
82
Armin Ronacher720e55b2007-05-30 00:57:49 +020083 def __call__(self, lineno, filename):
84 raise self.error_class(self.message, lineno, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +010085
86
Armin Ronacher4325e372008-05-01 22:59:47 +020087class Token(tuple):
88 """Token class."""
89 __slots__ = ()
90 lineno, type, value = (property(itemgetter(x)) for x in range(3))
91
92 def __new__(cls, lineno, type, value):
93 return tuple.__new__(cls, (lineno, intern(str(type)), value))
94
95 def __str__(self):
Armin Ronacher8a1d27f2008-05-19 08:37:19 +020096 if self.type in reverse_operators:
Armin Ronacher4325e372008-05-01 22:59:47 +020097 return reverse_operators[self.type]
98 elif self.type is 'name':
99 return self.value
100 return self.type
101
102 def test(self, expr):
103 """Test a token against a token expression. This can either be a
Armin Ronacher023b5e92008-05-08 11:03:10 +0200104 token type or ``'token_type:token_value'``. This can only test
105 against string values and types.
Armin Ronacher4325e372008-05-01 22:59:47 +0200106 """
Armin Ronachercda43df2008-05-03 17:10:05 +0200107 # here we do a regular string equality check as test_any is usually
Armin Ronacher4325e372008-05-01 22:59:47 +0200108 # passed an iterable of not interned strings.
109 if self.type == expr:
110 return True
111 elif ':' in expr:
112 return expr.split(':', 1) == [self.type, self.value]
113 return False
114
Armin Ronachercda43df2008-05-03 17:10:05 +0200115 def test_any(self, *iterable):
Armin Ronacher4325e372008-05-01 22:59:47 +0200116 """Test against multiple token expressions."""
117 for expr in iterable:
118 if self.test(expr):
119 return True
120 return False
121
122 def __repr__(self):
123 return 'Token(%r, %r, %r)' % (
124 self.lineno,
125 self.type,
126 self.value
127 )
128
129
130class TokenStreamIterator(object):
131 """The iterator for tokenstreams. Iterate over the stream
132 until the eof token is reached.
133 """
134
135 def __init__(self, stream):
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200136 self.stream = stream
Armin Ronacher4325e372008-05-01 22:59:47 +0200137
138 def __iter__(self):
139 return self
140
141 def next(self):
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200142 token = self.stream.current
Armin Ronacher4325e372008-05-01 22:59:47 +0200143 if token.type == 'eof':
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200144 self.stream.close()
Armin Ronacher4325e372008-05-01 22:59:47 +0200145 raise StopIteration()
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200146 self.stream.next()
Armin Ronacher4325e372008-05-01 22:59:47 +0200147 return token
148
149
150class TokenStream(object):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200151 """A token stream is an iterable that yields :class:`Token`\s. The
152 parser however does not iterate over it but calls :meth:`next` to go
153 one token ahead. The current active token is stored as :attr:`current`.
Armin Ronacher4325e372008-05-01 22:59:47 +0200154 """
155
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200156 def __init__(self, generator, name, filename):
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200157 self._next = iter(generator).next
Armin Ronacher4325e372008-05-01 22:59:47 +0200158 self._pushed = deque()
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200159 self.name = name
Armin Ronacher4325e372008-05-01 22:59:47 +0200160 self.filename = filename
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200161 self.closed = False
162 self.current = Token(1, 'initial', '')
Armin Ronacher4325e372008-05-01 22:59:47 +0200163 self.next()
164
165 def __iter__(self):
166 return TokenStreamIterator(self)
167
168 def __nonzero__(self):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200169 """Are we at the end of the stream?"""
Armin Ronacher4325e372008-05-01 22:59:47 +0200170 return bool(self._pushed) or self.current.type != 'eof'
171
172 eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)
173
174 def push(self, token):
175 """Push a token back to the stream."""
176 self._pushed.append(token)
177
178 def look(self):
179 """Look at the next token."""
180 old_token = self.next()
181 result = self.current
182 self.push(result)
183 self.current = old_token
184 return result
185
Armin Ronacherea847c52008-05-02 20:04:32 +0200186 def skip(self, n=1):
Armin Ronacher4325e372008-05-01 22:59:47 +0200187 """Got n tokens ahead."""
188 for x in xrange(n):
189 self.next()
190
Armin Ronacherfdf95302008-05-11 22:20:51 +0200191 def next_if(self, expr):
192 """Perform the token test and return the token if it matched.
193 Otherwise the return value is `None`.
194 """
195 if self.current.test(expr):
196 return self.next()
197
198 def skip_if(self, expr):
Armin Ronacher9cf95912008-05-24 19:54:43 +0200199 """Like :meth:`next_if` but only returns `True` or `False`."""
Armin Ronacherfdf95302008-05-11 22:20:51 +0200200 return self.next_if(expr) is not None
201
202 def next(self):
Armin Ronacher4325e372008-05-01 22:59:47 +0200203 """Go one token ahead and return the old one"""
204 rv = self.current
Armin Ronacherfdf95302008-05-11 22:20:51 +0200205 if self._pushed:
206 self.current = self._pushed.popleft()
207 elif self.current.type is not 'eof':
208 try:
209 self.current = self._next()
210 except StopIteration:
211 self.close()
Armin Ronacher4325e372008-05-01 22:59:47 +0200212 return rv
213
214 def close(self):
215 """Close the stream."""
216 self.current = Token(self.current.lineno, 'eof', '')
217 self._next = None
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200218 self.closed = True
Armin Ronacher4325e372008-05-01 22:59:47 +0200219
220 def expect(self, expr):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200221 """Expect a given token type and return it. This accepts the same
222 argument as :meth:`jinja2.lexer.Token.test`.
223 """
Armin Ronacher4325e372008-05-01 22:59:47 +0200224 if not self.current.test(expr):
225 if ':' in expr:
226 expr = expr.split(':')[1]
227 if self.current.type is 'eof':
228 raise TemplateSyntaxError('unexpected end of template, '
229 'expected %r.' % expr,
230 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200231 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200232 raise TemplateSyntaxError("expected token %r, got %r" %
233 (expr, str(self.current)),
234 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200235 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200236 try:
237 return self.current
238 finally:
239 self.next()
240
241
Armin Ronacher21580912007-04-17 17:13:10 +0200242class LexerMeta(type):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200243 """Metaclass for the lexer that caches instances for
Armin Ronacher21580912007-04-17 17:13:10 +0200244 the same configuration in a weak value dictionary.
245 """
246
247 def __call__(cls, environment):
Armin Ronacher203bfcb2008-04-24 21:54:44 +0200248 key = (environment.block_start_string,
249 environment.block_end_string,
250 environment.variable_start_string,
251 environment.variable_end_string,
252 environment.comment_start_string,
253 environment.comment_end_string,
254 environment.line_statement_prefix,
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200255 environment.trim_blocks,
256 environment.newline_sequence)
Armin Ronacherb5124e62008-04-25 00:36:14 +0200257 lexer = _lexer_cache.get(key)
258 if lexer is None:
259 lexer = type.__call__(cls, environment)
260 _lexer_cache[key] = lexer
Armin Ronacher21580912007-04-17 17:13:10 +0200261 return lexer
262
263
Armin Ronacher92f572f2007-02-26 22:17:32 +0100264class Lexer(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200265 """Class that implements a lexer for a given environment. Automatically
Armin Ronacher92f572f2007-02-26 22:17:32 +0100266 created by the environment class, usually you don't have to do that.
Armin Ronacher21580912007-04-17 17:13:10 +0200267
268 Note that the lexer is not automatically bound to an environment.
269 Multiple environments can share the same lexer.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100270 """
271
Armin Ronacher21580912007-04-17 17:13:10 +0200272 __metaclass__ = LexerMeta
273
Armin Ronacher92f572f2007-02-26 22:17:32 +0100274 def __init__(self, environment):
275 # shortcuts
276 c = lambda x: re.compile(x, re.M | re.S)
277 e = re.escape
278
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200279 # lexing rules for tags
Armin Ronacher92f572f2007-02-26 22:17:32 +0100280 tag_rules = [
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200281 (whitespace_re, 'whitespace', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200282 (float_re, 'float', None),
283 (integer_re, 'integer', None),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100284 (name_re, 'name', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200285 (string_re, 'string', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200286 (operator_re, 'operator', None)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100287 ]
288
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100289 # assamble the root lexing rule. because "|" is ungreedy
290 # we have to sort by length so that the lexer continues working
291 # as expected when we have parsing rules like <% for block and
292 # <%= for variables. (if someone wants asp like syntax)
Armin Ronacher33d528a2007-05-14 18:21:44 +0200293 # variables are just part of the rules if variable processing
294 # is required.
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100295 root_tag_rules = [
296 ('comment', environment.comment_start_string),
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200297 ('block', environment.block_start_string),
298 ('variable', environment.variable_start_string)
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100299 ]
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200300 root_tag_rules.sort(key=lambda x: -len(x[1]))
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200301
302 # now escape the rules. This is done here so that the escape
303 # signs don't count for the lengths of the tags.
304 root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]
305
306 # if we have a line statement prefix we need an extra rule for
307 # that. We add this rule *after* all the others.
308 if environment.line_statement_prefix is not None:
309 prefix = e(environment.line_statement_prefix)
310 root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100311
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200312 # block suffix if trimming is enabled
313 block_suffix_re = environment.trim_blocks and '\\n?' or ''
314
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200315 self.newline_sequence = environment.newline_sequence
316
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200317 # global lexing rules
Armin Ronacher92f572f2007-02-26 22:17:32 +0100318 self.rules = {
319 'root': [
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100320 # directives
321 (c('(.*?)(?:%s)' % '|'.join(
322 ['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
323 e(environment.block_start_string),
324 e(environment.block_start_string),
325 e(environment.block_end_string)
326 )] + [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200327 '(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100328 for n, r in root_tag_rules
329 ])), ('data', '#bygroup'), '#bygroup'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200330 # data
Armin Ronacher92f572f2007-02-26 22:17:32 +0100331 (c('.+'), 'data', None)
332 ],
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200333 # comments
Armin Ronacher92f572f2007-02-26 22:17:32 +0100334 'comment_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200335 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200336 e(environment.comment_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200337 e(environment.comment_end_string),
338 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200339 )), ('comment', 'comment_end'), '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100340 (c('(.)'), (Failure('Missing end of comment tag'),), None)
341 ],
Armin Ronacher21580912007-04-17 17:13:10 +0200342 # blocks
Armin Ronacher92f572f2007-02-26 22:17:32 +0100343 'block_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200344 (c('(?:\-%s\s*|%s)%s' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200345 e(environment.block_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200346 e(environment.block_end_string),
347 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200348 )), 'block_end', '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100349 ] + tag_rules,
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200350 # variables
351 'variable_begin': [
352 (c('\-%s\s*|%s' % (
353 e(environment.variable_end_string),
354 e(environment.variable_end_string)
355 )), 'variable_end', '#pop')
356 ] + tag_rules,
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200357 # raw block
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100358 'raw_begin': [
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200359 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
360 e(environment.block_start_string),
361 e(environment.block_start_string),
362 e(environment.block_end_string),
363 e(environment.block_end_string),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200364 block_suffix_re
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100365 )), ('data', 'raw_end'), '#pop'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200366 (c('(.)'), (Failure('Missing end of raw directive'),), None)
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200367 ],
368 # line statements
369 'linestatement_begin': [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200370 (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
371 ] + tag_rules
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200372 }
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200373
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200374 def _normalize_newlines(self, value):
375 """Called for strings and template data to normlize it to unicode."""
376 return newline_re.sub(self.newline_sequence, value)
377
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200378 def tokenize(self, source, name=None, filename=None):
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200379 """Calls tokeniter + tokenize and wraps it in a token stream.
380 This is currently only used for unittests.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100381 """
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200382 stream = self.tokeniter(source, name, filename)
383 return TokenStream(self.wrap(stream, name, filename), name, filename)
384
385 def wrap(self, stream, name=None, filename=None):
386 """This is called with the stream as returned by `tokenize` and wraps
387 every token in a :class:`Token` and converts the value.
388 """
389 for lineno, token, value in stream:
390 if token in ('comment_begin', 'comment', 'comment_end',
391 'whitespace'):
392 continue
393 elif token == 'linestatement_begin':
394 token = 'block_begin'
395 elif token == 'linestatement_end':
396 token = 'block_end'
397 # we are not interested in those tokens in the parser
398 elif token in ('raw_begin', 'raw_end'):
399 continue
400 elif token == 'data':
401 value = self._normalize_newlines(value)
402 elif token == 'keyword':
403 token = value
404 elif token == 'name':
405 value = str(value)
406 elif token == 'string':
407 # try to unescape string
408 try:
409 value = self._normalize_newlines(value[1:-1]) \
410 .encode('ascii', 'backslashreplace') \
411 .decode('unicode-escape')
412 except Exception, e:
413 msg = str(e).split(':')[-1].strip()
414 raise TemplateSyntaxError(msg, lineno, name, filename)
415 # if we can express it as bytestring (ascii only)
416 # we do that for support of semi broken APIs
417 # as datetime.datetime.strftime
418 try:
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200419 value = str(value)
Armin Ronacher9ad96e72008-06-13 22:44:01 +0200420 except UnicodeError:
421 pass
422 elif token == 'integer':
423 value = int(value)
424 elif token == 'float':
425 value = float(value)
426 elif token == 'operator':
427 token = operators[value]
428 yield Token(lineno, token, value)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100429
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200430 def tokeniter(self, source, name, filename=None):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200431 """This method tokenizes the text and returns the tokens in a
432 generator. Use this method if you just want to tokenize a template.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100433 """
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200434 source = '\n'.join(unicode(source).splitlines())
Armin Ronacher7977e5c2007-03-12 07:22:17 +0100435 pos = 0
436 lineno = 1
Armin Ronacher92f572f2007-02-26 22:17:32 +0100437 stack = ['root']
438 statetokens = self.rules['root']
439 source_length = len(source)
440
Armin Ronacher21580912007-04-17 17:13:10 +0200441 balancing_stack = []
442
Armin Ronacher71082072008-04-12 14:19:36 +0200443 while 1:
Armin Ronacher92f572f2007-02-26 22:17:32 +0100444 # tokenizer loop
445 for regex, tokens, new_state in statetokens:
446 m = regex.match(source, pos)
Armin Ronacher21580912007-04-17 17:13:10 +0200447 # if no match we try again with the next rule
Armin Ronacher71082072008-04-12 14:19:36 +0200448 if m is None:
Armin Ronacher21580912007-04-17 17:13:10 +0200449 continue
450
451 # we only match blocks and variables if brances / parentheses
452 # are balanced. continue parsing with the lower rule which
453 # is the operator rule. do this only if the end tags look
454 # like operators
455 if balancing_stack and \
Armin Ronacher71082072008-04-12 14:19:36 +0200456 tokens in ('variable_end', 'block_end',
457 'linestatement_end'):
Armin Ronacher21580912007-04-17 17:13:10 +0200458 continue
459
460 # tuples support more options
461 if isinstance(tokens, tuple):
462 for idx, token in enumerate(tokens):
Armin Ronacher21580912007-04-17 17:13:10 +0200463 # failure group
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200464 if token.__class__ is Failure:
Armin Ronacher720e55b2007-05-30 00:57:49 +0200465 raise token(lineno, filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200466 # bygroup is a bit more complex, in that case we
467 # yield for the current token the first named
468 # group that matched
469 elif token == '#bygroup':
Armin Ronacher92f572f2007-02-26 22:17:32 +0100470 for key, value in m.groupdict().iteritems():
471 if value is not None:
Armin Ronacher21580912007-04-17 17:13:10 +0200472 yield lineno, key, value
473 lineno += value.count('\n')
Armin Ronacher92f572f2007-02-26 22:17:32 +0100474 break
475 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200476 raise RuntimeError('%r wanted to resolve '
477 'the token dynamically'
478 ' but no group matched'
479 % regex)
480 # normal group
Armin Ronacher92f572f2007-02-26 22:17:32 +0100481 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200482 data = m.group(idx + 1)
483 if data:
484 yield lineno, token, data
485 lineno += data.count('\n')
486
Armin Ronacher71082072008-04-12 14:19:36 +0200487 # strings as token just are yielded as it.
Armin Ronacher21580912007-04-17 17:13:10 +0200488 else:
489 data = m.group()
490 # update brace/parentheses balance
491 if tokens == 'operator':
492 if data == '{':
493 balancing_stack.append('}')
494 elif data == '(':
495 balancing_stack.append(')')
496 elif data == '[':
497 balancing_stack.append(']')
498 elif data in ('}', ')', ']'):
Armin Ronacherf750daa2007-05-29 23:22:38 +0200499 if not balancing_stack:
500 raise TemplateSyntaxError('unexpected "%s"' %
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200501 data, lineno, name,
Armin Ronacherf750daa2007-05-29 23:22:38 +0200502 filename)
503 expected_op = balancing_stack.pop()
504 if expected_op != data:
505 raise TemplateSyntaxError('unexpected "%s", '
506 'expected "%s"' %
507 (data, expected_op),
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200508 lineno, name,
509 filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200510 # yield items
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200511 yield lineno, tokens, data
Armin Ronacher21580912007-04-17 17:13:10 +0200512 lineno += data.count('\n')
513
514 # fetch new position into new variable so that we can check
515 # if there is a internal parsing error which would result
516 # in an infinite loop
517 pos2 = m.end()
518
519 # handle state changes
520 if new_state is not None:
521 # remove the uppermost state
522 if new_state == '#pop':
523 stack.pop()
524 # resolve the new state by group checking
525 elif new_state == '#bygroup':
526 for key, value in m.groupdict().iteritems():
527 if value is not None:
528 stack.append(key)
529 break
530 else:
531 raise RuntimeError('%r wanted to resolve the '
532 'new state dynamically but'
533 ' no group matched' %
534 regex)
535 # direct state name given
536 else:
537 stack.append(new_state)
538 statetokens = self.rules[stack[-1]]
539 # we are still at the same position and no stack change.
540 # this means a loop without break condition, avoid that and
541 # raise error
542 elif pos2 == pos:
543 raise RuntimeError('%r yielded empty string without '
544 'stack change' % regex)
545 # publish new function and start again
546 pos = pos2
547 break
Armin Ronacher92f572f2007-02-26 22:17:32 +0100548 # if loop terminated without break we havn't found a single match
549 # either we are at the end of the file or we have a problem
550 else:
551 # end of text
552 if pos >= source_length:
553 return
554 # something went wrong
555 raise TemplateSyntaxError('unexpected char %r at %d' %
Armin Ronacher21580912007-04-17 17:13:10 +0200556 (source[pos], pos), lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200557 name, filename)