blob: 64621fd1c6497d277634a9d213d77a8b5960b780 [file] [log] [blame]
Armin Ronacher92f572f2007-02-26 22:17:32 +01001# -*- coding: utf-8 -*-
2"""
Armin Ronacher07bc6842008-03-31 14:18:49 +02003 jinja2.lexer
4 ~~~~~~~~~~~~
Armin Ronacher3b65b8a2007-02-27 20:21:45 +01005
Armin Ronacher5a8e4972007-04-05 11:21:38 +02006 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
9
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
13
Armin Ronacher1d51f632008-03-25 14:34:45 +010014 :copyright: 2007-2008 by Armin Ronacher.
Armin Ronacher3b65b8a2007-02-27 20:21:45 +010015 :license: BSD, see LICENSE for more details.
Armin Ronacher92f572f2007-02-26 22:17:32 +010016"""
17import re
Armin Ronacher1cc232c2007-09-07 17:52:41 +020018import unicodedata
Armin Ronacher4325e372008-05-01 22:59:47 +020019from operator import itemgetter
20from collections import deque
Armin Ronacher82b3f3d2008-03-31 20:01:08 +020021from jinja2.exceptions import TemplateSyntaxError
Armin Ronacherb5124e62008-04-25 00:36:14 +020022from jinja2.utils import LRUCache
Armin Ronacher92f572f2007-02-26 22:17:32 +010023
24
Armin Ronacher21580912007-04-17 17:13:10 +020025# cache for the lexers. Exists in order to be able to have multiple
26# environments with the same lexer
Armin Ronacher187bde12008-05-01 18:19:16 +020027_lexer_cache = LRUCache(50)
Armin Ronacher21580912007-04-17 17:13:10 +020028
Armin Ronacher92f572f2007-02-26 22:17:32 +010029# static regular expressions
Armin Ronacher0949e4d2007-10-07 18:53:29 +020030whitespace_re = re.compile(r'\s+(?um)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010031string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020033integer_re = re.compile(r'\d+')
Armin Ronacherd1ff8582008-05-11 00:30:43 +020034name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020035float_re = re.compile(r'\d+\.\d+')
Armin Ronacherf3c35c42008-05-23 23:18:14 +020036newline_re = re.compile(r'(\r\n|\r|\n)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010037
Armin Ronacher1cc232c2007-09-07 17:52:41 +020038# bind operators to token types
39operators = {
40 '+': 'add',
41 '-': 'sub',
42 '/': 'div',
43 '//': 'floordiv',
44 '*': 'mul',
45 '%': 'mod',
46 '**': 'pow',
47 '~': 'tilde',
Armin Ronacher1cc232c2007-09-07 17:52:41 +020048 '[': 'lbracket',
49 ']': 'rbracket',
50 '(': 'lparen',
51 ')': 'rparen',
52 '{': 'lbrace',
53 '}': 'rbrace',
54 '==': 'eq',
55 '!=': 'ne',
56 '>': 'gt',
57 '>=': 'gteq',
58 '<': 'lt',
59 '<=': 'lteq',
60 '=': 'assign',
61 '.': 'dot',
62 ':': 'colon',
63 '|': 'pipe',
Armin Ronacher07bc6842008-03-31 14:18:49 +020064 ',': 'comma',
65 ';': 'semicolon'
Armin Ronacher1cc232c2007-09-07 17:52:41 +020066}
67
68reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
69assert len(operators) == len(reverse_operators), 'operators dropped'
Armin Ronachere791c2a2008-04-07 18:39:54 +020070operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
71 sorted(operators, key=lambda x: -len(x))))
Armin Ronacher1cc232c2007-09-07 17:52:41 +020072
Armin Ronacher92f572f2007-02-26 22:17:32 +010073
74class Failure(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +020075 """Class that raises a `TemplateSyntaxError` if called.
Armin Ronacher92f572f2007-02-26 22:17:32 +010076 Used by the `Lexer` to specify known errors.
77 """
78
79 def __init__(self, message, cls=TemplateSyntaxError):
80 self.message = message
81 self.error_class = cls
82
Armin Ronacher720e55b2007-05-30 00:57:49 +020083 def __call__(self, lineno, filename):
84 raise self.error_class(self.message, lineno, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +010085
86
Armin Ronacher4325e372008-05-01 22:59:47 +020087class Token(tuple):
88 """Token class."""
89 __slots__ = ()
90 lineno, type, value = (property(itemgetter(x)) for x in range(3))
91
92 def __new__(cls, lineno, type, value):
93 return tuple.__new__(cls, (lineno, intern(str(type)), value))
94
95 def __str__(self):
Armin Ronacher8a1d27f2008-05-19 08:37:19 +020096 if self.type in reverse_operators:
Armin Ronacher4325e372008-05-01 22:59:47 +020097 return reverse_operators[self.type]
98 elif self.type is 'name':
99 return self.value
100 return self.type
101
102 def test(self, expr):
103 """Test a token against a token expression. This can either be a
Armin Ronacher023b5e92008-05-08 11:03:10 +0200104 token type or ``'token_type:token_value'``. This can only test
105 against string values and types.
Armin Ronacher4325e372008-05-01 22:59:47 +0200106 """
Armin Ronachercda43df2008-05-03 17:10:05 +0200107 # here we do a regular string equality check as test_any is usually
Armin Ronacher4325e372008-05-01 22:59:47 +0200108 # passed an iterable of not interned strings.
109 if self.type == expr:
110 return True
111 elif ':' in expr:
112 return expr.split(':', 1) == [self.type, self.value]
113 return False
114
Armin Ronachercda43df2008-05-03 17:10:05 +0200115 def test_any(self, *iterable):
Armin Ronacher4325e372008-05-01 22:59:47 +0200116 """Test against multiple token expressions."""
117 for expr in iterable:
118 if self.test(expr):
119 return True
120 return False
121
122 def __repr__(self):
123 return 'Token(%r, %r, %r)' % (
124 self.lineno,
125 self.type,
126 self.value
127 )
128
129
130class TokenStreamIterator(object):
131 """The iterator for tokenstreams. Iterate over the stream
132 until the eof token is reached.
133 """
134
135 def __init__(self, stream):
136 self._stream = stream
137
138 def __iter__(self):
139 return self
140
141 def next(self):
142 token = self._stream.current
143 if token.type == 'eof':
144 self._stream.close()
145 raise StopIteration()
146 self._stream.next(False)
147 return token
148
149
150class TokenStream(object):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200151 """A token stream is an iterable that yields :class:`Token`\s. The
152 parser however does not iterate over it but calls :meth:`next` to go
153 one token ahead. The current active token is stored as :attr:`current`.
Armin Ronacher4325e372008-05-01 22:59:47 +0200154 """
155
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200156 def __init__(self, generator, name, filename):
Armin Ronacher4325e372008-05-01 22:59:47 +0200157 self._next = generator.next
158 self._pushed = deque()
159 self.current = Token(1, 'initial', '')
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200160 self.name = name
Armin Ronacher4325e372008-05-01 22:59:47 +0200161 self.filename = filename
162 self.next()
163
164 def __iter__(self):
165 return TokenStreamIterator(self)
166
167 def __nonzero__(self):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200168 """Are we at the end of the stream?"""
Armin Ronacher4325e372008-05-01 22:59:47 +0200169 return bool(self._pushed) or self.current.type != 'eof'
170
171 eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)
172
173 def push(self, token):
174 """Push a token back to the stream."""
175 self._pushed.append(token)
176
177 def look(self):
178 """Look at the next token."""
179 old_token = self.next()
180 result = self.current
181 self.push(result)
182 self.current = old_token
183 return result
184
Armin Ronacherea847c52008-05-02 20:04:32 +0200185 def skip(self, n=1):
Armin Ronacher4325e372008-05-01 22:59:47 +0200186 """Got n tokens ahead."""
187 for x in xrange(n):
188 self.next()
189
Armin Ronacherfdf95302008-05-11 22:20:51 +0200190 def next_if(self, expr):
191 """Perform the token test and return the token if it matched.
192 Otherwise the return value is `None`.
193 """
194 if self.current.test(expr):
195 return self.next()
196
197 def skip_if(self, expr):
Armin Ronacher9cf95912008-05-24 19:54:43 +0200198 """Like :meth:`next_if` but only returns `True` or `False`."""
Armin Ronacherfdf95302008-05-11 22:20:51 +0200199 return self.next_if(expr) is not None
200
201 def next(self):
Armin Ronacher4325e372008-05-01 22:59:47 +0200202 """Go one token ahead and return the old one"""
203 rv = self.current
Armin Ronacherfdf95302008-05-11 22:20:51 +0200204 if self._pushed:
205 self.current = self._pushed.popleft()
206 elif self.current.type is not 'eof':
207 try:
208 self.current = self._next()
209 except StopIteration:
210 self.close()
Armin Ronacher4325e372008-05-01 22:59:47 +0200211 return rv
212
213 def close(self):
214 """Close the stream."""
215 self.current = Token(self.current.lineno, 'eof', '')
216 self._next = None
217
218 def expect(self, expr):
Armin Ronacher023b5e92008-05-08 11:03:10 +0200219 """Expect a given token type and return it. This accepts the same
220 argument as :meth:`jinja2.lexer.Token.test`.
221 """
Armin Ronacher4325e372008-05-01 22:59:47 +0200222 if not self.current.test(expr):
223 if ':' in expr:
224 expr = expr.split(':')[1]
225 if self.current.type is 'eof':
226 raise TemplateSyntaxError('unexpected end of template, '
227 'expected %r.' % expr,
228 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200229 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200230 raise TemplateSyntaxError("expected token %r, got %r" %
231 (expr, str(self.current)),
232 self.current.lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200233 self.name, self.filename)
Armin Ronacher4325e372008-05-01 22:59:47 +0200234 try:
235 return self.current
236 finally:
237 self.next()
238
239
Armin Ronacher21580912007-04-17 17:13:10 +0200240class LexerMeta(type):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200241 """Metaclass for the lexer that caches instances for
Armin Ronacher21580912007-04-17 17:13:10 +0200242 the same configuration in a weak value dictionary.
243 """
244
245 def __call__(cls, environment):
Armin Ronacher203bfcb2008-04-24 21:54:44 +0200246 key = (environment.block_start_string,
247 environment.block_end_string,
248 environment.variable_start_string,
249 environment.variable_end_string,
250 environment.comment_start_string,
251 environment.comment_end_string,
252 environment.line_statement_prefix,
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200253 environment.trim_blocks,
254 environment.newline_sequence)
Armin Ronacherb5124e62008-04-25 00:36:14 +0200255 lexer = _lexer_cache.get(key)
256 if lexer is None:
257 lexer = type.__call__(cls, environment)
258 _lexer_cache[key] = lexer
Armin Ronacher21580912007-04-17 17:13:10 +0200259 return lexer
260
261
Armin Ronacher92f572f2007-02-26 22:17:32 +0100262class Lexer(object):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200263 """Class that implements a lexer for a given environment. Automatically
Armin Ronacher92f572f2007-02-26 22:17:32 +0100264 created by the environment class, usually you don't have to do that.
Armin Ronacher21580912007-04-17 17:13:10 +0200265
266 Note that the lexer is not automatically bound to an environment.
267 Multiple environments can share the same lexer.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100268 """
269
Armin Ronacher21580912007-04-17 17:13:10 +0200270 __metaclass__ = LexerMeta
271
Armin Ronacher92f572f2007-02-26 22:17:32 +0100272 def __init__(self, environment):
273 # shortcuts
274 c = lambda x: re.compile(x, re.M | re.S)
275 e = re.escape
276
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200277 # lexing rules for tags
Armin Ronacher92f572f2007-02-26 22:17:32 +0100278 tag_rules = [
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200279 (whitespace_re, 'whitespace', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200280 (float_re, 'float', None),
281 (integer_re, 'integer', None),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100282 (name_re, 'name', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200283 (string_re, 'string', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200284 (operator_re, 'operator', None)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100285 ]
286
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100287 # assamble the root lexing rule. because "|" is ungreedy
288 # we have to sort by length so that the lexer continues working
289 # as expected when we have parsing rules like <% for block and
290 # <%= for variables. (if someone wants asp like syntax)
Armin Ronacher33d528a2007-05-14 18:21:44 +0200291 # variables are just part of the rules if variable processing
292 # is required.
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100293 root_tag_rules = [
294 ('comment', environment.comment_start_string),
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200295 ('block', environment.block_start_string),
296 ('variable', environment.variable_start_string)
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100297 ]
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200298 root_tag_rules.sort(key=lambda x: -len(x[1]))
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200299
300 # now escape the rules. This is done here so that the escape
301 # signs don't count for the lengths of the tags.
302 root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]
303
304 # if we have a line statement prefix we need an extra rule for
305 # that. We add this rule *after* all the others.
306 if environment.line_statement_prefix is not None:
307 prefix = e(environment.line_statement_prefix)
308 root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100309
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200310 # block suffix if trimming is enabled
311 block_suffix_re = environment.trim_blocks and '\\n?' or ''
312
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200313 self.newline_sequence = environment.newline_sequence
314
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200315 # global lexing rules
Armin Ronacher92f572f2007-02-26 22:17:32 +0100316 self.rules = {
317 'root': [
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100318 # directives
319 (c('(.*?)(?:%s)' % '|'.join(
320 ['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
321 e(environment.block_start_string),
322 e(environment.block_start_string),
323 e(environment.block_end_string)
324 )] + [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200325 '(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100326 for n, r in root_tag_rules
327 ])), ('data', '#bygroup'), '#bygroup'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200328 # data
Armin Ronacher92f572f2007-02-26 22:17:32 +0100329 (c('.+'), 'data', None)
330 ],
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200331 # comments
Armin Ronacher92f572f2007-02-26 22:17:32 +0100332 'comment_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200333 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200334 e(environment.comment_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200335 e(environment.comment_end_string),
336 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200337 )), ('comment', 'comment_end'), '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100338 (c('(.)'), (Failure('Missing end of comment tag'),), None)
339 ],
Armin Ronacher21580912007-04-17 17:13:10 +0200340 # blocks
Armin Ronacher92f572f2007-02-26 22:17:32 +0100341 'block_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200342 (c('(?:\-%s\s*|%s)%s' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200343 e(environment.block_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200344 e(environment.block_end_string),
345 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200346 )), 'block_end', '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100347 ] + tag_rules,
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200348 # variables
349 'variable_begin': [
350 (c('\-%s\s*|%s' % (
351 e(environment.variable_end_string),
352 e(environment.variable_end_string)
353 )), 'variable_end', '#pop')
354 ] + tag_rules,
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200355 # raw block
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100356 'raw_begin': [
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200357 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
358 e(environment.block_start_string),
359 e(environment.block_start_string),
360 e(environment.block_end_string),
361 e(environment.block_end_string),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200362 block_suffix_re
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100363 )), ('data', 'raw_end'), '#pop'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200364 (c('(.)'), (Failure('Missing end of raw directive'),), None)
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200365 ],
366 # line statements
367 'linestatement_begin': [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200368 (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
369 ] + tag_rules
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200370 }
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200371
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200372 def _normalize_newlines(self, value):
373 """Called for strings and template data to normlize it to unicode."""
374 return newline_re.sub(self.newline_sequence, value)
375
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200376 def tokenize(self, source, name=None, filename=None):
Armin Ronacher71082072008-04-12 14:19:36 +0200377 """Works like `tokeniter` but returns a tokenstream of tokens and not
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200378 a generator or token tuples. Additionally all token values are already
Armin Ronacher115de2e2008-05-01 22:20:05 +0200379 converted into types and postprocessed. For example comments are removed,
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200380 integers and floats converted, strings unescaped etc.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100381 """
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200382 def generate():
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200383 for lineno, token, value in self.tokeniter(source, name, filename):
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200384 if token in ('comment_begin', 'comment', 'comment_end',
385 'whitespace'):
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200386 continue
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200387 elif token == 'linestatement_begin':
388 token = 'block_begin'
389 elif token == 'linestatement_end':
390 token = 'block_end'
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200391 # we are not interested in those tokens in the parser
392 elif token in ('raw_begin', 'raw_end'):
393 continue
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200394 elif token == 'data':
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200395 value = self._normalize_newlines(value)
Armin Ronacher07bc6842008-03-31 14:18:49 +0200396 elif token == 'keyword':
Armin Ronacher82b3f3d2008-03-31 20:01:08 +0200397 token = value
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200398 elif token == 'name':
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200399 value = str(value)
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200400 elif token == 'string':
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200401 # try to unescape string
402 try:
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200403 value = self._normalize_newlines(value[1:-1]) \
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200404 .encode('ascii', 'backslashreplace') \
405 .decode('unicode-escape')
406 except Exception, e:
407 msg = str(e).split(':')[-1].strip()
408 raise TemplateSyntaxError(msg, lineno, name, filename)
409 # if we can express it as bytestring (ascii only)
410 # we do that for support of semi broken APIs
411 # as datetime.datetime.strftime
Armin Ronacherd1ff8582008-05-11 00:30:43 +0200412 try:
413 value = str(value)
414 except UnicodeError:
415 pass
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200416 elif token == 'integer':
417 value = int(value)
418 elif token == 'float':
419 value = float(value)
420 elif token == 'operator':
421 token = operators[value]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200422 yield Token(lineno, token, value)
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200423 return TokenStream(generate(), name, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100424
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200425 def tokeniter(self, source, name, filename=None):
Armin Ronacherb5124e62008-04-25 00:36:14 +0200426 """This method tokenizes the text and returns the tokens in a
427 generator. Use this method if you just want to tokenize a template.
428 The output you get is not compatible with the input the jinja parser
429 wants. The parser uses the `tokenize` function with returns a
430 `TokenStream` and postprocessed tokens.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100431 """
Armin Ronacherf3c35c42008-05-23 23:18:14 +0200432 source = '\n'.join(unicode(source).splitlines())
Armin Ronacher7977e5c2007-03-12 07:22:17 +0100433 pos = 0
434 lineno = 1
Armin Ronacher92f572f2007-02-26 22:17:32 +0100435 stack = ['root']
436 statetokens = self.rules['root']
437 source_length = len(source)
438
Armin Ronacher21580912007-04-17 17:13:10 +0200439 balancing_stack = []
440
Armin Ronacher71082072008-04-12 14:19:36 +0200441 while 1:
Armin Ronacher92f572f2007-02-26 22:17:32 +0100442 # tokenizer loop
443 for regex, tokens, new_state in statetokens:
444 m = regex.match(source, pos)
Armin Ronacher21580912007-04-17 17:13:10 +0200445 # if no match we try again with the next rule
Armin Ronacher71082072008-04-12 14:19:36 +0200446 if m is None:
Armin Ronacher21580912007-04-17 17:13:10 +0200447 continue
448
449 # we only match blocks and variables if brances / parentheses
450 # are balanced. continue parsing with the lower rule which
451 # is the operator rule. do this only if the end tags look
452 # like operators
453 if balancing_stack and \
Armin Ronacher71082072008-04-12 14:19:36 +0200454 tokens in ('variable_end', 'block_end',
455 'linestatement_end'):
Armin Ronacher21580912007-04-17 17:13:10 +0200456 continue
457
458 # tuples support more options
459 if isinstance(tokens, tuple):
460 for idx, token in enumerate(tokens):
Armin Ronacher21580912007-04-17 17:13:10 +0200461 # failure group
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200462 if token.__class__ is Failure:
Armin Ronacher720e55b2007-05-30 00:57:49 +0200463 raise token(lineno, filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200464 # bygroup is a bit more complex, in that case we
465 # yield for the current token the first named
466 # group that matched
467 elif token == '#bygroup':
Armin Ronacher92f572f2007-02-26 22:17:32 +0100468 for key, value in m.groupdict().iteritems():
469 if value is not None:
Armin Ronacher21580912007-04-17 17:13:10 +0200470 yield lineno, key, value
471 lineno += value.count('\n')
Armin Ronacher92f572f2007-02-26 22:17:32 +0100472 break
473 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200474 raise RuntimeError('%r wanted to resolve '
475 'the token dynamically'
476 ' but no group matched'
477 % regex)
478 # normal group
Armin Ronacher92f572f2007-02-26 22:17:32 +0100479 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200480 data = m.group(idx + 1)
481 if data:
482 yield lineno, token, data
483 lineno += data.count('\n')
484
Armin Ronacher71082072008-04-12 14:19:36 +0200485 # strings as token just are yielded as it.
Armin Ronacher21580912007-04-17 17:13:10 +0200486 else:
487 data = m.group()
488 # update brace/parentheses balance
489 if tokens == 'operator':
490 if data == '{':
491 balancing_stack.append('}')
492 elif data == '(':
493 balancing_stack.append(')')
494 elif data == '[':
495 balancing_stack.append(']')
496 elif data in ('}', ')', ']'):
Armin Ronacherf750daa2007-05-29 23:22:38 +0200497 if not balancing_stack:
498 raise TemplateSyntaxError('unexpected "%s"' %
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200499 data, lineno, name,
Armin Ronacherf750daa2007-05-29 23:22:38 +0200500 filename)
501 expected_op = balancing_stack.pop()
502 if expected_op != data:
503 raise TemplateSyntaxError('unexpected "%s", '
504 'expected "%s"' %
505 (data, expected_op),
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200506 lineno, name,
507 filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200508 # yield items
Armin Ronacherd8b8c3e2008-05-22 21:28:32 +0200509 yield lineno, tokens, data
Armin Ronacher21580912007-04-17 17:13:10 +0200510 lineno += data.count('\n')
511
512 # fetch new position into new variable so that we can check
513 # if there is a internal parsing error which would result
514 # in an infinite loop
515 pos2 = m.end()
516
517 # handle state changes
518 if new_state is not None:
519 # remove the uppermost state
520 if new_state == '#pop':
521 stack.pop()
522 # resolve the new state by group checking
523 elif new_state == '#bygroup':
524 for key, value in m.groupdict().iteritems():
525 if value is not None:
526 stack.append(key)
527 break
528 else:
529 raise RuntimeError('%r wanted to resolve the '
530 'new state dynamically but'
531 ' no group matched' %
532 regex)
533 # direct state name given
534 else:
535 stack.append(new_state)
536 statetokens = self.rules[stack[-1]]
537 # we are still at the same position and no stack change.
538 # this means a loop without break condition, avoid that and
539 # raise error
540 elif pos2 == pos:
541 raise RuntimeError('%r yielded empty string without '
542 'stack change' % regex)
543 # publish new function and start again
544 pos = pos2
545 break
Armin Ronacher92f572f2007-02-26 22:17:32 +0100546 # if loop terminated without break we havn't found a single match
547 # either we are at the end of the file or we have a problem
548 else:
549 # end of text
550 if pos >= source_length:
551 return
552 # something went wrong
553 raise TemplateSyntaxError('unexpected char %r at %d' %
Armin Ronacher21580912007-04-17 17:13:10 +0200554 (source[pos], pos), lineno,
Armin Ronacher7f15ef82008-05-16 09:11:39 +0200555 name, filename)