blob: beb9866696593d352605359a863a906f0472d40e [file] [log] [blame]
Armin Ronacher92f572f2007-02-26 22:17:32 +01001# -*- coding: utf-8 -*-
2"""
Armin Ronacher07bc6842008-03-31 14:18:49 +02003 jinja2.lexer
4 ~~~~~~~~~~~~
Armin Ronacher3b65b8a2007-02-27 20:21:45 +01005
Armin Ronacher5a8e4972007-04-05 11:21:38 +02006 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
9
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
13
Armin Ronacher1d51f632008-03-25 14:34:45 +010014 :copyright: 2007-2008 by Armin Ronacher.
Armin Ronacher3b65b8a2007-02-27 20:21:45 +010015 :license: BSD, see LICENSE for more details.
Armin Ronacher92f572f2007-02-26 22:17:32 +010016"""
17import re
Armin Ronacher1cc232c2007-09-07 17:52:41 +020018import unicodedata
Armin Ronacher82b3f3d2008-03-31 20:01:08 +020019from jinja2.datastructure import TokenStream, Token
20from jinja2.exceptions import TemplateSyntaxError
Armin Ronacher21580912007-04-17 17:13:10 +020021from weakref import WeakValueDictionary
Armin Ronacher92f572f2007-02-26 22:17:32 +010022
23
Armin Ronacher21580912007-04-17 17:13:10 +020024# cache for the lexers. Exists in order to be able to have multiple
25# environments with the same lexer
26_lexer_cache = WeakValueDictionary()
27
28
Armin Ronacher92f572f2007-02-26 22:17:32 +010029# static regular expressions
Armin Ronacher0949e4d2007-10-07 18:53:29 +020030whitespace_re = re.compile(r'\s+(?um)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010031string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020033integer_re = re.compile(r'\d+')
Armin Ronachere791c2a2008-04-07 18:39:54 +020034name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020035float_re = re.compile(r'\d+\.\d+')
Armin Ronacher92f572f2007-02-26 22:17:32 +010036
Armin Ronacher92f572f2007-02-26 22:17:32 +010037
Armin Ronacher9baa5ba2007-03-21 18:05:32 +010038# set of used keywords
Armin Ronachere791c2a2008-04-07 18:39:54 +020039keywords = set(['and', 'block', 'elif', 'else', 'endblock', 'print',
Armin Ronacher9baa5ba2007-03-21 18:05:32 +010040 'endfilter', 'endfor', 'endif', 'endmacro', 'endraw',
Armin Ronacher4f7d2d52008-04-22 10:40:26 +020041 'extends', 'filter', 'for', 'if', 'in', 'include',
Armin Ronacher2b60fe52008-04-21 08:23:59 +020042 'is', 'macro', 'not', 'or', 'raw', 'call', 'endcall'])
Armin Ronacher1cc232c2007-09-07 17:52:41 +020043
44# bind operators to token types
45operators = {
46 '+': 'add',
47 '-': 'sub',
48 '/': 'div',
49 '//': 'floordiv',
50 '*': 'mul',
51 '%': 'mod',
52 '**': 'pow',
53 '~': 'tilde',
Armin Ronacher1cc232c2007-09-07 17:52:41 +020054 '[': 'lbracket',
55 ']': 'rbracket',
56 '(': 'lparen',
57 ')': 'rparen',
58 '{': 'lbrace',
59 '}': 'rbrace',
60 '==': 'eq',
61 '!=': 'ne',
62 '>': 'gt',
63 '>=': 'gteq',
64 '<': 'lt',
65 '<=': 'lteq',
66 '=': 'assign',
67 '.': 'dot',
68 ':': 'colon',
69 '|': 'pipe',
Armin Ronacher07bc6842008-03-31 14:18:49 +020070 ',': 'comma',
71 ';': 'semicolon'
Armin Ronacher1cc232c2007-09-07 17:52:41 +020072}
73
74reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
75assert len(operators) == len(reverse_operators), 'operators dropped'
Armin Ronachere791c2a2008-04-07 18:39:54 +020076operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
77 sorted(operators, key=lambda x: -len(x))))
Armin Ronacher1cc232c2007-09-07 17:52:41 +020078
Armin Ronacher1d51f632008-03-25 14:34:45 +010079simple_escapes = {
80 'a': '\a',
81 'n': '\n',
82 'r': '\r',
83 'f': '\f',
84 't': '\t',
85 'v': '\v',
86 '\\': '\\',
87 '"': '"',
88 "'": "'",
89 '0': '\x00'
90}
91unicode_escapes = {
92 'x': 2,
93 'u': 4,
94 'U': 8
95}
96
Armin Ronacher1cc232c2007-09-07 17:52:41 +020097
98def unescape_string(lineno, filename, s):
99 r"""
100 Unescape a string. Supported escapes:
101 \a, \n, \r\, \f, \v, \\, \", \', \0
102
103 \x00, \u0000, \U00000000, \N{...}
104
105 Not supported are \101 because imho redundant.
106 """
107 result = []
108 write = result.append
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200109 chariter = iter(s)
110 next_char = chariter.next
111
Armin Ronacher1d51f632008-03-25 14:34:45 +0100112 # faster lookup
113 sescapes = simple_escapes
114 uescapes = unicode_escapes
115
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200116 try:
117 for char in chariter:
118 if char == '\\':
119 char = next_char()
Armin Ronacher1d51f632008-03-25 14:34:45 +0100120 if char in sescapes:
121 write(sescapes[char])
122 elif char in uescapes:
123 seq = [next_char() for x in xrange(uescapes[char])]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200124 try:
125 write(unichr(int(''.join(seq), 16)))
126 except ValueError:
127 raise TemplateSyntaxError('invalid unicode codepoint',
128 lineno, filename)
129 elif char == 'N':
130 if next_char() != '{':
131 raise TemplateSyntaxError('no name for codepoint',
132 lineno, filename)
133 seq = []
Armin Ronacher1d51f632008-03-25 14:34:45 +0100134 while 1:
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200135 char = next_char()
136 if char == '}':
137 break
138 seq.append(char)
139 try:
140 write(unicodedata.lookup(u''.join(seq)))
141 except KeyError:
142 raise TemplateSyntaxError('unknown character name',
143 lineno, filename)
144 else:
145 write('\\' + char)
146 else:
147 write(char)
148 except StopIteration:
149 raise TemplateSyntaxError('invalid string escape', lineno, filename)
150 return u''.join(result)
151
152
153def unescape_regex(s):
154 """
155 Unescape rules for regular expressions.
156 """
157 buffer = []
158 write = buffer.append
159 in_escape = False
160 for char in s:
161 if in_escape:
162 in_escape = False
163 if char not in safe_chars:
164 write('\\' + char)
165 continue
166 write(char)
167 return u''.join(buffer)
Armin Ronacher2894f222007-03-19 22:39:55 +0100168
Armin Ronacher92f572f2007-02-26 22:17:32 +0100169
170class Failure(object):
171 """
172 Class that raises a `TemplateSyntaxError` if called.
173 Used by the `Lexer` to specify known errors.
174 """
175
176 def __init__(self, message, cls=TemplateSyntaxError):
177 self.message = message
178 self.error_class = cls
179
Armin Ronacher720e55b2007-05-30 00:57:49 +0200180 def __call__(self, lineno, filename):
181 raise self.error_class(self.message, lineno, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100182
183
Armin Ronacher21580912007-04-17 17:13:10 +0200184class LexerMeta(type):
185 """
186 Metaclass for the lexer that caches instances for
187 the same configuration in a weak value dictionary.
188 """
189
190 def __call__(cls, environment):
191 key = hash((environment.block_start_string,
192 environment.block_end_string,
193 environment.variable_start_string,
194 environment.variable_end_string,
195 environment.comment_start_string,
196 environment.comment_end_string,
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200197 environment.line_statement_prefix,
Armin Ronacher21580912007-04-17 17:13:10 +0200198 environment.trim_blocks))
199
200 # use the cached lexer if possible
201 if key in _lexer_cache:
202 return _lexer_cache[key]
203
204 # create a new lexer and cache it
205 lexer = type.__call__(cls, environment)
206 _lexer_cache[key] = lexer
207 return lexer
208
209
Armin Ronacher92f572f2007-02-26 22:17:32 +0100210class Lexer(object):
211 """
212 Class that implements a lexer for a given environment. Automatically
213 created by the environment class, usually you don't have to do that.
Armin Ronacher21580912007-04-17 17:13:10 +0200214
215 Note that the lexer is not automatically bound to an environment.
216 Multiple environments can share the same lexer.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100217 """
218
Armin Ronacher21580912007-04-17 17:13:10 +0200219 __metaclass__ = LexerMeta
220
Armin Ronacher92f572f2007-02-26 22:17:32 +0100221 def __init__(self, environment):
222 # shortcuts
223 c = lambda x: re.compile(x, re.M | re.S)
224 e = re.escape
225
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200226 # lexing rules for tags
Armin Ronacher92f572f2007-02-26 22:17:32 +0100227 tag_rules = [
228 (whitespace_re, None, None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200229 (float_re, 'float', None),
230 (integer_re, 'integer', None),
Armin Ronachere791c2a2008-04-07 18:39:54 +0200231 (c(r'\b(?:%s)\b' % '|'.join(sorted(keywords, key=lambda x: -len(x)))),
Armin Ronacher07bc6842008-03-31 14:18:49 +0200232 'keyword', None),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100233 (name_re, 'name', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200234 (string_re, 'string', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200235 (operator_re, 'operator', None)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100236 ]
237
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100238 # assamble the root lexing rule. because "|" is ungreedy
239 # we have to sort by length so that the lexer continues working
240 # as expected when we have parsing rules like <% for block and
241 # <%= for variables. (if someone wants asp like syntax)
Armin Ronacher33d528a2007-05-14 18:21:44 +0200242 # variables are just part of the rules if variable processing
243 # is required.
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100244 root_tag_rules = [
245 ('comment', environment.comment_start_string),
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200246 ('block', environment.block_start_string),
247 ('variable', environment.variable_start_string)
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100248 ]
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200249 root_tag_rules.sort(key=lambda x: -len(x[1]))
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200250
251 # now escape the rules. This is done here so that the escape
252 # signs don't count for the lengths of the tags.
253 root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]
254
255 # if we have a line statement prefix we need an extra rule for
256 # that. We add this rule *after* all the others.
257 if environment.line_statement_prefix is not None:
258 prefix = e(environment.line_statement_prefix)
259 root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100260
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200261 # block suffix if trimming is enabled
262 block_suffix_re = environment.trim_blocks and '\\n?' or ''
263
264 # global lexing rules
Armin Ronacher92f572f2007-02-26 22:17:32 +0100265 self.rules = {
266 'root': [
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100267 # directives
268 (c('(.*?)(?:%s)' % '|'.join(
269 ['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
270 e(environment.block_start_string),
271 e(environment.block_start_string),
272 e(environment.block_end_string)
273 )] + [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200274 '(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100275 for n, r in root_tag_rules
276 ])), ('data', '#bygroup'), '#bygroup'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200277 # data
Armin Ronacher92f572f2007-02-26 22:17:32 +0100278 (c('.+'), 'data', None)
279 ],
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200280 # comments
Armin Ronacher92f572f2007-02-26 22:17:32 +0100281 'comment_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200282 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200283 e(environment.comment_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200284 e(environment.comment_end_string),
285 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200286 )), ('comment', 'comment_end'), '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100287 (c('(.)'), (Failure('Missing end of comment tag'),), None)
288 ],
Armin Ronacher21580912007-04-17 17:13:10 +0200289 # blocks
Armin Ronacher92f572f2007-02-26 22:17:32 +0100290 'block_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200291 (c('(?:\-%s\s*|%s)%s' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200292 e(environment.block_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200293 e(environment.block_end_string),
294 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200295 )), 'block_end', '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100296 ] + tag_rules,
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200297 # variables
298 'variable_begin': [
299 (c('\-%s\s*|%s' % (
300 e(environment.variable_end_string),
301 e(environment.variable_end_string)
302 )), 'variable_end', '#pop')
303 ] + tag_rules,
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200304 # raw block
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100305 'raw_begin': [
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200306 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
307 e(environment.block_start_string),
308 e(environment.block_start_string),
309 e(environment.block_end_string),
310 e(environment.block_end_string),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200311 block_suffix_re
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100312 )), ('data', 'raw_end'), '#pop'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200313 (c('(.)'), (Failure('Missing end of raw directive'),), None)
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200314 ],
315 # line statements
316 'linestatement_begin': [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200317 (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
318 ] + tag_rules
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200319 }
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200320
Armin Ronacher21580912007-04-17 17:13:10 +0200321 def tokenize(self, source, filename=None):
Armin Ronacher71082072008-04-12 14:19:36 +0200322 """Works like `tokeniter` but returns a tokenstream of tokens and not
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200323 a generator or token tuples. Additionally all token values are already
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200324 converted into types and postprocessed. For example keywords are
325 already keyword tokens, not named tokens, comments are removed,
326 integers and floats converted, strings unescaped etc.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100327 """
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200328 source = unicode(source)
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200329 def generate():
Armin Ronacher21580912007-04-17 17:13:10 +0200330 for lineno, token, value in self.tokeniter(source, filename):
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200331 if token in ('comment_begin', 'comment', 'comment_end'):
332 continue
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200333 elif token == 'linestatement_begin':
334 token = 'block_begin'
335 elif token == 'linestatement_end':
336 token = 'block_end'
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200337 # we are not interested in those tokens in the parser
338 elif token in ('raw_begin', 'raw_end'):
339 continue
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200340 elif token == 'data':
341 try:
342 value = str(value)
343 except UnicodeError:
344 pass
Armin Ronacher07bc6842008-03-31 14:18:49 +0200345 elif token == 'keyword':
Armin Ronacher82b3f3d2008-03-31 20:01:08 +0200346 token = value
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200347 elif token == 'name':
348 value = str(value)
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200349 elif token == 'string':
350 value = unescape_string(lineno, filename, value[1:-1])
351 try:
352 value = str(value)
353 except UnicodeError:
354 pass
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200355 elif token == 'integer':
356 value = int(value)
357 elif token == 'float':
358 value = float(value)
359 elif token == 'operator':
360 token = operators[value]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200361 yield Token(lineno, token, value)
Armin Ronacher21580912007-04-17 17:13:10 +0200362 return TokenStream(generate(), filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100363
Armin Ronacher21580912007-04-17 17:13:10 +0200364 def tokeniter(self, source, filename=None):
Armin Ronacher92f572f2007-02-26 22:17:32 +0100365 """
366 This method tokenizes the text and returns the tokens in a generator.
Armin Ronacherf626c8e2007-03-23 16:13:10 +0100367 Use this method if you just want to tokenize a template. The output
368 you get is not compatible with the input the jinja parser wants. The
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200369 parser uses the `tokenize` function with returns a `TokenStream` and
370 keywords instead of just names.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100371 """
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200372 source = '\n'.join(source.splitlines())
Armin Ronacher7977e5c2007-03-12 07:22:17 +0100373 pos = 0
374 lineno = 1
Armin Ronacher92f572f2007-02-26 22:17:32 +0100375 stack = ['root']
376 statetokens = self.rules['root']
377 source_length = len(source)
378
Armin Ronacher21580912007-04-17 17:13:10 +0200379 balancing_stack = []
380
Armin Ronacher71082072008-04-12 14:19:36 +0200381 while 1:
Armin Ronacher92f572f2007-02-26 22:17:32 +0100382 # tokenizer loop
383 for regex, tokens, new_state in statetokens:
384 m = regex.match(source, pos)
Armin Ronacher21580912007-04-17 17:13:10 +0200385 # if no match we try again with the next rule
Armin Ronacher71082072008-04-12 14:19:36 +0200386 if m is None:
Armin Ronacher21580912007-04-17 17:13:10 +0200387 continue
388
389 # we only match blocks and variables if brances / parentheses
390 # are balanced. continue parsing with the lower rule which
391 # is the operator rule. do this only if the end tags look
392 # like operators
393 if balancing_stack and \
Armin Ronacher71082072008-04-12 14:19:36 +0200394 tokens in ('variable_end', 'block_end',
395 'linestatement_end'):
Armin Ronacher21580912007-04-17 17:13:10 +0200396 continue
397
398 # tuples support more options
399 if isinstance(tokens, tuple):
400 for idx, token in enumerate(tokens):
401 # hidden group
402 if token is None:
403 g = m.group(idx)
404 if g:
405 lineno += g.count('\n')
406 continue
407 # failure group
Armin Ronacherecc051b2007-06-01 18:25:28 +0200408 elif token.__class__ is Failure:
Armin Ronacher720e55b2007-05-30 00:57:49 +0200409 raise token(lineno, filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200410 # bygroup is a bit more complex, in that case we
411 # yield for the current token the first named
412 # group that matched
413 elif token == '#bygroup':
Armin Ronacher92f572f2007-02-26 22:17:32 +0100414 for key, value in m.groupdict().iteritems():
415 if value is not None:
Armin Ronacher21580912007-04-17 17:13:10 +0200416 yield lineno, key, value
417 lineno += value.count('\n')
Armin Ronacher92f572f2007-02-26 22:17:32 +0100418 break
419 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200420 raise RuntimeError('%r wanted to resolve '
421 'the token dynamically'
422 ' but no group matched'
423 % regex)
424 # normal group
Armin Ronacher92f572f2007-02-26 22:17:32 +0100425 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200426 data = m.group(idx + 1)
427 if data:
428 yield lineno, token, data
429 lineno += data.count('\n')
430
Armin Ronacher71082072008-04-12 14:19:36 +0200431 # strings as token just are yielded as it.
Armin Ronacher21580912007-04-17 17:13:10 +0200432 else:
433 data = m.group()
434 # update brace/parentheses balance
435 if tokens == 'operator':
436 if data == '{':
437 balancing_stack.append('}')
438 elif data == '(':
439 balancing_stack.append(')')
440 elif data == '[':
441 balancing_stack.append(']')
442 elif data in ('}', ')', ']'):
Armin Ronacherf750daa2007-05-29 23:22:38 +0200443 if not balancing_stack:
444 raise TemplateSyntaxError('unexpected "%s"' %
445 data, lineno,
446 filename)
447 expected_op = balancing_stack.pop()
448 if expected_op != data:
449 raise TemplateSyntaxError('unexpected "%s", '
450 'expected "%s"' %
451 (data, expected_op),
Armin Ronacher21580912007-04-17 17:13:10 +0200452 lineno, filename)
453 # yield items
454 if tokens is not None:
Armin Ronacher71082072008-04-12 14:19:36 +0200455 yield lineno, tokens, data
Armin Ronacher21580912007-04-17 17:13:10 +0200456 lineno += data.count('\n')
457
458 # fetch new position into new variable so that we can check
459 # if there is a internal parsing error which would result
460 # in an infinite loop
461 pos2 = m.end()
462
463 # handle state changes
464 if new_state is not None:
465 # remove the uppermost state
466 if new_state == '#pop':
467 stack.pop()
468 # resolve the new state by group checking
469 elif new_state == '#bygroup':
470 for key, value in m.groupdict().iteritems():
471 if value is not None:
472 stack.append(key)
473 break
474 else:
475 raise RuntimeError('%r wanted to resolve the '
476 'new state dynamically but'
477 ' no group matched' %
478 regex)
479 # direct state name given
480 else:
481 stack.append(new_state)
482 statetokens = self.rules[stack[-1]]
483 # we are still at the same position and no stack change.
484 # this means a loop without break condition, avoid that and
485 # raise error
486 elif pos2 == pos:
487 raise RuntimeError('%r yielded empty string without '
488 'stack change' % regex)
489 # publish new function and start again
490 pos = pos2
491 break
Armin Ronacher92f572f2007-02-26 22:17:32 +0100492 # if loop terminated without break we havn't found a single match
493 # either we are at the end of the file or we have a problem
494 else:
495 # end of text
496 if pos >= source_length:
497 return
498 # something went wrong
499 raise TemplateSyntaxError('unexpected char %r at %d' %
Armin Ronacher21580912007-04-17 17:13:10 +0200500 (source[pos], pos), lineno,
501 filename)