blob: 364d3f5e1cc5de6f73044de9a0437d93fb44a7e5 [file] [log] [blame]
Armin Ronacher92f572f2007-02-26 22:17:32 +01001# -*- coding: utf-8 -*-
2"""
Armin Ronacher07bc6842008-03-31 14:18:49 +02003 jinja2.lexer
4 ~~~~~~~~~~~~
Armin Ronacher3b65b8a2007-02-27 20:21:45 +01005
Armin Ronacher5a8e4972007-04-05 11:21:38 +02006 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
9
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
13
Armin Ronacher1d51f632008-03-25 14:34:45 +010014 :copyright: 2007-2008 by Armin Ronacher.
Armin Ronacher3b65b8a2007-02-27 20:21:45 +010015 :license: BSD, see LICENSE for more details.
Armin Ronacher92f572f2007-02-26 22:17:32 +010016"""
17import re
Armin Ronacher1cc232c2007-09-07 17:52:41 +020018import unicodedata
Armin Ronacher82b3f3d2008-03-31 20:01:08 +020019from jinja2.datastructure import TokenStream, Token
20from jinja2.exceptions import TemplateSyntaxError
Armin Ronacher21580912007-04-17 17:13:10 +020021from weakref import WeakValueDictionary
Armin Ronacher92f572f2007-02-26 22:17:32 +010022
23
Armin Ronacher21580912007-04-17 17:13:10 +020024# cache for the lexers. Exists in order to be able to have multiple
25# environments with the same lexer
26_lexer_cache = WeakValueDictionary()
27
28
Armin Ronacher92f572f2007-02-26 22:17:32 +010029# static regular expressions
Armin Ronacher0949e4d2007-10-07 18:53:29 +020030whitespace_re = re.compile(r'\s+(?um)')
Armin Ronacher92f572f2007-02-26 22:17:32 +010031string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32 r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020033integer_re = re.compile(r'\d+')
Armin Ronachere791c2a2008-04-07 18:39:54 +020034name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
Armin Ronacher1cc232c2007-09-07 17:52:41 +020035float_re = re.compile(r'\d+\.\d+')
Armin Ronacher92f572f2007-02-26 22:17:32 +010036
Armin Ronacher92f572f2007-02-26 22:17:32 +010037
Armin Ronacher9baa5ba2007-03-21 18:05:32 +010038# set of used keywords
Armin Ronachere791c2a2008-04-07 18:39:54 +020039keywords = set(['and', 'block', 'elif', 'else', 'endblock', 'print',
Armin Ronacher9baa5ba2007-03-21 18:05:32 +010040 'endfilter', 'endfor', 'endif', 'endmacro', 'endraw',
Armin Ronacherf59bac22008-04-20 13:11:43 +020041 'extends', 'filter', 'for', 'if', 'in',
42 'include', 'is', 'macro', 'not', 'or', 'raw',
43 'recursive', 'set', 'call', 'endcall'])
Armin Ronacher1cc232c2007-09-07 17:52:41 +020044
45# bind operators to token types
46operators = {
47 '+': 'add',
48 '-': 'sub',
49 '/': 'div',
50 '//': 'floordiv',
51 '*': 'mul',
52 '%': 'mod',
53 '**': 'pow',
54 '~': 'tilde',
Armin Ronacher1cc232c2007-09-07 17:52:41 +020055 '[': 'lbracket',
56 ']': 'rbracket',
57 '(': 'lparen',
58 ')': 'rparen',
59 '{': 'lbrace',
60 '}': 'rbrace',
61 '==': 'eq',
62 '!=': 'ne',
63 '>': 'gt',
64 '>=': 'gteq',
65 '<': 'lt',
66 '<=': 'lteq',
67 '=': 'assign',
68 '.': 'dot',
69 ':': 'colon',
70 '|': 'pipe',
Armin Ronacher07bc6842008-03-31 14:18:49 +020071 ',': 'comma',
72 ';': 'semicolon'
Armin Ronacher1cc232c2007-09-07 17:52:41 +020073}
74
75reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
76assert len(operators) == len(reverse_operators), 'operators dropped'
Armin Ronachere791c2a2008-04-07 18:39:54 +020077operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
78 sorted(operators, key=lambda x: -len(x))))
Armin Ronacher1cc232c2007-09-07 17:52:41 +020079
Armin Ronacher1d51f632008-03-25 14:34:45 +010080simple_escapes = {
81 'a': '\a',
82 'n': '\n',
83 'r': '\r',
84 'f': '\f',
85 't': '\t',
86 'v': '\v',
87 '\\': '\\',
88 '"': '"',
89 "'": "'",
90 '0': '\x00'
91}
92unicode_escapes = {
93 'x': 2,
94 'u': 4,
95 'U': 8
96}
97
Armin Ronacher1cc232c2007-09-07 17:52:41 +020098
99def unescape_string(lineno, filename, s):
100 r"""
101 Unescape a string. Supported escapes:
102 \a, \n, \r\, \f, \v, \\, \", \', \0
103
104 \x00, \u0000, \U00000000, \N{...}
105
106 Not supported are \101 because imho redundant.
107 """
108 result = []
109 write = result.append
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200110 chariter = iter(s)
111 next_char = chariter.next
112
Armin Ronacher1d51f632008-03-25 14:34:45 +0100113 # faster lookup
114 sescapes = simple_escapes
115 uescapes = unicode_escapes
116
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200117 try:
118 for char in chariter:
119 if char == '\\':
120 char = next_char()
Armin Ronacher1d51f632008-03-25 14:34:45 +0100121 if char in sescapes:
122 write(sescapes[char])
123 elif char in uescapes:
124 seq = [next_char() for x in xrange(uescapes[char])]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200125 try:
126 write(unichr(int(''.join(seq), 16)))
127 except ValueError:
128 raise TemplateSyntaxError('invalid unicode codepoint',
129 lineno, filename)
130 elif char == 'N':
131 if next_char() != '{':
132 raise TemplateSyntaxError('no name for codepoint',
133 lineno, filename)
134 seq = []
Armin Ronacher1d51f632008-03-25 14:34:45 +0100135 while 1:
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200136 char = next_char()
137 if char == '}':
138 break
139 seq.append(char)
140 try:
141 write(unicodedata.lookup(u''.join(seq)))
142 except KeyError:
143 raise TemplateSyntaxError('unknown character name',
144 lineno, filename)
145 else:
146 write('\\' + char)
147 else:
148 write(char)
149 except StopIteration:
150 raise TemplateSyntaxError('invalid string escape', lineno, filename)
151 return u''.join(result)
152
153
154def unescape_regex(s):
155 """
156 Unescape rules for regular expressions.
157 """
158 buffer = []
159 write = buffer.append
160 in_escape = False
161 for char in s:
162 if in_escape:
163 in_escape = False
164 if char not in safe_chars:
165 write('\\' + char)
166 continue
167 write(char)
168 return u''.join(buffer)
Armin Ronacher2894f222007-03-19 22:39:55 +0100169
Armin Ronacher92f572f2007-02-26 22:17:32 +0100170
171class Failure(object):
172 """
173 Class that raises a `TemplateSyntaxError` if called.
174 Used by the `Lexer` to specify known errors.
175 """
176
177 def __init__(self, message, cls=TemplateSyntaxError):
178 self.message = message
179 self.error_class = cls
180
Armin Ronacher720e55b2007-05-30 00:57:49 +0200181 def __call__(self, lineno, filename):
182 raise self.error_class(self.message, lineno, filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100183
184
Armin Ronacher21580912007-04-17 17:13:10 +0200185class LexerMeta(type):
186 """
187 Metaclass for the lexer that caches instances for
188 the same configuration in a weak value dictionary.
189 """
190
191 def __call__(cls, environment):
192 key = hash((environment.block_start_string,
193 environment.block_end_string,
194 environment.variable_start_string,
195 environment.variable_end_string,
196 environment.comment_start_string,
197 environment.comment_end_string,
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200198 environment.line_statement_prefix,
Armin Ronacher21580912007-04-17 17:13:10 +0200199 environment.trim_blocks))
200
201 # use the cached lexer if possible
202 if key in _lexer_cache:
203 return _lexer_cache[key]
204
205 # create a new lexer and cache it
206 lexer = type.__call__(cls, environment)
207 _lexer_cache[key] = lexer
208 return lexer
209
210
Armin Ronacher92f572f2007-02-26 22:17:32 +0100211class Lexer(object):
212 """
213 Class that implements a lexer for a given environment. Automatically
214 created by the environment class, usually you don't have to do that.
Armin Ronacher21580912007-04-17 17:13:10 +0200215
216 Note that the lexer is not automatically bound to an environment.
217 Multiple environments can share the same lexer.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100218 """
219
Armin Ronacher21580912007-04-17 17:13:10 +0200220 __metaclass__ = LexerMeta
221
Armin Ronacher92f572f2007-02-26 22:17:32 +0100222 def __init__(self, environment):
223 # shortcuts
224 c = lambda x: re.compile(x, re.M | re.S)
225 e = re.escape
226
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200227 # lexing rules for tags
Armin Ronacher92f572f2007-02-26 22:17:32 +0100228 tag_rules = [
229 (whitespace_re, None, None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200230 (float_re, 'float', None),
231 (integer_re, 'integer', None),
Armin Ronachere791c2a2008-04-07 18:39:54 +0200232 (c(r'\b(?:%s)\b' % '|'.join(sorted(keywords, key=lambda x: -len(x)))),
Armin Ronacher07bc6842008-03-31 14:18:49 +0200233 'keyword', None),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100234 (name_re, 'name', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200235 (string_re, 'string', None),
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200236 (operator_re, 'operator', None)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100237 ]
238
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100239 # assamble the root lexing rule. because "|" is ungreedy
240 # we have to sort by length so that the lexer continues working
241 # as expected when we have parsing rules like <% for block and
242 # <%= for variables. (if someone wants asp like syntax)
Armin Ronacher33d528a2007-05-14 18:21:44 +0200243 # variables are just part of the rules if variable processing
244 # is required.
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100245 root_tag_rules = [
246 ('comment', environment.comment_start_string),
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200247 ('block', environment.block_start_string),
248 ('variable', environment.variable_start_string)
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100249 ]
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200250 root_tag_rules.sort(key=lambda x: len(x[1]))
251
252 # now escape the rules. This is done here so that the escape
253 # signs don't count for the lengths of the tags.
254 root_tag_rules = [(a, e(b)) for a, b in root_tag_rules]
255
256 # if we have a line statement prefix we need an extra rule for
257 # that. We add this rule *after* all the others.
258 if environment.line_statement_prefix is not None:
259 prefix = e(environment.line_statement_prefix)
260 root_tag_rules.insert(0, ('linestatement', '^\s*' + prefix))
Armin Ronacherd874fbe2007-02-27 20:51:59 +0100261
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200262 # block suffix if trimming is enabled
263 block_suffix_re = environment.trim_blocks and '\\n?' or ''
264
265 # global lexing rules
Armin Ronacher92f572f2007-02-26 22:17:32 +0100266 self.rules = {
267 'root': [
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100268 # directives
269 (c('(.*?)(?:%s)' % '|'.join(
270 ['(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*%s)' % (
271 e(environment.block_start_string),
272 e(environment.block_start_string),
273 e(environment.block_end_string)
274 )] + [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200275 '(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100276 for n, r in root_tag_rules
277 ])), ('data', '#bygroup'), '#bygroup'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200278 # data
Armin Ronacher92f572f2007-02-26 22:17:32 +0100279 (c('.+'), 'data', None)
280 ],
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200281 # comments
Armin Ronacher92f572f2007-02-26 22:17:32 +0100282 'comment_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200283 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200284 e(environment.comment_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200285 e(environment.comment_end_string),
286 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200287 )), ('comment', 'comment_end'), '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100288 (c('(.)'), (Failure('Missing end of comment tag'),), None)
289 ],
Armin Ronacher21580912007-04-17 17:13:10 +0200290 # blocks
Armin Ronacher92f572f2007-02-26 22:17:32 +0100291 'block_begin': [
Armin Ronachera5c8d582007-03-31 20:40:38 +0200292 (c('(?:\-%s\s*|%s)%s' % (
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200293 e(environment.block_end_string),
Armin Ronachera5c8d582007-03-31 20:40:38 +0200294 e(environment.block_end_string),
295 block_suffix_re
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200296 )), 'block_end', '#pop'),
Armin Ronacher92f572f2007-02-26 22:17:32 +0100297 ] + tag_rules,
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200298 # variables
299 'variable_begin': [
300 (c('\-%s\s*|%s' % (
301 e(environment.variable_end_string),
302 e(environment.variable_end_string)
303 )), 'variable_end', '#pop')
304 ] + tag_rules,
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200305 # raw block
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100306 'raw_begin': [
Armin Ronacher1151fbc2007-03-28 21:44:04 +0200307 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
308 e(environment.block_start_string),
309 e(environment.block_start_string),
310 e(environment.block_end_string),
311 e(environment.block_end_string),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200312 block_suffix_re
Armin Ronacher523bf4c2007-11-17 23:45:04 +0100313 )), ('data', 'raw_end'), '#pop'),
Armin Ronachera6c3ac52007-03-27 22:51:51 +0200314 (c('(.)'), (Failure('Missing end of raw directive'),), None)
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200315 ],
316 # line statements
317 'linestatement_begin': [
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200318 (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
319 ] + tag_rules
Armin Ronacher2e9396b2008-04-16 14:21:57 +0200320 }
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200321
Armin Ronacher21580912007-04-17 17:13:10 +0200322 def tokenize(self, source, filename=None):
Armin Ronacher71082072008-04-12 14:19:36 +0200323 """Works like `tokeniter` but returns a tokenstream of tokens and not
324 a generator or token tuples. Additionally all token values are already
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200325 converted into types and postprocessed. For example keywords are
326 already keyword tokens, not named tokens, comments are removed,
327 integers and floats converted, strings unescaped etc.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100328 """
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200329 source = unicode(source)
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200330 def generate():
Armin Ronacher21580912007-04-17 17:13:10 +0200331 for lineno, token, value in self.tokeniter(source, filename):
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200332 if token in ('comment_begin', 'comment', 'comment_end'):
333 continue
Armin Ronacherbf7c4ad2008-04-12 12:02:36 +0200334 elif token == 'linestatement_begin':
335 token = 'block_begin'
336 elif token == 'linestatement_end':
337 token = 'block_end'
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200338 elif token == 'data':
339 try:
340 value = str(value)
341 except UnicodeError:
342 pass
Armin Ronacher07bc6842008-03-31 14:18:49 +0200343 elif token == 'keyword':
Armin Ronacher82b3f3d2008-03-31 20:01:08 +0200344 token = value
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200345 elif token == 'name':
346 value = str(value)
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200347 elif token == 'string':
348 value = unescape_string(lineno, filename, value[1:-1])
349 try:
350 value = str(value)
351 except UnicodeError:
352 pass
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200353 elif token == 'integer':
354 value = int(value)
355 elif token == 'float':
356 value = float(value)
357 elif token == 'operator':
358 token = operators[value]
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200359 yield Token(lineno, token, value)
Armin Ronacher21580912007-04-17 17:13:10 +0200360 return TokenStream(generate(), filename)
Armin Ronacher92f572f2007-02-26 22:17:32 +0100361
Armin Ronacher21580912007-04-17 17:13:10 +0200362 def tokeniter(self, source, filename=None):
Armin Ronacher92f572f2007-02-26 22:17:32 +0100363 """
364 This method tokenizes the text and returns the tokens in a generator.
Armin Ronacherf626c8e2007-03-23 16:13:10 +0100365 Use this method if you just want to tokenize a template. The output
366 you get is not compatible with the input the jinja parser wants. The
Armin Ronacher1cc232c2007-09-07 17:52:41 +0200367 parser uses the `tokenize` function with returns a `TokenStream` and
368 keywords instead of just names.
Armin Ronacher92f572f2007-02-26 22:17:32 +0100369 """
Armin Ronacher5a8e4972007-04-05 11:21:38 +0200370 source = '\n'.join(source.splitlines())
Armin Ronacher7977e5c2007-03-12 07:22:17 +0100371 pos = 0
372 lineno = 1
Armin Ronacher92f572f2007-02-26 22:17:32 +0100373 stack = ['root']
374 statetokens = self.rules['root']
375 source_length = len(source)
376
Armin Ronacher21580912007-04-17 17:13:10 +0200377 balancing_stack = []
378
Armin Ronacher71082072008-04-12 14:19:36 +0200379 while 1:
Armin Ronacher92f572f2007-02-26 22:17:32 +0100380 # tokenizer loop
381 for regex, tokens, new_state in statetokens:
382 m = regex.match(source, pos)
Armin Ronacher21580912007-04-17 17:13:10 +0200383 # if no match we try again with the next rule
Armin Ronacher71082072008-04-12 14:19:36 +0200384 if m is None:
Armin Ronacher21580912007-04-17 17:13:10 +0200385 continue
386
387 # we only match blocks and variables if brances / parentheses
388 # are balanced. continue parsing with the lower rule which
389 # is the operator rule. do this only if the end tags look
390 # like operators
391 if balancing_stack and \
Armin Ronacher71082072008-04-12 14:19:36 +0200392 tokens in ('variable_end', 'block_end',
393 'linestatement_end'):
Armin Ronacher21580912007-04-17 17:13:10 +0200394 continue
395
396 # tuples support more options
397 if isinstance(tokens, tuple):
398 for idx, token in enumerate(tokens):
399 # hidden group
400 if token is None:
401 g = m.group(idx)
402 if g:
403 lineno += g.count('\n')
404 continue
405 # failure group
Armin Ronacherecc051b2007-06-01 18:25:28 +0200406 elif token.__class__ is Failure:
Armin Ronacher720e55b2007-05-30 00:57:49 +0200407 raise token(lineno, filename)
Armin Ronacher21580912007-04-17 17:13:10 +0200408 # bygroup is a bit more complex, in that case we
409 # yield for the current token the first named
410 # group that matched
411 elif token == '#bygroup':
Armin Ronacher92f572f2007-02-26 22:17:32 +0100412 for key, value in m.groupdict().iteritems():
413 if value is not None:
Armin Ronacher21580912007-04-17 17:13:10 +0200414 yield lineno, key, value
415 lineno += value.count('\n')
Armin Ronacher92f572f2007-02-26 22:17:32 +0100416 break
417 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200418 raise RuntimeError('%r wanted to resolve '
419 'the token dynamically'
420 ' but no group matched'
421 % regex)
422 # normal group
Armin Ronacher92f572f2007-02-26 22:17:32 +0100423 else:
Armin Ronacher21580912007-04-17 17:13:10 +0200424 data = m.group(idx + 1)
425 if data:
426 yield lineno, token, data
427 lineno += data.count('\n')
428
Armin Ronacher71082072008-04-12 14:19:36 +0200429 # strings as token just are yielded as it.
Armin Ronacher21580912007-04-17 17:13:10 +0200430 else:
431 data = m.group()
432 # update brace/parentheses balance
433 if tokens == 'operator':
434 if data == '{':
435 balancing_stack.append('}')
436 elif data == '(':
437 balancing_stack.append(')')
438 elif data == '[':
439 balancing_stack.append(']')
440 elif data in ('}', ')', ']'):
Armin Ronacherf750daa2007-05-29 23:22:38 +0200441 if not balancing_stack:
442 raise TemplateSyntaxError('unexpected "%s"' %
443 data, lineno,
444 filename)
445 expected_op = balancing_stack.pop()
446 if expected_op != data:
447 raise TemplateSyntaxError('unexpected "%s", '
448 'expected "%s"' %
449 (data, expected_op),
Armin Ronacher21580912007-04-17 17:13:10 +0200450 lineno, filename)
451 # yield items
452 if tokens is not None:
Armin Ronacher71082072008-04-12 14:19:36 +0200453 yield lineno, tokens, data
Armin Ronacher21580912007-04-17 17:13:10 +0200454 lineno += data.count('\n')
455
456 # fetch new position into new variable so that we can check
457 # if there is a internal parsing error which would result
458 # in an infinite loop
459 pos2 = m.end()
460
461 # handle state changes
462 if new_state is not None:
463 # remove the uppermost state
464 if new_state == '#pop':
465 stack.pop()
466 # resolve the new state by group checking
467 elif new_state == '#bygroup':
468 for key, value in m.groupdict().iteritems():
469 if value is not None:
470 stack.append(key)
471 break
472 else:
473 raise RuntimeError('%r wanted to resolve the '
474 'new state dynamically but'
475 ' no group matched' %
476 regex)
477 # direct state name given
478 else:
479 stack.append(new_state)
480 statetokens = self.rules[stack[-1]]
481 # we are still at the same position and no stack change.
482 # this means a loop without break condition, avoid that and
483 # raise error
484 elif pos2 == pos:
485 raise RuntimeError('%r yielded empty string without '
486 'stack change' % regex)
487 # publish new function and start again
488 pos = pos2
489 break
Armin Ronacher92f572f2007-02-26 22:17:32 +0100490 # if loop terminated without break we havn't found a single match
491 # either we are at the end of the file or we have a problem
492 else:
493 # end of text
494 if pos >= source_length:
495 return
496 # something went wrong
497 raise TemplateSyntaxError('unexpected char %r at %d' %
Armin Ronacher21580912007-04-17 17:13:10 +0200498 (source[pos], pos), lineno,
499 filename)