first version of new parser
--HG--
branch : trunk
diff --git a/jinja2/parser.py b/jinja2/parser.py
index d981c75..0c4edfc 100644
--- a/jinja2/parser.py
+++ b/jinja2/parser.py
@@ -8,12 +8,20 @@
:copyright: 2008 by Armin Ronacher.
:license: BSD, see LICENSE for more details.
"""
-from jinja import nodes
-from jinja.exceptions import TemplateSyntaxError
+from jinja2 import nodes
+from jinja2.exceptions import TemplateSyntaxError
__all__ = ['Parser']
+_statement_keywords = frozenset(['for', 'if', 'block', 'extends', 'include'])
+_compare_operators = frozenset(['eq', 'ne', 'lt', 'lteq', 'gt', 'gteq', 'in'])
+_tuple_edge_tokens = set(['rparen', 'block_end', 'variable_end', 'in',
+ 'semicolon', 'recursive'])
+_statement_end_tokens = set(['elif', 'else', 'endblock', 'endfilter',
+ 'endfor', 'endif', 'endmacro',
+ 'endcall', 'block_end'])
+
class Parser(object):
"""
@@ -35,5 +43,527 @@
self.no_variable_block = self.environment.lexer.no_variable_block
self.stream = environment.lexer.tokenize(source, filename)
- def parse(self):
+ def end_statement(self):
+ """Make sure that the statement ends properly."""
+ if self.stream.current.type is 'semicolon':
+ self.stream.next()
+ elif self.stream.current.type not in _statement_end_tokens:
+ raise TemplateSyntaxError('ambigous end of statement',
+ self.stream.current.lineno,
+ self.filename)
+
+ def parse_statement(self):
+ """Parse a single statement."""
+ token_type = self.stream.current.type
+ if token_type in _statement_keywords:
+ return getattr(self, 'parse_' + token_type)()
+ elif token_type is 'call':
+ self.stream.next()
+ return self.parse_call_block()
+ lineno = self.stream.current.lineno
+ expr = self.parse_expression()
+ if self.stream.current.type == 'assign':
+ return self.parse_assign(expr)
+ self.end_statement()
+ return nodes.ExprStmt(expr, lineno=lineno)
+
+ def parse_assign(self, target):
+ """Parse an assign statement."""
+ lineno = self.stream.expect('assign').lineno
+ if not target.can_assign():
+ raise TemplateSyntaxError("can't assign to '%s'" %
+ target, target.lineno,
+ self.filename)
+ expr = self.parse_tuple()
+ self.end_statement()
+ nodes.set_ctx(target, 'store')
+ return nodes.Assign(target, expr, lineno=lineno)
+
+ def parse_statements(self, end_tokens, drop_needle=False):
+ """
+ Parse multiple statements into a list until one of the end tokens
+ is reached. This is used to parse the body of statements as it
+ also parses template data if appropriate.
+ """
+ # the first token may be a colon for python compatibility
+ if self.stream.current.type is 'colon':
+ self.stream.next()
+
+ if self.stream.current.type is 'block_end':
+ self.stream.next()
+ result = self.subparse(end_tokens)
+ else:
+ result = []
+ while self.stream.current.type not in end_tokens:
+ result.append(self.parse_statement())
+ if drop_needle:
+ self.stream.next()
+ return result
+
+ def parse_for(self):
+ """Parse a for loop."""
+ lineno = self.stream.expect('for').lineno
+ target = self.parse_tuple(simplified=True)
+ nodes.set_ctx(target, 'store')
+ self.stream.expect('in')
+ iter = self.parse_tuple()
+ if self.stream.current.type is 'recursive':
+ self.stream.next()
+ recursive = True
+ else:
+ recursive = False
+ body = self.parse_statements(('endfor', 'else'))
+ token_type = self.stream.current.type
+ self.stream.next()
+ if token_type is 'endfor':
+ else_ = []
+ else:
+ else_ = self.parse_statements(('endfor',), drop_needle=True)
+ return nodes.For(target, iter, body, else_, False, lineno=lineno)
+
+ def parse_if(self):
pass
+
+ def parse_block(self):
+ pass
+
+ def parse_extends(self):
+ pass
+
+ def parse_include(self):
+ pass
+
+ def parse_call_block(self):
+ pass
+
+ def parse_expression(self):
+ """Parse an expression."""
+ return self.parse_condexpr()
+
+ def parse_condexpr(self):
+ lineno = self.stream.current.lineno
+ expr1 = self.parse_or()
+ while self.stream.current.type is 'if':
+ self.stream.next()
+ expr2 = self.parse_or()
+ self.stream.expect('else')
+ expr3 = self.parse_condexpr()
+ expr1 = nodes.CondExpr(expr2, expr1, expr3, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return expr1
+
+ def parse_or(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_and()
+ while self.stream.current.type is 'or':
+ self.stream.next()
+ right = self.parse_and()
+ left = nodes.Or(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_and(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_compare()
+ while self.stream.current.type is 'and':
+ self.stream.next()
+ right = self.parse_compare()
+ left = nodes.And(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_compare(self):
+ lineno = self.stream.current.lineno
+ expr = self.parse_add()
+ ops = []
+ while 1:
+ token_type = self.stream.current.type
+ if token_type in _compare_operators:
+ self.stream.next()
+ ops.append(nodes.Operand(token_type, self.parse_add()))
+ elif token_type is 'not' and self.stream.look().type is 'in':
+ self.stream.skip(2)
+ ops.append(nodes.Operand('notin', self.parse_add()))
+ else:
+ break
+ lineno = self.stream.current.lineno
+ if not ops:
+ return expr
+ return nodes.Compare(expr, ops, lineno=lineno)
+
+ def parse_add(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_sub()
+ while self.stream.current.type is 'add':
+ self.stream.next()
+ right = self.parse_sub()
+ left = nodes.Add(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_sub(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_concat()
+ while self.stream.current.type is 'sub':
+ self.stream.next()
+ right = self.parse_concat()
+ left = nodes.Sub(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_concat(self):
+ lineno = self.stream.current.lineno
+ args = [self.parse_mul()]
+ while self.stream.current.type is 'tilde':
+ self.stream.next()
+ args.append(self.parse_mul())
+ if len(args) == 1:
+ return args[0]
+ return nodes.Concat(args, lineno=lineno)
+
+ def parse_mul(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_div()
+ while self.stream.current.type is 'mul':
+ self.stream.next()
+ right = self.parse_div()
+ left = nodes.Mul(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_div(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_floordiv()
+ while self.stream.current.type is 'div':
+ self.stream.next()
+ right = self.parse_floordiv()
+ left = nodes.Floor(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_floordiv(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_mod()
+ while self.stream.current.type is 'floordiv':
+ self.stream.next()
+ right = self.parse_mod()
+ left = nodes.FloorDiv(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_mod(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_pow()
+ while self.stream.current.type is 'mod':
+ self.stream.next()
+ right = self.parse_pow()
+ left = nodes.Mod(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_pow(self):
+ lineno = self.stream.current.lineno
+ left = self.parse_unary()
+ while self.stream.current.type is 'pow':
+ self.stream.next()
+ right = self.parse_unary()
+ left = nodes.Pow(left, right, lineno=lineno)
+ lineno = self.stream.current.lineno
+ return left
+
+ def parse_unary(self):
+ token_type = self.stream.current.type
+ lineno = self.stream.current.lineno
+ if token_type is 'not':
+ self.stream.next()
+ node = self.parse_unary()
+ return nodes.Neg(node, lineno=lineno)
+ if token_type is 'sub':
+ self.stream.next()
+ node = self.parse_unary()
+ return nodes.Sub(node, lineno=lineno)
+ if token_type is 'add':
+ self.stream.next()
+ node = self.parse_unary()
+ return nodes.Pos(node, lineno=lineno)
+ return self.parse_primary()
+
+ def parse_primary(self, parse_postfix=True):
+ token = self.stream.current
+ if token.type is 'name':
+ if token.value in ('true', 'false'):
+ node = nodes.Const(token.value == 'true', lineno=token.lineno)
+ elif token.value == 'none':
+ node = nodes.Const(None, lineno=token.lineno)
+ else:
+ node = nodes.Name(token.value, 'load', lineno=token.lineno)
+ self.stream.next()
+ elif token.type in ('integer', 'float', 'string'):
+ self.stream.next()
+ node = nodes.Const(token.value, lineno=token.lineno)
+ elif token.type is 'lparen':
+ self.stream.next()
+ node = self.parse_tuple()
+ self.stream.expect('rparen')
+ elif token.type is 'lbracket':
+ node = self.parse_list()
+ elif token.type is 'lbrace':
+ node = self.parse_dict()
+ else:
+ raise TemplateSyntaxError("unexpected token '%s'" %
+ (token,), token.lineno,
+ self.filename)
+ if parse_postfix:
+ node = self.parse_postfix(node)
+ return node
+
+ def parse_tuple(self, enforce=False, simplified=False):
+ """
+ Parse multiple expressions into a tuple. This can also return
+ just one expression which is not a tuple. If you want to enforce
+ a tuple, pass it enforce=True (currently unused).
+ """
+ lineno = self.stream.current.lineno
+ parse = simplified and self.parse_primary or self.parse_expression
+ args = []
+ is_tuple = False
+ while 1:
+ if args:
+ self.stream.expect('comma')
+ if self.stream.current.type in _tuple_edge_tokens:
+ break
+ args.append(parse())
+ if self.stream.current.type is not 'comma':
+ break
+ is_tuple = True
+ lineno = self.stream.current.lineno
+ if not is_tuple and args:
+ if enforce:
+ raise TemplateSyntaxError('tuple expected', lineno,
+ self.filename)
+ return args[0]
+ return nodes.Tuple(args, 'load', lineno=lineno)
+
+ def parse_list(self):
+ token = self.stream.expect('lbracket')
+ items = []
+ while self.stream.current.type is not 'rbracket':
+ if items:
+ self.stream.expect('comma')
+ if self.stream.current.type == 'rbracket':
+ break
+ items.append(self.parse_expression())
+ self.stream.expect('rbracket')
+ return nodes.List(items, lineno=token.lineno)
+
+ def parse_dict(self):
+ token = self.stream.expect('lbrace')
+ items = []
+ while self.stream.current.type is not 'rbrace':
+ if items:
+ self.stream.expect('comma')
+ if self.stream.current.type == 'rbrace':
+ break
+ key = self.parse_expression()
+ self.stream.expect('colon')
+ value = self.parse_expression()
+ items.append(nodes.Pair(key, value, lineno=key.lineno))
+ self.stream.expect('rbrace')
+ return nodes.Dict(items, token.lineno, self.filename)
+
+ def parse_postfix(self, node):
+ while 1:
+ token_type = self.stream.current.type
+ if token_type is 'dot' or token_type is 'lbracket':
+ node = self.parse_subscript(node)
+ elif token_type is 'lparen':
+ node = self.parse_call(node)
+ elif token_type is 'pipe':
+ node = self.parse_filter(node)
+ elif token_type is 'is':
+ node = self.parse_test(node)
+ else:
+ break
+ return node
+
+ def parse_subscript(self, node):
+ token = self.stream.next()
+ if token.type is 'dot':
+ if token.type not in ('name', 'integer'):
+ raise TemplateSyntaxError('expected name or number',
+ token.lineno, self.filename)
+ arg = nodes.Const(token.value, lineno=token.lineno)
+ self.stream.next()
+ elif token.type is 'lbracket':
+ args = []
+ while self.stream.current.type is not 'rbracket':
+ if args:
+ self.stream.expect('comma')
+ args.append(self.parse_subscribed())
+ self.stream.expect('rbracket')
+ if len(args) == 1:
+ arg = args[0]
+ else:
+ arg = nodes.Tuple(args, lineno, self.filename)
+ else:
+ raise TemplateSyntaxError('expected subscript expression',
+ self.lineno, self.filename)
+ return nodes.Subscript(node, arg, 'load', lineno=token.lineno)
+
+ def parse_subscribed(self):
+ lineno = self.stream.current.lineno
+
+ if self.stream.current.type is 'colon':
+ self.stream.next()
+ args = [None]
+ else:
+ node = self.parse_expression()
+ if self.stream.current.type is not 'colon':
+ return node
+ self.stream.next()
+ args = [node]
+
+ if self.stream.current.type is 'colon':
+ args.append(None)
+ elif self.stream.current.type not in ('rbracket', 'comma'):
+ args.append(self.parse_expression())
+ else:
+ args.append(None)
+
+ if self.stream.current.type is 'colon':
+ self.stream.next()
+ if self.stream.current.type not in ('rbracket', 'comma'):
+ args.append(self.parse_expression())
+ else:
+ args.append(None)
+ else:
+ args.append(None)
+
+ return nodes.Slice(lineno=lineno, *args)
+
+ def parse_call(self, node):
+ token = self.stream.expect('lparen')
+ args = []
+ kwargs = []
+ dyn_args = dyn_kwargs = None
+ require_comma = False
+
+ def ensure(expr):
+ if not expr:
+ raise TemplateSyntaxError('invalid syntax for function '
+ 'call expression', token.lineno,
+ self.filename)
+
+ while self.stream.current.type is not 'rparen':
+ if require_comma:
+ self.stream.expect('comma')
+ # support for trailing comma
+ if self.stream.current.type is 'rparen':
+ break
+ if self.stream.current.type is 'mul':
+ ensure(dyn_args is None and dyn_kwargs is None)
+ self.stream.next()
+ dyn_args = self.parse_expression()
+ elif self.stream.current.type is 'pow':
+ ensure(dyn_kwargs is None)
+ self.stream.next()
+ dyn_kwargs = self.parse_expression()
+ else:
+ ensure(dyn_args is None and dyn_kwargs is None)
+ if self.stream.current.type is 'name' and \
+ self.stream.look().type is 'assign':
+ key = self.stream.current.value
+ self.stream.skip(2)
+ kwargs.append(nodes.Pair(key, self.parse_expression(),
+ lineno=key.lineno))
+ else:
+ ensure(not kwargs)
+ args.append(self.parse_expression())
+
+ require_comma = True
+ self.stream.expect('rparen')
+
+ if node is None:
+ return args, kwargs, dyn_args, dyn_kwargs
+ return nodes.Call(node, args, kwargs, dyn_args, dyn_kwargs,
+ lineno=token.lineno)
+
+ def parse_filter(self, node):
+ lineno = self.stream.current.type
+ filters = []
+ while self.stream.current.type == 'pipe':
+ self.stream.next()
+ token = self.stream.expect('name')
+ if self.stream.current.type is 'lparen':
+ args, kwargs, dyn_args, dyn_kwargs = self.parse_call(None)
+ else:
+ args = []
+ kwargs = []
+ dyn_args = dyn_kwargs = None
+ filters.append(nodes.FilterCall(token.value, args, kwargs,
+ dyn_args, dyn_kwargs,
+ lineno=token.lineno))
+ return nodes.Filter(node, filters)
+
+ def parse_test(self, node):
+ token = self.stream.expect('is')
+ if self.stream.current.type is 'not':
+ self.stream.next()
+ negated = True
+ else:
+ negated = False
+ name = self.stream.expect('name').value
+ if self.stream.current.type is 'lparen':
+ args, kwargs, dyn_args, dyn_kwargs = self.parse_call(None)
+ elif self.stream.current.type in ('name', 'string', 'integer',
+ 'float', 'lparen', 'lbracket',
+ 'lbrace', 'regex'):
+ args = [self.parse_expression()]
+ else:
+ args = []
+ kwargs = []
+ dyn_args = dyn_kwargs = None
+ node = nodes.Test(node, name, args, kwargs, dyn_args,
+ dyn_kwargs, lineno=token.lineno)
+ if negated:
+ node = nodes.NotExpression(node, lineno=token.lineno)
+ return node
+
+ def subparse(self, end_tokens=None):
+ body = []
+ data_buffer = []
+ add_data = data_buffer.append
+
+ def flush_data():
+ if data_buffer:
+ lineno = data_buffer[0].lineno
+ body.append(nodes.Output(data_buffer[:], lineno=lineno))
+ del data_buffer[:]
+
+ while self.stream:
+ token = self.stream.current
+ if token.type is 'data':
+ add_data(nodes.Const(token.value, lineno=token.lineno))
+ self.stream.next()
+ elif token.type is 'variable_begin':
+ self.stream.next()
+ add_data(self.parse_tuple())
+ self.stream.expect('variable_end')
+ elif token.type is 'block_begin':
+ flush_data()
+ self.stream.next()
+ if end_tokens is not None and \
+ self.stream.current.type in end_tokens:
+ return body
+ while self.stream.current.type is not 'block_end':
+ body.append(self.parse_statement())
+ self.stream.expect('block_end')
+ else:
+ raise AssertionError('internal parsing error')
+
+ flush_data()
+ return body
+
+ def parse(self):
+ """Parse the whole template into a `Template` node."""
+ return nodes.Template(self.subparse(), lineno=1)