| /* |
| * Copyright (C) 2015 Google, Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| * in compliance with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software distributed under the License |
| * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
| * or implied. See the License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| package com.google.escapevelocity; |
| |
| import com.google.escapevelocity.DirectiveNode.SetNode; |
| import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode; |
| import com.google.escapevelocity.ExpressionNode.NotExpressionNode; |
| import com.google.escapevelocity.ReferenceNode.IndexReferenceNode; |
| import com.google.escapevelocity.ReferenceNode.MemberReferenceNode; |
| import com.google.escapevelocity.ReferenceNode.MethodReferenceNode; |
| import com.google.escapevelocity.ReferenceNode.PlainReferenceNode; |
| import com.google.escapevelocity.TokenNode.CommentTokenNode; |
| import com.google.escapevelocity.TokenNode.ElseIfTokenNode; |
| import com.google.escapevelocity.TokenNode.ElseTokenNode; |
| import com.google.escapevelocity.TokenNode.EndTokenNode; |
| import com.google.escapevelocity.TokenNode.EofNode; |
| import com.google.escapevelocity.TokenNode.ForEachTokenNode; |
| import com.google.escapevelocity.TokenNode.IfTokenNode; |
| import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode; |
| import com.google.escapevelocity.TokenNode.NestedTokenNode; |
| import java.io.IOException; |
| import java.io.LineNumberReader; |
| import java.io.Reader; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| /** |
| * A parser that reads input from the given {@link Reader} and parses it to produce a |
| * {@link Template}. |
| * |
| * @author emcmanus@google.com (Éamonn McManus) |
| */ |
| class Parser { |
| private static final int EOF = -1; |
| |
| private final LineNumberReader reader; |
| private final String resourceName; |
| private final Template.ResourceOpener resourceOpener; |
| |
| /** |
| * The invariant of this parser is that {@code c} is always the next character of interest. |
| * This means that we never have to "unget" a character by reading too far. For example, after |
| * we parse an integer, {@code c} will be the first character after the integer, which is exactly |
| * the state we will be in when there are no more digits. |
| */ |
| private int c; |
| |
| Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener) |
| throws IOException { |
| this.reader = new LineNumberReader(reader); |
| this.reader.setLineNumber(1); |
| next(); |
| this.resourceName = resourceName; |
| this.resourceOpener = resourceOpener; |
| } |
| |
| /** |
| * Parse the input completely to produce a {@link Template}. |
| * |
| * <p>Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include |
| * entire references such as <pre> |
| * ${x.foo()[23]} |
| * </pre>or entire directives such as<pre> |
| * #set ($x = $y + $z) |
| * </pre>But tokens do not span complex constructs. For example,<pre> |
| * #if ($x == $y) something #end |
| * </pre>is three tokens:<pre> |
| * #if ($x == $y) |
| * (literal text " something ") |
| * #end |
| * </pre> |
| * |
| * <p>The second phase then takes the sequence of tokens and constructs a parse tree out of it. |
| * Some nodes in the parse tree will be unchanged from the token sequence, such as the <pre> |
| * ${x.foo()[23]} |
| * #set ($x = $y + $z) |
| * </pre> examples above. But a construct such as the {@code #if ... #end} mentioned above will |
| * become a single IfNode in the parse tree in the second phase. |
| * |
| * <p>The main reason for this approach is that Velocity has two kinds of lexical contexts. At the |
| * top level, there can be arbitrary literal text; references like <code>${x.foo()}</code>; and |
| * directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however, |
| * neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize |
| * the inside of <pre> |
| * #if ($x == $a + $b) |
| * </pre> as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical |
| * parser/lexer combination, where the lexer would need to switch between these two modes, we |
| * replace the lexer with an ad-hoc parser that is the first phase described above, and we |
| * define a simple parser over the resultant tokens that is the second phase. |
| */ |
| Template parse() throws IOException { |
| ImmutableList<Node> tokens = parseTokens(); |
| return new Reparser(tokens).reparse(); |
| } |
| |
| private ImmutableList<Node> parseTokens() throws IOException { |
| ImmutableList.Builder<Node> tokens = ImmutableList.builder(); |
| Node token; |
| do { |
| token = parseNode(); |
| tokens.add(token); |
| } while (!(token instanceof EofNode)); |
| return tokens.build(); |
| } |
| |
| private int lineNumber() { |
| return reader.getLineNumber(); |
| } |
| |
| /** |
| * Gets the next character from the reader and assigns it to {@code c}. If there are no more |
| * characters, sets {@code c} to {@link #EOF} if it is not already. |
| */ |
| private void next() throws IOException { |
| if (c != EOF) { |
| c = reader.read(); |
| } |
| } |
| |
| /** |
| * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or |
| * there are no more characters. |
| */ |
| private void skipSpace() throws IOException { |
| while (Character.isWhitespace(c)) { |
| next(); |
| } |
| } |
| |
| /** |
| * Gets the next character from the reader, and if it is a space character, keeps reading until |
| * a non-space character is found. |
| */ |
| private void nextNonSpace() throws IOException { |
| next(); |
| skipSpace(); |
| } |
| |
| /** |
| * Skips any space in the reader, and then throws an exception if the first non-space character |
| * found is not the expected one. Sets {@code c} to the first character after that expected one. |
| */ |
| private void expect(char expected) throws IOException { |
| skipSpace(); |
| if (c == expected) { |
| next(); |
| } else { |
| throw parseException("Expected " + expected); |
| } |
| } |
| |
| /** |
| * Parses a single node from the reader, as part of the first parsing phase. |
| * <pre>{@code |
| * <template> -> <empty> | |
| * <directive> <template> | |
| * <non-directive> <template> |
| * }</pre> |
| */ |
| private Node parseNode() throws IOException { |
| if (c == '#') { |
| next(); |
| if (c == '#') { |
| return parseComment(); |
| } else if (isAsciiLetter(c) || c == '{') { |
| return parseDirective(); |
| } else if (c == '[') { |
| return parseHashSquare(); |
| } else { |
| // For consistency with Velocity, we treat # not followed by # or a letter as a plain |
| // character, and we treat #$foo as a literal # followed by the reference $foo. |
| // But the # is its own ConstantExpressionNode; we don't try to merge it with adjacent text. |
| return new ConstantExpressionNode(resourceName, lineNumber(), "#"); |
| } |
| } |
| if (c == EOF) { |
| return new EofNode(resourceName, lineNumber()); |
| } |
| return parseNonDirective(); |
| } |
| |
| private Node parseHashSquare() throws IOException { |
| // We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character |
| // is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever |
| // that next character is. |
| assert c == '['; |
| next(); |
| if (c != '[') { |
| return new ConstantExpressionNode(resourceName, lineNumber(), "#["); |
| } |
| next(); |
| StringBuilder sb = new StringBuilder(); |
| while (true) { |
| if (c == EOF) { |
| throw parseException("Unterminated #[[ - did not see matching ]]#"); |
| } |
| if (c == '#') { |
| // This might be the last character of ]]# or it might just be a random #. |
| int len = sb.length(); |
| if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') { |
| next(); |
| break; |
| } |
| } |
| sb.append((char) c); |
| next(); |
| } |
| String quoted = sb.substring(0, sb.length() - 2); |
| return new ConstantExpressionNode(resourceName, lineNumber(), quoted); |
| } |
| |
| /** |
| * Parses a single non-directive node from the reader. |
| * <pre>{@code |
| * <non-directive> -> <reference> | |
| * <text containing neither $ nor #> |
| * }</pre> |
| */ |
| private Node parseNonDirective() throws IOException { |
| if (c == '$') { |
| next(); |
| if (isAsciiLetter(c) || c == '{') { |
| return parseReference(); |
| } else { |
| return parsePlainText('$'); |
| } |
| } else { |
| int firstChar = c; |
| next(); |
| return parsePlainText(firstChar); |
| } |
| } |
| |
| /** |
| * Parses a single directive token from the reader. Directives can be spelled with or without |
| * braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions |
| * here: <pre>{@code |
| * <directive> -> <if-token> | |
| * <else-token> | |
| * <elseif-token> | |
| * <end-token> | |
| * <foreach-token> | |
| * <set-token> | |
| * <parse-token> | |
| * <macro-token> | |
| * <macro-call> | |
| * <comment> |
| * }</pre> |
| */ |
| private Node parseDirective() throws IOException { |
| String directive; |
| if (c == '{') { |
| next(); |
| directive = parseId("Directive inside #{...}"); |
| expect('}'); |
| } else { |
| directive = parseId("Directive"); |
| } |
| Node node; |
| switch (directive) { |
| case "end": |
| node = new EndTokenNode(resourceName, lineNumber()); |
| break; |
| case "if": |
| case "elseif": |
| node = parseIfOrElseIf(directive); |
| break; |
| case "else": |
| node = new ElseTokenNode(resourceName, lineNumber()); |
| break; |
| case "foreach": |
| node = parseForEach(); |
| break; |
| case "set": |
| node = parseSet(); |
| break; |
| case "parse": |
| node = parseParse(); |
| break; |
| case "macro": |
| node = parseMacroDefinition(); |
| break; |
| default: |
| node = parsePossibleMacroCall(directive); |
| } |
| // Velocity skips a newline after any directive. |
| // TODO(emcmanus): in fact it also skips space before the newline, which should be implemented. |
| if (c == '\n') { |
| next(); |
| } |
| return node; |
| } |
| |
| /** |
| * Parses the condition following {@code #if} or {@code #elseif}. |
| * <pre>{@code |
| * <if-token> -> #if ( <condition> ) |
| * <elseif-token> -> #elseif ( <condition> ) |
| * }</pre> |
| * |
| * @param directive either {@code "if"} or {@code "elseif"}. |
| */ |
| private Node parseIfOrElseIf(String directive) throws IOException { |
| expect('('); |
| ExpressionNode condition = parseExpression(); |
| expect(')'); |
| return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition); |
| } |
| |
| /** |
| * Parses a {@code #foreach} token from the reader. <pre>{@code |
| * <foreach-token> -> #foreach ( $<id> in <expression> ) |
| * }</pre> |
| */ |
| private Node parseForEach() throws IOException { |
| expect('('); |
| expect('$'); |
| String var = parseId("For-each variable"); |
| skipSpace(); |
| boolean bad = false; |
| if (c != 'i') { |
| bad = true; |
| } else { |
| next(); |
| if (c != 'n') { |
| bad = true; |
| } |
| } |
| if (bad) { |
| throw parseException("Expected 'in' for #foreach"); |
| } |
| next(); |
| ExpressionNode collection = parseExpression(); |
| expect(')'); |
| return new ForEachTokenNode(var, collection); |
| } |
| |
| /** |
| * Parses a {@code #set} token from the reader. <pre>{@code |
| * <set-token> -> #set ( $<id> = <expression>) |
| * }</pre> |
| */ |
| private Node parseSet() throws IOException { |
| expect('('); |
| expect('$'); |
| String var = parseId("#set variable"); |
| expect('='); |
| ExpressionNode expression = parseExpression(); |
| expect(')'); |
| return new SetNode(var, expression); |
| } |
| |
| /** |
| * Parses a {@code #parse} token from the reader. <pre>{@code |
| * <parse-token> -> #parse ( <string-literal> ) |
| * }</pre> |
| * |
| * <p>The way this works is inconsistent with Velocity. In Velocity, the {@code #parse} directive |
| * is evaluated when it is encountered during template evaluation. That means that the argument |
| * can be a variable, and it also means that you can use {@code #if} to choose whether or not |
| * to do the {@code #parse}. Neither of those is true in EscapeVelocity. The contents of the |
| * {@code #parse} are integrated into the containing template pretty much as if they had been |
| * written inline. That also means that EscapeVelocity allows forward references to macros |
| * inside {@code #parse} directives, which Velocity does not. |
| */ |
| private Node parseParse() throws IOException { |
| expect('('); |
| skipSpace(); |
| if (c != '"') { |
| throw parseException("#parse only supported with string literal argument"); |
| } |
| String nestedResourceName = readStringLiteral(); |
| expect(')'); |
| try (Reader nestedReader = resourceOpener.openResource(nestedResourceName)) { |
| Parser nestedParser = new Parser(nestedReader, nestedResourceName, resourceOpener); |
| ImmutableList<Node> nestedTokens = nestedParser.parseTokens(); |
| return new NestedTokenNode(nestedResourceName, nestedTokens); |
| } |
| } |
| |
| /** |
| * Parses a {@code #macro} token from the reader. <pre>{@code |
| * <macro-token> -> #macro ( <id> <macro-parameter-list> ) |
| * <macro-parameter-list> -> <empty> | |
| * $<id> <macro-parameter-list> |
| * }</pre> |
| * |
| * <p>Macro parameters are not separated by commas, though method-reference parameters are. |
| */ |
| private Node parseMacroDefinition() throws IOException { |
| expect('('); |
| skipSpace(); |
| String name = parseId("Macro name"); |
| ImmutableList.Builder<String> parameterNames = ImmutableList.builder(); |
| while (true) { |
| skipSpace(); |
| if (c == ')') { |
| next(); |
| break; |
| } |
| if (c != '$') { |
| throw parseException("Macro parameters should look like $name"); |
| } |
| next(); |
| parameterNames.add(parseId("Macro parameter name")); |
| } |
| return new MacroDefinitionTokenNode(resourceName, lineNumber(), name, parameterNames.build()); |
| } |
| |
| /** |
| * Parses an identifier after {@code #} that is not one of the standard directives. The assumption |
| * is that it is a call of a macro that is defined in the template. Macro definitions are |
| * extracted from the template during the second parsing phase (and not during evaluation of the |
| * template as you might expect). This means that a macro can be called before it is defined. |
| * <pre>{@code |
| * <macro-call> -> # <id> ( <expression-list> ) |
| * <expression-list> -> <empty> | |
| * <expression> <optional-comma> <expression-list> |
| * <optional-comma> -> <empty> | , |
| * }</pre> |
| */ |
| private Node parsePossibleMacroCall(String directive) throws IOException { |
| skipSpace(); |
| if (c != '(') { |
| throw parseException("Unrecognized directive #" + directive); |
| } |
| next(); |
| ImmutableList.Builder<Node> parameterNodes = ImmutableList.builder(); |
| while (true) { |
| skipSpace(); |
| if (c == ')') { |
| next(); |
| break; |
| } |
| parameterNodes.add(parsePrimary()); |
| if (c == ',') { |
| // The documentation doesn't say so, but you can apparently have an optional comma in |
| // macro calls. |
| next(); |
| } |
| } |
| return new DirectiveNode.MacroCallNode( |
| resourceName, lineNumber(), directive, parameterNodes.build()); |
| } |
| |
| /** |
| * Parses and discards a comment, which is {@code ##} followed by any number of characters up to |
| * and including the next newline. |
| */ |
| private Node parseComment() throws IOException { |
| int lineNumber = lineNumber(); |
| while (c != '\n' && c != EOF) { |
| next(); |
| } |
| next(); |
| return new CommentTokenNode(resourceName, lineNumber); |
| } |
| |
| /** |
| * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given |
| * {@code firstChar} is the first character of the plain text, and {@link #c} is the second |
| * (if the plain text is more than one character). |
| */ |
| private Node parsePlainText(int firstChar) throws IOException { |
| StringBuilder sb = new StringBuilder(); |
| sb.appendCodePoint(firstChar); |
| |
| literal: |
| while (true) { |
| switch (c) { |
| case EOF: |
| case '$': |
| case '#': |
| break literal; |
| default: |
| // Just some random character. |
| } |
| sb.appendCodePoint(c); |
| next(); |
| } |
| return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString()); |
| } |
| |
| /** |
| * Parses a reference, which is everything that can start with a {@code $}. References can |
| * optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are |
| * useful when text after the reference would otherwise be parsed as part of it. For example, |
| * {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}. |
| * Of course {@code $xy} would be a reference to the variable {@code $xy}. |
| * <pre>{@code |
| * <reference> -> $<reference-no-brace> | |
| * ${<reference-no-brace>} |
| * }</pre> |
| * |
| * <p>On entry to this method, {@link #c} is the character immediately after the {@code $}. |
| */ |
| private ReferenceNode parseReference() throws IOException { |
| if (c == '{') { |
| next(); |
| ReferenceNode node = parseReferenceNoBrace(); |
| expect('}'); |
| return node; |
| } else { |
| return parseReferenceNoBrace(); |
| } |
| } |
| |
| /** |
| * Parses a reference, in the simple form without braces. |
| * <pre>{@code |
| * <reference-no-brace> -> <id><reference-suffix> |
| * }</pre> |
| */ |
| private ReferenceNode parseReferenceNoBrace() throws IOException { |
| String id = parseId("Reference"); |
| ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id); |
| return parseReferenceSuffix(lhs); |
| } |
| |
| /** |
| * Parses the modifiers that can appear at the tail of a reference. |
| * <pre>{@code |
| * <reference-suffix> -> <empty> | |
| * <reference-member> | |
| * <reference-index> |
| * }</pre> |
| * |
| * @param lhs the reference node representing the first part of the reference |
| * {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}. |
| */ |
| private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException { |
| switch (c) { |
| case '.': |
| return parseReferenceMember(lhs); |
| case '[': |
| return parseReferenceIndex(lhs); |
| default: |
| return lhs; |
| } |
| } |
| |
| /** |
| * Parses a reference member, which is either a property reference like {@code $x.y} or a method |
| * call like {@code $x.y($z)}. |
| * <pre>{@code |
| * <reference-member> -> .<id><reference-property-or-method><reference-suffix> |
| * <reference-property-or-method> -> <id> | |
| * <id> ( <method-parameter-list> ) |
| * }</pre> |
| * |
| * @param lhs the reference node representing what appears to the left of the dot, like the |
| * {@code $x} in {@code $x.foo} or {@code $x.foo()}. |
| */ |
| private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException { |
| assert c == '.'; |
| next(); |
| String id = parseId("Member"); |
| ReferenceNode reference; |
| if (c == '(') { |
| reference = parseReferenceMethodParams(lhs, id); |
| } else { |
| reference = new MemberReferenceNode(lhs, id); |
| } |
| return parseReferenceSuffix(reference); |
| } |
| |
| /** |
| * Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}. |
| * <pre>{@code |
| * <method-parameter-list> -> <empty> | |
| * <non-empty-method-parameter-list> |
| * <non-empty-method-parameter-list> -> <expression> | |
| * <expression> , <non-empty-method-parameter-list> |
| * }</pre> |
| * |
| * @param lhs the reference node representing what appears to the left of the dot, like the |
| * {@code $x} in {@code $x.foo()}. |
| */ |
| private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id) |
| throws IOException { |
| assert c == '('; |
| nextNonSpace(); |
| ImmutableList.Builder<ExpressionNode> args = ImmutableList.builder(); |
| if (c != ')') { |
| args.add(parseExpression()); |
| while (c == ',') { |
| nextNonSpace(); |
| args.add(parseExpression()); |
| } |
| if (c != ')') { |
| throw parseException("Expected )"); |
| } |
| } |
| assert c == ')'; |
| next(); |
| return new MethodReferenceNode(lhs, id, args.build()); |
| } |
| |
| /** |
| * Parses an index suffix to a method, like {@code $x[$i]}. |
| * <pre>{@code |
| * <reference-index> -> [ <expression> ] |
| * }</pre> |
| * |
| * @param lhs the reference node representing what appears to the left of the dot, like the |
| * {@code $x} in {@code $x[$i]}. |
| */ |
| private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException { |
| assert c == '['; |
| next(); |
| ExpressionNode index = parseExpression(); |
| if (c != ']') { |
| throw parseException("Expected ]"); |
| } |
| next(); |
| ReferenceNode reference = new IndexReferenceNode(lhs, index); |
| return parseReferenceSuffix(reference); |
| } |
| |
| enum Operator { |
| /** |
| * A dummy operator with low precedence. When parsing subexpressions, we always stop when we |
| * reach an operator of lower precedence than the "current precedence". For example, when |
| * parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when |
| * we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator, |
| * then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it |
| * if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as |
| * if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}. |
| */ |
| STOP("", 0), |
| |
| // If a one-character operator is a prefix of a two-character operator, like < and <=, then |
| // the one-character operator must come first. |
| OR("||", 1), |
| AND("&&", 2), |
| EQUAL("==", 3), NOT_EQUAL("!=", 3), |
| LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4), |
| PLUS("+", 5), MINUS("-", 5), |
| TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6); |
| |
| final String symbol; |
| final int precedence; |
| |
| Operator(String symbol, int precedence) { |
| this.symbol = symbol; |
| this.precedence = precedence; |
| } |
| |
| @Override |
| public String toString() { |
| return symbol; |
| } |
| } |
| |
| /** |
| * Maps a code point to the operators that begin with that code point. For example, maps |
| * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}. |
| */ |
| private static final Map<Integer, List<Operator>> CODE_POINT_TO_OPERATORS; |
| static { |
| Map<Integer, List<Operator>> map = new HashMap<>(); |
| for (Operator operator : Operator.values()) { |
| if (operator != Operator.STOP) { |
| Integer key = operator.symbol.codePointAt(0); |
| if (!map.containsKey(key)) { |
| map.put(key, new ArrayList<Operator>()); |
| } |
| map.get(key).add(operator); |
| } |
| } |
| CODE_POINT_TO_OPERATORS = Collections.unmodifiableMap(map); |
| } |
| |
| /** |
| * Parses an expression, which can occur within a directive like {@code #if} or {@code #set}, |
| * or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}. |
| * <pre>{@code |
| * <expression> -> <and-expression> | |
| * <expression> || <and-expression> |
| * <and-expression> -> <relational-expression> | |
| * <and-expression> && <relational-expression> |
| * <equality-exression> -> <relational-expression> | |
| * <equality-expression> <equality-op> <relational-expression> |
| * <equality-op> -> == | != |
| * <relational-expression> -> <additive-expression> | |
| * <relational-expression> <relation> <additive-expression> |
| * <relation> -> < | <= | > | >= |
| * <additive-expression> -> <multiplicative-expression> | |
| * <additive-expression> <add-op> <multiplicative-expression> |
| * <add-op> -> + | - |
| * <multiplicative-expression> -> <unary-expression> | |
| * <multiplicative-expression> <mult-op> <unary-expression> |
| * <mult-op> -> * | / | % |
| * }</pre> |
| */ |
| private ExpressionNode parseExpression() throws IOException { |
| ExpressionNode lhs = parseUnaryExpression(); |
| return new OperatorParser().parse(lhs, 1); |
| } |
| |
| /** |
| * An operator-precedence parser for the binary operations we understand. It implements an |
| * <a href="http://en.wikipedia.org/wiki/Operator-precedence_parser">algorithm</a> from Wikipedia |
| * that uses recursion rather than having an explicit stack of operators and values. |
| */ |
| private class OperatorParser { |
| /** |
| * The operator we have just scanned, in the same way that {@link #c} is the character we have |
| * just read. If we were not able to scan an operator, this will be {@link Operator#STOP}. |
| */ |
| private Operator currentOperator; |
| |
| OperatorParser() throws IOException { |
| nextOperator(); |
| } |
| |
| /** |
| * Parse a subexpression whose left-hand side is {@code lhs} and where we only consider |
| * operators with precedence at least {@code minPrecedence}. |
| * |
| * @return the parsed subexpression |
| */ |
| ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException { |
| while (currentOperator.precedence >= minPrecedence) { |
| Operator operator = currentOperator; |
| ExpressionNode rhs = parseUnaryExpression(); |
| nextOperator(); |
| while (currentOperator.precedence > operator.precedence) { |
| rhs = parse(rhs, currentOperator.precedence); |
| } |
| lhs = new BinaryExpressionNode(lhs, operator, rhs); |
| } |
| return lhs; |
| } |
| |
| /** |
| * Updates {@link #currentOperator} to be an operator read from the input, |
| * or {@link Operator#STOP} if there is none. |
| */ |
| private void nextOperator() throws IOException { |
| skipSpace(); |
| List<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c); |
| if (possibleOperators == null) { |
| currentOperator = Operator.STOP; |
| return; |
| } |
| int firstChar = c; |
| next(); |
| Operator operator = null; |
| for (Operator possibleOperator : possibleOperators) { |
| if (possibleOperator.symbol.length() == 1) { |
| assert operator == null; |
| operator = possibleOperator; |
| } else if (possibleOperator.symbol.charAt(1) == c) { |
| next(); |
| operator = possibleOperator; |
| } |
| } |
| if (operator == null) { |
| throw parseException("Expected " + possibleOperators.get(0) + ", not just " + firstChar); |
| } |
| currentOperator = operator; |
| } |
| } |
| |
| /** |
| * Parses an expression not containing any operators (except inside parentheses). |
| * <pre>{@code |
| * <unary-expression> -> <primary> | |
| * ( <expression> ) | |
| * ! <unary-expression> |
| * }</pre> |
| */ |
| private ExpressionNode parseUnaryExpression() throws IOException { |
| skipSpace(); |
| ExpressionNode node; |
| if (c == '(') { |
| nextNonSpace(); |
| node = parseExpression(); |
| expect(')'); |
| skipSpace(); |
| return node; |
| } else if (c == '!') { |
| next(); |
| node = new NotExpressionNode(parseUnaryExpression()); |
| skipSpace(); |
| return node; |
| } else { |
| return parsePrimary(); |
| } |
| } |
| |
| |
| /** |
| * Parses an expression containing only literals or references. |
| * <pre>{@code |
| * <primary> -> <reference> | |
| * <string-literal> | |
| * <integer-literal> | |
| * <boolean-literal> |
| * }</pre> |
| */ |
| private ExpressionNode parsePrimary() throws IOException { |
| ExpressionNode node; |
| if (c == '$') { |
| next(); |
| node = parseReference(); |
| } else if (c == '"') { |
| node = parseStringLiteral(); |
| } else if (c == '-') { |
| // Velocity does not have a negation operator. If we see '-' it must be the start of a |
| // negative integer literal. |
| next(); |
| node = parseIntLiteral("-"); |
| } else if (isAsciiDigit(c)) { |
| node = parseIntLiteral(""); |
| } else if (isAsciiLetter(c)) { |
| node = parseBooleanLiteral(); |
| } else { |
| throw parseException("Expected an expression"); |
| } |
| skipSpace(); |
| return node; |
| } |
| |
| private ExpressionNode parseStringLiteral() throws IOException { |
| return new ConstantExpressionNode(resourceName, lineNumber(), readStringLiteral()); |
| } |
| |
| private String readStringLiteral() throws IOException { |
| assert c == '"'; |
| StringBuilder sb = new StringBuilder(); |
| next(); |
| while (c != '"') { |
| if (c == '\n' || c == EOF) { |
| throw parseException("Unterminated string constant"); |
| } |
| if (c == '$' || c == '\\') { |
| // In real Velocity, you can have a $ reference expanded inside a "" string literal. |
| // There are also '' string literals where that is not so. We haven't needed that yet |
| // so it's not supported. |
| throw parseException( |
| "Escapes or references in string constants are not currently supported"); |
| } |
| sb.appendCodePoint(c); |
| next(); |
| } |
| next(); |
| return sb.toString(); |
| } |
| |
| private ExpressionNode parseIntLiteral(String prefix) throws IOException { |
| StringBuilder sb = new StringBuilder(prefix); |
| while (isAsciiDigit(c)) { |
| sb.appendCodePoint(c); |
| next(); |
| } |
| int value; |
| try { |
| value = Integer.parseInt(sb.toString()); |
| } catch (NumberFormatException e) { |
| throw parseException("Invalid integer: " + sb); |
| } |
| return new ConstantExpressionNode(resourceName, lineNumber(), value); |
| } |
| |
| /** |
| * Parses a boolean literal, either {@code true} or {@code false}. |
| * <boolean-literal> -> true | |
| * false |
| */ |
| private ExpressionNode parseBooleanLiteral() throws IOException { |
| String s = parseId("Identifier without $"); |
| boolean value; |
| if (s.equals("true")) { |
| value = true; |
| } else if (s.equals("false")) { |
| value = false; |
| } else { |
| throw parseException("Identifier in expression must be preceded by $ or be true or false"); |
| } |
| return new ConstantExpressionNode(resourceName, lineNumber(), value); |
| } |
| |
| private static final ImmutableAsciiSet ASCII_LETTER = |
| ImmutableAsciiSet.ofRange('A', 'Z') |
| .union(ImmutableAsciiSet.ofRange('a', 'z')); |
| |
| private static final ImmutableAsciiSet ASCII_DIGIT = |
| ImmutableAsciiSet.ofRange('0', '9'); |
| |
| private static final ImmutableAsciiSet ID_CHAR = |
| ASCII_LETTER |
| .union(ASCII_DIGIT) |
| .union(ImmutableAsciiSet.of('-')) |
| .union(ImmutableAsciiSet.of('_')); |
| |
| private static boolean isAsciiLetter(int c) { |
| return ASCII_LETTER.contains(c); |
| } |
| |
| private static boolean isAsciiDigit(int c) { |
| return ASCII_DIGIT.contains(c); |
| } |
| |
| private static boolean isIdChar(int c) { |
| return ID_CHAR.contains(c); |
| } |
| |
| /** |
| * Parse an identifier as specified by the |
| * <a href="http://velocity.apache.org/engine/devel/vtl-reference-guide.html#Variables">VTL |
| * </a>. Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and |
| * {@code _}. |
| */ |
| private String parseId(String what) throws IOException { |
| if (!isAsciiLetter(c)) { |
| throw parseException(what + " should start with an ASCII letter"); |
| } |
| StringBuilder id = new StringBuilder(); |
| while (isIdChar(c)) { |
| id.appendCodePoint(c); |
| next(); |
| } |
| return id.toString(); |
| } |
| |
| /** |
| * Returns an exception to be thrown describing a parse error with the given message, and |
| * including information about where it occurred. |
| */ |
| private ParseException parseException(String message) throws IOException { |
| StringBuilder context = new StringBuilder(); |
| if (c == EOF) { |
| context.append("EOF"); |
| } else { |
| int count = 0; |
| while (c != EOF && count < 20) { |
| context.appendCodePoint(c); |
| next(); |
| count++; |
| } |
| if (c != EOF) { |
| context.append("..."); |
| } |
| } |
| return new ParseException(message, resourceName, lineNumber(), context.toString()); |
| } |
| } |