blob: 9982be3da92a9f98d2a8445e0cd8882f009b580e [file] [log] [blame]
/*
* Copyright (C) 2015 Google, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.escapevelocity;
import com.google.escapevelocity.DirectiveNode.SetNode;
import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode;
import com.google.escapevelocity.ExpressionNode.NotExpressionNode;
import com.google.escapevelocity.ReferenceNode.IndexReferenceNode;
import com.google.escapevelocity.ReferenceNode.MemberReferenceNode;
import com.google.escapevelocity.ReferenceNode.MethodReferenceNode;
import com.google.escapevelocity.ReferenceNode.PlainReferenceNode;
import com.google.escapevelocity.TokenNode.CommentTokenNode;
import com.google.escapevelocity.TokenNode.ElseIfTokenNode;
import com.google.escapevelocity.TokenNode.ElseTokenNode;
import com.google.escapevelocity.TokenNode.EndTokenNode;
import com.google.escapevelocity.TokenNode.EofNode;
import com.google.escapevelocity.TokenNode.ForEachTokenNode;
import com.google.escapevelocity.TokenNode.IfTokenNode;
import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode;
import com.google.escapevelocity.TokenNode.NestedTokenNode;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* A parser that reads input from the given {@link Reader} and parses it to produce a
* {@link Template}.
*
* @author emcmanus@google.com (Éamonn McManus)
*/
class Parser {
private static final int EOF = -1;
private final LineNumberReader reader;
private final String resourceName;
private final Template.ResourceOpener resourceOpener;
/**
* The invariant of this parser is that {@code c} is always the next character of interest.
* This means that we never have to "unget" a character by reading too far. For example, after
* we parse an integer, {@code c} will be the first character after the integer, which is exactly
* the state we will be in when there are no more digits.
*/
private int c;
Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)
throws IOException {
this.reader = new LineNumberReader(reader);
this.reader.setLineNumber(1);
next();
this.resourceName = resourceName;
this.resourceOpener = resourceOpener;
}
/**
* Parse the input completely to produce a {@link Template}.
*
* <p>Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include
* entire references such as <pre>
* ${x.foo()[23]}
* </pre>or entire directives such as<pre>
* #set ($x = $y + $z)
* </pre>But tokens do not span complex constructs. For example,<pre>
* #if ($x == $y) something #end
* </pre>is three tokens:<pre>
* #if ($x == $y)
* (literal text " something ")
* #end
* </pre>
*
* <p>The second phase then takes the sequence of tokens and constructs a parse tree out of it.
* Some nodes in the parse tree will be unchanged from the token sequence, such as the <pre>
* ${x.foo()[23]}
* #set ($x = $y + $z)
* </pre> examples above. But a construct such as the {@code #if ... #end} mentioned above will
* become a single IfNode in the parse tree in the second phase.
*
* <p>The main reason for this approach is that Velocity has two kinds of lexical contexts. At the
* top level, there can be arbitrary literal text; references like <code>${x.foo()}</code>; and
* directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however,
* neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize
* the inside of <pre>
* #if ($x == $a + $b)
* </pre> as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical
* parser/lexer combination, where the lexer would need to switch between these two modes, we
* replace the lexer with an ad-hoc parser that is the first phase described above, and we
* define a simple parser over the resultant tokens that is the second phase.
*/
Template parse() throws IOException {
ImmutableList<Node> tokens = parseTokens();
return new Reparser(tokens).reparse();
}
private ImmutableList<Node> parseTokens() throws IOException {
ImmutableList.Builder<Node> tokens = ImmutableList.builder();
Node token;
do {
token = parseNode();
tokens.add(token);
} while (!(token instanceof EofNode));
return tokens.build();
}
private int lineNumber() {
return reader.getLineNumber();
}
/**
* Gets the next character from the reader and assigns it to {@code c}. If there are no more
* characters, sets {@code c} to {@link #EOF} if it is not already.
*/
private void next() throws IOException {
if (c != EOF) {
c = reader.read();
}
}
/**
* If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
* there are no more characters.
*/
private void skipSpace() throws IOException {
while (Character.isWhitespace(c)) {
next();
}
}
/**
* Gets the next character from the reader, and if it is a space character, keeps reading until
* a non-space character is found.
*/
private void nextNonSpace() throws IOException {
next();
skipSpace();
}
/**
* Skips any space in the reader, and then throws an exception if the first non-space character
* found is not the expected one. Sets {@code c} to the first character after that expected one.
*/
private void expect(char expected) throws IOException {
skipSpace();
if (c == expected) {
next();
} else {
throw parseException("Expected " + expected);
}
}
/**
* Parses a single node from the reader, as part of the first parsing phase.
* <pre>{@code
* <template> -> <empty> |
* <directive> <template> |
* <non-directive> <template>
* }</pre>
*/
private Node parseNode() throws IOException {
if (c == '#') {
next();
if (c == '#') {
return parseComment();
} else if (isAsciiLetter(c) || c == '{') {
return parseDirective();
} else if (c == '[') {
return parseHashSquare();
} else {
// For consistency with Velocity, we treat # not followed by # or a letter as a plain
// character, and we treat #$foo as a literal # followed by the reference $foo.
// But the # is its own ConstantExpressionNode; we don't try to merge it with adjacent text.
return new ConstantExpressionNode(resourceName, lineNumber(), "#");
}
}
if (c == EOF) {
return new EofNode(resourceName, lineNumber());
}
return parseNonDirective();
}
private Node parseHashSquare() throws IOException {
// We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character
// is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever
// that next character is.
assert c == '[';
next();
if (c != '[') {
return new ConstantExpressionNode(resourceName, lineNumber(), "#[");
}
next();
StringBuilder sb = new StringBuilder();
while (true) {
if (c == EOF) {
throw parseException("Unterminated #[[ - did not see matching ]]#");
}
if (c == '#') {
// This might be the last character of ]]# or it might just be a random #.
int len = sb.length();
if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') {
next();
break;
}
}
sb.append((char) c);
next();
}
String quoted = sb.substring(0, sb.length() - 2);
return new ConstantExpressionNode(resourceName, lineNumber(), quoted);
}
/**
* Parses a single non-directive node from the reader.
* <pre>{@code
* <non-directive> -> <reference> |
* <text containing neither $ nor #>
* }</pre>
*/
private Node parseNonDirective() throws IOException {
if (c == '$') {
next();
if (isAsciiLetter(c) || c == '{') {
return parseReference();
} else {
return parsePlainText('$');
}
} else {
int firstChar = c;
next();
return parsePlainText(firstChar);
}
}
/**
* Parses a single directive token from the reader. Directives can be spelled with or without
* braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions
* here: <pre>{@code
* <directive> -> <if-token> |
* <else-token> |
* <elseif-token> |
* <end-token> |
* <foreach-token> |
* <set-token> |
* <parse-token> |
* <macro-token> |
* <macro-call> |
* <comment>
* }</pre>
*/
private Node parseDirective() throws IOException {
String directive;
if (c == '{') {
next();
directive = parseId("Directive inside #{...}");
expect('}');
} else {
directive = parseId("Directive");
}
Node node;
switch (directive) {
case "end":
node = new EndTokenNode(resourceName, lineNumber());
break;
case "if":
case "elseif":
node = parseIfOrElseIf(directive);
break;
case "else":
node = new ElseTokenNode(resourceName, lineNumber());
break;
case "foreach":
node = parseForEach();
break;
case "set":
node = parseSet();
break;
case "parse":
node = parseParse();
break;
case "macro":
node = parseMacroDefinition();
break;
default:
node = parsePossibleMacroCall(directive);
}
// Velocity skips a newline after any directive.
// TODO(emcmanus): in fact it also skips space before the newline, which should be implemented.
if (c == '\n') {
next();
}
return node;
}
/**
* Parses the condition following {@code #if} or {@code #elseif}.
* <pre>{@code
* <if-token> -> #if ( <condition> )
* <elseif-token> -> #elseif ( <condition> )
* }</pre>
*
* @param directive either {@code "if"} or {@code "elseif"}.
*/
private Node parseIfOrElseIf(String directive) throws IOException {
expect('(');
ExpressionNode condition = parseExpression();
expect(')');
return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition);
}
/**
* Parses a {@code #foreach} token from the reader. <pre>{@code
* <foreach-token> -> #foreach ( $<id> in <expression> )
* }</pre>
*/
private Node parseForEach() throws IOException {
expect('(');
expect('$');
String var = parseId("For-each variable");
skipSpace();
boolean bad = false;
if (c != 'i') {
bad = true;
} else {
next();
if (c != 'n') {
bad = true;
}
}
if (bad) {
throw parseException("Expected 'in' for #foreach");
}
next();
ExpressionNode collection = parseExpression();
expect(')');
return new ForEachTokenNode(var, collection);
}
/**
* Parses a {@code #set} token from the reader. <pre>{@code
* <set-token> -> #set ( $<id> = <expression>)
* }</pre>
*/
private Node parseSet() throws IOException {
expect('(');
expect('$');
String var = parseId("#set variable");
expect('=');
ExpressionNode expression = parseExpression();
expect(')');
return new SetNode(var, expression);
}
/**
* Parses a {@code #parse} token from the reader. <pre>{@code
* <parse-token> -> #parse ( <string-literal> )
* }</pre>
*
* <p>The way this works is inconsistent with Velocity. In Velocity, the {@code #parse} directive
* is evaluated when it is encountered during template evaluation. That means that the argument
* can be a variable, and it also means that you can use {@code #if} to choose whether or not
* to do the {@code #parse}. Neither of those is true in EscapeVelocity. The contents of the
* {@code #parse} are integrated into the containing template pretty much as if they had been
* written inline. That also means that EscapeVelocity allows forward references to macros
* inside {@code #parse} directives, which Velocity does not.
*/
private Node parseParse() throws IOException {
expect('(');
skipSpace();
if (c != '"') {
throw parseException("#parse only supported with string literal argument");
}
String nestedResourceName = readStringLiteral();
expect(')');
try (Reader nestedReader = resourceOpener.openResource(nestedResourceName)) {
Parser nestedParser = new Parser(nestedReader, nestedResourceName, resourceOpener);
ImmutableList<Node> nestedTokens = nestedParser.parseTokens();
return new NestedTokenNode(nestedResourceName, nestedTokens);
}
}
/**
* Parses a {@code #macro} token from the reader. <pre>{@code
* <macro-token> -> #macro ( <id> <macro-parameter-list> )
* <macro-parameter-list> -> <empty> |
* $<id> <macro-parameter-list>
* }</pre>
*
* <p>Macro parameters are not separated by commas, though method-reference parameters are.
*/
private Node parseMacroDefinition() throws IOException {
expect('(');
skipSpace();
String name = parseId("Macro name");
ImmutableList.Builder<String> parameterNames = ImmutableList.builder();
while (true) {
skipSpace();
if (c == ')') {
next();
break;
}
if (c != '$') {
throw parseException("Macro parameters should look like $name");
}
next();
parameterNames.add(parseId("Macro parameter name"));
}
return new MacroDefinitionTokenNode(resourceName, lineNumber(), name, parameterNames.build());
}
/**
* Parses an identifier after {@code #} that is not one of the standard directives. The assumption
* is that it is a call of a macro that is defined in the template. Macro definitions are
* extracted from the template during the second parsing phase (and not during evaluation of the
* template as you might expect). This means that a macro can be called before it is defined.
* <pre>{@code
* <macro-call> -> # <id> ( <expression-list> )
* <expression-list> -> <empty> |
* <expression> <optional-comma> <expression-list>
* <optional-comma> -> <empty> | ,
* }</pre>
*/
private Node parsePossibleMacroCall(String directive) throws IOException {
skipSpace();
if (c != '(') {
throw parseException("Unrecognized directive #" + directive);
}
next();
ImmutableList.Builder<Node> parameterNodes = ImmutableList.builder();
while (true) {
skipSpace();
if (c == ')') {
next();
break;
}
parameterNodes.add(parsePrimary());
if (c == ',') {
// The documentation doesn't say so, but you can apparently have an optional comma in
// macro calls.
next();
}
}
return new DirectiveNode.MacroCallNode(
resourceName, lineNumber(), directive, parameterNodes.build());
}
/**
* Parses and discards a comment, which is {@code ##} followed by any number of characters up to
* and including the next newline.
*/
private Node parseComment() throws IOException {
int lineNumber = lineNumber();
while (c != '\n' && c != EOF) {
next();
}
next();
return new CommentTokenNode(resourceName, lineNumber);
}
/**
* Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
* {@code firstChar} is the first character of the plain text, and {@link #c} is the second
* (if the plain text is more than one character).
*/
private Node parsePlainText(int firstChar) throws IOException {
StringBuilder sb = new StringBuilder();
sb.appendCodePoint(firstChar);
literal:
while (true) {
switch (c) {
case EOF:
case '$':
case '#':
break literal;
default:
// Just some random character.
}
sb.appendCodePoint(c);
next();
}
return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString());
}
/**
* Parses a reference, which is everything that can start with a {@code $}. References can
* optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are
* useful when text after the reference would otherwise be parsed as part of it. For example,
* {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}.
* Of course {@code $xy} would be a reference to the variable {@code $xy}.
* <pre>{@code
* <reference> -> $<reference-no-brace> |
* ${<reference-no-brace>}
* }</pre>
*
* <p>On entry to this method, {@link #c} is the character immediately after the {@code $}.
*/
private ReferenceNode parseReference() throws IOException {
if (c == '{') {
next();
ReferenceNode node = parseReferenceNoBrace();
expect('}');
return node;
} else {
return parseReferenceNoBrace();
}
}
/**
* Parses a reference, in the simple form without braces.
* <pre>{@code
* <reference-no-brace> -> <id><reference-suffix>
* }</pre>
*/
private ReferenceNode parseReferenceNoBrace() throws IOException {
String id = parseId("Reference");
ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id);
return parseReferenceSuffix(lhs);
}
/**
* Parses the modifiers that can appear at the tail of a reference.
* <pre>{@code
* <reference-suffix> -> <empty> |
* <reference-member> |
* <reference-index>
* }</pre>
*
* @param lhs the reference node representing the first part of the reference
* {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}.
*/
private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException {
switch (c) {
case '.':
return parseReferenceMember(lhs);
case '[':
return parseReferenceIndex(lhs);
default:
return lhs;
}
}
/**
* Parses a reference member, which is either a property reference like {@code $x.y} or a method
* call like {@code $x.y($z)}.
* <pre>{@code
* <reference-member> -> .<id><reference-property-or-method><reference-suffix>
* <reference-property-or-method> -> <id> |
* <id> ( <method-parameter-list> )
* }</pre>
*
* @param lhs the reference node representing what appears to the left of the dot, like the
* {@code $x} in {@code $x.foo} or {@code $x.foo()}.
*/
private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException {
assert c == '.';
next();
String id = parseId("Member");
ReferenceNode reference;
if (c == '(') {
reference = parseReferenceMethodParams(lhs, id);
} else {
reference = new MemberReferenceNode(lhs, id);
}
return parseReferenceSuffix(reference);
}
/**
* Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}.
* <pre>{@code
* <method-parameter-list> -> <empty> |
* <non-empty-method-parameter-list>
* <non-empty-method-parameter-list> -> <expression> |
* <expression> , <non-empty-method-parameter-list>
* }</pre>
*
* @param lhs the reference node representing what appears to the left of the dot, like the
* {@code $x} in {@code $x.foo()}.
*/
private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id)
throws IOException {
assert c == '(';
nextNonSpace();
ImmutableList.Builder<ExpressionNode> args = ImmutableList.builder();
if (c != ')') {
args.add(parseExpression());
while (c == ',') {
nextNonSpace();
args.add(parseExpression());
}
if (c != ')') {
throw parseException("Expected )");
}
}
assert c == ')';
next();
return new MethodReferenceNode(lhs, id, args.build());
}
/**
* Parses an index suffix to a method, like {@code $x[$i]}.
* <pre>{@code
* <reference-index> -> [ <expression> ]
* }</pre>
*
* @param lhs the reference node representing what appears to the left of the dot, like the
* {@code $x} in {@code $x[$i]}.
*/
private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException {
assert c == '[';
next();
ExpressionNode index = parseExpression();
if (c != ']') {
throw parseException("Expected ]");
}
next();
ReferenceNode reference = new IndexReferenceNode(lhs, index);
return parseReferenceSuffix(reference);
}
enum Operator {
/**
* A dummy operator with low precedence. When parsing subexpressions, we always stop when we
* reach an operator of lower precedence than the "current precedence". For example, when
* parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when
* we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator,
* then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it
* if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as
* if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}.
*/
STOP("", 0),
// If a one-character operator is a prefix of a two-character operator, like < and <=, then
// the one-character operator must come first.
OR("||", 1),
AND("&&", 2),
EQUAL("==", 3), NOT_EQUAL("!=", 3),
LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4),
PLUS("+", 5), MINUS("-", 5),
TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6);
final String symbol;
final int precedence;
Operator(String symbol, int precedence) {
this.symbol = symbol;
this.precedence = precedence;
}
@Override
public String toString() {
return symbol;
}
}
/**
* Maps a code point to the operators that begin with that code point. For example, maps
* {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}.
*/
private static final Map<Integer, List<Operator>> CODE_POINT_TO_OPERATORS;
static {
Map<Integer, List<Operator>> map = new HashMap<>();
for (Operator operator : Operator.values()) {
if (operator != Operator.STOP) {
Integer key = operator.symbol.codePointAt(0);
if (!map.containsKey(key)) {
map.put(key, new ArrayList<Operator>());
}
map.get(key).add(operator);
}
}
CODE_POINT_TO_OPERATORS = Collections.unmodifiableMap(map);
}
/**
* Parses an expression, which can occur within a directive like {@code #if} or {@code #set},
* or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}.
* <pre>{@code
* <expression> -> <and-expression> |
* <expression> || <and-expression>
* <and-expression> -> <relational-expression> |
* <and-expression> && <relational-expression>
* <equality-exression> -> <relational-expression> |
* <equality-expression> <equality-op> <relational-expression>
* <equality-op> -> == | !=
* <relational-expression> -> <additive-expression> |
* <relational-expression> <relation> <additive-expression>
* <relation> -> < | <= | > | >=
* <additive-expression> -> <multiplicative-expression> |
* <additive-expression> <add-op> <multiplicative-expression>
* <add-op> -> + | -
* <multiplicative-expression> -> <unary-expression> |
* <multiplicative-expression> <mult-op> <unary-expression>
* <mult-op> -> * | / | %
* }</pre>
*/
private ExpressionNode parseExpression() throws IOException {
ExpressionNode lhs = parseUnaryExpression();
return new OperatorParser().parse(lhs, 1);
}
/**
* An operator-precedence parser for the binary operations we understand. It implements an
* <a href="http://en.wikipedia.org/wiki/Operator-precedence_parser">algorithm</a> from Wikipedia
* that uses recursion rather than having an explicit stack of operators and values.
*/
private class OperatorParser {
/**
* The operator we have just scanned, in the same way that {@link #c} is the character we have
* just read. If we were not able to scan an operator, this will be {@link Operator#STOP}.
*/
private Operator currentOperator;
OperatorParser() throws IOException {
nextOperator();
}
/**
* Parse a subexpression whose left-hand side is {@code lhs} and where we only consider
* operators with precedence at least {@code minPrecedence}.
*
* @return the parsed subexpression
*/
ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException {
while (currentOperator.precedence >= minPrecedence) {
Operator operator = currentOperator;
ExpressionNode rhs = parseUnaryExpression();
nextOperator();
while (currentOperator.precedence > operator.precedence) {
rhs = parse(rhs, currentOperator.precedence);
}
lhs = new BinaryExpressionNode(lhs, operator, rhs);
}
return lhs;
}
/**
* Updates {@link #currentOperator} to be an operator read from the input,
* or {@link Operator#STOP} if there is none.
*/
private void nextOperator() throws IOException {
skipSpace();
List<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c);
if (possibleOperators == null) {
currentOperator = Operator.STOP;
return;
}
int firstChar = c;
next();
Operator operator = null;
for (Operator possibleOperator : possibleOperators) {
if (possibleOperator.symbol.length() == 1) {
assert operator == null;
operator = possibleOperator;
} else if (possibleOperator.symbol.charAt(1) == c) {
next();
operator = possibleOperator;
}
}
if (operator == null) {
throw parseException("Expected " + possibleOperators.get(0) + ", not just " + firstChar);
}
currentOperator = operator;
}
}
/**
* Parses an expression not containing any operators (except inside parentheses).
* <pre>{@code
* <unary-expression> -> <primary> |
* ( <expression> ) |
* ! <unary-expression>
* }</pre>
*/
private ExpressionNode parseUnaryExpression() throws IOException {
skipSpace();
ExpressionNode node;
if (c == '(') {
nextNonSpace();
node = parseExpression();
expect(')');
skipSpace();
return node;
} else if (c == '!') {
next();
node = new NotExpressionNode(parseUnaryExpression());
skipSpace();
return node;
} else {
return parsePrimary();
}
}
/**
* Parses an expression containing only literals or references.
* <pre>{@code
* <primary> -> <reference> |
* <string-literal> |
* <integer-literal> |
* <boolean-literal>
* }</pre>
*/
private ExpressionNode parsePrimary() throws IOException {
ExpressionNode node;
if (c == '$') {
next();
node = parseReference();
} else if (c == '"') {
node = parseStringLiteral();
} else if (c == '-') {
// Velocity does not have a negation operator. If we see '-' it must be the start of a
// negative integer literal.
next();
node = parseIntLiteral("-");
} else if (isAsciiDigit(c)) {
node = parseIntLiteral("");
} else if (isAsciiLetter(c)) {
node = parseBooleanLiteral();
} else {
throw parseException("Expected an expression");
}
skipSpace();
return node;
}
private ExpressionNode parseStringLiteral() throws IOException {
return new ConstantExpressionNode(resourceName, lineNumber(), readStringLiteral());
}
private String readStringLiteral() throws IOException {
assert c == '"';
StringBuilder sb = new StringBuilder();
next();
while (c != '"') {
if (c == '\n' || c == EOF) {
throw parseException("Unterminated string constant");
}
if (c == '$' || c == '\\') {
// In real Velocity, you can have a $ reference expanded inside a "" string literal.
// There are also '' string literals where that is not so. We haven't needed that yet
// so it's not supported.
throw parseException(
"Escapes or references in string constants are not currently supported");
}
sb.appendCodePoint(c);
next();
}
next();
return sb.toString();
}
private ExpressionNode parseIntLiteral(String prefix) throws IOException {
StringBuilder sb = new StringBuilder(prefix);
while (isAsciiDigit(c)) {
sb.appendCodePoint(c);
next();
}
int value;
try {
value = Integer.parseInt(sb.toString());
} catch (NumberFormatException e) {
throw parseException("Invalid integer: " + sb);
}
return new ConstantExpressionNode(resourceName, lineNumber(), value);
}
/**
* Parses a boolean literal, either {@code true} or {@code false}.
* <boolean-literal> -> true |
* false
*/
private ExpressionNode parseBooleanLiteral() throws IOException {
String s = parseId("Identifier without $");
boolean value;
if (s.equals("true")) {
value = true;
} else if (s.equals("false")) {
value = false;
} else {
throw parseException("Identifier in expression must be preceded by $ or be true or false");
}
return new ConstantExpressionNode(resourceName, lineNumber(), value);
}
private static final ImmutableAsciiSet ASCII_LETTER =
ImmutableAsciiSet.ofRange('A', 'Z')
.union(ImmutableAsciiSet.ofRange('a', 'z'));
private static final ImmutableAsciiSet ASCII_DIGIT =
ImmutableAsciiSet.ofRange('0', '9');
private static final ImmutableAsciiSet ID_CHAR =
ASCII_LETTER
.union(ASCII_DIGIT)
.union(ImmutableAsciiSet.of('-'))
.union(ImmutableAsciiSet.of('_'));
private static boolean isAsciiLetter(int c) {
return ASCII_LETTER.contains(c);
}
private static boolean isAsciiDigit(int c) {
return ASCII_DIGIT.contains(c);
}
private static boolean isIdChar(int c) {
return ID_CHAR.contains(c);
}
/**
* Parse an identifier as specified by the
* <a href="http://velocity.apache.org/engine/devel/vtl-reference-guide.html#Variables">VTL
* </a>. Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and
* {@code _}.
*/
private String parseId(String what) throws IOException {
if (!isAsciiLetter(c)) {
throw parseException(what + " should start with an ASCII letter");
}
StringBuilder id = new StringBuilder();
while (isIdChar(c)) {
id.appendCodePoint(c);
next();
}
return id.toString();
}
/**
* Returns an exception to be thrown describing a parse error with the given message, and
* including information about where it occurred.
*/
private ParseException parseException(String message) throws IOException {
StringBuilder context = new StringBuilder();
if (c == EOF) {
context.append("EOF");
} else {
int count = 0;
while (c != EOF && count < 20) {
context.appendCodePoint(c);
next();
count++;
}
if (c != EOF) {
context.append("...");
}
}
return new ParseException(message, resourceName, lineNumber(), context.toString());
}
}