Make readExpr return an Expr object instead of a vector of tokens.
Previously, we handled an expression as a vector of tokens. In other
words, an expression was a vector of uncooked raw StringRefs.
When we need a value of an expression, we used ExprParser to run
the expression.
The separation was needed essentially because parse time is too
early to evaluate an expression. In order to evaluate an expression,
we need to finalize section sizes. Because linker script parsing
is done at very early stage of the linking process, we can't
evaluate expressions while parsing.
The above mechanism worked fairly well, but there were a few
drawbacks.
One thing is that we sometimes have to parse the same expression
more than once in order to find the end of the expression.
In some contexts, linker script expressions have no clear end marker.
So, we needed to recognize balanced expressions and ternary operators.
The other is poor error reporting. Since expressions are parsed
basically twice, and some information that is available at the first
stage is lost in the second stage, it was hard to print out
apprpriate error messages.
This patch fixes the issues with a new approach.
Now the expression parsing is integrated into ScriptParser.
ExprParser class is removed. Expressions are represented as lambdas
instead of vectors of tokens. Lambdas captures information they
need to run themselves when they are created.
In this way, ends of expressions are naturally detected, and
errors are handled in the usual way. This patch also reduces
the amount of code.
Differential Revision: https://reviews.llvm.org/D22728
llvm-svn: 276574
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index b95287a..e968e2f 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -55,188 +55,6 @@
   return C->Kind == InputSectionKind;
 }
 
-// This is an operator-precedence parser to parse and evaluate
-// a linker script expression. For each linker script arithmetic
-// expression (e.g. ". = . + 0x1000"), a new instance of ExprParser
-// is created and ran.
-namespace {
-class ExprParser : public ScriptParserBase {
-public:
-  ExprParser(std::vector<StringRef> &Tokens, uint64_t Dot)
-      : ScriptParserBase(Tokens), Dot(Dot) {}
-
-  uint64_t run();
-
-private:
-  uint64_t parsePrimary();
-  uint64_t parseTernary(uint64_t Cond);
-  uint64_t apply(StringRef Op, uint64_t L, uint64_t R);
-  uint64_t parseExpr1(uint64_t Lhs, int MinPrec);
-  uint64_t parseExpr();
-
-  uint64_t Dot;
-};
-}
-
-static int precedence(StringRef Op) {
-  return StringSwitch<int>(Op)
-      .Case("*", 4)
-      .Case("/", 4)
-      .Case("+", 3)
-      .Case("-", 3)
-      .Case("<", 2)
-      .Case(">", 2)
-      .Case(">=", 2)
-      .Case("<=", 2)
-      .Case("==", 2)
-      .Case("!=", 2)
-      .Case("&", 1)
-      .Default(-1);
-}
-
-static uint64_t evalExpr(std::vector<StringRef> &Tokens, uint64_t Dot) {
-  return ExprParser(Tokens, Dot).run();
-}
-
-uint64_t ExprParser::run() {
-  uint64_t V = parseExpr();
-  if (!atEOF() && !Error)
-    setError("stray token: " + peek());
-  return V;
-}
-
-uint64_t static getConstantValue(StringRef C) {
-  if (C == "COMMONPAGESIZE" || C == "MAXPAGESIZE")
-    return Target->PageSize;
-  error("unknown constant: " + C);
-  return 0;
-}
-
-// This is a part of the operator-precedence parser to evaluate
-// arithmetic expressions in SECTIONS command. This function evaluates an
-// integer literal, a parenthesized expression, the ALIGN function,
-// or the special variable ".".
-uint64_t ExprParser::parsePrimary() {
-  StringRef Tok = next();
-  if (Tok == ".")
-    return Dot;
-  if (Tok == "(") {
-    uint64_t V = parseExpr();
-    expect(")");
-    return V;
-  }
-  if (Tok == "ALIGN") {
-    expect("(");
-    uint64_t V = parseExpr();
-    expect(")");
-    return alignTo(Dot, V);
-  }
-  if (Tok == "CONSTANT") {
-    expect("(");
-    uint64_t V = getConstantValue(next());
-    expect(")");
-    return V;
-  }
-  // Documentations says there are two ways to compute
-  // the value of DATA_SEGMENT_ALIGN command, depending on whether the second
-  // uses fewer COMMONPAGESIZE sized pages for the data segment(area between the
-  // result of this expression and `DATA_SEGMENT_END') than the first or not.
-  // That is possible optimization, that we do not support, so we compute that
-  // function always as (ALIGN(MAXPAGESIZE) + (. & (MAXPAGESIZE - 1))) now.
-  if (Tok == "DATA_SEGMENT_ALIGN") {
-    expect("(");
-    uint64_t L = parseExpr();
-    expect(",");
-    parseExpr();
-    expect(")");
-    return alignTo(Dot, L) + (Dot & (L - 1));
-  }
-  // Since we do not support the optimization from comment above,
-  // we can just ignore that command.
-  if (Tok == "DATA_SEGMENT_END") {
-    expect("(");
-    expect(".");
-    expect(")");
-    return Dot;
-  }
-  uint64_t V = 0;
-  if (Tok.getAsInteger(0, V))
-    setError("malformed number: " + Tok);
-  return V;
-}
-
-uint64_t ExprParser::parseTernary(uint64_t Cond) {
-  next();
-  uint64_t V = parseExpr();
-  expect(":");
-  uint64_t W = parseExpr();
-  return Cond ? V : W;
-}
-
-uint64_t ExprParser::apply(StringRef Op, uint64_t L, uint64_t R) {
-  if (Op == "*")
-    return L * R;
-  if (Op == "/") {
-    if (R == 0) {
-      error("division by zero");
-      return 0;
-    }
-    return L / R;
-  }
-  if (Op == "+")
-    return L + R;
-  if (Op == "-")
-    return L - R;
-  if (Op == "<")
-    return L < R;
-  if (Op == ">")
-    return L > R;
-  if (Op == ">=")
-    return L >= R;
-  if (Op == "<=")
-    return L <= R;
-  if (Op == "==")
-    return L == R;
-  if (Op == "!=")
-    return L != R;
-  if (Op == "&")
-    return L & R;
-  llvm_unreachable("invalid operator");
-}
-
-// This is a part of the operator-precedence parser.
-// This function assumes that the remaining token stream starts
-// with an operator.
-uint64_t ExprParser::parseExpr1(uint64_t Lhs, int MinPrec) {
-  while (!atEOF()) {
-    // Read an operator and an expression.
-    StringRef Op1 = peek();
-    if (Op1 == "?")
-      return parseTernary(Lhs);
-    if (precedence(Op1) < MinPrec)
-      return Lhs;
-    next();
-    uint64_t Rhs = parsePrimary();
-
-    // Evaluate the remaining part of the expression first if the
-    // next operator has greater precedence than the previous one.
-    // For example, if we have read "+" and "3", and if the next
-    // operator is "*", then we'll evaluate 3 * ... part first.
-    while (!atEOF()) {
-      StringRef Op2 = peek();
-      if (precedence(Op2) <= precedence(Op1))
-        break;
-      Rhs = parseExpr1(Rhs, precedence(Op2));
-    }
-
-    Lhs = apply(Op1, Lhs, Rhs);
-  }
-  return Lhs;
-}
-
-// Reads and evaluates an arithmetic expression.
-uint64_t ExprParser::parseExpr() { return parseExpr1(parsePrimary(), 0); }
-
 template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) {
   return !S || !S->Live;
 }
@@ -334,7 +152,7 @@
 
 template <class ELFT>
 void LinkerScript<ELFT>::dispatchAssignment(SymbolAssignment *Cmd) {
-  uint64_t Val = evalExpr(Cmd->Expr, Dot);
+  uint64_t Val = Cmd->Expression(Dot);
   if (Cmd->Name == ".") {
     Dot = Val;
   } else if (!Cmd->Ignore) {
@@ -597,13 +415,18 @@
   void readSearchDir();
   void readSections();
 
-  void readLocationCounterValue();
+  void readAssignment();
   void readOutputSectionDescription(StringRef OutSec);
   std::vector<StringRef> readOutputSectionPhdrs();
   unsigned readPhdrType();
   void readProvide(bool Hidden);
   SymbolAssignment *readSymbolAssignment(StringRef Name);
-  std::vector<StringRef> readSectionsCommandExpr();
+
+  Expr readExpr();
+  Expr readExpr1(Expr Lhs, int MinPrec);
+  Expr readPrimary();
+  Expr readTernary(Expr Cond);
+  Expr combine(StringRef Op, Expr Lhs, Expr Rhs);
 
   const static StringMap<Handler> Cmd;
   ScriptConfiguration &Opt = *ScriptConfig;
@@ -795,29 +618,45 @@
   while (!Error && !skip("}")) {
     StringRef Tok = peek();
     if (Tok == ".") {
-      readLocationCounterValue();
+      readAssignment();
       continue;
     }
     next();
-    if (Tok == "PROVIDE")
+    if (Tok == "PROVIDE") {
       readProvide(false);
-    else if (Tok == "PROVIDE_HIDDEN")
+    } else if (Tok == "PROVIDE_HIDDEN") {
       readProvide(true);
-    else if (peek() == "=")
+    } else if (peek() == "=") {
       readSymbolAssignment(Tok);
-    else
+      expect(";");
+    } else {
       readOutputSectionDescription(Tok);
+    }
   }
 }
 
-void ScriptParser::readLocationCounterValue() {
+static int precedence(StringRef Op) {
+  return StringSwitch<int>(Op)
+      .Case("*", 4)
+      .Case("/", 4)
+      .Case("+", 3)
+      .Case("-", 3)
+      .Case("<", 2)
+      .Case(">", 2)
+      .Case(">=", 2)
+      .Case("<=", 2)
+      .Case("==", 2)
+      .Case("!=", 2)
+      .Case("&", 1)
+      .Default(-1);
+}
+
+void ScriptParser::readAssignment() {
   expect(".");
   expect("=");
-  std::vector<StringRef> Expr = readSectionsCommandExpr();
-  if (Expr.empty())
-    error("error in location counter expression");
-  else
-    Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(".", Expr));
+  Expr E = readExpr();
+  expect(";");
+  Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(".", E));
 }
 
 void ScriptParser::readOutputSectionDescription(StringRef OutSec) {
@@ -881,35 +720,142 @@
 
 SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef Name) {
   expect("=");
-  std::vector<StringRef> Expr = readSectionsCommandExpr();
-  if (Expr.empty()) {
-    error("error in symbol assignment expression");
-  } else {
-    Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(Name, Expr));
-    return static_cast<SymbolAssignment *>(Opt.Commands.back().get());
-  }
-  return nullptr;
+  Expr E = readExpr();
+  Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(Name, E));
+  return static_cast<SymbolAssignment *>(Opt.Commands.back().get());
 }
 
-// This function reads balanced expression until semicolon is seen.
-std::vector<StringRef> ScriptParser::readSectionsCommandExpr() {
-  int Braces = 0;
-  std::vector<StringRef> Expr;
-  while (!Error) {
-    StringRef Tok = peek();
+// This is an operator-precedence parser to parse a linker
+// script expression.
+Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); }
 
-    if (Tok == "(")
-      Braces++;
-    else if (Tok == ")")
-      if (--Braces < 0)
-        break;
-
-    next();
-    if (Tok == ";")
+// This is a part of the operator-precedence parser. This function
+// assumes that the remaining token stream starts with an operator.
+Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
+  while (!atEOF() && !Error) {
+    // Read an operator and an expression.
+    StringRef Op1 = peek();
+    if (Op1 == "?")
+      return readTernary(Lhs);
+    if (precedence(Op1) < MinPrec)
       break;
-    Expr.push_back(Tok);
+    next();
+    Expr Rhs = readPrimary();
+
+    // Evaluate the remaining part of the expression first if the
+    // next operator has greater precedence than the previous one.
+    // For example, if we have read "+" and "3", and if the next
+    // operator is "*", then we'll evaluate 3 * ... part first.
+    while (!atEOF()) {
+      StringRef Op2 = peek();
+      if (precedence(Op2) <= precedence(Op1))
+        break;
+      Rhs = readExpr1(Rhs, precedence(Op2));
+    }
+
+    Lhs = combine(Op1, Lhs, Rhs);
   }
-  return Expr;
+  return Lhs;
+}
+
+uint64_t static getConstant(StringRef S) {
+  if (S == "COMMONPAGESIZE" || S == "MAXPAGESIZE")
+    return Target->PageSize;
+  error("unknown constant: " + S);
+  return 0;
+}
+
+Expr ScriptParser::readPrimary() {
+  StringRef Tok = next();
+
+  if (Tok == ".")
+    return [](uint64_t Dot) { return Dot; };
+
+  if (Tok == "(") {
+    Expr E = readExpr();
+    expect(")");
+    return E;
+  }
+
+  // Built-in functions are parsed here.
+  // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
+  if (Tok == "ALIGN") {
+    expect("(");
+    Expr E = readExpr();
+    expect(")");
+    return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
+  }
+  if (Tok == "CONSTANT") {
+    expect("(");
+    StringRef Tok = next();
+    expect(")");
+    return [=](uint64_t Dot) { return getConstant(Tok); };
+  }
+  if (Tok == "DATA_SEGMENT_ALIGN") {
+    expect("(");
+    Expr E = readExpr();
+    expect(",");
+    readExpr();
+    expect(")");
+    return [=](uint64_t Dot) -> uint64_t {
+      uint64_t Val = E(Dot);
+      return alignTo(Dot, Val) + (Dot & (Val - 1));
+    };
+  }
+  if (Tok == "DATA_SEGMENT_END") {
+    expect("(");
+    expect(".");
+    expect(")");
+    return [](uint64_t Dot) { return Dot; };
+  }
+
+  // Parse a number literal
+  uint64_t V = 0;
+  if (Tok.getAsInteger(0, V))
+    setError("malformed number: " + Tok);
+  return [=](uint64_t Dot) { return V; };
+}
+
+Expr ScriptParser::readTernary(Expr Cond) {
+  next();
+  Expr L = readExpr();
+  expect(":");
+  Expr R = readExpr();
+  return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); };
+}
+
+Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) {
+  if (Op == "*")
+    return [=](uint64_t Dot) { return L(Dot) * R(Dot); };
+  if (Op == "/") {
+    return [=](uint64_t Dot) -> uint64_t {
+      uint64_t RHS = R(Dot);
+      if (RHS == 0) {
+        error("division by zero");
+        return 0;
+      }
+      return L(Dot) / RHS;
+    };
+  }
+  if (Op == "+")
+    return [=](uint64_t Dot) { return L(Dot) + R(Dot); };
+  if (Op == "-")
+    return [=](uint64_t Dot) { return L(Dot) - R(Dot); };
+  if (Op == "<")
+    return [=](uint64_t Dot) { return L(Dot) < R(Dot); };
+  if (Op == ">")
+    return [=](uint64_t Dot) { return L(Dot) > R(Dot); };
+  if (Op == ">=")
+    return [=](uint64_t Dot) { return L(Dot) >= R(Dot); };
+  if (Op == "<=")
+    return [=](uint64_t Dot) { return L(Dot) <= R(Dot); };
+  if (Op == "==")
+    return [=](uint64_t Dot) { return L(Dot) == R(Dot); };
+  if (Op == "!=")
+    return [=](uint64_t Dot) { return L(Dot) != R(Dot); };
+  if (Op == "&")
+    return [=](uint64_t Dot) { return L(Dot) & R(Dot); };
+  llvm_unreachable("invalid operator");
 }
 
 std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 7e9f157..48fb2b5 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <functional>
 
 namespace lld {
 namespace elf {
@@ -23,6 +24,8 @@
 template <class ELFT> class OutputSectionBase;
 template <class ELFT> class OutputSectionFactory;
 
+typedef std::function<uint64_t(uint64_t)> Expr;
+
 // Parses a linker script. Calling this function updates
 // Config and ScriptConfig.
 void readLinkerScript(MemoryBufferRef MB);
@@ -46,11 +49,11 @@
 };
 
 struct SymbolAssignment : BaseCommand {
-  SymbolAssignment(StringRef Name, std::vector<StringRef> &Expr)
-      : BaseCommand(AssignmentKind), Name(Name), Expr(std::move(Expr)) {}
+  SymbolAssignment(StringRef Name, Expr E)
+      : BaseCommand(AssignmentKind), Name(Name), Expression(E) {}
   static bool classof(const BaseCommand *C);
   StringRef Name;
-  std::vector<StringRef> Expr;
+  Expr Expression;
   bool Provide = false;
   // Hidden and Ignore can be true, only if Provide is true
   bool Hidden = false;
diff --git a/lld/test/ELF/linkerscript-locationcounter.s b/lld/test/ELF/linkerscript-locationcounter.s
index 500c994..93f7778 100644
--- a/lld/test/ELF/linkerscript-locationcounter.s
+++ b/lld/test/ELF/linkerscript-locationcounter.s
@@ -311,7 +311,7 @@
 # RUN: }" > %t.script
 # RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
 # RUN:  FileCheck --check-prefix=BRACKETERR %s
-# BRACKETERR: unexpected EOF
+# BRACKETERR: ) expected, but got ;
 
 ## Missing opening bracket.
 # RUN: echo "SECTIONS { \
@@ -319,7 +319,7 @@
 # RUN: }" > %t.script
 # RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
 # RUN:  FileCheck --check-prefix=BRACKETERR2 %s
-# BRACKETERR2: expected, but got *
+# BRACKETERR2: ; expected, but got )
 
 ## Empty expression.
 # RUN: echo "SECTIONS { \
@@ -327,7 +327,7 @@
 # RUN: }" > %t.script
 # RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
 # RUN:  FileCheck --check-prefix=ERREXPR %s
-# ERREXPR: error in location counter expression
+# ERREXPR: malformed number: ;
 
 ## Div by zero error.
 # RUN: echo "SECTIONS { \
@@ -343,7 +343,7 @@
 # RUN: }" > %t.script
 # RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
 # RUN:  FileCheck --check-prefix=TERNERR %s
-# TERNERR: unexpected EOF
+# TERNERR: : expected, but got ;
 
 .globl _start
 _start: