ELF2: Add basic linker script support.
This linker script parser and evaluator is powerful enough to read
Linux's libc.so, which is (despite its name) a linker script that
contains OUTPUT_FORMAT, GROUP and AS_NEEDED directives.
The parser implemented in this patch is a recursive-descendent one.
It does *not* construct an AST but consumes directives in place and
sets the results to Symtab object, like what Driver is doing.
This should be very fast since less objects are allocated, and
this is also more readable.
http://reviews.llvm.org/D13232
llvm-svn: 248918
diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp
index 704d7c9..516084c 100644
--- a/lld/ELF/DriverUtils.cpp
+++ b/lld/ELF/DriverUtils.cpp
@@ -13,10 +13,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Config.h"
 #include "Driver.h"
 #include "Error.h"
+#include "SymbolTable.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/StringSaver.h"
 
 using namespace llvm;
@@ -75,3 +79,124 @@
 
   return Args;
 }
+
+// Parser and evaluator of the linker script.
+// Results are directly written to the Config object.
+namespace {
+class LinkerScript {
+public:
+  LinkerScript(SymbolTable *T, StringRef S) : Symtab(T), Tokens(tokenize(S)) {}
+  void run();
+
+private:
+  static std::vector<StringRef> tokenize(StringRef S);
+  static StringRef skipSpace(StringRef S);
+  StringRef next();
+  bool atEOF() { return Tokens.size() == Pos; }
+  void expect(StringRef Expect);
+
+  void readAsNeeded();
+  void readGroup();
+  void readOutputFormat();
+
+  SymbolTable *Symtab;
+  std::vector<StringRef> Tokens;
+  size_t Pos = 0;
+};
+}
+
+void LinkerScript::run() {
+  while (!atEOF()) {
+    StringRef Tok = next();
+    if (Tok == "GROUP") {
+      readGroup();
+    } else if (Tok == "OUTPUT_FORMAT") {
+      readOutputFormat();
+    } else {
+      error("unknown directive: " + Tok);
+    }
+  }
+}
+
+// Split S into linker script tokens.
+std::vector<StringRef> LinkerScript::tokenize(StringRef S) {
+  std::vector<StringRef> Ret;
+  for (;;) {
+    S = skipSpace(S);
+    if (S.empty())
+      return Ret;
+    size_t Pos = S.find_first_not_of(
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+        "0123456789_.$/\\~=+[]*?-:");
+    // A character that cannot start a word (which is usually a
+    // punctuation) forms a single character token.
+    if (Pos == 0)
+      Pos = 1;
+    Ret.push_back(S.substr(0, Pos));
+    S = S.substr(Pos);
+  }
+}
+
+// Skip leading whitespace characters or /**/-style comments.
+StringRef LinkerScript::skipSpace(StringRef S) {
+  for (;;) {
+    if (S.startswith("/*")) {
+      size_t E = S.find("*/", 2);
+      if (E == StringRef::npos)
+        error("unclosed comment in a linker script");
+      S = S.substr(E + 2);
+      continue;
+    }
+    size_t Size = S.size();
+    S = S.ltrim();
+    if (S.size() == Size)
+      return S;
+  }
+}
+
+StringRef LinkerScript::next() {
+  if (Pos == Tokens.size())
+    error("unexpected EOF");
+  return Tokens[Pos++];
+}
+
+void LinkerScript::expect(StringRef Expect) {
+  StringRef Tok = next();
+  if (Tok != Expect)
+    error(Expect + " expected, but got " + Tok);
+}
+
+void LinkerScript::readAsNeeded() {
+  expect("(");
+  for (;;) {
+    StringRef Tok = next();
+    if (Tok == ")")
+      return;
+    Symtab->addFile(createFile(openFile(Tok)));
+  }
+}
+
+void LinkerScript::readGroup() {
+  expect("(");
+  for (;;) {
+    StringRef Tok = next();
+    if (Tok == ")")
+      return;
+    if (Tok == "AS_NEEDED") {
+      readAsNeeded();
+      continue;
+    }
+    Symtab->addFile(createFile(openFile(Tok)));
+  }
+}
+
+void LinkerScript::readOutputFormat() {
+  // Error checking only for now.
+  expect("(");
+  next();
+  expect(")");
+}
+
+void lld::elf2::readLinkerScript(SymbolTable *Symtab, MemoryBufferRef MB) {
+  LinkerScript(Symtab, MB.getBuffer()).run();
+}