Add basic linker script parsing.

llvm-svn: 176309
diff --git a/lld/lib/ReaderWriter/CMakeLists.txt b/lld/lib/ReaderWriter/CMakeLists.txt
index 81b1151..2ad3165 100644
--- a/lld/lib/ReaderWriter/CMakeLists.txt
+++ b/lld/lib/ReaderWriter/CMakeLists.txt
@@ -6,9 +6,11 @@
 add_subdirectory(PECOFF)
 add_subdirectory(YAML)
 add_lld_library(lldReaderWriter
+  LinkerScript.cpp
   Reader.cpp
-  Writer.cpp
   ReaderArchive.cpp
+  ReaderLinkerScript.cpp
+  Writer.cpp
   )
 
 target_link_libraries(lldReaderWriter
diff --git a/lld/lib/ReaderWriter/ELF/ELFTargetInfo.cpp b/lld/lib/ReaderWriter/ELF/ELFTargetInfo.cpp
index 85352c4..776676e 100644
--- a/lld/lib/ReaderWriter/ELF/ELFTargetInfo.cpp
+++ b/lld/lib/ReaderWriter/ELF/ELFTargetInfo.cpp
@@ -14,9 +14,11 @@
 
 #include "lld/Core/LinkerOptions.h"
 #include "lld/Passes/LayoutPass.h"
+#include "lld/ReaderWriter/ReaderLinkerScript.h"
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Support/FileSystem.h"
 
 namespace lld {
 ELFTargetInfo::ELFTargetInfo(const LinkerOptions &lo) : TargetInfo(lo) {}
@@ -63,10 +65,24 @@
 }
 
 ErrorOr<Reader &> ELFTargetInfo::getReader(const LinkerInput &input) const {
-  if (!_reader)
-    _reader = createReaderELF(*this, std::bind(&ELFTargetInfo::getReader, this,
-                                               std::placeholders::_1));
-  return *_reader;
+  auto buffer = input.getBuffer();
+  if (!buffer)
+    return error_code(buffer);
+  auto magic = llvm::sys::fs::identify_magic(buffer->getBuffer());
+  // Assume unknown file types are linker scripts.
+  if (magic == llvm::sys::fs::file_magic::unknown) {
+    if (!_linkerScriptReader)
+      _linkerScriptReader.reset(new ReaderLinkerScript(
+          *this,
+          std::bind(&ELFTargetInfo::getReader, this, std::placeholders::_1)));
+    return *_linkerScriptReader;
+  }
+
+  // Assume anything else is an ELF file.
+  if (!_elfReader)
+    _elfReader = createReaderELF(*this, std::bind(&ELFTargetInfo::getReader,
+                                                  this, std::placeholders::_1));
+  return *_elfReader;
 }
 
 ErrorOr<Writer &> ELFTargetInfo::getWriter() const {
diff --git a/lld/lib/ReaderWriter/LinkerScript.cpp b/lld/lib/ReaderWriter/LinkerScript.cpp
new file mode 100644
index 0000000..382ea4d
--- /dev/null
+++ b/lld/lib/ReaderWriter/LinkerScript.cpp
@@ -0,0 +1,270 @@
+//===- ReaderWriter/LinkerScript.cpp --------------------------------------===//
+//
+//                             The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Linker script parser.
+///
+//===----------------------------------------------------------------------===//
+
+#include "lld/ReaderWriter/LinkerScript.h"
+
+namespace lld {
+namespace script {
+void Token::dump(llvm::raw_ostream &os) const {
+  switch (_kind) {
+  case Token::eof:
+    os << "eof: ";
+    break;
+  case Token::identifier:
+    os << "identifier: ";
+    break;
+  case Token::kw_as_needed:
+    os << "kw_as_needed: ";
+    break;
+  case Token::kw_group:
+    os << "kw_group: ";
+    break;
+  case Token::kw_output_format:
+    os << "kw_output_format: ";
+    break;
+  case Token::l_paren:
+    os << "l_paren: ";
+    break;
+  case Token::r_paren:
+    os << "r_paren: ";
+    break;
+  case Token::unknown:
+    os << "unknown: ";
+    break;
+  }
+  os << _range << "\n";
+}
+
+bool Lexer::canStartName(char c) const {
+  switch (c) {
+  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+  case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+  case 'V': case 'W': case 'X': case 'Y': case 'Z':
+  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+  case 'v': case 'w': case 'x': case 'y': case 'z':
+  case '_': case '.': case '$': case '/': case '\\':
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool Lexer::canContinueName(char c) const {
+  switch (c) {
+  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+  case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+  case 'V': case 'W': case 'X': case 'Y': case 'Z':
+  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+  case 'v': case 'w': case 'x': case 'y': case 'z':
+  case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+  case '7': case '8': case '9':
+  case '_': case '.': case '$': case '/': case '\\': case '~': case '=':
+  case '+': case ',': case '[': case ']': case '*': case '?': case '-':
+    return true;
+  default:
+    return false;
+  }
+}
+
+void Lexer::lex(Token &tok) {
+  skipWhitespace();
+  if (_buffer.empty()) {
+    tok = Token(_buffer, Token::eof);
+    return;
+  }
+  switch (_buffer[0]) {
+  case 0:
+    tok = Token(_buffer.substr(0, 1), Token::eof);
+    _buffer = _buffer.drop_front();
+    return;
+  case '(':
+    tok = Token(_buffer.substr(0, 1), Token::l_paren);
+    _buffer = _buffer.drop_front();
+    return;
+  case ')':
+    tok = Token(_buffer.substr(0, 1), Token::r_paren);
+    _buffer = _buffer.drop_front();
+    return;
+  default:
+    /// keyword or identifer.
+    if (!canStartName(_buffer[0]))
+      break;
+    auto endIter =
+        std::find_if(_buffer.begin() + 1, _buffer.end(), [=](char c) {
+      return !canContinueName(c);
+    });
+    StringRef::size_type end =
+        endIter == _buffer.end() ? StringRef::npos
+                                 : std::distance(_buffer.begin(), endIter);
+    if (end == StringRef::npos || end == 0)
+      break;
+    StringRef word = _buffer.substr(0, end);
+    Token::Kind kind = llvm::StringSwitch<Token::Kind>(word)
+      .Case("OUTPUT_FORMAT", Token::kw_output_format)
+      .Case("GROUP", Token::kw_group)
+      .Case("AS_NEEDED", Token::kw_as_needed)
+      .Default(Token::identifier);
+    tok = Token(word, kind);
+    _buffer = _buffer.drop_front(end);
+    return;
+  }
+  tok = Token(_buffer.substr(0, 1), Token::unknown);
+  _buffer = _buffer.drop_front();
+}
+
+void Lexer::skipWhitespace() {
+  while (true) {
+    if (_buffer.empty())
+      return;
+    switch (_buffer[0]) {
+    case ' ':
+    case '\r':
+    case '\n':
+    case '\t':
+      _buffer = _buffer.drop_front();
+      break;
+    // Potential comment.
+    case '/':
+      if (_buffer.size() >= 2 && _buffer[1] == '*') {
+        // Skip starting /*
+        _buffer = _buffer.drop_front(2);
+        // If the next char is also a /, it's not the end.
+        if (!_buffer.empty() && _buffer[0] == '/')
+          _buffer = _buffer.drop_front();
+
+        // Scan for /'s. We're done if it is preceeded by a *.
+        while (true) {
+          if (_buffer.empty())
+            break;
+          _buffer = _buffer.drop_front();
+          if (_buffer.data()[-1] == '/' && _buffer.data()[-2] == '*')
+            break;
+        }
+      } else
+        return;
+      break;
+    default:
+      return;
+    }
+  }
+}
+
+LinkerScript *Parser::parse() {
+  // Get the first token.
+  _lex.lex(_tok);
+  // Parse top level commands.
+  while (true) {
+    switch (_tok._kind) {
+    case Token::eof:
+      return &_script;
+    case Token::kw_output_format: {
+      auto outputFormat = parseOutputFormat();
+      if (!outputFormat)
+        return nullptr;
+      _script._commands.push_back(outputFormat);
+      break;
+    }
+    case Token::kw_group: {
+      auto group = parseGroup();
+      if (!group)
+        return nullptr;
+      _script._commands.push_back(group);
+      break;
+    }
+    case Token::kw_as_needed:
+      // Not allowed at top level.
+      return nullptr;
+    default:
+      // Unexpected.
+      return nullptr;
+    }
+  }
+
+  return nullptr;
+}
+
+OutputFormat *Parser::parseOutputFormat() {
+  assert(_tok._kind == Token::kw_output_format && "Expected OUTPUT_FORMAT!");
+  consumeToken();
+  if (!expectAndConsume(Token::l_paren, "expected ("))
+    return nullptr;
+
+  if (_tok._kind != Token::identifier) {
+    error(_tok, "Expected identifer in OUTPUT_FORMAT.");
+    return nullptr;
+  }
+
+  auto ret = new (_alloc) OutputFormat(_tok._range);
+  consumeToken();
+
+  if (!expectAndConsume(Token::r_paren, "expected )"))
+    return nullptr;
+
+  return ret;
+}
+
+Group *Parser::parseGroup() {
+  assert(_tok._kind == Token::kw_group && "Expected GROUP!");
+  consumeToken();
+  if (!expectAndConsume(Token::l_paren, "expected ("))
+    return nullptr;
+
+  std::vector<Path> paths;
+
+  while (_tok._kind == Token::identifier || _tok._kind == Token::kw_as_needed) {
+    switch (_tok._kind) {
+    case Token::identifier:
+      paths.push_back(Path(_tok._range));
+      consumeToken();
+      break;
+    case Token::kw_as_needed:
+      if (!parseAsNeeded(paths))
+        return nullptr;
+      break;
+    default:
+      llvm_unreachable("Invalid token.");
+    }
+  }
+
+  auto ret = new (_alloc) Group(paths);
+
+  if (!expectAndConsume(Token::r_paren, "expected )"))
+    return nullptr;
+
+  return ret;
+}
+
+bool Parser::parseAsNeeded(std::vector<Path> &paths) {
+  assert(_tok._kind == Token::kw_as_needed && "Expected AS_NEEDED!");
+  consumeToken();
+  if (!expectAndConsume(Token::l_paren, "expected ("))
+    return false;
+
+  while (_tok._kind == Token::identifier) {
+    paths.push_back(Path(_tok._range, true));
+    consumeToken();
+  }
+
+  if (!expectAndConsume(Token::r_paren, "expected )"))
+    return false;
+  return true;
+}
+} // end namespace script
+} // end namespace lld
diff --git a/lld/lib/ReaderWriter/ReaderLinkerScript.cpp b/lld/lib/ReaderWriter/ReaderLinkerScript.cpp
new file mode 100644
index 0000000..78203fa
--- /dev/null
+++ b/lld/lib/ReaderWriter/ReaderLinkerScript.cpp
@@ -0,0 +1,105 @@
+//===- lib/ReaderWriter/ReaderLinkerScript.cpp ----------------------------===//
+//
+//                             The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "lld/ReaderWriter/ReaderLinkerScript.h"
+
+#include "lld/Core/Error.h"
+#include "lld/Core/File.h"
+#include "lld/Core/LinkerOptions.h"
+#include "lld/ReaderWriter/LinkerScript.h"
+
+using namespace lld;
+using namespace script;
+
+namespace {
+class LinkerScriptFile : public File {
+public:
+  static ErrorOr<std::unique_ptr<LinkerScriptFile> >
+  create(const TargetInfo &ti, std::unique_ptr<llvm::MemoryBuffer> mb) {
+    std::unique_ptr<LinkerScriptFile> file(
+        new LinkerScriptFile(ti, std::move(mb)));
+    file->_script = file->_parser.parse();
+    if (!file->_script)
+      return linker_script_reader_error::parse_error;
+    return std::move(file);
+  }
+
+  virtual Kind kind() const { return kindLinkerScript; }
+
+  static inline bool classof(const File *f) {
+    return f->kind() == kindLinkerScript;
+  }
+
+  virtual void setOrdinalAndIncrement(uint64_t &ordinal) const {
+    _ordinal = ordinal++;
+  }
+
+  virtual const TargetInfo &getTargetInfo() const { return _targetInfo; }
+
+  virtual const atom_collection<DefinedAtom> &defined() const {
+    return _definedAtoms;
+  }
+
+  virtual const atom_collection<UndefinedAtom> &undefined() const {
+    return _undefinedAtoms;
+  }
+
+  virtual const atom_collection<SharedLibraryAtom> &sharedLibrary() const {
+    return _sharedLibraryAtoms;
+  }
+
+  virtual const atom_collection<AbsoluteAtom> &absolute() const {
+    return _absoluteAtoms;
+  }
+
+  const LinkerScript *getScript() {
+    return _script;
+  }
+
+private:
+  LinkerScriptFile(const TargetInfo &ti, std::unique_ptr<llvm::MemoryBuffer> mb)
+      : File(mb->getBufferIdentifier()),
+        _targetInfo(ti),
+        _lexer(std::move(mb)),
+        _parser(_lexer),
+        _script(nullptr) {}
+
+  const TargetInfo &_targetInfo;
+  atom_collection_vector<DefinedAtom> _definedAtoms;
+  atom_collection_vector<UndefinedAtom> _undefinedAtoms;
+  atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms;
+  atom_collection_vector<AbsoluteAtom> _absoluteAtoms;
+  Lexer _lexer;
+  Parser _parser;
+  const LinkerScript *_script;
+};
+} // end anon namespace
+
+namespace lld {
+error_code
+ReaderLinkerScript::parseFile(std::unique_ptr<llvm::MemoryBuffer> mb,
+                              std::vector<std::unique_ptr<File> > &result) {
+  auto lsf = LinkerScriptFile::create(_targetInfo, std::move(mb));
+  if (!lsf)
+    return lsf;
+  const LinkerScript *ls = (*lsf)->getScript();
+  result.push_back(std::move(*lsf));
+  for (const auto &c : ls->_commands) {
+    if (auto group = dyn_cast<Group>(c))
+      for (const auto &path : group->getPaths()) {
+        auto reader = _getReader(LinkerInput(path._path));
+        if (!reader)
+          return reader;
+        if (error_code ec = reader->readFile(path._path, result))
+          return ec;
+      }
+  }
+  return error_code::success();
+}
+} // end namespace lld