Reland 'Classify llvm-cfi-verify.'

Summary: Move llvm-cfi-verify into a class in preparation for CFI analysis to come.

Reviewers: vlad.tsyrklevich

Reviewed By: vlad.tsyrklevich

Subscribers: mgorny, llvm-commits, pcc, kcc

Differential Revision: https://reviews.llvm.org/D38379

llvm-svn: 315504
diff --git a/llvm/tools/llvm-cfi-verify/lib/CMakeLists.txt b/llvm/tools/llvm-cfi-verify/lib/CMakeLists.txt
new file mode 100644
index 0000000..a673d88
--- /dev/null
+++ b/llvm/tools/llvm-cfi-verify/lib/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_llvm_library(LLVMCFIVerify
+  FileAnalysis.cpp
+  FileAnalysis.h
+
+  LINK_COMPONENTS
+  MC
+  MCParser
+  Object
+  Support)
diff --git a/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp
new file mode 100644
index 0000000..6a275ce
--- /dev/null
+++ b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp
@@ -0,0 +1,277 @@
+//===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FileAnalysis.h"
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <functional>
+
+using Instr = llvm::cfi_verify::FileAnalysis::Instr;
+
+namespace llvm {
+namespace cfi_verify {
+
+Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
+  // Open the filename provided.
+  Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
+      object::createBinary(Filename);
+  if (!BinaryOrErr)
+    return BinaryOrErr.takeError();
+
+  // Construct the object and allow it to take ownership of the binary.
+  object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
+  FileAnalysis Analysis(std::move(Binary));
+
+  Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
+  if (!Analysis.Object)
+    return make_error<UnsupportedDisassembly>();
+
+  Analysis.ObjectTriple = Analysis.Object->makeTriple();
+  Analysis.Features = Analysis.Object->getFeatures();
+
+  // Init the rest of the object.
+  if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
+    return std::move(InitResponse);
+
+  if (auto SectionParseResponse = Analysis.parseCodeSections())
+    return std::move(SectionParseResponse);
+
+  return std::move(Analysis);
+}
+
+FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
+    : Binary(std::move(Binary)) {}
+
+FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
+                           const SubtargetFeatures &Features)
+    : ObjectTriple(ObjectTriple), Features(Features) {}
+
+const Instr *
+FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
+  std::map<uint64_t, Instr>::const_iterator KV =
+      Instructions.find(InstrMeta.VMAddress);
+  if (KV == Instructions.end() || KV == Instructions.begin())
+    return nullptr;
+
+  if (!(--KV)->second.Valid)
+    return nullptr;
+
+  return &KV->second;
+}
+
+const Instr *
+FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
+  std::map<uint64_t, Instr>::const_iterator KV =
+      Instructions.find(InstrMeta.VMAddress);
+  if (KV == Instructions.end() || ++KV == Instructions.end())
+    return nullptr;
+
+  if (!KV->second.Valid)
+    return nullptr;
+
+  return &KV->second;
+}
+
+bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
+  for (const auto &Operand : InstrMeta.Instruction) {
+    if (Operand.isReg())
+      return true;
+  }
+  return false;
+}
+
+const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
+  const auto &InstrKV = Instructions.find(Address);
+  if (InstrKV == Instructions.end())
+    return nullptr;
+
+  return &InstrKV->second;
+}
+
+const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
+  const auto &InstrKV = Instructions.find(Address);
+  assert(InstrKV != Instructions.end() && "Address doesn't exist.");
+  return InstrKV->second;
+}
+
+const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
+  return IndirectInstructions;
+}
+
+const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
+  return RegisterInfo.get();
+}
+
+const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
+
+const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
+  return MIA.get();
+}
+
+Error FileAnalysis::initialiseDisassemblyMembers() {
+  std::string TripleName = ObjectTriple.getTriple();
+  ArchName = "";
+  MCPU = "";
+  std::string ErrorString;
+
+  ObjectTarget =
+      TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
+  if (!ObjectTarget)
+    return make_error<StringError>(Twine("Couldn't find target \"") +
+                                       ObjectTriple.getTriple() +
+                                       "\", failed with error: " + ErrorString,
+                                   inconvertibleErrorCode());
+
+  RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
+  if (!RegisterInfo)
+    return make_error<StringError>("Failed to initialise RegisterInfo.",
+                                   inconvertibleErrorCode());
+
+  AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
+  if (!AsmInfo)
+    return make_error<StringError>("Failed to initialise AsmInfo.",
+                                   inconvertibleErrorCode());
+
+  SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
+      TripleName, MCPU, Features.getString()));
+  if (!SubtargetInfo)
+    return make_error<StringError>("Failed to initialise SubtargetInfo.",
+                                   inconvertibleErrorCode());
+
+  MII.reset(ObjectTarget->createMCInstrInfo());
+  if (!MII)
+    return make_error<StringError>("Failed to initialise MII.",
+                                   inconvertibleErrorCode());
+
+  Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
+
+  Disassembler.reset(
+      ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
+
+  if (!Disassembler)
+    return make_error<StringError>("No disassembler available for target",
+                                   inconvertibleErrorCode());
+
+  MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
+
+  Printer.reset(ObjectTarget->createMCInstPrinter(
+      ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
+      *RegisterInfo));
+
+  return Error::success();
+}
+
+Error FileAnalysis::parseCodeSections() {
+  for (const object::SectionRef &Section : Object->sections()) {
+    // Ensure only executable sections get analysed.
+    if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
+      continue;
+
+    StringRef SectionContents;
+    if (Section.getContents(SectionContents))
+      return make_error<StringError>("Failed to retrieve section contents",
+                                     inconvertibleErrorCode());
+
+    ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
+                                   Section.getSize());
+    parseSectionContents(SectionBytes, Section.getAddress());
+  }
+  return Error::success();
+}
+
+void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
+                                        uint64_t SectionAddress) {
+  MCInst Instruction;
+  Instr InstrMeta;
+  uint64_t InstructionSize;
+
+  for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
+    bool ValidInstruction =
+        Disassembler->getInstruction(Instruction, InstructionSize,
+                                     SectionBytes.drop_front(Byte), 0, nulls(),
+                                     outs()) == MCDisassembler::Success;
+
+    Byte += InstructionSize;
+
+    uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
+    InstrMeta.Instruction = Instruction;
+    InstrMeta.VMAddress = VMAddress;
+    InstrMeta.InstructionSize = InstructionSize;
+    InstrMeta.Valid = ValidInstruction;
+    addInstruction(InstrMeta);
+
+    if (!ValidInstruction)
+      continue;
+
+    // Skip additional parsing for instructions that do not affect the control
+    // flow.
+    const auto &InstrDesc = MII->get(Instruction.getOpcode());
+    if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
+      continue;
+
+    uint64_t Target;
+    if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
+      // If the target can be evaluated, it's not indirect.
+      StaticBranchTargetings[Target].push_back(VMAddress);
+      continue;
+    }
+
+    if (!usesRegisterOperand(InstrMeta))
+      continue;
+
+    IndirectInstructions.insert(VMAddress);
+  }
+}
+
+void FileAnalysis::addInstruction(const Instr &Instruction) {
+  const auto &KV =
+      Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
+  if (!KV.second) {
+    errs() << "Failed to add instruction at address "
+           << format_hex(Instruction.VMAddress, 2)
+           << ": Instruction at this address already exists.\n";
+    exit(EXIT_FAILURE);
+  }
+}
+
+char UnsupportedDisassembly::ID;
+void UnsupportedDisassembly::log(raw_ostream &OS) const {
+  OS << "Dissassembling of non-objects not currently supported.\n";
+}
+
+std::error_code UnsupportedDisassembly::convertToErrorCode() const {
+  return std::error_code();
+}
+
+} // namespace cfi_verify
+} // namespace llvm
diff --git a/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.h b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.h
new file mode 100644
index 0000000..80e3256
--- /dev/null
+++ b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.h
@@ -0,0 +1,157 @@
+//===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
+#define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <functional>
+#include <set>
+#include <string>
+#include <unordered_map>
+
+namespace llvm {
+namespace cfi_verify {
+
+// Disassembler and analysis tool for machine code files. Keeps track of non-
+// sequential control flows, including indirect control flow instructions.
+class FileAnalysis {
+public:
+  // A metadata struct for an instruction.
+  struct Instr {
+    uint64_t VMAddress;       // Virtual memory address of this instruction.
+    MCInst Instruction;       // Instruction.
+    uint64_t InstructionSize; // Size of this instruction.
+    bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
+                // undefined.
+  };
+
+  // Construct a FileAnalysis from a file path.
+  static Expected<FileAnalysis> Create(StringRef Filename);
+
+  // Construct and take ownership of the supplied object. Do not use this
+  // constructor, prefer to use FileAnalysis::Create instead.
+  FileAnalysis(object::OwningBinary<object::Binary> Binary);
+  FileAnalysis() = delete;
+  FileAnalysis(const FileAnalysis &) = delete;
+  FileAnalysis(FileAnalysis &&Other) = default;
+
+  // Returns the instruction at the provided address. Returns nullptr if there
+  // is no instruction at the provided address.
+  const Instr *getInstruction(uint64_t Address) const;
+
+  // Returns the instruction at the provided adress, dying if the instruction is
+  // not found.
+  const Instr &getInstructionOrDie(uint64_t Address) const;
+
+  // Returns a pointer to the previous/next instruction in sequence,
+  // respectively. Returns nullptr if the next/prev instruction doesn't exist,
+  // or if the provided instruction doesn't exist.
+  const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
+  const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
+
+  // Returns whether this instruction uses a register operand.
+  bool usesRegisterOperand(const Instr &InstrMeta) const;
+
+  // Returns the list of indirect instructions.
+  const std::set<uint64_t> &getIndirectInstructions() const;
+
+  const MCRegisterInfo *getRegisterInfo() const;
+  const MCInstrInfo *getMCInstrInfo() const;
+  const MCInstrAnalysis *getMCInstrAnalysis() const;
+
+protected:
+  // Construct a blank object with the provided triple and features. Used in
+  // testing, where a sub class will dependency inject protected methods to
+  // allow analysis of raw binary, without requiring a fully valid ELF file.
+  FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
+
+  // Add an instruction to this object.
+  void addInstruction(const Instr &Instruction);
+
+  // Disassemble and parse the provided bytes into this object. Instruction
+  // address calculation is done relative to the provided SectionAddress.
+  void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
+                            uint64_t SectionAddress);
+
+  // Constructs and initialises members required for disassembly.
+  Error initialiseDisassemblyMembers();
+
+  // Parses code sections from the internal object file. Saves them into the
+  // internal members. Should only be called once by Create().
+  Error parseCodeSections();
+
+private:
+  // Members that describe the input file.
+  object::OwningBinary<object::Binary> Binary;
+  const object::ObjectFile *Object = nullptr;
+  Triple ObjectTriple;
+  std::string ArchName;
+  std::string MCPU;
+  const Target *ObjectTarget = nullptr;
+  SubtargetFeatures Features;
+
+  // Members required for disassembly.
+  std::unique_ptr<const MCRegisterInfo> RegisterInfo;
+  std::unique_ptr<const MCAsmInfo> AsmInfo;
+  std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
+  std::unique_ptr<const MCInstrInfo> MII;
+  MCObjectFileInfo MOFI;
+  std::unique_ptr<MCContext> Context;
+  std::unique_ptr<const MCDisassembler> Disassembler;
+  std::unique_ptr<const MCInstrAnalysis> MIA;
+  std::unique_ptr<MCInstPrinter> Printer;
+
+  // A mapping between the virtual memory address to the instruction metadata
+  // struct.
+  std::map<uint64_t, Instr> Instructions;
+
+  // Contains a mapping between a specific address, and a list of instructions
+  // that use this address as a branch target (including call instructions).
+  std::unordered_map<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
+
+  // A list of addresses of indirect control flow instructions.
+  std::set<uint64_t> IndirectInstructions;
+};
+
+class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
+public:
+  static char ID;
+
+  void log(raw_ostream &OS) const override;
+  std::error_code convertToErrorCode() const override;
+};
+
+} // namespace cfi_verify
+} // namespace llvm
+
+#endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H
diff --git a/llvm/tools/llvm-cfi-verify/lib/LLVMBuild.txt b/llvm/tools/llvm-cfi-verify/lib/LLVMBuild.txt
new file mode 100644
index 0000000..39537f5
--- /dev/null
+++ b/llvm/tools/llvm-cfi-verify/lib/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-cfi-verify/lib/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = CFIVerify
+parent = Libraries
+required_libraries = MC MCDisassembler MCParser Support