Update LLVM for rebase to r212749. Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18

commit: cd81d94322a39503e4a3e87b6ee03d4fcb3465fb [log] [tgz]
author: Stephen Hines <srhines@google.com> Mon Jul 21 00:45:20 2014 -0700
committer: Stephen Hines <srhines@google.com> Fri Jul 25 00:48:57 2014 -0700
tree: 81b7dd2bb4370a392f31d332a566c903b5744764
parent: 0c5f13c0c4499eaf42ab5e9e2ceabd4e20e36861 [diff]
diff --git a/lib/MC/MCAnalysis/Android.mk b/lib/MC/MCAnalysis/Android.mk
new file mode 100644
index 0000000..27f848a
--- /dev/null
+++ b/lib/MC/MCAnalysis/Android.mk

@@ -0,0 +1,37 @@
+LOCAL_PATH:= $(call my-dir)
+
+mc_analysis_SRC_FILES := \
+  MCAtom.cpp \
+  MCFunction.cpp \
+  MCModule.cpp \
+  MCModuleYAML.cpp \
+  MCObjectDisassembler.cpp \
+  MCObjectSymbolizer.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMCAnalysis
+
+LOCAL_MODULE_TAGS := optional
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
+
+LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMCAnalysis
+
+LOCAL_MODULE_TAGS := optional
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif

diff --git a/lib/MC/MCAnalysis/CMakeLists.txt b/lib/MC/MCAnalysis/CMakeLists.txt
new file mode 100644
index 0000000..81eae2d
--- /dev/null
+++ b/lib/MC/MCAnalysis/CMakeLists.txt

@@ -0,0 +1,8 @@
+add_llvm_library(LLVMMCAnalysis
+ MCAtom.cpp
+ MCFunction.cpp
+ MCModule.cpp
+ MCModuleYAML.cpp
+ MCObjectDisassembler.cpp
+ MCObjectSymbolizer.cpp
+)

diff --git a/lib/MC/MCAnalysis/LLVMBuild.txt b/lib/MC/MCAnalysis/LLVMBuild.txt
new file mode 100644
index 0000000..1b58fec
--- /dev/null
+++ b/lib/MC/MCAnalysis/LLVMBuild.txt

@@ -0,0 +1,5 @@
+[component_0]
+type = Library
+name = MCAnalysis
+parent = Libraries
+required_libraries = MC Object Support

diff --git a/lib/MC/MCAnalysis/MCAtom.cpp b/lib/MC/MCAnalysis/MCAtom.cpp
new file mode 100644
index 0000000..82056ee
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCAtom.cpp

@@ -0,0 +1,114 @@
+//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <iterator>
+
+using namespace llvm;
+
+// Pin the vtable to this file.
+void MCAtom::anchor() {}
+
+void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
+  Parent->remap(this, NewBegin, NewEnd);
+}
+
+void MCAtom::remapForTruncate(uint64_t TruncPt) {
+  assert((TruncPt >= Begin && TruncPt < End) &&
+         "Truncation point not contained in atom!");
+  remap(Begin, TruncPt);
+}
+
+void MCAtom::remapForSplit(uint64_t SplitPt,
+                           uint64_t &LBegin, uint64_t &LEnd,
+                           uint64_t &RBegin, uint64_t &REnd) {
+  assert((SplitPt > Begin && SplitPt <= End) &&
+         "Splitting at point not contained in atom!");
+
+  // Compute the new begin/end points.
+  LBegin = Begin;
+  LEnd = SplitPt - 1;
+  RBegin = SplitPt;
+  REnd = End;
+
+  // Remap this atom to become the lower of the two new ones.
+  remap(LBegin, LEnd);
+}
+
+// MCDataAtom
+
+void MCDataAtom::addData(const MCData &D) {
+  Data.push_back(D);
+  if (Data.size() > End + 1 - Begin)
+    remap(Begin, End + 1);
+}
+
+void MCDataAtom::truncate(uint64_t TruncPt) {
+  remapForTruncate(TruncPt);
+
+  Data.resize(TruncPt - Begin + 1);
+}
+
+MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
+  uint64_t LBegin, LEnd, RBegin, REnd;
+  remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+  MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
+  RightAtom->setName(getName());
+
+  std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
+  assert(I != Data.end() && "Split point not found in range!");
+
+  std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
+  Data.erase(I, Data.end());
+  return RightAtom;
+}
+
+// MCTextAtom
+
+void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
+  if (NextInstAddress + Size - 1 > End)
+    remap(Begin, NextInstAddress + Size - 1);
+  Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
+  NextInstAddress += Size;
+}
+
+void MCTextAtom::truncate(uint64_t TruncPt) {
+  remapForTruncate(TruncPt);
+
+  InstListTy::iterator I = Insts.begin();
+  while (I != Insts.end() && I->Address <= TruncPt) ++I;
+
+  assert(I != Insts.end() && "Truncation point not found in disassembly!");
+  assert(I->Address == TruncPt + 1 &&
+         "Truncation point does not fall on instruction boundary");
+
+  Insts.erase(I, Insts.end());
+}
+
+MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
+  uint64_t LBegin, LEnd, RBegin, REnd;
+  remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+  MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
+  RightAtom->setName(getName());
+
+  InstListTy::iterator I = Insts.begin();
+  while (I != Insts.end() && I->Address < SplitPt) ++I;
+  assert(I != Insts.end() && "Split point not found in disassembly!");
+  assert(I->Address == SplitPt &&
+         "Split point does not fall on instruction boundary!");
+
+  std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
+  Insts.erase(I, Insts.end());
+  Parent->splitBasicBlocksForAtom(this, RightAtom);
+  return RightAtom;
+}

diff --git a/lib/MC/MCAnalysis/MCFunction.cpp b/lib/MC/MCAnalysis/MCFunction.cpp
new file mode 100644
index 0000000..4e09d1a
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCFunction.cpp

@@ -0,0 +1,76 @@
+//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include <algorithm>
+
+using namespace llvm;
+
+// MCFunction
+
+MCFunction::MCFunction(StringRef Name, MCModule *Parent)
+  : Name(Name), ParentModule(Parent)
+{}
+
+MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
+  std::unique_ptr<MCBasicBlock> MCBB(new MCBasicBlock(TA, this));
+  Blocks.push_back(std::move(MCBB));
+  return *Blocks.back();
+}
+
+MCBasicBlock *MCFunction::find(uint64_t StartAddr) {
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    if ((*I)->getInsts()->getBeginAddr() == StartAddr)
+      return I->get();
+  return nullptr;
+}
+
+const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const {
+  return const_cast<MCFunction *>(this)->find(StartAddr);
+}
+
+// MCBasicBlock
+
+MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
+  : Insts(&Insts), Parent(Parent) {
+  getParent()->getParent()->trackBBForAtom(&Insts, this);
+}
+
+void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
+  if (!isSuccessor(MCBB))
+    Successors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
+  return std::find(Successors.begin(), Successors.end(),
+                   MCBB) != Successors.end();
+}
+
+void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
+  if (!isPredecessor(MCBB))
+    Predecessors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
+  return std::find(Predecessors.begin(), Predecessors.end(),
+                   MCBB) != Predecessors.end();
+}
+
+void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) {
+  assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() &&
+         "Splitting unrelated basic blocks!");
+  SplitBB->addPredecessor(this);
+  assert(SplitBB->Successors.empty() &&
+         "Split basic block shouldn't already have successors!");
+  SplitBB->Successors = Successors;
+  Successors.clear();
+  addSuccessor(SplitBB);
+}

diff --git a/lib/MC/MCAnalysis/MCModule.cpp b/lib/MC/MCAnalysis/MCModule.cpp
new file mode 100644
index 0000000..7512299
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCModule.cpp

@@ -0,0 +1,142 @@
+//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include <algorithm>
+
+using namespace llvm;
+
+static bool AtomComp(const MCAtom *L, uint64_t Addr) {
+  return L->getEndAddr() < Addr;
+}
+
+static bool AtomCompInv(uint64_t Addr, const MCAtom *R) {
+  return Addr < R->getEndAddr();
+}
+
+void MCModule::map(MCAtom *NewAtom) {
+  uint64_t Begin = NewAtom->Begin;
+
+  assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?");
+
+  // Check for atoms already covering this range.
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Begin, AtomComp);
+  assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End)
+         && "Offset range already occupied!");
+
+  // Insert the new atom to the list.
+  Atoms.insert(I, NewAtom);
+}
+
+MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
+  MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
+  map(NewAtom);
+  return NewAtom;
+}
+
+MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
+  MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
+  map(NewAtom);
+  return NewAtom;
+}
+
+// remap - Update the interval mapping for an atom.
+void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
+  // Find and erase the old mapping.
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Atom->Begin, AtomComp);
+  assert(I != atom_end() && "Atom offset not found in module!");
+  assert(*I == Atom && "Previous atom mapping was invalid!");
+  Atoms.erase(I);
+
+  // FIXME: special case NewBegin == Atom->Begin
+
+  // Insert the new mapping.
+  AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
+                                               NewBegin, AtomComp);
+  assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End)
+         && "Offset range already occupied!");
+  Atoms.insert(NewI, Atom);
+
+  // Update the atom internal bounds.
+  Atom->Begin = NewBegin;
+  Atom->End = NewEnd;
+}
+
+const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
+  AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                                  Addr, AtomComp);
+  if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+    return *I;
+  return nullptr;
+}
+
+MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
+  return const_cast<MCAtom*>(
+    const_cast<const MCModule *>(this)->findAtomContaining(Addr));
+}
+
+const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const {
+  AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(),
+                                                  Addr, AtomCompInv);
+  if (I != atom_end())
+    return *I;
+  return nullptr;
+}
+
+MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) {
+  return const_cast<MCAtom*>(
+    const_cast<const MCModule *>(this)->findFirstAtomAfter(Addr));
+}
+
+MCFunction *MCModule::createFunction(StringRef Name) {
+  std::unique_ptr<MCFunction> MCF(new MCFunction(Name, this));
+  Functions.push_back(std::move(MCF));
+  return Functions.back().get();
+}
+
+static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) {
+  return BB->getInsts() < Atom;
+}
+
+void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA,
+                                       const MCTextAtom *NewTA) {
+  BBsByAtomTy::iterator
+    I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(),
+                         TA, CompBBToAtom);
+  for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) {
+    MCBasicBlock *BB = *I;
+    MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA);
+    BB->splitBasicBlock(NewBB);
+  }
+}
+
+void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) {
+  assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!");
+  BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(),
+                                             BBsByAtom.end(),
+                                             Atom, CompBBToAtom);
+  for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I)
+    if (*I == BB)
+      return;
+  BBsByAtom.insert(I, BB);
+}
+
+MCModule::MCModule() : Entrypoint(0) { }
+
+MCModule::~MCModule() {
+  for (AtomListTy::iterator AI = atom_begin(),
+                            AE = atom_end();
+                            AI != AE; ++AI)
+    delete *AI;
+}

diff --git a/lib/MC/MCAnalysis/MCModuleYAML.cpp b/lib/MC/MCAnalysis/MCModuleYAML.cpp
new file mode 100644
index 0000000..876b06d
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCModuleYAML.cpp

@@ -0,0 +1,464 @@
+//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of MCModule.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCModuleYAML.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/YAML.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+
+namespace llvm {
+
+namespace {
+
+// This class is used to map opcode and register names to enum values.
+//
+// There are at least 3 obvious ways to do this:
+// 1- Generate an MII/MRI method using a tablegen StringMatcher
+// 2- Write an MII/MRI method using std::lower_bound and the assumption that
+//    the enums are sorted (starting at a fixed value).
+// 3- Do the matching manually as is done here.
+//
+// Why 3?
+// 1- A StringMatcher function for thousands of entries would incur
+//    a non-negligible binary size overhead.
+// 2- The lower_bound comparators would be somewhat involved and aren't
+//    obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h)
+// 3- This isn't actually something useful outside tests (but the same argument
+//    can be made against having {MII,MRI}::getName).
+//
+// If this becomes useful outside this specific situation, feel free to do
+// the Right Thing (tm) and move the functionality to MII/MRI.
+//
+class InstrRegInfoHolder {
+  typedef StringMap<unsigned, BumpPtrAllocator> EnumValByNameTy;
+  EnumValByNameTy InstEnumValueByName;
+  EnumValByNameTy RegEnumValueByName;
+
+public:
+  const MCInstrInfo &MII;
+  const MCRegisterInfo &MRI;
+  InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI)
+      : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())),
+        RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) {
+    for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i)
+      InstEnumValueByName[MII.getName(i)] = i;
+    for (int i = 0, e = MRI.getNumRegs(); i != e; ++i)
+      RegEnumValueByName[MRI.getName(i)] = i;
+  }
+
+  bool matchRegister(StringRef Name, unsigned &Reg) {
+    EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name);
+    if (It == RegEnumValueByName.end())
+      return false;
+    Reg = It->getValue();
+    return true;
+  }
+  bool matchOpcode(StringRef Name, unsigned &Opc) {
+    EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name);
+    if (It == InstEnumValueByName.end())
+      return false;
+    Opc = It->getValue();
+    return true;
+  }
+};
+
+} // end unnamed namespace
+
+namespace MCModuleYAML {
+
+LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum)
+
+struct Operand {
+  MCOperand MCOp;
+};
+
+struct Inst {
+  OpcodeEnum Opcode;
+  std::vector<Operand> Operands;
+  uint64_t Size;
+};
+
+struct Atom {
+  MCAtom::AtomKind Type;
+  yaml::Hex64 StartAddress;
+  uint64_t Size;
+
+  std::vector<Inst> Insts;
+  yaml::BinaryRef Data;
+};
+
+struct BasicBlock {
+  yaml::Hex64 Address;
+  std::vector<yaml::Hex64> Preds;
+  std::vector<yaml::Hex64> Succs;
+};
+
+struct Function {
+  StringRef Name;
+  std::vector<BasicBlock> BasicBlocks;
+};
+
+struct Module {
+  std::vector<Atom> Atoms;
+  std::vector<Function> Functions;
+};
+
+} // end namespace MCModuleYAML
+} // end namespace llvm
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function)
+
+namespace llvm {
+
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<MCAtom::AtomKind> {
+  static void enumeration(IO &IO, MCAtom::AtomKind &Kind);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Atom> {
+  static void mapping(IO &IO, MCModuleYAML::Atom &A);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Inst> {
+  static void mapping(IO &IO, MCModuleYAML::Inst &I);
+};
+
+template <> struct MappingTraits<MCModuleYAML::BasicBlock> {
+  static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Function> {
+  static void mapping(IO &IO, MCModuleYAML::Function &Fn);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Module> {
+  static void mapping(IO &IO, MCModuleYAML::Module &M);
+};
+
+template <> struct ScalarTraits<MCModuleYAML::Operand> {
+  static void output(const MCModuleYAML::Operand &, void *,
+                     llvm::raw_ostream &);
+  static StringRef input(StringRef, void *, MCModuleYAML::Operand &);
+  static bool mustQuote(StringRef) { return false; }
+};
+
+template <> struct ScalarTraits<MCModuleYAML::OpcodeEnum> {
+  static void output(const MCModuleYAML::OpcodeEnum &, void *,
+                     llvm::raw_ostream &);
+  static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &);
+  static bool mustQuote(StringRef) { return false; }
+};
+
+void ScalarEnumerationTraits<MCAtom::AtomKind>::enumeration(
+    IO &IO, MCAtom::AtomKind &Value) {
+  IO.enumCase(Value, "Text", MCAtom::TextAtom);
+  IO.enumCase(Value, "Data", MCAtom::DataAtom);
+}
+
+void MappingTraits<MCModuleYAML::Atom>::mapping(IO &IO, MCModuleYAML::Atom &A) {
+  IO.mapRequired("StartAddress", A.StartAddress);
+  IO.mapRequired("Size", A.Size);
+  IO.mapRequired("Type", A.Type);
+  if (A.Type == MCAtom::TextAtom)
+    IO.mapRequired("Content", A.Insts);
+  else if (A.Type == MCAtom::DataAtom)
+    IO.mapRequired("Content", A.Data);
+}
+
+void MappingTraits<MCModuleYAML::Inst>::mapping(IO &IO, MCModuleYAML::Inst &I) {
+  IO.mapRequired("Inst", I.Opcode);
+  IO.mapRequired("Size", I.Size);
+  IO.mapRequired("Ops", I.Operands);
+}
+
+void
+MappingTraits<MCModuleYAML::BasicBlock>::mapping(IO &IO,
+                                                 MCModuleYAML::BasicBlock &BB) {
+  IO.mapRequired("Address", BB.Address);
+  IO.mapRequired("Preds", BB.Preds);
+  IO.mapRequired("Succs", BB.Succs);
+}
+
+void MappingTraits<MCModuleYAML::Function>::mapping(IO &IO,
+                                                    MCModuleYAML::Function &F) {
+  IO.mapRequired("Name", F.Name);
+  IO.mapRequired("BasicBlocks", F.BasicBlocks);
+}
+
+void MappingTraits<MCModuleYAML::Module>::mapping(IO &IO,
+                                                  MCModuleYAML::Module &M) {
+  IO.mapRequired("Atoms", M.Atoms);
+  IO.mapOptional("Functions", M.Functions);
+}
+
+void
+ScalarTraits<MCModuleYAML::Operand>::output(const MCModuleYAML::Operand &Val,
+                                            void *Ctx, raw_ostream &Out) {
+  InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+
+  // FIXME: Doesn't support FPImm and expr/inst, but do these make sense?
+  if (Val.MCOp.isImm())
+    Out << "I" << Val.MCOp.getImm();
+  else if (Val.MCOp.isReg())
+    Out << "R" << IRI->MRI.getName(Val.MCOp.getReg());
+  else
+    llvm_unreachable("Trying to output invalid MCOperand!");
+}
+
+StringRef
+ScalarTraits<MCModuleYAML::Operand>::input(StringRef Scalar, void *Ctx,
+                                           MCModuleYAML::Operand &Val) {
+  InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+  char Type = 0;
+  if (Scalar.size() >= 1)
+    Type = Scalar.front();
+  if (Type != 'R' && Type != 'I')
+    return "Operand must start with 'R' (register) or 'I' (immediate).";
+  if (Type == 'R') {
+    unsigned Reg;
+    if (!IRI->matchRegister(Scalar.substr(1), Reg))
+      return "Invalid register name.";
+    Val.MCOp = MCOperand::CreateReg(Reg);
+  } else if (Type == 'I') {
+    int64_t RIVal;
+    if (Scalar.substr(1).getAsInteger(10, RIVal))
+      return "Invalid immediate value.";
+    Val.MCOp = MCOperand::CreateImm(RIVal);
+  } else {
+    Val.MCOp = MCOperand();
+  }
+  return StringRef();
+}
+
+void ScalarTraits<MCModuleYAML::OpcodeEnum>::output(
+    const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) {
+  InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+  Out << IRI->MII.getName(Val);
+}
+
+StringRef
+ScalarTraits<MCModuleYAML::OpcodeEnum>::input(StringRef Scalar, void *Ctx,
+                                              MCModuleYAML::OpcodeEnum &Val) {
+  InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+  unsigned Opc;
+  if (!IRI->matchOpcode(Scalar, Opc))
+    return "Invalid instruction opcode.";
+  Val = Opc;
+  return "";
+}
+
+} // end namespace yaml
+
+namespace {
+
+class MCModule2YAML {
+  const MCModule &MCM;
+  MCModuleYAML::Module YAMLModule;
+  void dumpAtom(const MCAtom *MCA);
+  void dumpFunction(const MCFunction &MCF);
+  void dumpBasicBlock(const MCBasicBlock *MCBB);
+
+public:
+  MCModule2YAML(const MCModule &MCM);
+  MCModuleYAML::Module &getYAMLModule();
+};
+
+class YAML2MCModule {
+  MCModule &MCM;
+
+public:
+  YAML2MCModule(MCModule &MCM);
+  StringRef parse(const MCModuleYAML::Module &YAMLModule);
+};
+
+} // end unnamed namespace
+
+MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() {
+  for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end();
+       AI != AE; ++AI)
+    dumpAtom(*AI);
+  for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end();
+       FI != FE; ++FI)
+    dumpFunction(**FI);
+}
+
+void MCModule2YAML::dumpAtom(const MCAtom *MCA) {
+  YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1);
+  MCModuleYAML::Atom &A = YAMLModule.Atoms.back();
+  A.Type = MCA->getKind();
+  A.StartAddress = MCA->getBeginAddr();
+  A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1;
+  if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(MCA)) {
+    const size_t InstCount = TA->size();
+    A.Insts.resize(InstCount);
+    for (size_t i = 0; i != InstCount; ++i) {
+      const MCDecodedInst &MCDI = TA->at(i);
+      A.Insts[i].Opcode = MCDI.Inst.getOpcode();
+      A.Insts[i].Size = MCDI.Size;
+      const unsigned OpCount = MCDI.Inst.getNumOperands();
+      A.Insts[i].Operands.resize(OpCount);
+      for (unsigned oi = 0; oi != OpCount; ++oi)
+        A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi);
+    }
+  } else if (const MCDataAtom *DA = dyn_cast<MCDataAtom>(MCA)) {
+    A.Data = DA->getData();
+  } else {
+    llvm_unreachable("Unknown atom type.");
+  }
+}
+
+void MCModule2YAML::dumpFunction(const MCFunction &MCF) {
+  YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1);
+  MCModuleYAML::Function &F = YAMLModule.Functions.back();
+  F.Name = MCF.getName();
+  for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end();
+       BBI != BBE; ++BBI) {
+    const MCBasicBlock &MCBB = **BBI;
+    F.BasicBlocks.resize(F.BasicBlocks.size() + 1);
+    MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back();
+    BB.Address = MCBB.getInsts()->getBeginAddr();
+    for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(),
+                                           PE = MCBB.pred_end();
+         PI != PE; ++PI)
+      BB.Preds.push_back((*PI)->getInsts()->getBeginAddr());
+    for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(),
+                                           SE = MCBB.succ_end();
+         SI != SE; ++SI)
+      BB.Succs.push_back((*SI)->getInsts()->getBeginAddr());
+  }
+}
+
+MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; }
+
+YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {}
+
+StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) {
+  typedef std::vector<MCModuleYAML::Atom>::const_iterator AtomIt;
+  typedef std::vector<MCModuleYAML::Inst>::const_iterator InstIt;
+  typedef std::vector<MCModuleYAML::Operand>::const_iterator OpIt;
+
+  typedef DenseMap<uint64_t, MCTextAtom *> AddrToTextAtomTy;
+  AddrToTextAtomTy TAByAddr;
+
+  for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end();
+       AI != AE; ++AI) {
+    uint64_t StartAddress = AI->StartAddress;
+    if (AI->Size == 0)
+      return "Atoms can't be empty!";
+    uint64_t EndAddress = StartAddress + AI->Size - 1;
+    switch (AI->Type) {
+    case MCAtom::TextAtom: {
+      MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress);
+      TAByAddr[StartAddress] = TA;
+      for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE;
+           ++II) {
+        MCInst MI;
+        MI.setOpcode(II->Opcode);
+        for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE;
+             ++OI)
+          MI.addOperand(OI->MCOp);
+        TA->addInst(MI, II->Size);
+      }
+      break;
+    }
+    case MCAtom::DataAtom: {
+      MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress);
+      SmallVector<char, 64> Data;
+      raw_svector_ostream OS(Data);
+      AI->Data.writeAsBinary(OS);
+      OS.flush();
+      for (size_t i = 0, e = Data.size(); i != e; ++i)
+        DA->addData((uint8_t)Data[i]);
+      break;
+    }
+    }
+  }
+
+  typedef std::vector<MCModuleYAML::Function>::const_iterator FuncIt;
+  typedef std::vector<MCModuleYAML::BasicBlock>::const_iterator BBIt;
+  typedef std::vector<yaml::Hex64>::const_iterator AddrIt;
+  for (FuncIt FI = YAMLModule.Functions.begin(),
+              FE = YAMLModule.Functions.end();
+       FI != FE; ++FI) {
+    MCFunction *MCFN = MCM.createFunction(FI->Name);
+    for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
+         BBI != BBE; ++BBI) {
+      AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address);
+      if (It == TAByAddr.end())
+        return "Basic block start address doesn't match any text atom!";
+      MCFN->createBlock(*It->second);
+    }
+    for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
+         BBI != BBE; ++BBI) {
+      MCBasicBlock *MCBB = MCFN->find(BBI->Address);
+      if (!MCBB)
+        return "Couldn't find matching basic block in function.";
+      for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE;
+           ++PI) {
+        MCBasicBlock *Pred = MCFN->find(*PI);
+        if (!Pred)
+          return "Couldn't find predecessor basic block.";
+        MCBB->addPredecessor(Pred);
+      }
+      for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE;
+           ++SI) {
+        MCBasicBlock *Succ = MCFN->find(*SI);
+        if (!Succ)
+          return "Couldn't find predecessor basic block.";
+        MCBB->addSuccessor(Succ);
+      }
+    }
+  }
+  return "";
+}
+
+StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM,
+                        const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
+  MCModule2YAML Dumper(MCM);
+  InstrRegInfoHolder IRI(MII, MRI);
+  yaml::Output YOut(OS, (void *)&IRI);
+  YOut << Dumper.getYAMLModule();
+  return "";
+}
+
+StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent,
+                        const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
+  MCM.reset(new MCModule);
+  YAML2MCModule Parser(*MCM);
+  MCModuleYAML::Module YAMLModule;
+  InstrRegInfoHolder IRI(MII, MRI);
+  yaml::Input YIn(YamlContent, (void *)&IRI);
+  YIn >> YAMLModule;
+  if (std::error_code ec = YIn.error())
+    return ec.message();
+  StringRef err = Parser.parse(YAMLModule);
+  if (!err.empty())
+    return err;
+  return "";
+}
+
+} // end namespace llvm

diff --git a/lib/MC/MCAnalysis/MCObjectDisassembler.cpp b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp
new file mode 100644
index 0000000..0f789ff
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp

@@ -0,0 +1,574 @@
+//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectDisassembler.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCObjectSymbolizer.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+using namespace object;
+
+#define DEBUG_TYPE "mc"
+
+MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
+                                           const MCDisassembler &Dis,
+                                           const MCInstrAnalysis &MIA)
+    : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {}
+
+uint64_t MCObjectDisassembler::getEntrypoint() {
+  for (const SymbolRef &Symbol : Obj.symbols()) {
+    StringRef Name;
+    Symbol.getName(Name);
+    if (Name == "main" || Name == "_main") {
+      uint64_t Entrypoint;
+      Symbol.getAddress(Entrypoint);
+      return getEffectiveLoadAddr(Entrypoint);
+    }
+  }
+  return 0;
+}
+
+ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
+  return ArrayRef<uint64_t>();
+}
+
+ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
+  return ArrayRef<uint64_t>();
+}
+
+MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) {
+  // FIXME: Keep track of object sections.
+  return FallbackRegion.get();
+}
+
+uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
+  return Addr;
+}
+
+uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
+  return Addr;
+}
+
+MCModule *MCObjectDisassembler::buildEmptyModule() {
+  MCModule *Module = new MCModule;
+  Module->Entrypoint = getEntrypoint();
+  return Module;
+}
+
+MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+  MCModule *Module = buildEmptyModule();
+
+  buildSectionAtoms(Module);
+  if (withCFG)
+    buildCFG(Module);
+  return Module;
+}
+
+void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
+  for (const SectionRef &Section : Obj.sections()) {
+    bool isText;
+    Section.isText(isText);
+    bool isData;
+    Section.isData(isData);
+    if (!isData && !isText)
+      continue;
+
+    uint64_t StartAddr;
+    Section.getAddress(StartAddr);
+    uint64_t SecSize;
+    Section.getSize(SecSize);
+    if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
+      continue;
+    StartAddr = getEffectiveLoadAddr(StartAddr);
+
+    StringRef Contents;
+    Section.getContents(Contents);
+    StringRefMemoryObject memoryObject(Contents, StartAddr);
+
+    // We don't care about things like non-file-backed sections yet.
+    if (Contents.size() != SecSize || !SecSize)
+      continue;
+    uint64_t EndAddr = StartAddr + SecSize - 1;
+
+    StringRef SecName;
+    Section.getName(SecName);
+
+    if (isText) {
+      MCTextAtom *Text = nullptr;
+      MCDataAtom *InvalidData = nullptr;
+
+      uint64_t InstSize;
+      for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
+        const uint64_t CurAddr = StartAddr + Index;
+        MCInst Inst;
+        if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
+                               nulls())) {
+          if (!Text) {
+            Text = Module->createTextAtom(CurAddr, CurAddr);
+            Text->setName(SecName);
+          }
+          Text->addInst(Inst, InstSize);
+          InvalidData = nullptr;
+        } else {
+          assert(InstSize && "getInstruction() consumed no bytes");
+          if (!InvalidData) {
+            Text = nullptr;
+            InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1);
+          }
+          for (uint64_t I = 0; I < InstSize; ++I)
+            InvalidData->addData(Contents[Index+I]);
+        }
+      }
+    } else {
+      MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
+      Data->setName(SecName);
+      for (uint64_t Index = 0; Index < SecSize; ++Index)
+        Data->addData(Contents[Index]);
+    }
+  }
+}
+
+namespace {
+  struct BBInfo;
+  typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy;
+
+  struct BBInfo {
+    MCTextAtom *Atom;
+    MCBasicBlock *BB;
+    BBInfoSetTy Succs;
+    BBInfoSetTy Preds;
+    MCObjectDisassembler::AddressSetTy SuccAddrs;
+
+    BBInfo() : Atom(nullptr), BB(nullptr) {}
+
+    void addSucc(BBInfo &Succ) {
+      Succs.insert(&Succ);
+      Succ.Preds.insert(this);
+    }
+  };
+}
+
+static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) {
+  std::sort(V.begin(), V.end());
+  V.erase(std::unique(V.begin(), V.end()), V.end());
+}
+
+void MCObjectDisassembler::buildCFG(MCModule *Module) {
+  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+  BBInfoByAddrTy BBInfos;
+  AddressSetTy Splits;
+  AddressSetTy Calls;
+
+  for (const SymbolRef &Symbol : Obj.symbols()) {
+    SymbolRef::Type SymType;
+    Symbol.getType(SymType);
+    if (SymType == SymbolRef::ST_Function) {
+      uint64_t SymAddr;
+      Symbol.getAddress(SymAddr);
+      SymAddr = getEffectiveLoadAddr(SymAddr);
+      Calls.push_back(SymAddr);
+      Splits.push_back(SymAddr);
+    }
+  }
+
+  assert(Module->func_begin() == Module->func_end()
+         && "Module already has a CFG!");
+
+  // First, determine the basic block boundaries and call targets.
+  for (MCModule::atom_iterator AI = Module->atom_begin(),
+                               AE = Module->atom_end();
+       AI != AE; ++AI) {
+    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+    if (!TA) continue;
+    Calls.push_back(TA->getBeginAddr());
+    BBInfos[TA->getBeginAddr()].Atom = TA;
+    for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+         II != IE; ++II) {
+      if (MIA.isTerminator(II->Inst))
+        Splits.push_back(II->Address + II->Size);
+      uint64_t Target;
+      if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
+        if (MIA.isCall(II->Inst))
+          Calls.push_back(Target);
+        Splits.push_back(Target);
+      }
+    }
+  }
+
+  RemoveDupsFromAddressVector(Splits);
+  RemoveDupsFromAddressVector(Calls);
+
+  // Split text atoms into basic block atoms.
+  for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
+       SI != SE; ++SI) {
+    MCAtom *A = Module->findAtomContaining(*SI);
+    if (!A) continue;
+    MCTextAtom *TA = cast<MCTextAtom>(A);
+    if (TA->getBeginAddr() == *SI)
+      continue;
+    MCTextAtom *NewAtom = TA->split(*SI);
+    BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
+    StringRef BBName = TA->getName();
+    BBName = BBName.substr(0, BBName.find_last_of(':'));
+    NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
+  }
+
+  // Compute succs/preds.
+  for (MCModule::atom_iterator AI = Module->atom_begin(),
+                               AE = Module->atom_end();
+                               AI != AE; ++AI) {
+    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+    if (!TA) continue;
+    BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
+    const MCDecodedInst &LI = TA->back();
+    if (MIA.isBranch(LI.Inst)) {
+      uint64_t Target;
+      if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
+        CurBB.addSucc(BBInfos[Target]);
+      if (MIA.isConditionalBranch(LI.Inst))
+        CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+    } else if (!MIA.isTerminator(LI.Inst))
+      CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+  }
+
+
+  // Create functions and basic blocks.
+  for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
+       CI != CE; ++CI) {
+    BBInfo &BBI = BBInfos[*CI];
+    if (!BBI.Atom) continue;
+
+    MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
+
+    // Create MCBBs.
+    SmallSetVector<BBInfo*, 16> Worklist;
+    Worklist.insert(&BBI);
+    for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+      BBInfo *BBI = Worklist[wi];
+      if (!BBI->Atom)
+        continue;
+      BBI->BB = &MCFN.createBlock(*BBI->Atom);
+      // Add all predecessors and successors to the worklist.
+      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+                                 SI != SE; ++SI)
+        Worklist.insert(*SI);
+      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+                                 PI != PE; ++PI)
+        Worklist.insert(*PI);
+    }
+
+    // Set preds/succs.
+    for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+      BBInfo *BBI = Worklist[wi];
+      MCBasicBlock *MCBB = BBI->BB;
+      if (!MCBB)
+        continue;
+      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+           SI != SE; ++SI)
+        if ((*SI)->BB)
+          MCBB->addSuccessor((*SI)->BB);
+      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+           PI != PE; ++PI)
+        if ((*PI)->BB)
+          MCBB->addPredecessor((*PI)->BB);
+    }
+  }
+}
+
+// Basic idea of the disassembly + discovery:
+//
+// start with the wanted address, insert it in the worklist
+// while worklist not empty, take next address in the worklist:
+// - check if atom exists there
+//   - if middle of atom:
+//     - split basic blocks referencing the atom
+//     - look for an already encountered BBInfo (using a map<atom, bbinfo>)
+//       - if there is, split it (new one, fallthrough, move succs, etc..)
+//   - if start of atom: nothing else to do
+//   - if no atom: create new atom and new bbinfo
+// - look at the last instruction in the atom, add succs to worklist
+// for all elements in the worklist:
+// - create basic block, update preds/succs, etc..
+//
+MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
+                                            uint64_t BBBeginAddr,
+                                            AddressSetTy &CallTargets,
+                                            AddressSetTy &TailCallTargets) {
+  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+  typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
+  BBInfoByAddrTy BBInfos;
+  AddrWorklistTy Worklist;
+
+  Worklist.insert(BBBeginAddr);
+  for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+    const uint64_t BeginAddr = Worklist[wi];
+    BBInfo *BBI = &BBInfos[BeginAddr];
+
+    MCTextAtom *&TA = BBI->Atom;
+    assert(!TA && "Discovered basic block already has an associated atom!");
+
+    // Look for an atom at BeginAddr.
+    if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
+      // FIXME: We don't care about mixed atoms, see above.
+      TA = cast<MCTextAtom>(A);
+
+      // The found atom doesn't begin at BeginAddr, we have to split it.
+      if (TA->getBeginAddr() != BeginAddr) {
+        // FIXME: Handle overlapping atoms: middle-starting instructions, etc..
+        MCTextAtom *NewTA = TA->split(BeginAddr);
+
+        // Look for an already encountered basic block that needs splitting
+        BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
+        if (It != BBInfos.end() && It->second.Atom) {
+          BBI->SuccAddrs = It->second.SuccAddrs;
+          It->second.SuccAddrs.clear();
+          It->second.SuccAddrs.push_back(BeginAddr);
+        }
+        TA = NewTA;
+      }
+      BBI->Atom = TA;
+    } else {
+      // If we didn't find an atom, then we have to disassemble to create one!
+
+      MemoryObject *Region = getRegionFor(BeginAddr);
+      if (!Region)
+        llvm_unreachable(("Couldn't find suitable region for disassembly at " +
+                          utostr(BeginAddr)).c_str());
+
+      uint64_t InstSize;
+      uint64_t EndAddr = Region->getBase() + Region->getExtent();
+
+      // We want to stop before the next atom and have a fallthrough to it.
+      if (MCTextAtom *NextAtom =
+              cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
+        EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
+
+      for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
+        MCInst Inst;
+        if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
+                               nulls())) {
+          if (!TA)
+            TA = Module->createTextAtom(Addr, Addr);
+          TA->addInst(Inst, InstSize);
+        } else {
+          // We don't care about splitting mixed atoms either.
+          llvm_unreachable("Couldn't disassemble instruction in atom.");
+        }
+
+        uint64_t BranchTarget;
+        if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
+          if (MIA.isCall(Inst))
+            CallTargets.push_back(BranchTarget);
+        }
+
+        if (MIA.isTerminator(Inst))
+          break;
+      }
+      BBI->Atom = TA;
+    }
+
+    assert(TA && "Couldn't disassemble atom, none was created!");
+    assert(TA->begin() != TA->end() && "Empty atom!");
+
+    MemoryObject *Region = getRegionFor(TA->getBeginAddr());
+    assert(Region && "Couldn't find region for already disassembled code!");
+    uint64_t EndRegion = Region->getBase() + Region->getExtent();
+
+    // Now we have a basic block atom, add successors.
+    // Add the fallthrough block.
+    if ((MIA.isConditionalBranch(TA->back().Inst) ||
+         !MIA.isTerminator(TA->back().Inst)) &&
+        (TA->getEndAddr() + 1 < EndRegion)) {
+      BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
+      Worklist.insert(TA->getEndAddr() + 1);
+    }
+
+    // If the terminator is a branch, add the target block.
+    if (MIA.isBranch(TA->back().Inst)) {
+      uint64_t BranchTarget;
+      if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
+                             TA->back().Size, BranchTarget)) {
+        StringRef ExtFnName;
+        if (MOS)
+          ExtFnName =
+              MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
+        if (!ExtFnName.empty()) {
+          TailCallTargets.push_back(BranchTarget);
+          CallTargets.push_back(BranchTarget);
+        } else {
+          BBI->SuccAddrs.push_back(BranchTarget);
+          Worklist.insert(BranchTarget);
+        }
+      }
+    }
+  }
+
+  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
+    const uint64_t BeginAddr = Worklist[wi];
+    BBInfo *BBI = &BBInfos[BeginAddr];
+
+    assert(BBI->Atom && "Found a basic block without an associated atom!");
+
+    // Look for a basic block at BeginAddr.
+    BBI->BB = MCFN->find(BeginAddr);
+    if (BBI->BB) {
+      // FIXME: check that the succs/preds are the same
+      continue;
+    }
+    // If there was none, we have to create one from the atom.
+    BBI->BB = &MCFN->createBlock(*BBI->Atom);
+  }
+
+  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
+    const uint64_t BeginAddr = Worklist[wi];
+    BBInfo *BBI = &BBInfos[BeginAddr];
+    MCBasicBlock *BB = BBI->BB;
+
+    RemoveDupsFromAddressVector(BBI->SuccAddrs);
+    for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
+         SE = BBI->SuccAddrs.end();
+         SE != SE; ++SI) {
+      MCBasicBlock *Succ = BBInfos[*SI].BB;
+      BB->addSuccessor(Succ);
+      Succ->addPredecessor(BB);
+    }
+  }
+
+  assert(BBInfos[Worklist[0]].BB &&
+         "No basic block created at requested address?");
+
+  return BBInfos[Worklist[0]].BB;
+}
+
+MCFunction *
+MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
+                                     AddressSetTy &CallTargets,
+                                     AddressSetTy &TailCallTargets) {
+  // First, check if this is an external function.
+  StringRef ExtFnName;
+  if (MOS)
+    ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr));
+  if (!ExtFnName.empty())
+    return Module->createFunction(ExtFnName);
+
+  // If it's not, look for an existing function.
+  for (MCModule::func_iterator FI = Module->func_begin(),
+                               FE = Module->func_end();
+       FI != FE; ++FI) {
+    if ((*FI)->empty())
+      continue;
+    // FIXME: MCModule should provide a findFunctionByAddr()
+    if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
+      return FI->get();
+  }
+
+  // Finally, just create a new one.
+  MCFunction *MCFN = Module->createFunction("");
+  getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
+  return MCFN;
+}
+
+// MachO MCObjectDisassembler implementation.
+
+MCMachOObjectDisassembler::MCMachOObjectDisassembler(
+    const MachOObjectFile &MOOF, const MCDisassembler &Dis,
+    const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
+    uint64_t HeaderLoadAddress)
+    : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
+      VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
+
+  for (const SectionRef &Section : MOOF.sections()) {
+    StringRef Name;
+    Section.getName(Name);
+    // FIXME: We should use the S_ section type instead of the name.
+    if (Name == "__mod_init_func") {
+      DEBUG(dbgs() << "Found __mod_init_func section!\n");
+      Section.getContents(ModInitContents);
+    } else if (Name == "__mod_exit_func") {
+      DEBUG(dbgs() << "Found __mod_exit_func section!\n");
+      Section.getContents(ModExitContents);
+    }
+  }
+}
+
+// FIXME: Only do the translations for addresses actually inside the object.
+uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
+  return Addr + VMAddrSlide;
+}
+
+uint64_t
+MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
+  return EffectiveAddr - VMAddrSlide;
+}
+
+uint64_t MCMachOObjectDisassembler::getEntrypoint() {
+  uint64_t EntryFileOffset = 0;
+
+  // Look for LC_MAIN.
+  {
+    uint32_t LoadCommandCount = MOOF.getHeader().ncmds;
+    MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
+    for (unsigned I = 0;; ++I) {
+      if (Load.C.cmd == MachO::LC_MAIN) {
+        EntryFileOffset =
+            ((const MachO::entry_point_command *)Load.Ptr)->entryoff;
+        break;
+      }
+
+      if (I == LoadCommandCount - 1)
+        break;
+      else
+        Load = MOOF.getNextLoadCommandInfo(Load);
+    }
+  }
+
+  // If we didn't find anything, default to the common implementation.
+  // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
+  if (EntryFileOffset)
+    return MCObjectDisassembler::getEntrypoint();
+
+  return EntryFileOffset + HeaderLoadAddress;
+}
+
+ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
+  // FIXME: We only handle 64bit mach-o
+  assert(MOOF.is64Bit());
+
+  size_t EntrySize = 8;
+  size_t EntryCount = ModInitContents.size() / EntrySize;
+  return ArrayRef<uint64_t>(
+      reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
+}
+
+ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
+  // FIXME: We only handle 64bit mach-o
+  assert(MOOF.is64Bit());
+
+  size_t EntrySize = 8;
+  size_t EntryCount = ModExitContents.size() / EntrySize;
+  return ArrayRef<uint64_t>(
+      reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
+}

diff --git a/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp
new file mode 100644
index 0000000..b149596
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp

@@ -0,0 +1,268 @@
+//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectSymbolizer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRelocationInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace object;
+
+//===- MCMachObjectSymbolizer ---------------------------------------------===//
+
+namespace {
+class MCMachObjectSymbolizer : public MCObjectSymbolizer {
+  const MachOObjectFile *MOOF;
+  // __TEXT;__stubs support.
+  uint64_t StubsStart;
+  uint64_t StubsCount;
+  uint64_t StubSize;
+  uint64_t StubsIndSymIndex;
+
+public:
+  MCMachObjectSymbolizer(MCContext &Ctx,
+                         std::unique_ptr<MCRelocationInfo> RelInfo,
+                         const MachOObjectFile *MOOF);
+
+  StringRef findExternalFunctionAt(uint64_t Addr) override;
+
+  void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+                                       uint64_t Address) override;
+};
+} // End unnamed namespace
+
+MCMachObjectSymbolizer::MCMachObjectSymbolizer(
+    MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
+    const MachOObjectFile *MOOF)
+  : MCObjectSymbolizer(Ctx, std::move(RelInfo), MOOF), MOOF(MOOF),
+    StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) {
+
+  for (const SectionRef &Section : MOOF->sections()) {
+    StringRef Name;
+    Section.getName(Name);
+    if (Name == "__stubs") {
+      SectionRef StubsSec = Section;
+      if (MOOF->is64Bit()) {
+        MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl());
+        StubsIndSymIndex = S.reserved1;
+        StubSize = S.reserved2;
+      } else {
+        MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl());
+        StubsIndSymIndex = S.reserved1;
+        StubSize = S.reserved2;
+      }
+      assert(StubSize && "Mach-O stub entry size can't be zero!");
+      StubsSec.getAddress(StubsStart);
+      StubsSec.getSize(StubsCount);
+      StubsCount /= StubSize;
+    }
+  }
+}
+
+StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
+  // FIXME: also, this can all be done at the very beginning, by iterating over
+  // all stubs and creating the calls to outside functions. Is it worth it
+  // though?
+  if (!StubSize)
+    return StringRef();
+  uint64_t StubIdx = (Addr - StubsStart) / StubSize;
+  if (StubIdx >= StubsCount)
+    return StringRef();
+
+  uint32_t SymtabIdx =
+    MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx);
+
+  StringRef SymName;
+  symbol_iterator SI = MOOF->symbol_begin();
+  for (uint32_t i = 0; i != SymtabIdx; ++i)
+    ++SI;
+  SI->getName(SymName);
+  assert(SI != MOOF->symbol_end() && "Stub wasn't found in the symbol table!");
+  assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!");
+  return SymName.substr(1);
+}
+
+void MCMachObjectSymbolizer::
+tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+                                uint64_t Address) {
+  if (const RelocationRef *R = findRelocationAt(Address)) {
+    const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R);
+    if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false)
+      return;
+  }
+  uint64_t Addr = Value;
+  if (const SectionRef *S = findSectionContaining(Addr)) {
+    StringRef Name; S->getName(Name);
+    uint64_t SAddr; S->getAddress(SAddr);
+    if (Name == "__cstring") {
+      StringRef Contents;
+      S->getContents(Contents);
+      Contents = Contents.substr(Addr - SAddr);
+      cStream << " ## literal pool for: "
+              << Contents.substr(0, Contents.find_first_of(0));
+    }
+  }
+}
+
+//===- MCObjectSymbolizer -------------------------------------------------===//
+
+MCObjectSymbolizer::MCObjectSymbolizer(
+  MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
+  const ObjectFile *Obj)
+  : MCSymbolizer(Ctx, std::move(RelInfo)), Obj(Obj), SortedSections(),
+    AddrToReloc() {}
+
+bool MCObjectSymbolizer::
+tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream,
+                         int64_t Value, uint64_t Address, bool IsBranch,
+                         uint64_t Offset, uint64_t InstSize) {
+  if (IsBranch) {
+    StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value);
+    if (!ExtFnName.empty()) {
+      MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName);
+      const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+      MI.addOperand(MCOperand::CreateExpr(Expr));
+      return true;
+    }
+  }
+
+  if (const RelocationRef *R = findRelocationAt(Address + Offset)) {
+    if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) {
+      MI.addOperand(MCOperand::CreateExpr(RelExpr));
+      return true;
+    }
+    // Only try to create a symbol+offset expression if there is no relocation.
+    return false;
+  }
+
+  // Interpret Value as a branch target.
+  if (IsBranch == false)
+    return false;
+  uint64_t UValue = Value;
+  // FIXME: map instead of looping each time?
+  for (const SymbolRef &Symbol : Obj->symbols()) {
+    uint64_t SymAddr;
+    Symbol.getAddress(SymAddr);
+    uint64_t SymSize;
+    Symbol.getSize(SymSize);
+    StringRef SymName;
+    Symbol.getName(SymName);
+    SymbolRef::Type SymType;
+    Symbol.getType(SymType);
+    if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize ||
+        SymName.empty() || SymType != SymbolRef::ST_Function)
+      continue;
+
+    if ( SymAddr == UValue ||
+        (SymAddr <= UValue && SymAddr + SymSize > UValue)) {
+      MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
+      const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+      if (SymAddr != UValue) {
+        const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx);
+        Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx);
+      }
+      MI.addOperand(MCOperand::CreateExpr(Expr));
+      return true;
+    }
+  }
+  return false;
+}
+
+void MCObjectSymbolizer::
+tryAddingPcLoadReferenceComment(raw_ostream &cStream,
+                                int64_t Value, uint64_t Address) {
+}
+
+StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
+  return StringRef();
+}
+
+MCObjectSymbolizer *MCObjectSymbolizer::createObjectSymbolizer(
+    MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
+    const ObjectFile *Obj) {
+  if (const MachOObjectFile *MOOF = dyn_cast<MachOObjectFile>(Obj))
+    return new MCMachObjectSymbolizer(Ctx, std::move(RelInfo), MOOF);
+  return new MCObjectSymbolizer(Ctx, std::move(RelInfo), Obj);
+}
+
+// SortedSections implementation.
+
+static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) {
+  uint64_t SAddr; S.getAddress(SAddr);
+  return SAddr < Addr;
+}
+
+const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) {
+  if (SortedSections.empty())
+    buildSectionList();
+
+  SortedSectionList::iterator
+    EndIt = SortedSections.end(),
+    It = std::lower_bound(SortedSections.begin(), EndIt,
+                          Addr, SectionStartsBefore);
+  if (It == EndIt)
+    return nullptr;
+  uint64_t SAddr; It->getAddress(SAddr);
+  uint64_t SSize; It->getSize(SSize);
+  if (Addr >= SAddr + SSize)
+    return nullptr;
+  return &*It;
+}
+
+const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) {
+  if (AddrToReloc.empty())
+    buildRelocationByAddrMap();
+
+  AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr);
+  if (RI == AddrToReloc.end())
+    return nullptr;
+  return &RI->second;
+}
+
+void MCObjectSymbolizer::buildSectionList() {
+  for (const SectionRef &Section : Obj->sections()) {
+    bool RequiredForExec;
+    Section.isRequiredForExecution(RequiredForExec);
+    if (RequiredForExec == false)
+      continue;
+    uint64_t SAddr;
+    Section.getAddress(SAddr);
+    uint64_t SSize;
+    Section.getSize(SSize);
+    SortedSectionList::iterator It =
+        std::lower_bound(SortedSections.begin(), SortedSections.end(), SAddr,
+                         SectionStartsBefore);
+    if (It != SortedSections.end()) {
+      uint64_t FoundSAddr; It->getAddress(FoundSAddr);
+      if (FoundSAddr < SAddr + SSize)
+        llvm_unreachable("Inserting overlapping sections");
+    }
+    SortedSections.insert(It, Section);
+  }
+}
+
+void MCObjectSymbolizer::buildRelocationByAddrMap() {
+  for (const SectionRef &Section : Obj->sections()) {
+    for (const RelocationRef &Reloc : Section.relocations()) {
+      uint64_t Address;
+      Reloc.getAddress(Address);
+      // At a specific address, only keep the first relocation.
+      if (AddrToReloc.find(Address) == AddrToReloc.end())
+        AddrToReloc[Address] = Reloc;
+    }
+  }
+}

diff --git a/lib/MC/MCAnalysis/Makefile b/lib/MC/MCAnalysis/Makefile
new file mode 100644
index 0000000..add2dbd
--- /dev/null
+++ b/lib/MC/MCAnalysis/Makefile

@@ -0,0 +1,14 @@
+##===- lib/MC/MCAnalysys/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCAnalysis
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
commit	cd81d94322a39503e4a3e87b6ee03d4fcb3465fb	[log] [tgz]
author	Stephen Hines <srhines@google.com>	Mon Jul 21 00:45:20 2014 -0700
committer	Stephen Hines <srhines@google.com>	Fri Jul 25 00:48:57 2014 -0700
tree	81b7dd2bb4370a392f31d332a566c903b5744764
parent	0c5f13c0c4499eaf42ab5e9e2ceabd4e20e36861 [diff]