Revert "[Orc] Directly emit machine code for the x86 resolver block and trampolines."

This reverts commit r251933.

It broke the build of examples/Kaleidoscope/Orc/fully_lazy/toy.cpp.

llvm-svn: 251937
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
index d2379cd..d3effeec9 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -12,18 +12,20 @@
 
 using namespace llvm;
 
-LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) {
+LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM,
+                                         LLVMContextRef Context) {
   TargetMachine *TM2(unwrap(TM));
+  LLVMContext &Ctx = *unwrap(Context);
 
   Triple T(TM2->getTargetTriple());
 
-  auto CompileCallbackMgr = OrcCBindingsStack::createCompileCallbackMgr(T);
+  auto CallbackMgrBuilder = OrcCBindingsStack::createCallbackManagerBuilder(T);
   auto IndirectStubsMgrBuilder =
     OrcCBindingsStack::createIndirectStubsMgrBuilder(T);
 
   OrcCBindingsStack *JITStack =
-    new OrcCBindingsStack(*TM2, std::move(CompileCallbackMgr),
-			  IndirectStubsMgrBuilder);
+    new OrcCBindingsStack(*TM2, Ctx, CallbackMgrBuilder,
+                          IndirectStubsMgrBuilder);
 
   return wrap(JITStack);
 }
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
index 7326fa7..93c698d 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
@@ -17,14 +17,19 @@
 
 using namespace llvm;
 
-std::unique_ptr<OrcCBindingsStack::CompileCallbackMgr>
-OrcCBindingsStack::createCompileCallbackMgr(Triple T) {
+OrcCBindingsStack::CallbackManagerBuilder
+OrcCBindingsStack::createCallbackManagerBuilder(Triple T) {
   switch (T.getArch()) {
     default: return nullptr;
 
     case Triple::x86_64: {
-      typedef orc::JITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
-      return llvm::make_unique<CCMgrT>(0);
+      typedef orc::JITCompileCallbackManager<CompileLayerT,
+                                             orc::OrcX86_64> CCMgrT;
+      return [](CompileLayerT &CompileLayer, RuntimeDyld::MemoryManager &MemMgr,
+                LLVMContext &Context) {
+               return llvm::make_unique<CCMgrT>(CompileLayer, MemMgr, Context, 0,
+                                                64);
+             };
     }
   }
 }
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index c622101..6188b29 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -34,7 +34,10 @@
   typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
   typedef orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr> CODLayerT;
 
-  typedef std::function<std::unique_ptr<CompileCallbackMgr>()>
+  typedef std::function<
+            std::unique_ptr<CompileCallbackMgr>(CompileLayerT&,
+                                                RuntimeDyld::MemoryManager&,
+                                                LLVMContext&)>
     CallbackManagerBuilder;
 
   typedef CODLayerT::IndirectStubsManagerBuilderT IndirectStubsManagerBuilder;
@@ -83,18 +86,19 @@
 
   typedef unsigned ModuleHandleT;
 
-  static std::unique_ptr<CompileCallbackMgr> createCompileCallbackMgr(Triple T);
+  static CallbackManagerBuilder createCallbackManagerBuilder(Triple T);
   static IndirectStubsManagerBuilder createIndirectStubsMgrBuilder(Triple T);
 
-  OrcCBindingsStack(TargetMachine &TM,
-		    std::unique_ptr<CompileCallbackMgr> CCMgr, 
+  OrcCBindingsStack(TargetMachine &TM, LLVMContext &Context,
+                    CallbackManagerBuilder &BuildCallbackMgr,
                     IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
-    : DL(TM.createDataLayout()), CCMgr(std::move(CCMgr)),
+    : Context(Context), DL(TM.createDataLayout()),
       ObjectLayer(),
       CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
+      CCMgr(BuildCallbackMgr(CompileLayer, CCMgrMemMgr, Context)),
       CODLayer(CompileLayer,
                [](Function &F) { std::set<Function*> S; S.insert(&F); return S; },
-               *this->CCMgr, std::move(IndirectStubsMgrBuilder), false),
+               *CCMgr, std::move(IndirectStubsMgrBuilder), false),
       IndirectStubsMgr(IndirectStubsMgrBuilder()),
       CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {}
 
@@ -123,7 +127,7 @@
   orc::TargetAddress
   createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
                             void *CallbackCtx) {
-    auto CCInfo = CCMgr->getCompileCallback();
+    auto CCInfo = CCMgr->getCompileCallback(Context);
     CCInfo.setCompileAction(
       [=]() -> orc::TargetAddress {
         return Callback(wrap(this), CallbackCtx);
@@ -260,12 +264,13 @@
     return NewHandle;
   }
 
+  LLVMContext &Context;
   DataLayout DL;
   SectionMemoryManager CCMgrMemMgr;
 
-  std::unique_ptr<CompileCallbackMgr> CCMgr;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
+  std::unique_ptr<CompileCallbackMgr> CCMgr;
   CODLayerT CODLayer;
 
   std::unique_ptr<orc::IndirectStubsManagerBase> IndirectStubsMgr;
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
index b931f10..c03b935 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -12,88 +12,136 @@
 #include "llvm/Support/Process.h"
 #include <array>
 
+using namespace llvm::orc;
+
+namespace {
+
+uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
+                                TargetAddress CallbackID) {
+  return JCBM->executeCompileCallback(CallbackID);
+}
+
+}
+
 namespace llvm {
 namespace orc {
 
-void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
-                                  void *CallbackMgr) {
+const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
 
-  const uint8_t ResolverCode[] = {
-                                               // resolver_entry:
-    0x55,                                      // 0x00: pushq     %rbp
-    0x48, 0x89, 0xe5,                          // 0x01: movq      %rsp, %rbp
-    0x50,                                      // 0x04: pushq     %rax
-    0x53,                                      // 0x05: pushq     %rbx
-    0x51,                                      // 0x06: pushq     %rcx
-    0x52,                                      // 0x07: pushq     %rdx
-    0x56,                                      // 0x08: pushq     %rsi
-    0x57,                                      // 0x09: pushq     %rdi
-    0x41, 0x50,                                // 0x0a: pushq     %r8
-    0x41, 0x51,                                // 0x0c: pushq     %r9
-    0x41, 0x52,                                // 0x0e: pushq     %r10
-    0x41, 0x53,                                // 0x10: pushq     %r11
-    0x41, 0x54,                                // 0x12: pushq     %r12
-    0x41, 0x55,                                // 0x14: pushq     %r13
-    0x41, 0x56,                                // 0x16: pushq     %r14
-    0x41, 0x57,                                // 0x18: pushq     %r15
-    0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00,  // 0x1a: subq      20, %rsp
-    0x48, 0x0f, 0xae, 0x04, 0x24,              // 0x21: fxsave64  (%rsp)
-    0x48, 0x8d, 0x3d, 0x43, 0x00, 0x00, 0x00,  // 0x26: leaq      67(%rip), %rdi
-    0x48, 0x8b, 0x3f,                          // 0x2d: movq      (%rdi), %rdi
-    0x48, 0x8b, 0x75, 0x08,                    // 0x30: movq      8(%rbp), %rsi
-    0x48, 0x83, 0xee, 0x06,                    // 0x34: subq      $6, %rsi
-    0x48, 0xb8,                                // 0x38: movabsq   $0, %rax
+void OrcX86_64::insertResolverBlock(
+    Module &M, JITCompileCallbackManagerBase &JCBM) {
 
-    // 0x3a: JIT re-entry fn addr:
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  // Trampoline code-sequence length, used to get trampoline address from return
+  // address.
+  const unsigned X86_64_TrampolineLength = 6;
 
-    0xff, 0xd0,                                // 0x42: callq     *%rax
-    0x48, 0x89, 0x45, 0x08,                    // 0x44: movq      %rax, 8(%rbp)
-    0x48, 0x0f, 0xae, 0x0c, 0x24,              // 0x48: fxrstor64 (%rsp)
-    0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00,  // 0x4d: addq      20, %rsp
-    0x41, 0x5f,                                // 0x54: popq      %r15
-    0x41, 0x5e,                                // 0x56: popq      %r14
-    0x41, 0x5d,                                // 0x58: popq      %r13
-    0x41, 0x5c,                                // 0x5a: popq      %r12
-    0x41, 0x5b,                                // 0x5c: popq      %r11
-    0x41, 0x5a,                                // 0x5e: popq      %r10
-    0x41, 0x59,                                // 0x60: popq      %r9
-    0x41, 0x58,                                // 0x62: popq      %r8
-    0x5f,                                      // 0x64: popq      %rdi
-    0x5e,                                      // 0x65: popq      %rsi
-    0x5a,                                      // 0x66: popq      %rdx
-    0x59,                                      // 0x67: popq      %rcx
-    0x5b,                                      // 0x68: popq      %rbx
-    0x58,                                      // 0x69: popq      %rax
-    0x5d,                                      // 0x6a: popq      %rbp
-    0xc3,                                      // 0x6b: retq
-    0x00, 0x00, 0x00, 0x00,                    // 0x6c: <padding>
+  // List of x86-64 GPRs to save. Note - RBP saved separately below.
+  std::array<const char *, 14> GPRs = {{
+      "rax", "rbx", "rcx", "rdx",
+      "rsi", "rdi", "r8", "r9",
+      "r10", "r11", "r12", "r13",
+      "r14", "r15"
+    }};
 
-    // 0x70: Callback mgr address.
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  };
+  // Address of the executeCompileCallback function.
+  uint64_t CallbackAddr =
+      static_cast<uint64_t>(
+        reinterpret_cast<uintptr_t>(executeCompileCallback));
 
-  const unsigned ReentryFnAddrOffset = 0x3a;
-  const unsigned CallbackMgrAddrOffset = 0x70;
-  
-  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
-  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
-  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
-         sizeof(CallbackMgr));
+  std::ostringstream AsmStream;
+  Triple TT(M.getTargetTriple());
+
+  // Switch to text section.
+  if (TT.getOS() == Triple::Darwin)
+    AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
+              << ".align 4, 0x90\n";
+  else
+    AsmStream << ".text\n"
+              << ".align 16, 0x90\n";
+
+  // Bake in a pointer to the callback manager immediately before the
+  // start of the resolver function.
+  AsmStream << "jit_callback_manager_addr:\n"
+            << "  .quad " << &JCBM << "\n";
+
+  // Start the resolver function.
+  AsmStream << ResolverBlockName << ":\n"
+            << "  pushq     %rbp\n"
+            << "  movq      %rsp, %rbp\n";
+
+  // Store the GPRs.
+  for (const auto &GPR : GPRs)
+    AsmStream << "  pushq     %" << GPR << "\n";
+
+  // Store floating-point state with FXSAVE.
+  // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
+  //       number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
+  //       an extra 64 bits of padding to the FXSave area.
+  unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
+  unsigned FXSaveSize = 512 + Padding;
+  AsmStream << "  subq      $" << FXSaveSize << ", %rsp\n"
+            << "  fxsave64  (%rsp)\n"
+
+  // Load callback manager address, compute trampoline address, call JIT.
+            << "  lea       jit_callback_manager_addr(%rip), %rdi\n"
+            << "  movq      (%rdi), %rdi\n"
+            << "  movq      0x8(%rbp), %rsi\n"
+            << "  subq      $" << X86_64_TrampolineLength << ", %rsi\n"
+            << "  movabsq   $" << CallbackAddr << ", %rax\n"
+            << "  callq     *%rax\n"
+
+  // Replace the return to the trampoline with the return address of the
+  // compiled function body.
+            << "  movq      %rax, 0x8(%rbp)\n"
+
+  // Restore the floating point state.
+            << "  fxrstor64 (%rsp)\n"
+            << "  addq      $" << FXSaveSize << ", %rsp\n";
+
+  for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
+    AsmStream << "  popq      %" << GPR << "\n";
+
+  // Restore original RBP and return to compiled function body.
+  AsmStream << "  popq      %rbp\n"
+            << "  retq\n";
+
+  M.appendModuleInlineAsm(AsmStream.str());
 }
 
-void OrcX86_64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
-				 unsigned NumTrampolines) {
+OrcX86_64::LabelNameFtor
+OrcX86_64::insertCompileCallbackTrampolines(Module &M,
+                                            TargetAddress ResolverBlockAddr,
+                                            unsigned NumCalls,
+                                            unsigned StartIndex) {
+  const char *ResolverBlockPtrName = "Lorc_resolve_block_addr";
 
-  unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
+  std::ostringstream AsmStream;
+  Triple TT(M.getTargetTriple());
 
-  memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void*));
+  if (TT.getOS() == Triple::Darwin)
+    AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
+              << ".align 4, 0x90\n";
+  else
+    AsmStream << ".text\n"
+              << ".align 16, 0x90\n";
 
-  uint64_t *Trampolines = reinterpret_cast<uint64_t*>(TrampolineMem);
-  uint64_t CallIndirPCRel = 0xf1c40000000015ff;
+  AsmStream << ResolverBlockPtrName << ":\n"
+            << "  .quad " << ResolverBlockAddr << "\n";
 
-  for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
-    Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
+  auto GetLabelName =
+    [=](unsigned I) {
+      std::ostringstream LabelStream;
+      LabelStream << "orc_jcc_" << (StartIndex + I);
+      return LabelStream.str();
+  };
+
+  for (unsigned I = 0; I < NumCalls; ++I)
+    AsmStream << GetLabelName(I) << ":\n"
+              << "  callq *" << ResolverBlockPtrName << "(%rip)\n";
+
+  M.appendModuleInlineAsm(AsmStream.str());
+
+  return GetLabelName;
 }
 
 std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,