[WinEH] Emit prologues and epilogues for funclets

Summary:
32-bit funclets have short prologues that allocate enough stack for the
largest call in the whole function. The runtime saves CSRs for the
funclet. It doesn't restore CSRs after we finally transfer control back
to the parent funciton via a CATCHRET, but that's a separate issue.
32-bit funclets also have to adjust the incoming EBP value, which is
what llvm.x86.seh.recoverframe does in the old model.

64-bit funclets need to spill CSRs as normal. For simplicity, this just
spills the same set of CSRs as the parent function, rather than trying
to compute different CSR sets for the parent function and each funclet.
64-bit funclets also allocate enough stack space for the largest
outgoing call frame, like 32-bit.

Reviewers: majnemer

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D12546

llvm-svn: 247092
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 3b544d5..a2f9efb 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -88,7 +89,7 @@
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
-          MMI.callsUnwindInit() || MMI.callsEHReturn() ||
+          MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
           MFI->hasStackMap() || MFI->hasPatchPoint());
 }
 
@@ -695,7 +696,40 @@
   uint64_t NumBytes = 0;
   int stackGrowth = -SlotSize;
 
-  if (HasFP) {
+  if (MBB.isEHFuncletEntry()) {
+    assert(STI.isOSWindows() && "funclets only supported on Windows");
+
+    // Set up the FramePtr and BasePtr physical registers using the address
+    // passed as EBP or RDX by the MSVC EH runtime.
+    if (STI.is32Bit()) {
+      MBBI = restoreWin32EHFrameAndBasePtr(MBB, MBBI, DL);
+    } else {
+      // FIXME: Add SEH directives.
+      NeedsWinCFI = false;
+      // Immediately spill RDX into the home slot. The runtime cares about this.
+      unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX;
+      // MOV64mr %rdx, 16(%rsp)
+      unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)),
+                   StackPtr, true, 16)
+        .addReg(RDX)
+        .setMIFlag(MachineInstr::FrameSetup);
+      // PUSH64r %rbp
+      BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+          .addReg(MachineFramePtr, RegState::Kill)
+          .setMIFlag(MachineInstr::FrameSetup);
+      // MOV64rr %rdx, %rbp
+      unsigned MOVrr = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
+      BuildMI(MBB, MBBI, DL, TII.get(MOVrr), FramePtr)
+          .addReg(RDX)
+          .setMIFlag(MachineInstr::FrameSetup);
+      assert(!TRI->hasBasePointer(MF) &&
+             "x64 funclets with base ptrs not yet implemented");
+    }
+
+    // For EH funclets, only allocate enough space for outgoing calls.
+    NumBytes = MFI->getMaxCallFrameSize();
+  } else if (HasFP) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
     // If required, include space for extra hidden slot for stashing base pointer.
@@ -956,10 +990,11 @@
       // it recovers the frame pointer from the base pointer rather than the
       // other way around.
       unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
-      unsigned IgnoredFrameReg;
-      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true,
-                   getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(),
-                                          IgnoredFrameReg))
+      unsigned UsedReg;
+      int Offset =
+          getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+      assert(UsedReg == BasePtr);
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
           .addReg(FramePtr)
           .setMIFlag(MachineInstr::FrameSetup);
     }
@@ -991,6 +1026,17 @@
   return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
 }
 
+static bool isFuncletReturnInstr(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case X86::CATCHRET:
+  case X86::CATCHRET64:
+    return true;
+  default:
+    return false;
+  }
+  llvm_unreachable("impossible");
+}
+
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1016,7 +1062,16 @@
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
-  if (hasFP(MF)) {
+  if (isFuncletReturnInstr(MBBI)) {
+    NumBytes = MFI->getMaxCallFrameSize();
+
+    if (Is64Bit) {
+      assert(hasFP(MF) && "win64 EH funclets without FP not yet implemented");
+      // POP64r %rbp
+      BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+              MachineFramePtr);
+    }
+  } else if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
     NumBytes = FrameSize - CSSize;
@@ -1331,6 +1386,11 @@
     const TargetRegisterInfo *TRI) const {
   DebugLoc DL = MBB.findDebugLoc(MI);
 
+  // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
+  // for us, and there are no XMM CSRs on Win32.
+  if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
+    return true;
+
   // Push GPRs. It increases frame size.
   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
   for (unsigned i = CSI.size(); i != 0; --i) {
@@ -1372,6 +1432,11 @@
   if (CSI.empty())
     return false;
 
+  // Don't restore CSRs in 32-bit EH funclets. Matches
+  // spillCalleeSavedRegisters.
+  if (isFuncletReturnInstr(MI) && STI.is32Bit() && STI.isOSWindows())
+    return true;
+
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   // Reload XMMs from stack frame.
@@ -1423,8 +1488,16 @@
   }
 
   // Spill the BasePtr if it's used.
-  if (TRI->hasBasePointer(MF))
+  if (TRI->hasBasePointer(MF)) {
     SavedRegs.set(TRI->getBaseRegister());
+
+    // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
+    if (MF.getMMI().hasEHFunclets()) {
+      int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize);
+      X86FI->setHasSEHFramePtrSave(true);
+      X86FI->setSEHFramePtrSaveIndex(FI);
+    }
+  }
 }
 
 static bool
@@ -1976,3 +2049,58 @@
   // safe to insert the epilogue here.
   return !terminatorsNeedFlagsAsInput(MBB);
 }
+
+MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHFrameAndBasePtr(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    DebugLoc DL) const {
+  assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
+  assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
+  assert(STI.is32Bit() && !Uses64BitFramePtr &&
+         "restoring EBP/ESI on non-32-bit target");
+
+  MachineFunction &MF = *MBB.getParent();
+  unsigned FramePtr = TRI->getFrameRegister(MF);
+  unsigned BasePtr = TRI->getBaseRegister();
+  MachineModuleInfo &MMI = MF.getMMI();
+  const Function *Fn = MF.getFunction();
+  WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn);
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // FIXME: Don't set FrameSetup flag in catchret case.
+
+  int FI = FuncInfo.EHRegNodeFrameIndex;
+  unsigned UsedReg;
+  int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
+  int EHRegSize = MFI->getObjectSize(FI);
+  int EndOffset = -EHRegOffset - EHRegSize;
+  assert(EndOffset >= 0 &&
+         "end of registration object above normal EBP position!");
+  if (UsedReg == FramePtr) {
+    // ADD $offset, %ebp
+    assert(UsedReg == FramePtr);
+    unsigned ADDri = getADDriOpcode(false, EndOffset);
+    BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
+        .addReg(FramePtr)
+        .addImm(EndOffset)
+        .setMIFlag(MachineInstr::FrameSetup)
+        ->getOperand(3)
+        .setIsDead();
+  } else {
+    assert(UsedReg == BasePtr);
+    // LEA offset(%ebp), %esi
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
+                 FramePtr, false, EndOffset)
+        .setMIFlag(MachineInstr::FrameSetup);
+    // MOV32mr SavedEBPOffset(%esi), %ebp
+    assert(X86FI->getHasSEHFramePtrSave());
+    int Offset =
+        getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+    assert(UsedReg == BasePtr);
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), UsedReg, true,
+                 Offset)
+        .addReg(FramePtr)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+  return MBBI;
+}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index dd83928..7cbd18b 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -154,6 +154,13 @@
                                            MachineBasicBlock::iterator MBBI,
                                            DebugLoc DL, int64_t Offset,
                                            bool InEpilogue) const;
+
+  /// Sets up EBP and optionally ESI based on the incoming EBP value.  Only
+  /// needed for 32-bit. Used in funclet prologues and at catchret destinations.
+  MachineBasicBlock::iterator
+  restoreWin32EHFrameAndBasePtr(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                DebugLoc DL) const;
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 18c8d7d..4798474 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -152,7 +152,7 @@
 
 }
 
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in {
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1, isReturn = 1 in {
 def CATCHRET   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
                    "ret{l}\t# CATCHRET",
                    [(X86catchret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index b001030..88a6d00 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -531,7 +531,8 @@
     break;
   }
 
-  case X86::CATCHRET: {
+  case X86::CATCHRET:
+  case X86::CATCHRET64: {
     OutMI = MCInst();
     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
     break;
diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp
index 19f574a..ccec774 100644
--- a/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -179,6 +179,8 @@
   WinEHFuncInfo &FuncInfo =
       *(MMI ? &MMI->getWinEHFuncInfo(&F) : FuncInfoPtr.get());
 
+  FuncInfo.EHRegNode = RegNode;
+
   switch (Personality) {
   default: llvm_unreachable("unexpected personality function");
   case EHPersonality::MSVC_CXX:
@@ -494,9 +496,12 @@
 
     // Insert calls to llvm.x86.seh.restoreframe at catchret destinations.
     if (auto *CR = dyn_cast<CatchReturnInst>(BB.getTerminator())) {
-      //llvm::errs() << "BB: " << BB << '\n';
-      //llvm::errs() << "CR->getSuccessor(): " << *CR->getSuccessor() << '\n';
-      IRBuilder<> Builder(CR->getSuccessor()->begin());
+      Instruction *Start = CR->getSuccessor()->begin();
+      assert(!isa<PHINode>(Start) &&
+             "winehprepare failed to demote phi after catchret");
+      if (match(Start, m_Intrinsic<Intrinsic::x86_seh_restoreframe>()))
+        continue;
+      IRBuilder<> Builder(Start);
       Builder.CreateCall(RestoreFrame, {});
     }
   }