X86: Fold tail calls into conditional branches also for 64-bit (PR26302)

This extends the optimization in r280832 to also work for 64-bit. The only
quirk is that we can't do this for 64-bit Windows (yet).

Differential Revision: https://reviews.llvm.org/D24423

llvm-svn: 281113
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 701b3f2..63e8f01 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -81,6 +81,7 @@
   case X86::TCRETURNri:
   case X86::TCRETURNmi:
   case X86::TCRETURNdi64:
+  case X86::TCRETURNdi64cc:
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64: {
     bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
@@ -98,7 +99,7 @@
     Offset = StackAdj - MaxTCDelta;
     assert(Offset >= 0 && "Offset should never be negative");
 
-    if (Opcode == X86::TCRETURNdicc) {
+    if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
       assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
     }
 
@@ -111,7 +112,7 @@
     // Jump to label or value in register.
     bool IsWin64 = STI->isTargetWin64();
     if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
-        Opcode == X86::TCRETURNdi64) {
+        Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
       unsigned Op;
       switch (Opcode) {
       case X86::TCRETURNdi:
@@ -120,6 +121,11 @@
       case X86::TCRETURNdicc:
         Op = X86::TAILJMPd_CC;
         break;
+      case X86::TCRETURNdi64cc:
+        assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder.");
+        // TODO: We could do it for Win64 "leaf" functions though; PR30337.
+        Op = X86::TAILJMPd64_CC;
+        break;
       default:
         // Note: Win64 uses REX prefixes indirect jumps out of functions, but
         // not direct ones.
@@ -135,7 +141,7 @@
         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                               JumpTarget.getTargetFlags());
       }
-      if (Op == X86::TAILJMPd_CC) {
+      if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
         MIB.addImm(MBBI->getOperand(2).getImm());
       }
 
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 77796d2..f2ca000 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -305,17 +305,27 @@
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
     isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
     SchedRW = [WriteJump] in {
-  def TCRETURNdi64 : PseudoI<(outs),
-                      (ins i64i32imm_pcrel:$dst, i32imm:$offset),
-                      []>;
-  def TCRETURNri64 : PseudoI<(outs),
-                      (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
+  def TCRETURNdi64   : PseudoI<(outs),
+                        (ins i64i32imm_pcrel:$dst, i32imm:$offset),
+                        []>;
+  def TCRETURNdi64cc : PseudoI<(outs),
+                        (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+                         i32imm:$cond), []>;
+  def TCRETURNri64   : PseudoI<(outs),
+                        (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
   let mayLoad = 1 in
-  def TCRETURNmi64 : PseudoI<(outs),
-                       (ins i64mem_TC:$dst, i32imm:$offset), []>;
+  def TCRETURNmi64   : PseudoI<(outs),
+                        (ins i64mem_TC:$dst, i32imm:$offset), []>;
 
   def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
                    "jmp\t$dst", [], IIC_JMP_REL>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i64i32imm_pcrel:$dst, i32imm:$cond),
+                           "",
+                           [], IIC_JMP_REL>;
+
   def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
                      "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 65fe45e..1d8a388 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4101,11 +4101,18 @@
 bool X86InstrInfo::canMakeTailCallConditional(
     SmallVectorImpl<MachineOperand> &BranchCond,
     const MachineInstr &TailCall) const {
-  if (TailCall.getOpcode() != X86::TCRETURNdi) {
+  if (TailCall.getOpcode() != X86::TCRETURNdi &&
+      TailCall.getOpcode() != X86::TCRETURNdi64) {
     // Only direct calls can be done with a conditional branch.
     return false;
   }
 
+  if (Subtarget.isTargetWin64()) {
+    // Conditional tail calls confuse the Win64 unwinder.
+    // TODO: Allow them for "leaf" functions; PR30337.
+    return false;
+  }
+
   assert(BranchCond.size() == 1);
   if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
     // Can't make a conditional tail call with this condition.
@@ -4144,7 +4151,10 @@
     break;
   }
 
-  auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(X86::TCRETURNdicc));
+  unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
+                                                         : X86::TCRETURNdi64cc;
+
+  auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
   MIB->addOperand(TailCall.getOperand(0)); // Destination.
   MIB.addImm(0); // Stack offset (not used).
   MIB->addOperand(BranchCond[0]); // Condition.
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 9ce647a..c86f15a 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -505,6 +505,7 @@
   case X86::TAILJMPd:
   case X86::TAILJMPd64: Opcode = X86::JMP_1;  goto SetTailJmpOpcode;
   case X86::TAILJMPd_CC:
+  case X86::TAILJMPd64_CC:
     Opcode = X86::GetCondBranchFromCond(
         static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
     goto SetTailJmpOpcode;
@@ -1309,6 +1310,7 @@
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
   case X86::TAILJMPd64:
+  case X86::TAILJMPd64_CC:
   case X86::TAILJMPr64_REX:
   case X86::TAILJMPm64_REX:
     // Lower these as normal, but add some comments.