[mips] Enable tail calls by default

Enable tail calls by default for (micro)MIPS(64).

microMIPS is slightly more tricky than doing it for MIPS(R6) or microMIPSR6.
microMIPS has two instruction encodings: 16bit and 32bit along with some
restrictions on the size of the instruction that can fill the delay slot.
For safe tail calls for microMIPS, the delay slot filler attempts to find
a correct size instruction for the delay slot of TAILCALL pseudos.

Reviewers: dsanders, vkalintris

Subscribers: jfb, dsanders, sdardis, llvm-commits

Differential Revision: https://reviews.llvm.org/D21138

llvm-svn: 277708
diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index fdf3c5f..de2f7d4 100644
--- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -471,6 +471,18 @@
   let isIndirectBranch = 1;
 }
 
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
+    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+  class TailCall_MM<Instruction JumpInst> :
+    PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
+    PseudoInstExpansion<(JumpInst jmptarget_mm:$target)>;
+
+  class TailCallReg_MM<RegisterOperand RO, Instruction JRInst,
+                       RegisterOperand ResRO = RO> :
+    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
+    PseudoInstExpansion<(JRInst ResRO:$rs)>;
+}
+
 // Break16 and Sdbbp16
 class BrkSdbbp16MM<string opstr> :
   MicroMipsInst16<(outs), (ins uimm4:$code_),
@@ -969,6 +981,12 @@
   def PREFX_MM : PrefetchIndexed<"prefx">, POOL32F_PREFX_FM_MM<0x15, 0x1A0>;
 }
 
+let AdditionalPredicates = [InMicroMips] in {
+  def TAILCALL_MM : TailCall_MM<J_MM>, ISA_MIPS1_NOT_32R6_64R6;
+  def TAILCALLREG_MM : TailCallReg_MM<GPR32Opnd, JR_MM>,
+                       ISA_MIPS1_NOT_32R6_64R6;
+}
+
 let DecoderNamespace = "MicroMips" in {
   def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
                  RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index f552f8d..be239b0 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -848,6 +848,8 @@
   def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
   def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
 }
+def TAILCALL_R6 : TailCall<J>, ISA_MIPS32R6;
+def TAILCALLREG_R6 : TailCallReg<GPR32Opnd, JR>, GPR_32, ISA_MIPS32R6;
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td
index 39313e1..c3d8754 100644
--- a/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -228,22 +228,24 @@
            ISA_MIPS2_NOT_32R6_64R6;
 def SC64 : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, PTR_64,
            ISA_MIPS2_NOT_32R6_64R6;
+def JR64   : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>, PTR_64;
 }
 
+def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
+
 /// Jump and Branch Instructions
 let isCodeGenOnly = 1 in {
-  def JR64   : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
   def BEQ64  : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
   def BNE64  : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
   def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
   def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
   def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
   def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
-  def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
   def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
-  def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>;
+  def TAILCALLREG64 : TailCallReg<GPR64Opnd, JR64>, GPR_64, ISA_MIPS1_NOT_32R6_64R6;
 }
 
+
 def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
 def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>;
 
diff --git a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
index cc01400..0f36660 100644
--- a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -165,6 +165,10 @@
 def BGEZC64 : BGEZC_ENC, BGEZC64_DESC, ISA_MIPS64R6, GPR_64;
 }
 
+def TAILCALL64_R6 : TailCall<J>, ISA_MIPS64R6;
+def TAILCALLREG64_R6 : TailCallReg<GPR64Opnd, JR64>, GPR_64,
+                       ISA_MIPS64R6;
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Aliases
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 44bb872..c473c6c 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -543,6 +543,9 @@
 
 // For given opcode returns opcode of corresponding instruction with short
 // delay slot.
+// For the pseudo TAILCALL*_MM instrunctions return the short delay slot
+// form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range
+// that is too short to make use of for tail calls.
 static int getEquivalentCallShort(int Opcode) {
   switch (Opcode) {
   case Mips::BGEZAL:
@@ -555,6 +558,10 @@
     return Mips::JALRS_MM;
   case Mips::JALR16_MM:
     return Mips::JALRS16_MM;
+  case Mips::TAILCALL_MM:
+    llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!");
+  case Mips::TAILCALLREG_MM:
+    return Mips::JR16_MM;
   default:
     llvm_unreachable("Unexpected call instruction for microMIPS.");
   }
@@ -606,6 +613,12 @@
             DSI->isCall()) {
           // If instruction in delay slot is 16b change opcode to
           // corresponding instruction with short delay slot.
+
+          // TODO: Implement an instruction mapping table of 16bit opcodes to
+          // 32bit opcodes so that an instruction can be expanded. This would
+          // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop.
+          // TODO: Permit b16 when branching backwards to the the same function
+          // if it is in range.
           DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode())));
         }
         continue;
@@ -692,9 +705,14 @@
     bool InMicroMipsMode = STI.inMicroMipsMode();
     const MipsInstrInfo *TII = STI.getInstrInfo();
     unsigned Opcode = (*Slot).getOpcode();
-    if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
+    // This is complicated by the tail call optimization. For non-PIC code
+    // there is only a 32bit sized unconditional branch which can be assumed
+    // to be able to reach the target. b16 only has a range of +/- 1 KB.
+    // It's entirely possible that the target function is reachable with b16
+    // but we don't have enough information to make that decision.
+     if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
-         Opcode == Mips::PseudoReturn))
+         Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
       continue;
 
     Filler = CurrI;
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 1f38ca6..c317ecb 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -282,6 +282,7 @@
     case Mips::JR:
     case Mips::PseudoReturn:
     case Mips::PseudoIndirectBranch:
+    case Mips::TAILCALLREG_MM:
       canUseShortMicroMipsCTI = true;
       break;
     }
@@ -362,6 +363,8 @@
     case Mips::JR:
     case Mips::PseudoReturn:
     case Mips::PseudoIndirectBranch:
+    case Mips::TAILCALLREG_MM:
+    case Mips::TAILCALLREG_R6:
       if (canUseShortMicroMipsCTI)
         return Mips::JRC16_MM;
       return Mips::JIC;
@@ -370,6 +373,7 @@
     case Mips::JR64:
     case Mips::PseudoReturn64:
     case Mips::PseudoIndirectBranch64:
+    case Mips::TAILCALLREG64_R6:
       return Mips::JIC64;
     case Mips::JALR64Pseudo:
       return Mips::JIALC64;
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index 43e4818..ff8a5fb 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -1918,8 +1918,11 @@
 def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd, 0>,
               BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6;
 def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
-def TAILCALL : TailCall<J>;
-def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
+
+let AdditionalPredicates = [NotInMicroMips] in {
+def TAILCALL : TailCall<J>, ISA_MIPS1_NOT_32R6_64R6;
+def TAILCALLREG : TailCallReg<GPR32Opnd, JR>, ISA_MIPS1_NOT_32R6_64R6, GPR_32;
+}
 
 // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 80c000d..73c461b 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -27,8 +27,8 @@
 #define DEBUG_TYPE "mips-isel"
 
 static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
-                    cl::desc("MIPS: Enable tail calls."), cl::init(false));
+UseMipsTailCalls("mips-tail-calls", cl::Hidden,
+                    cl::desc("MIPS: permit tail calls."), cl::init(true));
 
 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
                                    cl::desc("Expand double precision loads and "
@@ -1178,7 +1178,7 @@
 bool MipsSETargetLowering::isEligibleForTailCallOptimization(
     const CCState &CCInfo, unsigned NextStackOffset,
     const MipsFunctionInfo &FI) const {
-  if (!EnableMipsTailCalls)
+  if (!UseMipsTailCalls)
     return false;
 
   // Exception has to be cleared with eret.