[PowerPC] Implement mayBeEmittedAsTailCall for PPC

Implements TargetLowering callback 'mayBeEmittedAsTailCall' that enables
CodeGenPrepare to duplicate returns when they might enable a tail-call.

Differential Revision: https://reviews.llvm.org/D39777

llvm-svn: 318321
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2d15b73..3c3657e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13804,3 +13804,38 @@
 
   return SDValue();
 }
+
+bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
+  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
+  if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
+    return false;
+
+  // If not a tail call then no need to proceed.
+  if (!CI->isTailCall())
+    return false;
+
+  // If tail calls are disabled for the caller then we are done.
+  const Function *Caller = CI->getParent()->getParent();
+  auto Attr = Caller->getFnAttribute("disable-tail-calls");
+  if (Attr.getValueAsString() == "true")
+    return false;
+
+  // If sibling calls have been disabled and tail-calls aren't guaranteed
+  // there is no reason to duplicate.
+  auto &TM = getTargetMachine();
+  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
+    return false;
+
+  // Can't tail call a function called indirectly, or if it has variadic args.
+  const Function *Callee = CI->getCalledFunction();
+  if (!Callee || Callee->isVarArg())
+    return false;
+
+  // Make sure the callee and caller calling conventions are eligible for tco.
+  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
+                                           CI->getCallingConv()))
+      return false;
+
+  // If the function is local then we have a good chance at tail-calling it
+  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index bf9c4b8..c75b956 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1085,6 +1085,10 @@
     /// essentially v16i8 vector version of VINSERTH.
     SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
 
+    // Return whether the call instruction can potentially be optimized to a
+    // tail call. This will cause the optimizers to attempt to move, or
+    // duplicate return instructions to help enable tail call optimizations.
+    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
   }; // end class PPCTargetLowering
 
   namespace PPC {
diff --git a/llvm/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll b/llvm/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll
new file mode 100644
index 0000000..520efd8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll
@@ -0,0 +1,63 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -stop-after codegenprepare -mtriple=powerpc64le-unknown-gnu-linux  < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -stop-after codegenprepare -mtriple=powerpc64-unknown-gnu-linux  < %s | FileCheck %s
+
+; Function Attrs: noinline norecurse nounwind readnone
+define hidden signext i32 @call1(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr #0 {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  ret i32 %add1
+}
+
+; Function Attrs: nounwind
+define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr #1 {
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = tail call signext i32 @call1(i32 signext %a, i32 signext %b, i32 signext %c)
+  br label %return
+; The return should get duplciated here to enable a tail-call opportunity.
+; CHECK-LABEL: if.then:
+; CHECK-NEXT:  %[[T1:[a-zA-Z0-9]+]] = tail call signext i32 @call1
+; CHECK-NEXT:  ret i32 %[[T1]]
+
+if.end:                                           ; preds = %entry
+  %cmp1 = icmp slt i32 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end4
+
+if.then2:                                         ; preds = %if.end
+  %call3 = tail call signext i32 @call2(i32 signext %a, i32 signext %b, i32 signext %c) #3
+  br label %return
+; No duplication here since we cannot tail-call an external function anyway.
+; CHECK-LABEL: if.then2:
+; CHECK-NEXT:  tail call signext i32 @call2
+; CHECK-NEXT:  br
+
+if.end4:                                          ; preds = %if.end
+  %cmp5 = icmp sgt i32 %b, %c
+  br i1 %cmp5, label %if.then6, label %return
+
+if.then6:                                         ; preds = %if.end4
+  %call7 = tail call fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c)
+  br label %return
+; No duplication here because the calling convention mismatch means we won't tail-call
+; CHECK_LABEL: if.then13:
+; CHECK:       tail call fastcc signext i32 @call3
+; CHECK-NEXT:  br
+
+return:                                           ; preds = %if.end4, %if.then6, %if.then2, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %c, %if.end4 ]
+  ret i32 %retval.0
+}
+
+declare signext i32 @call2(i32 signext, i32 signext, i32 signext) local_unnamed_addr #2
+
+; Function Attrs: noinline norecurse nounwind readnone
+define internal fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c) unnamed_addr #0 {
+entry:
+  %mul = mul nsw i32 %b, %a
+  %mul1 = mul nsw i32 %mul, %c
+  ret i32 %mul1
+}