Re-commit r124462 with fixes. Tail recursion elim will now dup ret into unconditional predecessor to enable TCE on demand.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124518 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index ce4b1be..15aed34 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -465,9 +465,12 @@
MaxDuplicateCount = TailDuplicateSize;
if (PreRegAlloc) {
- // Pre-regalloc tail duplication hurts compile time and doesn't help
- // much except for indirect branches.
- if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
+ if (TailBB->empty())
+ return false;
+ const TargetInstrDesc &TID = TailBB->back().getDesc();
+ // Pre-regalloc tail duplication hurts compile time and doesn't help
+ // much except for indirect branches and returns.
+ if (!TID.isIndirectBranch() && !TID.isReturn())
return false;
// If the target has hardware branch prediction that can handle indirect
// branches, duplicating them can often make them predictable when there
@@ -502,7 +505,7 @@
}
// Heuristically, don't tail-duplicate calls if it would expand code size,
// as it's less likely to be worth the extra cost.
- if (InstrCount > 1 && HasCall)
+ if (InstrCount > 1 && (PreRegAlloc && HasCall))
return false;
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');