When optimizing for size, don't tail-merge unless it's likely to be a
code-size win, and not when it's only likely to be code-size neutral,
such as when only a single instruction would be eliminated and a new
branch would be required.
This fixes rdar://7392894.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@88692 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index dd17d88..6606316 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -519,21 +519,24 @@
return true;
// If both blocks have an unconditional branch temporarily stripped out,
- // treat that as an additional common instruction.
- if (MBB1 != PredBB && MBB2 != PredBB &&
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
!MBB1->back().getDesc().isBarrier() &&
!MBB2->back().getDesc().isBarrier())
- --minCommonTailLength;
+ ++EffectiveTailLen;
// Check if the common tail is long enough to be worthwhile.
- if (CommonTailLen >= minCommonTailLength)
+ if (EffectiveTailLen >= minCommonTailLength)
return true;
- // If we are optimizing for code size, 1 instruction in common is enough if
- // we don't have to split a block. At worst we will be replacing a
- // fallthrough into the common tail with a branch, which at worst breaks
- // even with falling through into the duplicated common tail.
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index fdf96d6..0d86e56 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -293,3 +293,116 @@
}
declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+ %0 = icmp eq i32 undef, 0
+ br i1 %0, label %bbx, label %bby
+
+bby:
+ switch i32 undef, label %bb7 [
+ i32 16, label %return
+ ]
+
+bb7:
+ volatile store i32 0, i32* @XYZ
+ unreachable
+
+bbx:
+ switch i32 undef, label %bb12 [
+ i32 128, label %return
+ ]
+
+bb12:
+ volatile store i32 0, i32* @XYZ
+ unreachable
+
+return:
+ ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+ %0 = icmp eq i32 undef, 0
+ br i1 %0, label %bbx, label %bby
+
+bby:
+ switch i32 undef, label %bb7 [
+ i32 16, label %return
+ ]
+
+bb7:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+bbx:
+ switch i32 undef, label %bb12 [
+ i32 128, label %return
+ ]
+
+bb12:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+return:
+ ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+ %0 = icmp eq i32 undef, 0
+ br i1 %0, label %bbx, label %bby
+
+bby:
+ switch i32 undef, label %bb7 [
+ i32 16, label %return
+ ]
+
+bb7:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+bbx:
+ switch i32 undef, label %bb12 [
+ i32 128, label %return
+ ]
+
+bb12:
+ volatile store i32 0, i32* @XYZ
+ volatile store i32 1, i32* @XYZ
+ unreachable
+
+return:
+ ret void
+}