[CGP] transform select instructions into branches and sink expensive operands

This was originally checked in at r250527, but reverted at r250570 because of PR25222.
There were at least 2 problems: 
1. The cost check was checking for an instruction with an exact cost of TCC_Expensive;
that should have been >=.
2. The cause of the clang stage 1 failures was illegally sinking 'call' instructions;
we can't sink instructions that may have side effects / are not safe to execute speculatively.

Fixed those conditions in sinkSelectOperand() and added test cases.

Original commit message:
This is a follow-up to the discussion in D12882.

Ideally, we would like SimplifyCFG to be able to form select instructions even when the operands
are expensive (as defined by the TTI cost model) because that may expose further optimizations.
However, we would then like a later pass like CodeGenPrepare to undo that transformation if the
target would likely benefit from not speculatively executing an expensive op (this patch).

Once we have this safety mechanism in place, we can adjust SimplifyCFG to restore its
select-formation behavior that changed with r248439.

Differential Revision: http://reviews.llvm.org/D13297

llvm-svn: 250743
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll
new file mode 100644
index 0000000..a26938a
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll
@@ -0,0 +1,141 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink here, but this gets converted to a branch to
+; avoid stalling an out-of-order CPU on a predictable branch.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y)  {
+entry:
+  %load = load double, double* %b, align 8
+  %cmp = fcmp olt double %load, %a
+  %sel = select i1 %cmp, i32 %x, i32 %y
+  ret i32 %sel
+
+; CHECK-LABEL: @no_sink(
+; CHECK:    %load = load double, double* %b, align 8
+; CHECK:    %cmp = fcmp olt double %load, %a
+; CHECK:    br i1 %cmp, label %select.end, label %select.false
+; CHECK:  select.false:
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi i32 [ %x, %entry ], [ %y, %select.false ] 
+; CHECK:    ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %div, float 2.0
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 1.0
+; CHECK:    br i1 %cmp, label %select.true.sink, label %select.end
+; CHECK:  select.true.sink:
+; CHECK:    %div = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ]
+; CHECK:    ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 3.0
+  %sel = select i1 %cmp, float 4.0, float %div
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 3.0
+; CHECK:    br i1 %cmp, label %select.end, label %select.false.sink
+; CHECK:  select.false.sink:
+; CHECK:    %div = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ] 
+; CHECK:    ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+entry:
+  %div1 = fdiv float %a, %b
+  %div2 = fdiv float %b, %a
+  %cmp = fcmp ogt float %a, 5.0
+  %sel = select i1 %cmp, float %div1, float %div2
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 5.0
+; CHECK:    br i1 %cmp, label %select.true.sink, label %select.false.sink
+; CHECK:  select.true.sink:
+; CHECK:    %div1 = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.false.sink:
+; CHECK:    %div2 = fdiv float %b, %a
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ] 
+; CHECK:    ret float %sel
+}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+  %add = fadd float %a, %b
+  %cmp = fcmp ogt float 6.0, %a
+  %sel = select i1 %cmp, float %add, float 7.0 
+  ret float %sel
+
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK:  %sel = select i1 %cmp, float %add, float 7.0 
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %add = fadd float %div, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %add, float 8.0
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK:  %sel = select i1 %cmp, float %add, float 8.0 
+}
+
+; Do not transform the CFG if the select operands may have side effects.
+
+declare i64* @bar(i32, i32, i32)
+declare i64* @baz(i32, i32, i32)
+
+define i64* @calls_no_sink(i32 %in) {
+  %call1 = call i64* @bar(i32 1, i32 2, i32 3)
+  %call2 = call i64* @baz(i32 1, i32 2, i32 3)
+  %tobool = icmp ne i32 %in, 0
+  %sel = select i1 %tobool, i64* %call1, i64* %call2
+  ret i64* %sel
+
+; CHECK-LABEL: @calls_no_sink(
+; CHECK:  %sel = select i1 %tobool, i64* %call1, i64* %call2
+}
+
+define i32 @sdiv_no_sink(i32 %a, i32 %b) {
+  %div1 = sdiv i32 %a, %b
+  %div2 = sdiv i32 %b, %a
+  %cmp = icmp sgt i32 %a, 5
+  %sel = select i1 %cmp, i32 %div1, i32 %div2
+  ret i32 %sel
+
+; CHECK-LABEL: @sdiv_no_sink(
+; CHECK:  %sel = select i1 %cmp, i32 %div1, i32 %div2
+}
+