DAGCombiner: Canonicalize select(and/or,x,y) depending on target.

This is based on the following equivalences:
select(C0 & C1, X, Y) <=> select(C0, select(C1, X, Y), Y)
select(C0 | C1, X, Y) <=> select(C0, X, select(C1, X, Y))

Many target cannot perform and/or on the CPU flags and therefore the
right side should be choosen to avoid materializign the i1 flags in an
integer register. If the target can perform this operation efficiently
we normalize to the left form.

Differential Revision: http://reviews.llvm.org/D7622

llvm-svn: 231507
diff --git a/llvm/test/CodeGen/ARM/movcc-double.ll b/llvm/test/CodeGen/ARM/movcc-double.ll
new file mode 100644
index 0000000..9ce708d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/movcc-double.ll
@@ -0,0 +1,50 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "arm-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: tst
+; CHECK-NOT: movne
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a2, %a3
+  %and = and i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: orss
+; CHECK-NOT: tst
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a2, %a3
+  %and = or i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: orrs
+; CHECK: movne
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a1, %a2
+  %or = or i1 %cmp0, %cmp1
+  %zero_one = zext i1 %or to i32
+  store volatile i32 %zero_one, i32* @var32
+  %res = select i1 %or, i32 %a3, i32 %a4
+  ret i32 %res
+}
diff --git a/llvm/test/CodeGen/R600/or.ll b/llvm/test/CodeGen/R600/or.ll
index 1b1cb9a..1337adb 100644
--- a/llvm/test/CodeGen/R600/or.ll
+++ b/llvm/test/CodeGen/R600/or.ll
@@ -156,14 +156,14 @@
 ; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
 
 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
   %a = load float, float addrspace(1)* %in0
   %b = load float, float addrspace(1)* %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 0.000000e+00
   %or = or i1 %acmp, %bcmp
-  %result = select i1 %or, float %a, float %b
-  store float %result, float addrspace(1)* %out
+  %result = zext i1 %or to i32
+  store i32 %result, i32 addrspace(1)* %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/X86/cmov-double.ll b/llvm/test/CodeGen/X86/cmov-double.ll
new file mode 100644
index 0000000..994a027
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmov-double.ll
@@ -0,0 +1,52 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "x86_64-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: set
+; CHECK-NOT: and[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = fcmp olt float %a2, %a3
+  %and = and i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: set
+; CHECK-NOT: or[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = fcmp olt float %a2, %a3
+  %and = or i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: cmov
+; CHECK-NOT: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a1, %a2
+  %or = or i1 %cmp0, %cmp1
+  %zero_one = zext i1 %or to i32
+  store volatile i32 %zero_one, i32* @var32
+  %res = select i1 %or, i32 %a3, i32 %a4
+  ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index 440f1cc..31a7af3 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -217,17 +217,15 @@
 ; PR13475
 ; If we have sub a, b and cmp b, a and the result of cmp is used
 ; by sbb, we should not optimize cmp away.
-define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
+define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
 ; CHECK-LABEL: func_q:
 ; CHECK: cmp
 ; CHECK-NEXT: sbb
-  %tmp532 = add i32 %j.4, %w
-  %tmp533 = icmp ugt i32 %tmp532, %el
-  %tmp534 = icmp ult i32 %w, %el
-  %or.cond = and i1 %tmp533, %tmp534
-  %tmp535 = sub i32 %el, %w
-  %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
-  ret i32 %j.5
+  %1 = icmp ult i32 %a0, %a1
+  %2 = sub i32 %a1, %a0
+  %3 = select i1 %1, i32 -1, i32 0
+  %4 = xor i32 %2, %3
+  ret i32 %4
 }
 ; rdar://11873276
 define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 2758bff..01f8711 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -34,11 +34,12 @@
   %tmp12 = add i64 %tmp11, 5089792279245435153
 
 ; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
 ; CHECK:      cmpl	$-8608074, %e[[REGISTER_zext]]
+; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
 ; CHECK-NOT:  [[REGISTER_zext]]
-; CHECK-DAG:  testl     %e[[REGISTER_zext]]
-; CHECK:      subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
+; CHECK-DAG:  cmpl	$2138875573, %e[[REGISTER_zext]]
+; CHECK:      movq  [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]]
+; CHECK:      subq	%r[[REGISTER_zext]], [[REGISTER_sext2]]
 
   %tmp13 = sub i64 %tmp12, 2138875574
   %tmp14 = zext i32 %tmp4 to i64