Reapply "[ARM] Combine CMOV into BFI where possible"

Added fixes for stage2 failures: CMOV is not commutable; commuting the operands results in the condition being flipped! d'oh!

Original commit message:

If we have a CMOV, OR and AND combination such as:
  if (x & CN)
      y |= CM;

And:
  * CN is a single bit;
    * All bits covered by CM are known zero in y;

Then we can convert this to a sequence of BFI instructions. This will always be a win if CM is a single bit, will always be no worse than the TST & OR sequence if CM is two bits, and for thumb will be no worse if CM is three bits (due to the extra IT instruction).

llvm-svn: 252606
diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll
index 0661960..0707838 100644
--- a/llvm/test/CodeGen/ARM/bfi.ll
+++ b/llvm/test/CodeGen/ARM/bfi.ll
@@ -74,3 +74,37 @@
   %or = or i32 %shl, %and
   ret i32 %or
 }
+
+define i32 @f7(i32 %x, i32 %y) {
+; CHECK-LABEL: f7:
+; CHECK: bfi r1, r0, #4, #1
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 16
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %or, i32 %y2
+  ret i32 %sel
+}
+
+define i32 @f8(i32 %x, i32 %y) {
+; CHECK-LABEL: f8:
+; CHECK: bfi r1, r0, #4, #1
+; CHECK: bfi r1, r0, #5, #1
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 48
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %or, i32 %y2
+  ret i32 %sel
+}
+
+define i32 @f9(i32 %x, i32 %y) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: bfi
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 48
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %y2, i32 %or
+  ret i32 %sel
+}