[x86] allow FP-logic ops when one operand is FP and result is FP

We save an inter-register file move this way. If there's any CPU where
the FP logic is slower, we could transform this back to int-logic in 
MachineCombiner.

This helps, but doesn't solve, PR6137:
https://llvm.org/bugs/show_bug.cgi?id=6137

The 'andn' test shows that we're missing a pattern match to
recognize the xor with -1 constant as a 'not' op.

llvm-svn: 287171
diff --git a/llvm/test/CodeGen/X86/fp-logic-replace.ll b/llvm/test/CodeGen/X86/fp-logic-replace.ll
index 47e0768..50e2c1b 100644
--- a/llvm/test/CodeGen/X86/fp-logic-replace.ll
+++ b/llvm/test/CodeGen/X86/fp-logic-replace.ll
@@ -29,20 +29,16 @@
 define double @FsANDNPSrr(double %x, double %y) {
 ; SSE-LABEL: FsANDNPSrr:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movd %xmm0, %rax
-; SSE-NEXT:    movd %xmm1, %rcx
-; SSE-NEXT:    notq %rcx
-; SSE-NEXT:    andq %rax, %rcx
-; SSE-NEXT:    movd %rcx, %xmm0
+; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT:    xorpd %xmm1, %xmm2
+; SSE-NEXT:    andpd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: FsANDNPSrr:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    vmovq %xmm1, %rcx
-; AVX-NEXT:    notq %rcx
-; AVX-NEXT:    andq %rax, %rcx
-; AVX-NEXT:    vmovq %rcx, %xmm0
+; AVX-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
+; AVX-NEXT:    vxorpd %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
   %bc1 = bitcast double %x to i64
diff --git a/llvm/test/CodeGen/X86/fp-logic.ll b/llvm/test/CodeGen/X86/fp-logic.ll
index 2c6698f..301fa8f 100644
--- a/llvm/test/CodeGen/X86/fp-logic.ll
+++ b/llvm/test/CodeGen/X86/fp-logic.ll
@@ -3,13 +3,9 @@
 
 ; PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428
 ; f1, f2, f3, and f4 should use an integer logic instruction.
-; f9 and f10 should use an FP (SSE) logic instruction.
+; f5, f6, f9, and f10 should use an FP (SSE) logic instruction.
 ;
-; f5, f6, f7, and f8 are less clear.
-;
-; For f5 and f6, we can save a register move by using an FP logic instruction,
-; but we may need to calculate the relative costs of an SSE op vs. int op vs.
-; scalar <-> SSE register moves.
+; f7 and f8 are less clear.
 ;
 ; For f7 and f8, the SSE instructions don't take immediate operands, so if we
 ; use one of those, we either have to load a constant from memory or move the
@@ -79,9 +75,8 @@
 define float @f5(float %x, i32 %y) {
 ; CHECK-LABEL: f5:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    andl %edi, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movd %edi, %xmm1
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %bc1 = bitcast float %x to i32
@@ -95,9 +90,8 @@
 define float @f6(float %x, i32 %y) {
 ; CHECK-LABEL: f6:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    andl %edi, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movd %edi, %xmm1
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 ;
   %bc1 = bitcast float %x to i32