[X86] Special-case 2x CMOV when custom-inserting.
This lets us avoid a few copies that are otherwise hard to get rid of.
The way this is done is, the custom-inserter looks at the following
instruction for another CMOV, and replaces both at the same time.
A previous version used a new CMOV2 opcode, but the custom inserter
is expected to be able to return a different basic block anyway, which
means it's OK - though far from ideal - to alter that block's contents.
Explicitly document that, in case it ever makes a difference.
Alternatives welcome!
Follow-up to r231045.
rdar://19767934
Closes http://reviews.llvm.org/D8019
llvm-svn: 231046
diff --git a/llvm/test/CodeGen/X86/cmovcmov.ll b/llvm/test/CodeGen/X86/cmovcmov.ll
index e9ac86f..982bc82 100644
--- a/llvm/test/CodeGen/X86/cmovcmov.ll
+++ b/llvm/test/CodeGen/X86/cmovcmov.ll
@@ -15,21 +15,18 @@
; CMOV-NEXT: movl %edi, %eax
; CMOV-NEXT: retq
-; NOCMOV-NEXT: flds 8(%esp)
-; NOCMOV-NEXT: flds 4(%esp)
-; NOCMOV-NEXT: fucompp
-; NOCMOV-NEXT: fnstsw %ax
-; NOCMOV-NEXT: sahf
-; NOCMOV-NEXT: leal 16(%esp), %eax
-; NOCMOV-NEXT: movl %eax, %ecx
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 12(%esp), %ecx
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %ecx, %eax
-; NOCMOV-NEXT: [[TBB2]]:
-; NOCMOV-NEXT: movl (%eax), %eax
-; NOCMOV-NEXT: retl
+; NOCMOV-NEXT: flds 8(%esp)
+; NOCMOV-NEXT: flds 4(%esp)
+; NOCMOV-NEXT: fucompp
+; NOCMOV-NEXT: fnstsw %ax
+; NOCMOV-NEXT: sahf
+; NOCMOV-NEXT: leal 16(%esp), %eax
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 12(%esp), %eax
+; NOCMOV-NEXT:[[TBB]]:
+; NOCMOV-NEXT: movl (%eax), %eax
+; NOCMOV-NEXT: retl
define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
entry:
%cmp = fcmp oeq float %a, %b
@@ -51,13 +48,10 @@
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal 20(%esp), %ecx
-; NOCMOV-NEXT: movl %ecx, %eax
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 12(%esp), %eax
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %eax, %ecx
-; NOCMOV-NEXT: [[TBB2]]:
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 12(%esp), %ecx
+; NOCMOV-NEXT: [[TBB]]:
; NOCMOV-NEXT: movl (%ecx), %eax
; NOCMOV-NEXT: orl $4, %ecx
; NOCMOV-NEXT: movl (%ecx), %edx
@@ -83,13 +77,10 @@
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal 12(%esp), %ecx
-; NOCMOV-NEXT: movl %ecx, %eax
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 20(%esp), %eax
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %eax, %ecx
-; NOCMOV-NEXT: [[TBB2]]:
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 20(%esp), %ecx
+; NOCMOV-NEXT: [[TBB]]:
; NOCMOV-NEXT: movl (%ecx), %eax
; NOCMOV-NEXT: orl $4, %ecx
; NOCMOV-NEXT: movl (%ecx), %edx
@@ -104,13 +95,10 @@
; CHECK-LABEL: test_select_fcmp_oeq_f64:
; CMOV-NEXT: ucomiss %xmm1, %xmm0
-; CMOV-NEXT: movaps %xmm3, %xmm0
-; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; CMOV-NEXT: movaps %xmm2, %xmm0
-; CMOV-NEXT: [[TBB1]]:
-; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; CMOV-NEXT: movaps %xmm0, %xmm3
-; CMOV-NEXT: [[TBB2]]:
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: movaps %xmm2, %xmm3
+; CMOV-NEXT: [[TBB]]:
; CMOV-NEXT: movaps %xmm3, %xmm0
; CMOV-NEXT: retq
@@ -120,13 +108,10 @@
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: sahf
; NOCMOV-NEXT: leal 20(%esp), %eax
-; NOCMOV-NEXT: movl %eax, %ecx
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 12(%esp), %ecx
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %ecx, %eax
-; NOCMOV-NEXT: [[TBB2]]:
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 12(%esp), %eax
+; NOCMOV-NEXT: [[TBB]]:
; NOCMOV-NEXT: fldl (%eax)
; NOCMOV-NEXT: retl
define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
@@ -139,68 +124,51 @@
; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
; CMOV-NEXT: ucomiss %xmm1, %xmm0
-; CMOV-NEXT: movaps %xmm3, %xmm0
-; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; CMOV-NEXT: movaps %xmm2, %xmm0
-; CMOV-NEXT: [[TBB1]]:
-; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; CMOV-NEXT: movaps %xmm0, %xmm3
-; CMOV-NEXT: [[TBB2]]:
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: movaps %xmm2, %xmm3
+; CMOV-NEXT: [[TBB]]:
; CMOV-NEXT: movaps %xmm3, %xmm0
; CMOV-NEXT: retq
-; NOCMOV-NEXT: pushl %ebx
; NOCMOV-NEXT: pushl %edi
; NOCMOV-NEXT: pushl %esi
-; NOCMOV-NEXT: flds 24(%esp)
; NOCMOV-NEXT: flds 20(%esp)
+; NOCMOV-NEXT: flds 16(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: sahf
-; NOCMOV-NEXT: leal 44(%esp), %eax
-; NOCMOV-NEXT: movl %eax, %ecx
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 28(%esp), %ecx
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %ecx, %eax
-; NOCMOV-NEXT: [[TBB2]]:
+; NOCMOV-NEXT: leal 40(%esp), %eax
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 24(%esp), %eax
+; NOCMOV-NEXT: [[TBB]]:
; NOCMOV-NEXT: movl (%eax), %eax
-; NOCMOV-NEXT: leal 48(%esp), %ecx
-; NOCMOV-NEXT: movl %ecx, %edx
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 32(%esp), %edx
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %edx, %ecx
-; NOCMOV-NEXT: [[TBB2]]:
+; NOCMOV-NEXT: leal 44(%esp), %ecx
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 28(%esp), %ecx
+; NOCMOV-NEXT: [[TBB]]:
; NOCMOV-NEXT: movl (%ecx), %ecx
-; NOCMOV-NEXT: leal 52(%esp), %edx
-; NOCMOV-NEXT: movl %edx, %esi
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 36(%esp), %esi
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %esi, %edx
-; NOCMOV-NEXT: [[TBB2]]:
-; NOCMOV-NEXT: movl (%edx), %edx
-; NOCMOV-NEXT: leal 56(%esp), %esi
-; NOCMOV-NEXT: movl %esi, %ebx
-; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; NOCMOV-NEXT: leal 40(%esp), %ebx
-; NOCMOV-NEXT: [[TBB1]]:
-; NOCMOV-NEXT: movl 16(%esp), %edi
-; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; NOCMOV-NEXT: movl %ebx, %esi
-; NOCMOV-NEXT: [[TBB2]]:
+; NOCMOV-NEXT: leal 48(%esp), %esi
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 32(%esp), %esi
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl 12(%esp), %edx
; NOCMOV-NEXT: movl (%esi), %esi
-; NOCMOV-NEXT: movl %esi, 12(%edi)
-; NOCMOV-NEXT: movl %edx, 8(%edi)
-; NOCMOV-NEXT: movl %ecx, 4(%edi)
-; NOCMOV-NEXT: movl %eax, (%edi)
+; NOCMOV-NEXT: leal 52(%esp), %edi
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 36(%esp), %edi
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl (%edi), %edi
+; NOCMOV-NEXT: movl %edi, 12(%edx)
+; NOCMOV-NEXT: movl %esi, 8(%edx)
+; NOCMOV-NEXT: movl %ecx, 4(%edx)
+; NOCMOV-NEXT: movl %eax, (%edx)
; NOCMOV-NEXT: popl %esi
; NOCMOV-NEXT: popl %edi
-; NOCMOV-NEXT: popl %ebx
; NOCMOV-NEXT: retl $4
define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
entry:
@@ -217,17 +185,14 @@
; CHECK-LABEL: test_zext_fcmp_une:
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
-; CMOV-NEXT: movaps %xmm0, %xmm1
-; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; CMOV-NEXT: xorps %xmm1, %xmm1
-; CMOV-NEXT: [[TBB1]]:
-; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; CMOV-NEXT: movaps %xmm1, %xmm0
-; CMOV-NEXT: [[TBB2]]:
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: xorps %xmm0, %xmm0
+; CMOV-NEXT: [[TBB]]:
; CMOV-NEXT: retq
-; NOCMOV: jne
-; NOCMOV: jp
+; NOCMOV: jne
+; NOCMOV-NEXT: jp
define float @test_zext_fcmp_une(float %a, float %b) #0 {
entry:
%cmp = fcmp une float %a, %b
@@ -242,17 +207,14 @@
; CHECK-LABEL: test_zext_fcmp_oeq:
; CMOV-NEXT: ucomiss %xmm1, %xmm0
; CMOV-NEXT: xorps %xmm0, %xmm0
-; CMOV-NEXT: xorps %xmm1, %xmm1
-; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
-; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm1
-; CMOV-NEXT: [[TBB1]]:
-; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
-; CMOV-NEXT: movaps %xmm1, %xmm0
-; CMOV-NEXT: [[TBB2]]:
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
+; CMOV-NEXT: [[TBB]]:
; CMOV-NEXT: retq
-; NOCMOV: jne
-; NOCMOV: jp
+; NOCMOV: jne
+; NOCMOV-NEXT: jp
define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
entry:
%cmp = fcmp oeq float %a, %b