blob: e9ac86f41083b953657e51e3e36ef2903a90cfd8 [file] [log] [blame]
Ahmed Bougacha066d0b82015-03-03 01:09:14 +00001; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV
2; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5
6; Test 2xCMOV patterns exposed after legalization.
7; One way to do that is with (select (fcmp une/oeq)), which gets
8; legalized to setp/setne.
9
10; CHECK-LABEL: test_select_fcmp_oeq_i32:
11
12; CMOV-NEXT: ucomiss %xmm1, %xmm0
13; CMOV-NEXT: cmovnel %esi, %edi
14; CMOV-NEXT: cmovpl %esi, %edi
15; CMOV-NEXT: movl %edi, %eax
16; CMOV-NEXT: retq
17
18; NOCMOV-NEXT: flds 8(%esp)
19; NOCMOV-NEXT: flds 4(%esp)
20; NOCMOV-NEXT: fucompp
21; NOCMOV-NEXT: fnstsw %ax
22; NOCMOV-NEXT: sahf
23; NOCMOV-NEXT: leal 16(%esp), %eax
24; NOCMOV-NEXT: movl %eax, %ecx
25; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
26; NOCMOV-NEXT: leal 12(%esp), %ecx
27; NOCMOV-NEXT: [[TBB1]]:
28; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
29; NOCMOV-NEXT: movl %ecx, %eax
30; NOCMOV-NEXT: [[TBB2]]:
31; NOCMOV-NEXT: movl (%eax), %eax
32; NOCMOV-NEXT: retl
33define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
34entry:
35 %cmp = fcmp oeq float %a, %b
36 %r = select i1 %cmp, i32 %c, i32 %d
37 ret i32 %r
38}
39
40; CHECK-LABEL: test_select_fcmp_oeq_i64:
41
42; CMOV-NEXT: ucomiss %xmm1, %xmm0
43; CMOV-NEXT: cmovneq %rsi, %rdi
44; CMOV-NEXT: cmovpq %rsi, %rdi
45; CMOV-NEXT: movq %rdi, %rax
46; CMOV-NEXT: retq
47
48; NOCMOV-NEXT: flds 8(%esp)
49; NOCMOV-NEXT: flds 4(%esp)
50; NOCMOV-NEXT: fucompp
51; NOCMOV-NEXT: fnstsw %ax
52; NOCMOV-NEXT: sahf
53; NOCMOV-NEXT: leal 20(%esp), %ecx
54; NOCMOV-NEXT: movl %ecx, %eax
55; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
56; NOCMOV-NEXT: leal 12(%esp), %eax
57; NOCMOV-NEXT: [[TBB1]]:
58; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
59; NOCMOV-NEXT: movl %eax, %ecx
60; NOCMOV-NEXT: [[TBB2]]:
61; NOCMOV-NEXT: movl (%ecx), %eax
62; NOCMOV-NEXT: orl $4, %ecx
63; NOCMOV-NEXT: movl (%ecx), %edx
64; NOCMOV-NEXT: retl
65define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 {
66entry:
67 %cmp = fcmp oeq float %a, %b
68 %r = select i1 %cmp, i64 %c, i64 %d
69 ret i64 %r
70}
71
72; CHECK-LABEL: test_select_fcmp_une_i64:
73
74; CMOV-NEXT: ucomiss %xmm1, %xmm0
75; CMOV-NEXT: cmovneq %rdi, %rsi
76; CMOV-NEXT: cmovpq %rdi, %rsi
77; CMOV-NEXT: movq %rsi, %rax
78; CMOV-NEXT: retq
79
80; NOCMOV-NEXT: flds 8(%esp)
81; NOCMOV-NEXT: flds 4(%esp)
82; NOCMOV-NEXT: fucompp
83; NOCMOV-NEXT: fnstsw %ax
84; NOCMOV-NEXT: sahf
85; NOCMOV-NEXT: leal 12(%esp), %ecx
86; NOCMOV-NEXT: movl %ecx, %eax
87; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
88; NOCMOV-NEXT: leal 20(%esp), %eax
89; NOCMOV-NEXT: [[TBB1]]:
90; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
91; NOCMOV-NEXT: movl %eax, %ecx
92; NOCMOV-NEXT: [[TBB2]]:
93; NOCMOV-NEXT: movl (%ecx), %eax
94; NOCMOV-NEXT: orl $4, %ecx
95; NOCMOV-NEXT: movl (%ecx), %edx
96; NOCMOV-NEXT: retl
97define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 {
98entry:
99 %cmp = fcmp une float %a, %b
100 %r = select i1 %cmp, i64 %c, i64 %d
101 ret i64 %r
102}
103
104; CHECK-LABEL: test_select_fcmp_oeq_f64:
105
106; CMOV-NEXT: ucomiss %xmm1, %xmm0
107; CMOV-NEXT: movaps %xmm3, %xmm0
108; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
109; CMOV-NEXT: movaps %xmm2, %xmm0
110; CMOV-NEXT: [[TBB1]]:
111; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
112; CMOV-NEXT: movaps %xmm0, %xmm3
113; CMOV-NEXT: [[TBB2]]:
114; CMOV-NEXT: movaps %xmm3, %xmm0
115; CMOV-NEXT: retq
116
117; NOCMOV-NEXT: flds 8(%esp)
118; NOCMOV-NEXT: flds 4(%esp)
119; NOCMOV-NEXT: fucompp
120; NOCMOV-NEXT: fnstsw %ax
121; NOCMOV-NEXT: sahf
122; NOCMOV-NEXT: leal 20(%esp), %eax
123; NOCMOV-NEXT: movl %eax, %ecx
124; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
125; NOCMOV-NEXT: leal 12(%esp), %ecx
126; NOCMOV-NEXT: [[TBB1]]:
127; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
128; NOCMOV-NEXT: movl %ecx, %eax
129; NOCMOV-NEXT: [[TBB2]]:
130; NOCMOV-NEXT: fldl (%eax)
131; NOCMOV-NEXT: retl
132define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
133entry:
134 %cmp = fcmp oeq float %a, %b
135 %r = select i1 %cmp, double %c, double %d
136 ret double %r
137}
138
139; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
140
141; CMOV-NEXT: ucomiss %xmm1, %xmm0
142; CMOV-NEXT: movaps %xmm3, %xmm0
143; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
144; CMOV-NEXT: movaps %xmm2, %xmm0
145; CMOV-NEXT: [[TBB1]]:
146; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
147; CMOV-NEXT: movaps %xmm0, %xmm3
148; CMOV-NEXT: [[TBB2]]:
149; CMOV-NEXT: movaps %xmm3, %xmm0
150; CMOV-NEXT: retq
151
152; NOCMOV-NEXT: pushl %ebx
153; NOCMOV-NEXT: pushl %edi
154; NOCMOV-NEXT: pushl %esi
155; NOCMOV-NEXT: flds 24(%esp)
156; NOCMOV-NEXT: flds 20(%esp)
157; NOCMOV-NEXT: fucompp
158; NOCMOV-NEXT: fnstsw %ax
159; NOCMOV-NEXT: sahf
160; NOCMOV-NEXT: leal 44(%esp), %eax
161; NOCMOV-NEXT: movl %eax, %ecx
162; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
163; NOCMOV-NEXT: leal 28(%esp), %ecx
164; NOCMOV-NEXT: [[TBB1]]:
165; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
166; NOCMOV-NEXT: movl %ecx, %eax
167; NOCMOV-NEXT: [[TBB2]]:
168; NOCMOV-NEXT: movl (%eax), %eax
169; NOCMOV-NEXT: leal 48(%esp), %ecx
170; NOCMOV-NEXT: movl %ecx, %edx
171; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
172; NOCMOV-NEXT: leal 32(%esp), %edx
173; NOCMOV-NEXT: [[TBB1]]:
174; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
175; NOCMOV-NEXT: movl %edx, %ecx
176; NOCMOV-NEXT: [[TBB2]]:
177; NOCMOV-NEXT: movl (%ecx), %ecx
178; NOCMOV-NEXT: leal 52(%esp), %edx
179; NOCMOV-NEXT: movl %edx, %esi
180; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
181; NOCMOV-NEXT: leal 36(%esp), %esi
182; NOCMOV-NEXT: [[TBB1]]:
183; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
184; NOCMOV-NEXT: movl %esi, %edx
185; NOCMOV-NEXT: [[TBB2]]:
186; NOCMOV-NEXT: movl (%edx), %edx
187; NOCMOV-NEXT: leal 56(%esp), %esi
188; NOCMOV-NEXT: movl %esi, %ebx
189; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
190; NOCMOV-NEXT: leal 40(%esp), %ebx
191; NOCMOV-NEXT: [[TBB1]]:
192; NOCMOV-NEXT: movl 16(%esp), %edi
193; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
194; NOCMOV-NEXT: movl %ebx, %esi
195; NOCMOV-NEXT: [[TBB2]]:
196; NOCMOV-NEXT: movl (%esi), %esi
197; NOCMOV-NEXT: movl %esi, 12(%edi)
198; NOCMOV-NEXT: movl %edx, 8(%edi)
199; NOCMOV-NEXT: movl %ecx, 4(%edi)
200; NOCMOV-NEXT: movl %eax, (%edi)
201; NOCMOV-NEXT: popl %esi
202; NOCMOV-NEXT: popl %edi
203; NOCMOV-NEXT: popl %ebx
204; NOCMOV-NEXT: retl $4
205define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
206entry:
207 %cmp = fcmp oeq float %a, %b
208 %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
209 ret <4 x i32> %r
210}
211
212; Also make sure we catch the original code-sequence of interest:
213
214; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
215; CMOV-NEXT: .long 1065353216
216
217; CHECK-LABEL: test_zext_fcmp_une:
218; CMOV-NEXT: ucomiss %xmm1, %xmm0
219; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
220; CMOV-NEXT: movaps %xmm0, %xmm1
221; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
222; CMOV-NEXT: xorps %xmm1, %xmm1
223; CMOV-NEXT: [[TBB1]]:
224; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
225; CMOV-NEXT: movaps %xmm1, %xmm0
226; CMOV-NEXT: [[TBB2]]:
227; CMOV-NEXT: retq
228
229; NOCMOV: jne
230; NOCMOV: jp
231define float @test_zext_fcmp_une(float %a, float %b) #0 {
232entry:
233 %cmp = fcmp une float %a, %b
234 %conv1 = zext i1 %cmp to i32
235 %conv2 = sitofp i32 %conv1 to float
236 ret float %conv2
237}
238
239; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
240; CMOV-NEXT: .long 1065353216
241
242; CHECK-LABEL: test_zext_fcmp_oeq:
243; CMOV-NEXT: ucomiss %xmm1, %xmm0
244; CMOV-NEXT: xorps %xmm0, %xmm0
245; CMOV-NEXT: xorps %xmm1, %xmm1
246; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]]
247; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm1
248; CMOV-NEXT: [[TBB1]]:
249; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]]
250; CMOV-NEXT: movaps %xmm1, %xmm0
251; CMOV-NEXT: [[TBB2]]:
252; CMOV-NEXT: retq
253
254; NOCMOV: jne
255; NOCMOV: jp
256define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
257entry:
258 %cmp = fcmp oeq float %a, %b
259 %conv1 = zext i1 %cmp to i32
260 %conv2 = sitofp i32 %conv1 to float
261 ret float %conv2
262}
263
264attributes #0 = { nounwind }