blob: 80876a87de726fb017edd0043a2b2c84609776e1 [file] [log] [blame]
Simon Pilgrimc6b55722017-03-10 16:59:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
4
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +00005; If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents
Simon Pilgrimed655f02017-03-10 17:23:55 +00006; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) without affecting rounding/exceptions etc.
Simon Pilgrimc6b55722017-03-10 16:59:43 +00007
8define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
9; X86-LABEL: cvt_v2f64_v2i32:
10; X86: # BB#0:
11; X86-NEXT: pushl %ebp
12; X86-NEXT: movl %esp, %ebp
13; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000014; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +000015; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000016; X86-NEXT: cvtpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000017; X86-NEXT: paddd %mm0, %mm0
18; X86-NEXT: movq %mm0, (%esp)
19; X86-NEXT: movl (%esp), %ecx
20; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
21; X86-NEXT: movl %edx, 4(%eax)
22; X86-NEXT: movl %ecx, (%eax)
23; X86-NEXT: movl %ebp, %esp
24; X86-NEXT: popl %ebp
25; X86-NEXT: retl
26;
27; X64-LABEL: cvt_v2f64_v2i32:
28; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000029; X64-NEXT: cvtpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000030; X64-NEXT: paddd %mm0, %mm0
31; X64-NEXT: movq %mm0, (%rdi)
32; X64-NEXT: retq
33 %3 = tail call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %0)
34 %4 = bitcast <4 x i32> %3 to <2 x i64>
35 %5 = extractelement <2 x i64> %4, i32 0
36 %6 = bitcast i64 %5 to x86_mmx
37 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
38 %8 = bitcast x86_mmx %7 to i64
39 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
40 store <1 x i64> %9, <1 x i64>* %1
41 ret void
42}
43
44define void @cvtt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
45; X86-LABEL: cvtt_v2f64_v2i32:
46; X86: # BB#0:
47; X86-NEXT: pushl %ebp
48; X86-NEXT: movl %esp, %ebp
49; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000050; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +000051; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000052; X86-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000053; X86-NEXT: paddd %mm0, %mm0
54; X86-NEXT: movq %mm0, (%esp)
55; X86-NEXT: movl (%esp), %ecx
56; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
57; X86-NEXT: movl %edx, 4(%eax)
58; X86-NEXT: movl %ecx, (%eax)
59; X86-NEXT: movl %ebp, %esp
60; X86-NEXT: popl %ebp
61; X86-NEXT: retl
62;
63; X64-LABEL: cvtt_v2f64_v2i32:
64; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000065; X64-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000066; X64-NEXT: paddd %mm0, %mm0
67; X64-NEXT: movq %mm0, (%rdi)
68; X64-NEXT: retq
69 %3 = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %0)
70 %4 = bitcast <4 x i32> %3 to <2 x i64>
71 %5 = extractelement <2 x i64> %4, i32 0
72 %6 = bitcast i64 %5 to x86_mmx
73 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
74 %8 = bitcast x86_mmx %7 to i64
75 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
76 store <1 x i64> %9, <1 x i64>* %1
77 ret void
78}
79
80define void @fptosi_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
81; X86-LABEL: fptosi_v2f64_v2i32:
82; X86: # BB#0:
83; X86-NEXT: pushl %ebp
84; X86-NEXT: movl %esp, %ebp
85; X86-NEXT: andl $-8, %esp
86; X86-NEXT: subl $16, %esp
87; X86-NEXT: movl 8(%ebp), %eax
88; X86-NEXT: cvttpd2dq %xmm0, %xmm0
89; X86-NEXT: movlpd %xmm0, {{[0-9]+}}(%esp)
90; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
91; X86-NEXT: paddd %mm0, %mm0
92; X86-NEXT: movq %mm0, (%esp)
93; X86-NEXT: movl (%esp), %ecx
94; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
95; X86-NEXT: movl %edx, 4(%eax)
96; X86-NEXT: movl %ecx, (%eax)
97; X86-NEXT: movl %ebp, %esp
98; X86-NEXT: popl %ebp
99; X86-NEXT: retl
100;
101; X64-LABEL: fptosi_v2f64_v2i32:
102; X64: # BB#0:
103; X64-NEXT: cvttpd2dq %xmm0, %xmm0
104; X64-NEXT: movlpd %xmm0, -{{[0-9]+}}(%rsp)
105; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
106; X64-NEXT: paddd %mm0, %mm0
107; X64-NEXT: movq %mm0, (%rdi)
108; X64-NEXT: retq
109 %3 = fptosi <2 x double> %0 to <2 x i32>
110 %4 = bitcast <2 x i32> %3 to x86_mmx
111 %5 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %4, x86_mmx %4)
112 %6 = bitcast x86_mmx %5 to i64
113 %7 = insertelement <1 x i64> undef, i64 %6, i32 0
114 store <1 x i64> %7, <1 x i64>* %1
115 ret void
116}
117
118define void @cvt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
119; X86-LABEL: cvt_v2f32_v2i32:
120; X86: # BB#0:
121; X86-NEXT: pushl %ebp
122; X86-NEXT: movl %esp, %ebp
123; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000124; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000125; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000126; X86-NEXT: cvtps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000127; X86-NEXT: paddd %mm0, %mm0
128; X86-NEXT: movq %mm0, (%esp)
129; X86-NEXT: movl (%esp), %ecx
130; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
131; X86-NEXT: movl %edx, 4(%eax)
132; X86-NEXT: movl %ecx, (%eax)
133; X86-NEXT: movl %ebp, %esp
134; X86-NEXT: popl %ebp
135; X86-NEXT: retl
136;
137; X64-LABEL: cvt_v2f32_v2i32:
138; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000139; X64-NEXT: cvtps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000140; X64-NEXT: paddd %mm0, %mm0
141; X64-NEXT: movq %mm0, (%rdi)
142; X64-NEXT: retq
143 %3 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %0)
144 %4 = bitcast <4 x i32> %3 to <2 x i64>
145 %5 = extractelement <2 x i64> %4, i32 0
146 %6 = bitcast i64 %5 to x86_mmx
147 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
148 %8 = bitcast x86_mmx %7 to i64
149 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
150 store <1 x i64> %9, <1 x i64>* %1
151 ret void
152}
153
154define void @cvtt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
155; X86-LABEL: cvtt_v2f32_v2i32:
156; X86: # BB#0:
157; X86-NEXT: pushl %ebp
158; X86-NEXT: movl %esp, %ebp
159; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000160; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000161; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000162; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000163; X86-NEXT: paddd %mm0, %mm0
164; X86-NEXT: movq %mm0, (%esp)
165; X86-NEXT: movl (%esp), %ecx
166; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
167; X86-NEXT: movl %edx, 4(%eax)
168; X86-NEXT: movl %ecx, (%eax)
169; X86-NEXT: movl %ebp, %esp
170; X86-NEXT: popl %ebp
171; X86-NEXT: retl
172;
173; X64-LABEL: cvtt_v2f32_v2i32:
174; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000175; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000176; X64-NEXT: paddd %mm0, %mm0
177; X64-NEXT: movq %mm0, (%rdi)
178; X64-NEXT: retq
179 %3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %0)
180 %4 = bitcast <4 x i32> %3 to <2 x i64>
181 %5 = extractelement <2 x i64> %4, i32 0
182 %6 = bitcast i64 %5 to x86_mmx
183 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
184 %8 = bitcast x86_mmx %7 to i64
185 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
186 store <1 x i64> %9, <1 x i64>* %1
187 ret void
188}
189
Simon Pilgrimdddce312017-04-02 13:10:20 +0000190define void @fptosi_v4f32_v4i32(<4 x float>, <1 x i64>*) nounwind {
191; X86-LABEL: fptosi_v4f32_v4i32:
192; X86: # BB#0:
193; X86-NEXT: pushl %ebp
194; X86-NEXT: movl %esp, %ebp
195; X86-NEXT: andl $-8, %esp
Simon Pilgrime56a2d72017-04-02 15:52:28 +0000196; X86-NEXT: subl $8, %esp
Simon Pilgrimdddce312017-04-02 13:10:20 +0000197; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimba282632017-04-02 16:20:34 +0000198; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimdddce312017-04-02 13:10:20 +0000199; X86-NEXT: paddd %mm0, %mm0
200; X86-NEXT: movq %mm0, (%esp)
201; X86-NEXT: movl (%esp), %ecx
202; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
203; X86-NEXT: movl %edx, 4(%eax)
204; X86-NEXT: movl %ecx, (%eax)
205; X86-NEXT: movl %ebp, %esp
206; X86-NEXT: popl %ebp
207; X86-NEXT: retl
208;
209; X64-LABEL: fptosi_v4f32_v4i32:
210; X64: # BB#0:
Simon Pilgrimba282632017-04-02 16:20:34 +0000211; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimdddce312017-04-02 13:10:20 +0000212; X64-NEXT: paddd %mm0, %mm0
213; X64-NEXT: movq %mm0, (%rdi)
214; X64-NEXT: retq
215 %3 = fptosi <4 x float> %0 to <4 x i32>
216 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
217 %5 = bitcast <2 x i32> %4 to x86_mmx
218 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
219 %7 = bitcast x86_mmx %6 to i64
220 %8 = insertelement <1 x i64> undef, i64 %7, i32 0
221 store <1 x i64> %8, <1 x i64>* %1
222 ret void
223}
224
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000225define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
226; X86-LABEL: fptosi_v2f32_v2i32:
227; X86: # BB#0:
228; X86-NEXT: pushl %ebp
229; X86-NEXT: movl %esp, %ebp
230; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000231; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000232; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000233; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000234; X86-NEXT: paddd %mm0, %mm0
235; X86-NEXT: movq %mm0, (%esp)
236; X86-NEXT: movl (%esp), %ecx
237; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
238; X86-NEXT: movl %edx, 4(%eax)
239; X86-NEXT: movl %ecx, (%eax)
240; X86-NEXT: movl %ebp, %esp
241; X86-NEXT: popl %ebp
242; X86-NEXT: retl
243;
244; X64-LABEL: fptosi_v2f32_v2i32:
245; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000246; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000247; X64-NEXT: paddd %mm0, %mm0
248; X64-NEXT: movq %mm0, (%rdi)
249; X64-NEXT: retq
250 %3 = fptosi <4 x float> %0 to <4 x i32>
251 %4 = bitcast <4 x i32> %3 to <2 x i64>
252 %5 = extractelement <2 x i64> %4, i32 0
253 %6 = bitcast i64 %5 to x86_mmx
254 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
255 %8 = bitcast x86_mmx %7 to i64
256 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
257 store <1 x i64> %9, <1 x i64>* %1
258 ret void
259}
260
Simon Pilgrime1a72a92017-03-14 12:13:41 +0000261; FIXME: If we are transferring MMX registers to XMM for conversion we could use the MMX equivalents
Simon Pilgrimed655f02017-03-10 17:23:55 +0000262; (CVTPI2PD + CVTPI2PS) without affecting rounding/exceptions etc.
263
264define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind {
265; X86-LABEL: sitofp_v2i32_v2f64:
266; X86: # BB#0:
267; X86-NEXT: pushl %ebp
268; X86-NEXT: movl %esp, %ebp
269; X86-NEXT: andl $-8, %esp
270; X86-NEXT: subl $8, %esp
271; X86-NEXT: movl 8(%ebp), %eax
272; X86-NEXT: movq (%eax), %mm0
273; X86-NEXT: paddd %mm0, %mm0
274; X86-NEXT: movq %mm0, (%esp)
Simon Pilgrim128a10a2017-03-10 22:35:07 +0000275; X86-NEXT: cvtdq2pd (%esp), %xmm0
Simon Pilgrimed655f02017-03-10 17:23:55 +0000276; X86-NEXT: movl %ebp, %esp
277; X86-NEXT: popl %ebp
278; X86-NEXT: retl
279;
280; X64-LABEL: sitofp_v2i32_v2f64:
281; X64: # BB#0:
282; X64-NEXT: movq (%rdi), %mm0
283; X64-NEXT: paddd %mm0, %mm0
284; X64-NEXT: movq2dq %mm0, %xmm0
285; X64-NEXT: cvtdq2pd %xmm0, %xmm0
286; X64-NEXT: retq
287 %2 = bitcast <1 x i64>* %0 to x86_mmx*
288 %3 = load x86_mmx, x86_mmx* %2, align 8
289 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
290 %5 = bitcast x86_mmx %4 to i64
291 %6 = insertelement <2 x i64> undef, i64 %5, i32 0
292 %7 = bitcast <2 x i64> %6 to <4 x i32>
293 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
294 %9 = sitofp <2 x i32> %8 to <2 x double>
295 ret <2 x double> %9
296}
297
298define <4 x float> @sitofp_v2i32_v2f32(<1 x i64>*) nounwind {
299; X86-LABEL: sitofp_v2i32_v2f32:
300; X86: # BB#0:
301; X86-NEXT: pushl %ebp
302; X86-NEXT: movl %esp, %ebp
303; X86-NEXT: andl $-8, %esp
304; X86-NEXT: subl $8, %esp
305; X86-NEXT: movl 8(%ebp), %eax
306; X86-NEXT: movq (%eax), %mm0
307; X86-NEXT: paddd %mm0, %mm0
308; X86-NEXT: movq %mm0, (%esp)
309; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
310; X86-NEXT: cvtdq2ps %xmm0, %xmm0
311; X86-NEXT: movl %ebp, %esp
312; X86-NEXT: popl %ebp
313; X86-NEXT: retl
314;
315; X64-LABEL: sitofp_v2i32_v2f32:
316; X64: # BB#0:
317; X64-NEXT: movq (%rdi), %mm0
318; X64-NEXT: paddd %mm0, %mm0
Simon Pilgrimbe22cff2017-03-29 10:47:18 +0000319; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
320; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
321; X64-NEXT: cvtdq2ps %xmm0, %xmm0
322; X64-NEXT: retq
323 %2 = bitcast <1 x i64>* %0 to x86_mmx*
324 %3 = load x86_mmx, x86_mmx* %2, align 8
325 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
326 %5 = bitcast x86_mmx %4 to <2 x i32>
327 %6 = shufflevector <2 x i32> %5, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
328 %7 = sitofp <4 x i32> %6 to <4 x float>
329 ret <4 x float> %7
330}
331
332define <4 x float> @cvt_v2i32_v2f32(<1 x i64>*) nounwind {
333; X86-LABEL: cvt_v2i32_v2f32:
334; X86: # BB#0:
335; X86-NEXT: pushl %ebp
336; X86-NEXT: movl %esp, %ebp
337; X86-NEXT: andl $-8, %esp
338; X86-NEXT: subl $8, %esp
339; X86-NEXT: movl 8(%ebp), %eax
340; X86-NEXT: movq (%eax), %mm0
341; X86-NEXT: paddd %mm0, %mm0
342; X86-NEXT: movq %mm0, (%esp)
343; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
344; X86-NEXT: cvtdq2ps %xmm0, %xmm0
345; X86-NEXT: movl %ebp, %esp
346; X86-NEXT: popl %ebp
347; X86-NEXT: retl
348;
349; X64-LABEL: cvt_v2i32_v2f32:
350; X64: # BB#0:
351; X64-NEXT: movq (%rdi), %mm0
352; X64-NEXT: paddd %mm0, %mm0
Simon Pilgrimed655f02017-03-10 17:23:55 +0000353; X64-NEXT: movd %mm0, %rax
354; X64-NEXT: movd %rax, %xmm0
355; X64-NEXT: cvtdq2ps %xmm0, %xmm0
356; X64-NEXT: retq
357 %2 = bitcast <1 x i64>* %0 to x86_mmx*
358 %3 = load x86_mmx, x86_mmx* %2, align 8
359 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
360 %5 = bitcast x86_mmx %4 to i64
361 %6 = insertelement <2 x i64> undef, i64 %5, i32 0
362 %7 = insertelement <2 x i64> %6, i64 0, i32 1
363 %8 = bitcast <2 x i64> %7 to <4 x i32>
364 %9 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %8)
365 ret <4 x float> %9
366}
367
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000368declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
369declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
370declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)
371declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
372declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
Simon Pilgrimed655f02017-03-10 17:23:55 +0000373declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)