blob: c238109daad3c6dc8ca594272d70c60e9464d7bc [file] [log] [blame]
Simon Pilgrimc6b55722017-03-10 16:59:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
4
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +00005; If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents
Simon Pilgrimed655f02017-03-10 17:23:55 +00006; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) without affecting rounding/exceptions etc.
Simon Pilgrimc6b55722017-03-10 16:59:43 +00007
8define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
9; X86-LABEL: cvt_v2f64_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000010; X86: # %bb.0:
Simon Pilgrimc6b55722017-03-10 16:59:43 +000011; X86-NEXT: pushl %ebp
12; X86-NEXT: movl %esp, %ebp
13; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000014; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +000015; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000016; X86-NEXT: cvtpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000017; X86-NEXT: paddd %mm0, %mm0
18; X86-NEXT: movq %mm0, (%esp)
19; X86-NEXT: movl (%esp), %ecx
20; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
21; X86-NEXT: movl %edx, 4(%eax)
22; X86-NEXT: movl %ecx, (%eax)
23; X86-NEXT: movl %ebp, %esp
24; X86-NEXT: popl %ebp
25; X86-NEXT: retl
26;
27; X64-LABEL: cvt_v2f64_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000028; X64: # %bb.0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000029; X64-NEXT: cvtpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000030; X64-NEXT: paddd %mm0, %mm0
31; X64-NEXT: movq %mm0, (%rdi)
32; X64-NEXT: retq
33 %3 = tail call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %0)
34 %4 = bitcast <4 x i32> %3 to <2 x i64>
35 %5 = extractelement <2 x i64> %4, i32 0
36 %6 = bitcast i64 %5 to x86_mmx
37 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
38 %8 = bitcast x86_mmx %7 to i64
39 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
40 store <1 x i64> %9, <1 x i64>* %1
41 ret void
42}
43
44define void @cvtt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
45; X86-LABEL: cvtt_v2f64_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000046; X86: # %bb.0:
Simon Pilgrimc6b55722017-03-10 16:59:43 +000047; X86-NEXT: pushl %ebp
48; X86-NEXT: movl %esp, %ebp
49; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000050; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +000051; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000052; X86-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000053; X86-NEXT: paddd %mm0, %mm0
54; X86-NEXT: movq %mm0, (%esp)
55; X86-NEXT: movl (%esp), %ecx
56; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
57; X86-NEXT: movl %edx, 4(%eax)
58; X86-NEXT: movl %ecx, (%eax)
59; X86-NEXT: movl %ebp, %esp
60; X86-NEXT: popl %ebp
61; X86-NEXT: retl
62;
63; X64-LABEL: cvtt_v2f64_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000064; X64: # %bb.0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000065; X64-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000066; X64-NEXT: paddd %mm0, %mm0
67; X64-NEXT: movq %mm0, (%rdi)
68; X64-NEXT: retq
69 %3 = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %0)
70 %4 = bitcast <4 x i32> %3 to <2 x i64>
71 %5 = extractelement <2 x i64> %4, i32 0
72 %6 = bitcast i64 %5 to x86_mmx
73 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
74 %8 = bitcast x86_mmx %7 to i64
75 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
76 store <1 x i64> %9, <1 x i64>* %1
77 ret void
78}
79
80define void @fptosi_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
81; X86-LABEL: fptosi_v2f64_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; X86: # %bb.0:
Simon Pilgrimc6b55722017-03-10 16:59:43 +000083; X86-NEXT: pushl %ebp
84; X86-NEXT: movl %esp, %ebp
85; X86-NEXT: andl $-8, %esp
Simon Pilgrim0e2f8cd2017-04-02 17:45:41 +000086; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +000087; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrim0e2f8cd2017-04-02 17:45:41 +000088; X86-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000089; X86-NEXT: paddd %mm0, %mm0
90; X86-NEXT: movq %mm0, (%esp)
91; X86-NEXT: movl (%esp), %ecx
92; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
93; X86-NEXT: movl %edx, 4(%eax)
94; X86-NEXT: movl %ecx, (%eax)
95; X86-NEXT: movl %ebp, %esp
96; X86-NEXT: popl %ebp
97; X86-NEXT: retl
98;
99; X64-LABEL: fptosi_v2f64_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; X64: # %bb.0:
Simon Pilgrim0e2f8cd2017-04-02 17:45:41 +0000101; X64-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000102; X64-NEXT: paddd %mm0, %mm0
103; X64-NEXT: movq %mm0, (%rdi)
104; X64-NEXT: retq
105 %3 = fptosi <2 x double> %0 to <2 x i32>
106 %4 = bitcast <2 x i32> %3 to x86_mmx
107 %5 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %4, x86_mmx %4)
108 %6 = bitcast x86_mmx %5 to i64
109 %7 = insertelement <1 x i64> undef, i64 %6, i32 0
110 store <1 x i64> %7, <1 x i64>* %1
111 ret void
112}
113
114define void @cvt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
115; X86-LABEL: cvt_v2f32_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000116; X86: # %bb.0:
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000117; X86-NEXT: pushl %ebp
118; X86-NEXT: movl %esp, %ebp
119; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000120; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000121; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000122; X86-NEXT: cvtps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000123; X86-NEXT: paddd %mm0, %mm0
124; X86-NEXT: movq %mm0, (%esp)
125; X86-NEXT: movl (%esp), %ecx
126; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
127; X86-NEXT: movl %edx, 4(%eax)
128; X86-NEXT: movl %ecx, (%eax)
129; X86-NEXT: movl %ebp, %esp
130; X86-NEXT: popl %ebp
131; X86-NEXT: retl
132;
133; X64-LABEL: cvt_v2f32_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000134; X64: # %bb.0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000135; X64-NEXT: cvtps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000136; X64-NEXT: paddd %mm0, %mm0
137; X64-NEXT: movq %mm0, (%rdi)
138; X64-NEXT: retq
139 %3 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %0)
140 %4 = bitcast <4 x i32> %3 to <2 x i64>
141 %5 = extractelement <2 x i64> %4, i32 0
142 %6 = bitcast i64 %5 to x86_mmx
143 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
144 %8 = bitcast x86_mmx %7 to i64
145 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
146 store <1 x i64> %9, <1 x i64>* %1
147 ret void
148}
149
150define void @cvtt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
151; X86-LABEL: cvtt_v2f32_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000152; X86: # %bb.0:
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000153; X86-NEXT: pushl %ebp
154; X86-NEXT: movl %esp, %ebp
155; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000156; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000157; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000158; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000159; X86-NEXT: paddd %mm0, %mm0
160; X86-NEXT: movq %mm0, (%esp)
161; X86-NEXT: movl (%esp), %ecx
162; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
163; X86-NEXT: movl %edx, 4(%eax)
164; X86-NEXT: movl %ecx, (%eax)
165; X86-NEXT: movl %ebp, %esp
166; X86-NEXT: popl %ebp
167; X86-NEXT: retl
168;
169; X64-LABEL: cvtt_v2f32_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000170; X64: # %bb.0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000171; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000172; X64-NEXT: paddd %mm0, %mm0
173; X64-NEXT: movq %mm0, (%rdi)
174; X64-NEXT: retq
175 %3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %0)
176 %4 = bitcast <4 x i32> %3 to <2 x i64>
177 %5 = extractelement <2 x i64> %4, i32 0
178 %6 = bitcast i64 %5 to x86_mmx
179 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
180 %8 = bitcast x86_mmx %7 to i64
181 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
182 store <1 x i64> %9, <1 x i64>* %1
183 ret void
184}
185
Simon Pilgrimdddce312017-04-02 13:10:20 +0000186define void @fptosi_v4f32_v4i32(<4 x float>, <1 x i64>*) nounwind {
187; X86-LABEL: fptosi_v4f32_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000188; X86: # %bb.0:
Simon Pilgrimdddce312017-04-02 13:10:20 +0000189; X86-NEXT: pushl %ebp
190; X86-NEXT: movl %esp, %ebp
191; X86-NEXT: andl $-8, %esp
Simon Pilgrime56a2d72017-04-02 15:52:28 +0000192; X86-NEXT: subl $8, %esp
Simon Pilgrimdddce312017-04-02 13:10:20 +0000193; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimba282632017-04-02 16:20:34 +0000194; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimdddce312017-04-02 13:10:20 +0000195; X86-NEXT: paddd %mm0, %mm0
196; X86-NEXT: movq %mm0, (%esp)
197; X86-NEXT: movl (%esp), %ecx
198; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
199; X86-NEXT: movl %edx, 4(%eax)
200; X86-NEXT: movl %ecx, (%eax)
201; X86-NEXT: movl %ebp, %esp
202; X86-NEXT: popl %ebp
203; X86-NEXT: retl
204;
205; X64-LABEL: fptosi_v4f32_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000206; X64: # %bb.0:
Simon Pilgrimba282632017-04-02 16:20:34 +0000207; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimdddce312017-04-02 13:10:20 +0000208; X64-NEXT: paddd %mm0, %mm0
209; X64-NEXT: movq %mm0, (%rdi)
210; X64-NEXT: retq
211 %3 = fptosi <4 x float> %0 to <4 x i32>
212 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
213 %5 = bitcast <2 x i32> %4 to x86_mmx
214 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
215 %7 = bitcast x86_mmx %6 to i64
216 %8 = insertelement <1 x i64> undef, i64 %7, i32 0
217 store <1 x i64> %8, <1 x i64>* %1
218 ret void
219}
220
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000221define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
222; X86-LABEL: fptosi_v2f32_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000223; X86: # %bb.0:
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000224; X86-NEXT: pushl %ebp
225; X86-NEXT: movl %esp, %ebp
226; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000227; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000228; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000229; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000230; X86-NEXT: paddd %mm0, %mm0
231; X86-NEXT: movq %mm0, (%esp)
232; X86-NEXT: movl (%esp), %ecx
233; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
234; X86-NEXT: movl %edx, 4(%eax)
235; X86-NEXT: movl %ecx, (%eax)
236; X86-NEXT: movl %ebp, %esp
237; X86-NEXT: popl %ebp
238; X86-NEXT: retl
239;
240; X64-LABEL: fptosi_v2f32_v2i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000241; X64: # %bb.0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000242; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000243; X64-NEXT: paddd %mm0, %mm0
244; X64-NEXT: movq %mm0, (%rdi)
245; X64-NEXT: retq
246 %3 = fptosi <4 x float> %0 to <4 x i32>
247 %4 = bitcast <4 x i32> %3 to <2 x i64>
248 %5 = extractelement <2 x i64> %4, i32 0
249 %6 = bitcast i64 %5 to x86_mmx
250 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
251 %8 = bitcast x86_mmx %7 to i64
252 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
253 store <1 x i64> %9, <1 x i64>* %1
254 ret void
255}
256
Simon Pilgrime1a72a92017-03-14 12:13:41 +0000257; FIXME: If we are transferring MMX registers to XMM for conversion we could use the MMX equivalents
Simon Pilgrimed655f02017-03-10 17:23:55 +0000258; (CVTPI2PD + CVTPI2PS) without affecting rounding/exceptions etc.
259
260define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind {
261; X86-LABEL: sitofp_v2i32_v2f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000262; X86: # %bb.0:
Simon Pilgrimed655f02017-03-10 17:23:55 +0000263; X86-NEXT: pushl %ebp
264; X86-NEXT: movl %esp, %ebp
265; X86-NEXT: andl $-8, %esp
266; X86-NEXT: subl $8, %esp
267; X86-NEXT: movl 8(%ebp), %eax
268; X86-NEXT: movq (%eax), %mm0
269; X86-NEXT: paddd %mm0, %mm0
270; X86-NEXT: movq %mm0, (%esp)
Simon Pilgrim128a10a2017-03-10 22:35:07 +0000271; X86-NEXT: cvtdq2pd (%esp), %xmm0
Simon Pilgrimed655f02017-03-10 17:23:55 +0000272; X86-NEXT: movl %ebp, %esp
273; X86-NEXT: popl %ebp
274; X86-NEXT: retl
275;
276; X64-LABEL: sitofp_v2i32_v2f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000277; X64: # %bb.0:
Simon Pilgrimed655f02017-03-10 17:23:55 +0000278; X64-NEXT: movq (%rdi), %mm0
279; X64-NEXT: paddd %mm0, %mm0
280; X64-NEXT: movq2dq %mm0, %xmm0
281; X64-NEXT: cvtdq2pd %xmm0, %xmm0
282; X64-NEXT: retq
283 %2 = bitcast <1 x i64>* %0 to x86_mmx*
284 %3 = load x86_mmx, x86_mmx* %2, align 8
285 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
286 %5 = bitcast x86_mmx %4 to i64
287 %6 = insertelement <2 x i64> undef, i64 %5, i32 0
288 %7 = bitcast <2 x i64> %6 to <4 x i32>
289 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
290 %9 = sitofp <2 x i32> %8 to <2 x double>
291 ret <2 x double> %9
292}
293
294define <4 x float> @sitofp_v2i32_v2f32(<1 x i64>*) nounwind {
295; X86-LABEL: sitofp_v2i32_v2f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000296; X86: # %bb.0:
Simon Pilgrimed655f02017-03-10 17:23:55 +0000297; X86-NEXT: pushl %ebp
298; X86-NEXT: movl %esp, %ebp
299; X86-NEXT: andl $-8, %esp
300; X86-NEXT: subl $8, %esp
301; X86-NEXT: movl 8(%ebp), %eax
302; X86-NEXT: movq (%eax), %mm0
303; X86-NEXT: paddd %mm0, %mm0
304; X86-NEXT: movq %mm0, (%esp)
305; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
306; X86-NEXT: cvtdq2ps %xmm0, %xmm0
307; X86-NEXT: movl %ebp, %esp
308; X86-NEXT: popl %ebp
309; X86-NEXT: retl
310;
311; X64-LABEL: sitofp_v2i32_v2f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000312; X64: # %bb.0:
Simon Pilgrimed655f02017-03-10 17:23:55 +0000313; X64-NEXT: movq (%rdi), %mm0
314; X64-NEXT: paddd %mm0, %mm0
Simon Pilgrimbe22cff2017-03-29 10:47:18 +0000315; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
316; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
317; X64-NEXT: cvtdq2ps %xmm0, %xmm0
318; X64-NEXT: retq
319 %2 = bitcast <1 x i64>* %0 to x86_mmx*
320 %3 = load x86_mmx, x86_mmx* %2, align 8
321 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
322 %5 = bitcast x86_mmx %4 to <2 x i32>
323 %6 = shufflevector <2 x i32> %5, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
324 %7 = sitofp <4 x i32> %6 to <4 x float>
325 ret <4 x float> %7
326}
327
328define <4 x float> @cvt_v2i32_v2f32(<1 x i64>*) nounwind {
329; X86-LABEL: cvt_v2i32_v2f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000330; X86: # %bb.0:
Simon Pilgrimbe22cff2017-03-29 10:47:18 +0000331; X86-NEXT: pushl %ebp
332; X86-NEXT: movl %esp, %ebp
333; X86-NEXT: andl $-8, %esp
334; X86-NEXT: subl $8, %esp
335; X86-NEXT: movl 8(%ebp), %eax
336; X86-NEXT: movq (%eax), %mm0
337; X86-NEXT: paddd %mm0, %mm0
338; X86-NEXT: movq %mm0, (%esp)
339; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
340; X86-NEXT: cvtdq2ps %xmm0, %xmm0
341; X86-NEXT: movl %ebp, %esp
342; X86-NEXT: popl %ebp
343; X86-NEXT: retl
344;
345; X64-LABEL: cvt_v2i32_v2f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000346; X64: # %bb.0:
Simon Pilgrimbe22cff2017-03-29 10:47:18 +0000347; X64-NEXT: movq (%rdi), %mm0
348; X64-NEXT: paddd %mm0, %mm0
Craig Topper00486732018-01-05 20:55:12 +0000349; X64-NEXT: movq %mm0, %rax
Ayman Musad9fb1572017-04-26 07:08:44 +0000350; X64-NEXT: movq %rax, %xmm0
Simon Pilgrimed655f02017-03-10 17:23:55 +0000351; X64-NEXT: cvtdq2ps %xmm0, %xmm0
352; X64-NEXT: retq
353 %2 = bitcast <1 x i64>* %0 to x86_mmx*
354 %3 = load x86_mmx, x86_mmx* %2, align 8
355 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
356 %5 = bitcast x86_mmx %4 to i64
357 %6 = insertelement <2 x i64> undef, i64 %5, i32 0
358 %7 = insertelement <2 x i64> %6, i64 0, i32 1
359 %8 = bitcast <2 x i64> %7 to <4 x i32>
360 %9 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %8)
361 ret <4 x float> %9
362}
363
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000364declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
365declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
366declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)
367declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
368declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
Simon Pilgrimed655f02017-03-10 17:23:55 +0000369declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)