blob: e6ee132cf457d54d72b16df3a4ce8149a3dd1055 [file] [log] [blame]
Simon Pilgrimc6b55722017-03-10 16:59:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
4
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +00005; If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents
Simon Pilgrimed655f02017-03-10 17:23:55 +00006; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) without affecting rounding/exceptions etc.
Simon Pilgrimc6b55722017-03-10 16:59:43 +00007
8define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
9; X86-LABEL: cvt_v2f64_v2i32:
10; X86: # BB#0:
11; X86-NEXT: pushl %ebp
12; X86-NEXT: movl %esp, %ebp
13; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000014; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +000015; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000016; X86-NEXT: cvtpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000017; X86-NEXT: paddd %mm0, %mm0
18; X86-NEXT: movq %mm0, (%esp)
19; X86-NEXT: movl (%esp), %ecx
20; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
21; X86-NEXT: movl %edx, 4(%eax)
22; X86-NEXT: movl %ecx, (%eax)
23; X86-NEXT: movl %ebp, %esp
24; X86-NEXT: popl %ebp
25; X86-NEXT: retl
26;
27; X64-LABEL: cvt_v2f64_v2i32:
28; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000029; X64-NEXT: cvtpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000030; X64-NEXT: paddd %mm0, %mm0
31; X64-NEXT: movq %mm0, (%rdi)
32; X64-NEXT: retq
33 %3 = tail call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %0)
34 %4 = bitcast <4 x i32> %3 to <2 x i64>
35 %5 = extractelement <2 x i64> %4, i32 0
36 %6 = bitcast i64 %5 to x86_mmx
37 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
38 %8 = bitcast x86_mmx %7 to i64
39 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
40 store <1 x i64> %9, <1 x i64>* %1
41 ret void
42}
43
44define void @cvtt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
45; X86-LABEL: cvtt_v2f64_v2i32:
46; X86: # BB#0:
47; X86-NEXT: pushl %ebp
48; X86-NEXT: movl %esp, %ebp
49; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000050; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +000051; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000052; X86-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000053; X86-NEXT: paddd %mm0, %mm0
54; X86-NEXT: movq %mm0, (%esp)
55; X86-NEXT: movl (%esp), %ecx
56; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
57; X86-NEXT: movl %edx, 4(%eax)
58; X86-NEXT: movl %ecx, (%eax)
59; X86-NEXT: movl %ebp, %esp
60; X86-NEXT: popl %ebp
61; X86-NEXT: retl
62;
63; X64-LABEL: cvtt_v2f64_v2i32:
64; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +000065; X64-NEXT: cvttpd2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +000066; X64-NEXT: paddd %mm0, %mm0
67; X64-NEXT: movq %mm0, (%rdi)
68; X64-NEXT: retq
69 %3 = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %0)
70 %4 = bitcast <4 x i32> %3 to <2 x i64>
71 %5 = extractelement <2 x i64> %4, i32 0
72 %6 = bitcast i64 %5 to x86_mmx
73 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
74 %8 = bitcast x86_mmx %7 to i64
75 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
76 store <1 x i64> %9, <1 x i64>* %1
77 ret void
78}
79
80define void @fptosi_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
81; X86-LABEL: fptosi_v2f64_v2i32:
82; X86: # BB#0:
83; X86-NEXT: pushl %ebp
84; X86-NEXT: movl %esp, %ebp
85; X86-NEXT: andl $-8, %esp
86; X86-NEXT: subl $16, %esp
87; X86-NEXT: movl 8(%ebp), %eax
88; X86-NEXT: cvttpd2dq %xmm0, %xmm0
89; X86-NEXT: movlpd %xmm0, {{[0-9]+}}(%esp)
90; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
91; X86-NEXT: paddd %mm0, %mm0
92; X86-NEXT: movq %mm0, (%esp)
93; X86-NEXT: movl (%esp), %ecx
94; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
95; X86-NEXT: movl %edx, 4(%eax)
96; X86-NEXT: movl %ecx, (%eax)
97; X86-NEXT: movl %ebp, %esp
98; X86-NEXT: popl %ebp
99; X86-NEXT: retl
100;
101; X64-LABEL: fptosi_v2f64_v2i32:
102; X64: # BB#0:
103; X64-NEXT: cvttpd2dq %xmm0, %xmm0
104; X64-NEXT: movlpd %xmm0, -{{[0-9]+}}(%rsp)
105; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
106; X64-NEXT: paddd %mm0, %mm0
107; X64-NEXT: movq %mm0, (%rdi)
108; X64-NEXT: retq
109 %3 = fptosi <2 x double> %0 to <2 x i32>
110 %4 = bitcast <2 x i32> %3 to x86_mmx
111 %5 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %4, x86_mmx %4)
112 %6 = bitcast x86_mmx %5 to i64
113 %7 = insertelement <1 x i64> undef, i64 %6, i32 0
114 store <1 x i64> %7, <1 x i64>* %1
115 ret void
116}
117
118define void @cvt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
119; X86-LABEL: cvt_v2f32_v2i32:
120; X86: # BB#0:
121; X86-NEXT: pushl %ebp
122; X86-NEXT: movl %esp, %ebp
123; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000124; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000125; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000126; X86-NEXT: cvtps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000127; X86-NEXT: paddd %mm0, %mm0
128; X86-NEXT: movq %mm0, (%esp)
129; X86-NEXT: movl (%esp), %ecx
130; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
131; X86-NEXT: movl %edx, 4(%eax)
132; X86-NEXT: movl %ecx, (%eax)
133; X86-NEXT: movl %ebp, %esp
134; X86-NEXT: popl %ebp
135; X86-NEXT: retl
136;
137; X64-LABEL: cvt_v2f32_v2i32:
138; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000139; X64-NEXT: cvtps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000140; X64-NEXT: paddd %mm0, %mm0
141; X64-NEXT: movq %mm0, (%rdi)
142; X64-NEXT: retq
143 %3 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %0)
144 %4 = bitcast <4 x i32> %3 to <2 x i64>
145 %5 = extractelement <2 x i64> %4, i32 0
146 %6 = bitcast i64 %5 to x86_mmx
147 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
148 %8 = bitcast x86_mmx %7 to i64
149 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
150 store <1 x i64> %9, <1 x i64>* %1
151 ret void
152}
153
154define void @cvtt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
155; X86-LABEL: cvtt_v2f32_v2i32:
156; X86: # BB#0:
157; X86-NEXT: pushl %ebp
158; X86-NEXT: movl %esp, %ebp
159; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000160; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000161; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000162; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000163; X86-NEXT: paddd %mm0, %mm0
164; X86-NEXT: movq %mm0, (%esp)
165; X86-NEXT: movl (%esp), %ecx
166; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
167; X86-NEXT: movl %edx, 4(%eax)
168; X86-NEXT: movl %ecx, (%eax)
169; X86-NEXT: movl %ebp, %esp
170; X86-NEXT: popl %ebp
171; X86-NEXT: retl
172;
173; X64-LABEL: cvtt_v2f32_v2i32:
174; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000175; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000176; X64-NEXT: paddd %mm0, %mm0
177; X64-NEXT: movq %mm0, (%rdi)
178; X64-NEXT: retq
179 %3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %0)
180 %4 = bitcast <4 x i32> %3 to <2 x i64>
181 %5 = extractelement <2 x i64> %4, i32 0
182 %6 = bitcast i64 %5 to x86_mmx
183 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
184 %8 = bitcast x86_mmx %7 to i64
185 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
186 store <1 x i64> %9, <1 x i64>* %1
187 ret void
188}
189
190define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
191; X86-LABEL: fptosi_v2f32_v2i32:
192; X86: # BB#0:
193; X86-NEXT: pushl %ebp
194; X86-NEXT: movl %esp, %ebp
195; X86-NEXT: andl $-8, %esp
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000196; X86-NEXT: subl $8, %esp
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000197; X86-NEXT: movl 8(%ebp), %eax
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000198; X86-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000199; X86-NEXT: paddd %mm0, %mm0
200; X86-NEXT: movq %mm0, (%esp)
201; X86-NEXT: movl (%esp), %ecx
202; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
203; X86-NEXT: movl %edx, 4(%eax)
204; X86-NEXT: movl %ecx, (%eax)
205; X86-NEXT: movl %ebp, %esp
206; X86-NEXT: popl %ebp
207; X86-NEXT: retl
208;
209; X64-LABEL: fptosi_v2f32_v2i32:
210; X64: # BB#0:
Simon Pilgrimc7c5aa42017-03-28 21:32:11 +0000211; X64-NEXT: cvttps2pi %xmm0, %mm0
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000212; X64-NEXT: paddd %mm0, %mm0
213; X64-NEXT: movq %mm0, (%rdi)
214; X64-NEXT: retq
215 %3 = fptosi <4 x float> %0 to <4 x i32>
216 %4 = bitcast <4 x i32> %3 to <2 x i64>
217 %5 = extractelement <2 x i64> %4, i32 0
218 %6 = bitcast i64 %5 to x86_mmx
219 %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6)
220 %8 = bitcast x86_mmx %7 to i64
221 %9 = insertelement <1 x i64> undef, i64 %8, i32 0
222 store <1 x i64> %9, <1 x i64>* %1
223 ret void
224}
225
Simon Pilgrime1a72a92017-03-14 12:13:41 +0000226; FIXME: If we are transferring MMX registers to XMM for conversion we could use the MMX equivalents
Simon Pilgrimed655f02017-03-10 17:23:55 +0000227; (CVTPI2PD + CVTPI2PS) without affecting rounding/exceptions etc.
228
229define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind {
230; X86-LABEL: sitofp_v2i32_v2f64:
231; X86: # BB#0:
232; X86-NEXT: pushl %ebp
233; X86-NEXT: movl %esp, %ebp
234; X86-NEXT: andl $-8, %esp
235; X86-NEXT: subl $8, %esp
236; X86-NEXT: movl 8(%ebp), %eax
237; X86-NEXT: movq (%eax), %mm0
238; X86-NEXT: paddd %mm0, %mm0
239; X86-NEXT: movq %mm0, (%esp)
Simon Pilgrim128a10a2017-03-10 22:35:07 +0000240; X86-NEXT: cvtdq2pd (%esp), %xmm0
Simon Pilgrimed655f02017-03-10 17:23:55 +0000241; X86-NEXT: movl %ebp, %esp
242; X86-NEXT: popl %ebp
243; X86-NEXT: retl
244;
245; X64-LABEL: sitofp_v2i32_v2f64:
246; X64: # BB#0:
247; X64-NEXT: movq (%rdi), %mm0
248; X64-NEXT: paddd %mm0, %mm0
249; X64-NEXT: movq2dq %mm0, %xmm0
250; X64-NEXT: cvtdq2pd %xmm0, %xmm0
251; X64-NEXT: retq
252 %2 = bitcast <1 x i64>* %0 to x86_mmx*
253 %3 = load x86_mmx, x86_mmx* %2, align 8
254 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
255 %5 = bitcast x86_mmx %4 to i64
256 %6 = insertelement <2 x i64> undef, i64 %5, i32 0
257 %7 = bitcast <2 x i64> %6 to <4 x i32>
258 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
259 %9 = sitofp <2 x i32> %8 to <2 x double>
260 ret <2 x double> %9
261}
262
263define <4 x float> @sitofp_v2i32_v2f32(<1 x i64>*) nounwind {
264; X86-LABEL: sitofp_v2i32_v2f32:
265; X86: # BB#0:
266; X86-NEXT: pushl %ebp
267; X86-NEXT: movl %esp, %ebp
268; X86-NEXT: andl $-8, %esp
269; X86-NEXT: subl $8, %esp
270; X86-NEXT: movl 8(%ebp), %eax
271; X86-NEXT: movq (%eax), %mm0
272; X86-NEXT: paddd %mm0, %mm0
273; X86-NEXT: movq %mm0, (%esp)
274; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
275; X86-NEXT: cvtdq2ps %xmm0, %xmm0
276; X86-NEXT: movl %ebp, %esp
277; X86-NEXT: popl %ebp
278; X86-NEXT: retl
279;
280; X64-LABEL: sitofp_v2i32_v2f32:
281; X64: # BB#0:
282; X64-NEXT: movq (%rdi), %mm0
283; X64-NEXT: paddd %mm0, %mm0
Simon Pilgrimbe22cff2017-03-29 10:47:18 +0000284; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
285; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
286; X64-NEXT: cvtdq2ps %xmm0, %xmm0
287; X64-NEXT: retq
288 %2 = bitcast <1 x i64>* %0 to x86_mmx*
289 %3 = load x86_mmx, x86_mmx* %2, align 8
290 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
291 %5 = bitcast x86_mmx %4 to <2 x i32>
292 %6 = shufflevector <2 x i32> %5, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293 %7 = sitofp <4 x i32> %6 to <4 x float>
294 ret <4 x float> %7
295}
296
297define <4 x float> @cvt_v2i32_v2f32(<1 x i64>*) nounwind {
298; X86-LABEL: cvt_v2i32_v2f32:
299; X86: # BB#0:
300; X86-NEXT: pushl %ebp
301; X86-NEXT: movl %esp, %ebp
302; X86-NEXT: andl $-8, %esp
303; X86-NEXT: subl $8, %esp
304; X86-NEXT: movl 8(%ebp), %eax
305; X86-NEXT: movq (%eax), %mm0
306; X86-NEXT: paddd %mm0, %mm0
307; X86-NEXT: movq %mm0, (%esp)
308; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
309; X86-NEXT: cvtdq2ps %xmm0, %xmm0
310; X86-NEXT: movl %ebp, %esp
311; X86-NEXT: popl %ebp
312; X86-NEXT: retl
313;
314; X64-LABEL: cvt_v2i32_v2f32:
315; X64: # BB#0:
316; X64-NEXT: movq (%rdi), %mm0
317; X64-NEXT: paddd %mm0, %mm0
Simon Pilgrimed655f02017-03-10 17:23:55 +0000318; X64-NEXT: movd %mm0, %rax
319; X64-NEXT: movd %rax, %xmm0
320; X64-NEXT: cvtdq2ps %xmm0, %xmm0
321; X64-NEXT: retq
322 %2 = bitcast <1 x i64>* %0 to x86_mmx*
323 %3 = load x86_mmx, x86_mmx* %2, align 8
324 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
325 %5 = bitcast x86_mmx %4 to i64
326 %6 = insertelement <2 x i64> undef, i64 %5, i32 0
327 %7 = insertelement <2 x i64> %6, i64 0, i32 1
328 %8 = bitcast <2 x i64> %7 to <4 x i32>
329 %9 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %8)
330 ret <4 x float> %9
331}
332
Simon Pilgrimc6b55722017-03-10 16:59:43 +0000333declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
334declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
335declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>)
336declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
337declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
Simon Pilgrimed655f02017-03-10 17:23:55 +0000338declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)