blob: e6a0d52c5ae8df14cc58588f98b8046633c7d299 [file] [log] [blame]
Simon Pilgrima2c8da92016-04-02 14:09:17 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
6
7define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) {
8; X32-SSE-LABEL: fptrunc_frommem2:
9; X32-SSE: # BB#0: # %entry
10; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
11; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
12; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm0
13; X32-SSE-NEXT: extractps $1, %xmm0, 4(%eax)
14; X32-SSE-NEXT: movss %xmm0, (%eax)
15; X32-SSE-NEXT: retl
16;
17; X32-AVX-LABEL: fptrunc_frommem2:
18; X32-AVX: # BB#0: # %entry
19; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
20; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
21; X32-AVX-NEXT: vcvtpd2psx (%ecx), %xmm0
22; X32-AVX-NEXT: vextractps $1, %xmm0, 4(%eax)
23; X32-AVX-NEXT: vmovss %xmm0, (%eax)
24; X32-AVX-NEXT: retl
25;
26; X64-SSE-LABEL: fptrunc_frommem2:
27; X64-SSE: # BB#0: # %entry
28; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0
29; X64-SSE-NEXT: movlpd %xmm0, (%rsi)
30; X64-SSE-NEXT: retq
31;
32; X64-AVX-LABEL: fptrunc_frommem2:
33; X64-AVX: # BB#0: # %entry
34; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0
35; X64-AVX-NEXT: vmovlpd %xmm0, (%rsi)
36; X64-AVX-NEXT: retq
37entry:
38 %0 = load <2 x double>, <2 x double>* %in
39 %1 = fptrunc <2 x double> %0 to <2 x float>
40 store <2 x float> %1, <2 x float>* %out, align 1
41 ret void
42}
43
44define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) {
45; X32-SSE-LABEL: fptrunc_frommem4:
46; X32-SSE: # BB#0: # %entry
47; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
48; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
49; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0
50; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1
51; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
52; X32-SSE-NEXT: movupd %xmm1, (%eax)
53; X32-SSE-NEXT: retl
54;
55; X32-AVX-LABEL: fptrunc_frommem4:
56; X32-AVX: # BB#0: # %entry
57; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
58; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
59; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0
60; X32-AVX-NEXT: vmovupd %xmm0, (%eax)
61; X32-AVX-NEXT: retl
62;
63; X64-SSE-LABEL: fptrunc_frommem4:
64; X64-SSE: # BB#0: # %entry
65; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0
66; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1
67; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
68; X64-SSE-NEXT: movupd %xmm1, (%rsi)
69; X64-SSE-NEXT: retq
70;
71; X64-AVX-LABEL: fptrunc_frommem4:
72; X64-AVX: # BB#0: # %entry
73; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0
74; X64-AVX-NEXT: vmovupd %xmm0, (%rsi)
75; X64-AVX-NEXT: retq
76entry:
77 %0 = load <4 x double>, <4 x double>* %in
78 %1 = fptrunc <4 x double> %0 to <4 x float>
79 store <4 x float> %1, <4 x float>* %out, align 1
80 ret void
81}
82
83define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
84; X32-SSE-LABEL: fptrunc_frommem8:
85; X32-SSE: # BB#0: # %entry
86; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
87; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
88; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0
89; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1
90; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
91; X32-SSE-NEXT: cvtpd2ps 48(%ecx), %xmm0
92; X32-SSE-NEXT: cvtpd2ps 32(%ecx), %xmm2
93; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
94; X32-SSE-NEXT: movupd %xmm2, 16(%eax)
95; X32-SSE-NEXT: movupd %xmm1, (%eax)
96; X32-SSE-NEXT: retl
97;
98; X32-AVX-LABEL: fptrunc_frommem8:
99; X32-AVX: # BB#0: # %entry
100; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
101; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
102; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0
103; X32-AVX-NEXT: vcvtpd2psy 32(%ecx), %xmm1
104; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Craig Topper464b8cb2017-02-11 05:32:57 +0000105; X32-AVX-NEXT: vmovups %ymm0, (%eax)
Simon Pilgrima2c8da92016-04-02 14:09:17 +0000106; X32-AVX-NEXT: vzeroupper
107; X32-AVX-NEXT: retl
108;
109; X64-SSE-LABEL: fptrunc_frommem8:
110; X64-SSE: # BB#0: # %entry
111; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0
112; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1
113; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
114; X64-SSE-NEXT: cvtpd2ps 48(%rdi), %xmm0
115; X64-SSE-NEXT: cvtpd2ps 32(%rdi), %xmm2
116; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
117; X64-SSE-NEXT: movupd %xmm2, 16(%rsi)
118; X64-SSE-NEXT: movupd %xmm1, (%rsi)
119; X64-SSE-NEXT: retq
120;
121; X64-AVX-LABEL: fptrunc_frommem8:
122; X64-AVX: # BB#0: # %entry
123; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0
124; X64-AVX-NEXT: vcvtpd2psy 32(%rdi), %xmm1
125; X64-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Craig Topper464b8cb2017-02-11 05:32:57 +0000126; X64-AVX-NEXT: vmovups %ymm0, (%rsi)
Simon Pilgrima2c8da92016-04-02 14:09:17 +0000127; X64-AVX-NEXT: vzeroupper
128; X64-AVX-NEXT: retq
129entry:
130 %0 = load <8 x double>, <8 x double>* %in
131 %1 = fptrunc <8 x double> %0 to <8 x float>
132 store <8 x float> %1, <8 x float>* %out, align 1
133 ret void
134}
135
Simon Pilgrim9eb978b2016-08-23 10:35:24 +0000136define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) {
137; X32-SSE-LABEL: fptrunc_frommem2_zext:
Simon Pilgrim9a363182016-09-04 13:30:46 +0000138; X32-SSE: # BB#0:
139; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
140; X32-SSE-NEXT: cvtpd2ps (%eax), %xmm0
141; X32-SSE-NEXT: retl
142;
143; X32-AVX-LABEL: fptrunc_frommem2_zext:
144; X32-AVX: # BB#0:
145; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
146; X32-AVX-NEXT: vcvtpd2psx (%eax), %xmm0
147; X32-AVX-NEXT: retl
148;
149; X64-SSE-LABEL: fptrunc_frommem2_zext:
150; X64-SSE: # BB#0:
151; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0
152; X64-SSE-NEXT: retq
153;
154; X64-AVX-LABEL: fptrunc_frommem2_zext:
155; X64-AVX: # BB#0:
156; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0
157; X64-AVX-NEXT: retq
158 %arg = load <2 x double>, <2 x double> * %ld, align 16
159 %cvt = fptrunc <2 x double> %arg to <2 x float>
Simon Pilgrim9eb978b2016-08-23 10:35:24 +0000160 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
161 ret <4 x float> %ret
162}
163
164define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) {
Simon Pilgrim9a363182016-09-04 13:30:46 +0000165; X32-SSE-LABEL: fptrunc_fromreg2_zext:
166; X32-SSE: # BB#0:
167; X32-SSE-NEXT: cvtpd2ps %xmm0, %xmm0
168; X32-SSE-NEXT: retl
169;
170; X32-AVX-LABEL: fptrunc_fromreg2_zext:
171; X32-AVX: # BB#0:
172; X32-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
173; X32-AVX-NEXT: retl
174;
175; X64-SSE-LABEL: fptrunc_fromreg2_zext:
176; X64-SSE: # BB#0:
177; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0
178; X64-SSE-NEXT: retq
179;
180; X64-AVX-LABEL: fptrunc_fromreg2_zext:
181; X64-AVX: # BB#0:
182; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0
183; X64-AVX-NEXT: retq
184 %cvt = fptrunc <2 x double> %arg to <2 x float>
185 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
Simon Pilgrim9eb978b2016-08-23 10:35:24 +0000186 ret <4 x float> %ret
187}
188
Simon Pilgrima2c8da92016-04-02 14:09:17 +0000189; FIXME: For exact truncations we should be able to fold this.
190define <4 x float> @fptrunc_fromconst() {
191; X32-SSE-LABEL: fptrunc_fromconst:
192; X32-SSE: # BB#0: # %entry
Simon Pilgrim82755832016-08-20 21:37:30 +0000193; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm1
194; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm0
Simon Pilgrima2c8da92016-04-02 14:09:17 +0000195; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
196; X32-SSE-NEXT: retl
197;
198; X32-AVX-LABEL: fptrunc_fromconst:
199; X32-AVX: # BB#0: # %entry
Simon Pilgrim82755832016-08-20 21:37:30 +0000200; X32-AVX-NEXT: vcvtpd2psy {{\.LCPI.*}}, %xmm0
Simon Pilgrima2c8da92016-04-02 14:09:17 +0000201; X32-AVX-NEXT: retl
202;
203; X64-SSE-LABEL: fptrunc_fromconst:
204; X64-SSE: # BB#0: # %entry
205; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1
206; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0
207; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
208; X64-SSE-NEXT: retq
209;
210; X64-AVX-LABEL: fptrunc_fromconst:
211; X64-AVX: # BB#0: # %entry
212; X64-AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0
213; X64-AVX-NEXT: retq
214entry:
215 %0 = insertelement <4 x double> undef, double 1.0, i32 0
216 %1 = insertelement <4 x double> %0, double -2.0, i32 1
217 %2 = insertelement <4 x double> %1, double +4.0, i32 2
218 %3 = insertelement <4 x double> %2, double -0.0, i32 3
219 %4 = fptrunc <4 x double> %3 to <4 x float>
220 ret <4 x float> %4
221}