blob: 6ead17cefbf693cbf316cafe46893aa8f515623f [file] [log] [blame]
Cameron McInallyf37bd012018-06-13 14:32:12 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Cameron McInally7caac672018-06-15 20:57:55 +00002; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
3; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s
Cameron McInallyf37bd012018-06-13 14:32:12 +00004
Cameron McInally2c9bcff2018-07-23 14:40:17 +00005define <2 x double> @constrained_vector_fdiv_v2f64() {
6; NO-FMA-LABEL: constrained_vector_fdiv_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00007; NO-FMA: # %bb.0: # %entry
8; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
9; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
10; NO-FMA-NEXT: retq
11;
Cameron McInally2c9bcff2018-07-23 14:40:17 +000012; HAS-FMA-LABEL: constrained_vector_fdiv_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +000013; HAS-FMA: # %bb.0: # %entry
14; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
15; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0
16; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +000017entry:
18 %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
19 <2 x double> <double 1.000000e+00, double 2.000000e+00>,
20 <2 x double> <double 1.000000e+01, double 1.000000e+01>,
21 metadata !"round.dynamic",
22 metadata !"fpexcept.strict")
23 ret <2 x double> %div
24}
25
Cameron McInally04ae8582018-08-01 14:17:19 +000026define <3 x float> @constrained_vector_fdiv_v3f32() {
27; NO-FMA-LABEL: constrained_vector_fdiv_v3f32:
28; NO-FMA: # %bb.0: # %entry
29; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
30; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
31; NO-FMA-NEXT: divss %xmm1, %xmm2
32; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
33; NO-FMA-NEXT: divss %xmm1, %xmm0
34; NO-FMA-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
35; NO-FMA-NEXT: divss %xmm1, %xmm3
36; NO-FMA-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
37; NO-FMA-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
38; NO-FMA-NEXT: retq
39;
40; HAS-FMA-LABEL: constrained_vector_fdiv_v3f32:
41; HAS-FMA: # %bb.0: # %entry
42; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
43; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
44; HAS-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm1
45; HAS-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
46; HAS-FMA-NEXT: vdivss %xmm0, %xmm2, %xmm2
47; HAS-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
48; HAS-FMA-NEXT: vdivss %xmm0, %xmm3, %xmm0
49; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
50; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
51; HAS-FMA-NEXT: retq
52entry:
53 %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
54 <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
55 <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
56 metadata !"round.dynamic",
57 metadata !"fpexcept.strict")
58 ret <3 x float> %div
59}
60
61define <3 x double> @constrained_vector_fdiv_v3f64() {
62; NO-FMA-LABEL: constrained_vector_fdiv_v3f64:
63; NO-FMA: # %bb.0: # %entry
64; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
65; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
66; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
67; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1
68; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
69; NO-FMA-NEXT: movapd %xmm0, %xmm1
70; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
71; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
72; NO-FMA-NEXT: retq
73;
74; HAS-FMA-LABEL: constrained_vector_fdiv_v3f64:
75; HAS-FMA: # %bb.0: # %entry
76; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
77; HAS-FMA-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
78; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
79; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm1, %xmm1
80; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
81; HAS-FMA-NEXT: retq
82entry:
83 %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
84 <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
85 <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
86 metadata !"round.dynamic",
87 metadata !"fpexcept.strict")
88 ret <3 x double> %div
89}
90
Cameron McInally2c9bcff2018-07-23 14:40:17 +000091define <4 x double> @constrained_vector_fdiv_v4f64() {
92; NO-FMA-LABEL: constrained_vector_fdiv_v4f64:
93; NO-FMA: # %bb.0:
94; NO-FMA-NEXT: movapd {{.*#+}} xmm2 = [1.000000e+01,1.000000e+01]
Cameron McInally2c9bcff2018-07-23 14:40:17 +000095; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
96; NO-FMA-NEXT: divpd %xmm2, %xmm0
Ulrich Weigand5f753712018-07-25 17:08:13 +000097; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [3.000000e+00,4.000000e+00]
98; NO-FMA-NEXT: divpd %xmm2, %xmm1
Cameron McInally2c9bcff2018-07-23 14:40:17 +000099; NO-FMA-NEXT: retq
100;
101; HAS-FMA-LABEL: constrained_vector_fdiv_v4f64:
102; HAS-FMA: # %bb.0:
103; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
104; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0
105; HAS-FMA-NEXT: retq
106 %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
107 <4 x double> <double 1.000000e+00, double 2.000000e+00,
108 double 3.000000e+00, double 4.000000e+00>,
109 <4 x double> <double 1.000000e+01, double 1.000000e+01,
110 double 1.000000e+01, double 1.000000e+01>,
111 metadata !"round.dynamic",
112 metadata !"fpexcept.strict")
113 ret <4 x double> %div
114}
115
116define <2 x double> @constrained_vector_fmul_v2f64() {
117; NO-FMA-LABEL: constrained_vector_fmul_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000118; NO-FMA: # %bb.0: # %entry
119; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
120; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0
121; NO-FMA-NEXT: retq
122;
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000123; HAS-FMA-LABEL: constrained_vector_fmul_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000124; HAS-FMA: # %bb.0: # %entry
125; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
126; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
127; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +0000128entry:
129 %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
130 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
131 <2 x double> <double 2.000000e+00, double 3.000000e+00>,
132 metadata !"round.dynamic",
133 metadata !"fpexcept.strict")
134 ret <2 x double> %mul
135}
136
Cameron McInally04ae8582018-08-01 14:17:19 +0000137define <3 x float> @constrained_vector_fmul_v3f32() {
138; NO-FMA-LABEL: constrained_vector_fmul_v3f32:
139; NO-FMA: # %bb.0: # %entry
140; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
141; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
142; NO-FMA-NEXT: mulss %xmm1, %xmm2
143; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
144; NO-FMA-NEXT: mulss %xmm1, %xmm0
145; NO-FMA-NEXT: mulss {{.*}}(%rip), %xmm1
146; NO-FMA-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
147; NO-FMA-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
148; NO-FMA-NEXT: retq
149;
150; HAS-FMA-LABEL: constrained_vector_fmul_v3f32:
151; HAS-FMA: # %bb.0: # %entry
152; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
153; HAS-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
154; HAS-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm2
155; HAS-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
156; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
157; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
158; HAS-FMA-NEXT: retq
159entry:
160 %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
161 <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
162 float 0x7FF0000000000000>,
163 <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
164 metadata !"round.dynamic",
165 metadata !"fpexcept.strict")
166 ret <3 x float> %mul
167}
168
169define <3 x double> @constrained_vector_fmul_v3f64() {
170; NO-FMA-LABEL: constrained_vector_fmul_v3f64:
171; NO-FMA: # %bb.0: # %entry
172; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
173; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0
174; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
175; NO-FMA-NEXT: mulsd {{.*}}(%rip), %xmm1
176; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
177; NO-FMA-NEXT: movapd %xmm0, %xmm1
178; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
179; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
180; NO-FMA-NEXT: retq
181;
182; HAS-FMA-LABEL: constrained_vector_fmul_v3f64:
183; HAS-FMA: # %bb.0: # %entry
184; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
185; HAS-FMA-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
186; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
187; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm1, %xmm1
188; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
189; HAS-FMA-NEXT: retq
190entry:
191 %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
192 <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
193 double 0x7FEFFFFFFFFFFFFF>,
194 <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>,
195 metadata !"round.dynamic",
196 metadata !"fpexcept.strict")
197 ret <3 x double> %mul
198}
199
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000200define <4 x double> @constrained_vector_fmul_v4f64() {
201; NO-FMA-LABEL: constrained_vector_fmul_v4f64:
202; NO-FMA: # %bb.0: # %entry
Ulrich Weigand5f753712018-07-25 17:08:13 +0000203; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
204; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [2.000000e+00,3.000000e+00]
205; NO-FMA-NEXT: mulpd %xmm1, %xmm0
206; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm1
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000207; NO-FMA-NEXT: retq
208;
209; HAS-FMA-LABEL: constrained_vector_fmul_v4f64:
210; HAS-FMA: # %bb.0: # %entry
211; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308]
212; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
213; HAS-FMA-NEXT: retq
214entry:
215 %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
216 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
217 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
218 <4 x double> <double 2.000000e+00, double 3.000000e+00,
219 double 4.000000e+00, double 5.000000e+00>,
220 metadata !"round.dynamic",
221 metadata !"fpexcept.strict")
222 ret <4 x double> %mul
223}
224
225
226define <2 x double> @constrained_vector_fadd_v2f64() {
227; NO-FMA-LABEL: constrained_vector_fadd_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000228; NO-FMA: # %bb.0: # %entry
229; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
230; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0
231; NO-FMA-NEXT: retq
232;
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000233; HAS-FMA-LABEL: constrained_vector_fadd_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000234; HAS-FMA: # %bb.0: # %entry
235; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
236; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0
237; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +0000238entry:
239 %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
240 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
241 <2 x double> <double 1.000000e+00, double 1.000000e-01>,
242 metadata !"round.dynamic",
243 metadata !"fpexcept.strict")
244 ret <2 x double> %add
245}
246
Cameron McInally04ae8582018-08-01 14:17:19 +0000247define <3 x float> @constrained_vector_fadd_v3f32() {
248; NO-FMA-LABEL: constrained_vector_fadd_v3f32:
249; NO-FMA: # %bb.0: # %entry
250; NO-FMA-NEXT: xorps %xmm1, %xmm1
251; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
252; NO-FMA-NEXT: addss %xmm2, %xmm1
253; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
254; NO-FMA-NEXT: addss %xmm2, %xmm0
255; NO-FMA-NEXT: addss {{.*}}(%rip), %xmm2
256; NO-FMA-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
257; NO-FMA-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
258; NO-FMA-NEXT: retq
259;
260; HAS-FMA-LABEL: constrained_vector_fadd_v3f32:
261; HAS-FMA: # %bb.0: # %entry
262; HAS-FMA-NEXT: vxorps %xmm0, %xmm0, %xmm0
263; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
264; HAS-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
265; HAS-FMA-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm2
266; HAS-FMA-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
267; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
268; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
269; HAS-FMA-NEXT: retq
270entry:
271 %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
272 <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
273 float 0xFFFFFFFFE0000000>,
274 <3 x float> <float 2.0, float 1.0, float 0.0>,
275 metadata !"round.dynamic",
276 metadata !"fpexcept.strict")
277 ret <3 x float> %add
278}
279
280define <3 x double> @constrained_vector_fadd_v3f64() {
281; NO-FMA-LABEL: constrained_vector_fadd_v3f64:
282; NO-FMA: # %bb.0: # %entry
283; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
284; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0
285; NO-FMA-NEXT: xorpd %xmm1, %xmm1
286; NO-FMA-NEXT: addsd {{.*}}(%rip), %xmm1
287; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
288; NO-FMA-NEXT: movapd %xmm0, %xmm1
289; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
290; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
291; NO-FMA-NEXT: retq
292;
293; HAS-FMA-LABEL: constrained_vector_fadd_v3f64:
294; HAS-FMA: # %bb.0: # %entry
295; HAS-FMA-NEXT: vxorpd %xmm0, %xmm0, %xmm0
296; HAS-FMA-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
297; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
298; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1
299; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
300; HAS-FMA-NEXT: retq
301entry:
302 %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
303 <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
304 double 0x7FEFFFFFFFFFFFFF>,
305 <3 x double> <double 2.0, double 1.0, double 0.0>,
306 metadata !"round.dynamic",
307 metadata !"fpexcept.strict")
308 ret <3 x double> %add
309}
310
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000311define <4 x double> @constrained_vector_fadd_v4f64() {
312; NO-FMA-LABEL: constrained_vector_fadd_v4f64:
313; NO-FMA: # %bb.0: # %entry
Ulrich Weigand5f753712018-07-25 17:08:13 +0000314; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
315; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,1.000000e-01]
316; NO-FMA-NEXT: addpd %xmm1, %xmm0
317; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm1
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000318; NO-FMA-NEXT: retq
319;
320; HAS-FMA-LABEL: constrained_vector_fadd_v4f64:
321; HAS-FMA: # %bb.0: # %entry
322; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308]
323; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
324; HAS-FMA-NEXT: retq
325entry:
326 %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
327 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
328 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
329 <4 x double> <double 1.000000e+00, double 1.000000e-01,
330 double 2.000000e+00, double 2.000000e-01>,
331 metadata !"round.dynamic",
332 metadata !"fpexcept.strict")
333 ret <4 x double> %add
334}
335
336define <2 x double> @constrained_vector_fsub_v2f64() {
337; NO-FMA-LABEL: constrained_vector_fsub_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000338; NO-FMA: # %bb.0: # %entry
339; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
340; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
341; NO-FMA-NEXT: retq
342;
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000343; HAS-FMA-LABEL: constrained_vector_fsub_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000344; HAS-FMA: # %bb.0: # %entry
345; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
346; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
347; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +0000348entry:
349 %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
350 <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
351 <2 x double> <double 1.000000e+00, double 1.000000e-01>,
352 metadata !"round.dynamic",
353 metadata !"fpexcept.strict")
354 ret <2 x double> %sub
355}
356
Cameron McInally04ae8582018-08-01 14:17:19 +0000357define <3 x float> @constrained_vector_fsub_v3f32() {
358; NO-FMA-LABEL: constrained_vector_fsub_v3f32:
359; NO-FMA: # %bb.0: # %entry
360; NO-FMA-NEXT: xorps %xmm0, %xmm0
361; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
362; NO-FMA-NEXT: movaps %xmm1, %xmm2
363; NO-FMA-NEXT: subss %xmm0, %xmm2
364; NO-FMA-NEXT: movaps %xmm1, %xmm0
365; NO-FMA-NEXT: subss {{.*}}(%rip), %xmm0
366; NO-FMA-NEXT: subss {{.*}}(%rip), %xmm1
367; NO-FMA-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
368; NO-FMA-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
369; NO-FMA-NEXT: retq
370;
371; HAS-FMA-LABEL: constrained_vector_fsub_v3f32:
372; HAS-FMA: # %bb.0: # %entry
373; HAS-FMA-NEXT: vxorps %xmm0, %xmm0, %xmm0
374; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
375; HAS-FMA-NEXT: vsubss %xmm0, %xmm1, %xmm0
376; HAS-FMA-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm2
377; HAS-FMA-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm1
378; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
379; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
380; HAS-FMA-NEXT: retq
381entry:
382 %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
383 <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
384 float 0xFFFFFFFFE0000000>,
385 <3 x float> <float 2.0, float 1.0, float 0.0>,
386 metadata !"round.dynamic",
387 metadata !"fpexcept.strict")
388 ret <3 x float> %sub
389}
390
391define <3 x double> @constrained_vector_fsub_v3f64() {
392; NO-FMA-LABEL: constrained_vector_fsub_v3f64:
393; NO-FMA: # %bb.0: # %entry
394; NO-FMA-NEXT: xorpd %xmm0, %xmm0
395; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
396; NO-FMA-NEXT: subsd %xmm0, %xmm1
397; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
398; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
399; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
400; NO-FMA-NEXT: movapd %xmm0, %xmm1
401; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
402; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
403; NO-FMA-NEXT: retq
404;
405; HAS-FMA-LABEL: constrained_vector_fsub_v3f64:
406; HAS-FMA: # %bb.0: # %entry
407; HAS-FMA-NEXT: vxorpd %xmm0, %xmm0, %xmm0
408; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
409; HAS-FMA-NEXT: vsubsd %xmm0, %xmm1, %xmm0
410; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [-1.797693e+308,-1.797693e+308]
411; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
412; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
413; HAS-FMA-NEXT: retq
414entry:
415 %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
416 <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
417 double 0xFFEFFFFFFFFFFFFF>,
418 <3 x double> <double 2.0, double 1.0, double 0.0>,
419 metadata !"round.dynamic",
420 metadata !"fpexcept.strict")
421 ret <3 x double> %sub
422}
423
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000424define <4 x double> @constrained_vector_fsub_v4f64() {
425; NO-FMA-LABEL: constrained_vector_fsub_v4f64:
426; NO-FMA: # %bb.0: # %entry
Ulrich Weigand5f753712018-07-25 17:08:13 +0000427; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [-1.797693e+308,-1.797693e+308]
428; NO-FMA-NEXT: movapd %xmm1, %xmm0
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000429; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
Ulrich Weigand5f753712018-07-25 17:08:13 +0000430; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm1
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000431; NO-FMA-NEXT: retq
432;
433; HAS-FMA-LABEL: constrained_vector_fsub_v4f64:
434; HAS-FMA: # %bb.0: # %entry
435; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [-1.797693e+308,-1.797693e+308,-1.797693e+308,-1.797693e+308]
436; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0
437; HAS-FMA-NEXT: retq
438entry:
439 %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
440 <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
441 double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
442 <4 x double> <double 1.000000e+00, double 1.000000e-01,
443 double 2.000000e+00, double 2.000000e-01>,
444 metadata !"round.dynamic",
445 metadata !"fpexcept.strict")
446 ret <4 x double> %sub
447}
448
Cameron McInally7caac672018-06-15 20:57:55 +0000449define <2 x double> @constrained_vector_fma_v2f64() {
450; NO-FMA-LABEL: constrained_vector_fma_v2f64:
451; NO-FMA: # %bb.0: # %entry
452; NO-FMA-NEXT: subq $24, %rsp
453; NO-FMA-NEXT: .cfi_def_cfa_offset 32
454; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
455; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
456; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
457; NO-FMA-NEXT: callq fma
458; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
459; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
460; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
461; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
462; NO-FMA-NEXT: callq fma
463; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
464; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
465; NO-FMA-NEXT: addq $24, %rsp
466; NO-FMA-NEXT: .cfi_def_cfa_offset 8
467; NO-FMA-NEXT: retq
468;
469; HAS-FMA-LABEL: constrained_vector_fma_v2f64:
470; HAS-FMA: # %bb.0: # %entry
471; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.500000e+00,5.000000e-01]
472; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [3.500000e+00,2.500000e+00]
473; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
474; HAS-FMA-NEXT: retq
475entry:
476 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
477 <2 x double> <double 1.5, double 0.5>,
478 <2 x double> <double 3.5, double 2.5>,
479 <2 x double> <double 5.5, double 4.5>,
480 metadata !"round.dynamic",
481 metadata !"fpexcept.strict")
482 ret <2 x double> %fma
483}
484
Cameron McInally04ae8582018-08-01 14:17:19 +0000485define <3 x float> @constrained_vector_fma_v3f32() {
486; NO-FMA-LABEL: constrained_vector_fma_v3f32:
487; NO-FMA: # %bb.0: # %entry
488; NO-FMA-NEXT: subq $40, %rsp
489; NO-FMA-NEXT: .cfi_def_cfa_offset 48
490; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
491; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
492; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
493; NO-FMA-NEXT: callq fmaf
494; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
495; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
496; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
497; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
498; NO-FMA-NEXT: callq fmaf
499; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
500; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
501; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
502; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
503; NO-FMA-NEXT: callq fmaf
504; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
505; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
506; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
507; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
508; NO-FMA-NEXT: movaps %xmm1, %xmm0
509; NO-FMA-NEXT: addq $40, %rsp
510; NO-FMA-NEXT: .cfi_def_cfa_offset 8
511; NO-FMA-NEXT: retq
512;
513; HAS-FMA-LABEL: constrained_vector_fma_v3f32:
514; HAS-FMA: # %bb.0: # %entry
515; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
516; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
517; HAS-FMA-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
518; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
519; HAS-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
520; HAS-FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm0 * xmm2) + mem
521; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
522; HAS-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
523; HAS-FMA-NEXT: vfmadd213ss {{.*#+}} xmm3 = (xmm0 * xmm3) + mem
524; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm3[0],xmm2[2,3]
525; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
526; HAS-FMA-NEXT: retq
527entry:
528 %fma = call <3 x float> @llvm.experimental.constrained.fma.v3f32(
529 <3 x float> <float 2.5, float 1.5, float 0.5>,
530 <3 x float> <float 5.5, float 4.5, float 3.5>,
531 <3 x float> <float 8.5, float 7.5, float 6.5>,
532 metadata !"round.dynamic",
533 metadata !"fpexcept.strict")
534 ret <3 x float> %fma
535}
536
537define <3 x double> @constrained_vector_fma_v3f64() {
538; NO-FMA-LABEL: constrained_vector_fma_v3f64:
539; NO-FMA: # %bb.0: # %entry
540; NO-FMA-NEXT: subq $24, %rsp
541; NO-FMA-NEXT: .cfi_def_cfa_offset 32
542; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
543; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
544; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
545; NO-FMA-NEXT: callq fma
546; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
547; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
548; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
549; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
550; NO-FMA-NEXT: callq fma
551; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
552; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
553; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
554; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
555; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
556; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
557; NO-FMA-NEXT: callq fma
558; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
559; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
560; NO-FMA-NEXT: movaps %xmm0, %xmm1
561; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
562; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
563; NO-FMA-NEXT: addq $24, %rsp
564; NO-FMA-NEXT: .cfi_def_cfa_offset 8
565; NO-FMA-NEXT: retq
566;
567; HAS-FMA-LABEL: constrained_vector_fma_v3f64:
568; HAS-FMA: # %bb.0: # %entry
569; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
570; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
571; HAS-FMA-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
572; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [2.500000e+00,1.500000e+00]
573; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm2 = [5.500000e+00,4.500000e+00]
574; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm2 = (xmm0 * xmm2) + mem
575; HAS-FMA-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
576; HAS-FMA-NEXT: retq
577entry:
578 %fma = call <3 x double> @llvm.experimental.constrained.fma.v3f64(
579 <3 x double> <double 2.5, double 1.5, double 0.5>,
580 <3 x double> <double 5.5, double 4.5, double 3.5>,
581 <3 x double> <double 8.5, double 7.5, double 6.5>,
582 metadata !"round.dynamic",
583 metadata !"fpexcept.strict")
584 ret <3 x double> %fma
585}
586
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000587define <4 x double> @constrained_vector_fma_v4f64() {
588; NO-FMA-LABEL: constrained_vector_fma_v4f64:
589; NO-FMA: # %bb.0: # %entry
590; NO-FMA-NEXT: subq $40, %rsp
591; NO-FMA-NEXT: .cfi_def_cfa_offset 48
592; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
593; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
594; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
595; NO-FMA-NEXT: callq fma
596; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
597; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
598; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
599; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
600; NO-FMA-NEXT: callq fma
601; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
602; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
603; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
604; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
605; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
606; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
607; NO-FMA-NEXT: callq fma
608; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
609; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
610; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
611; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
612; NO-FMA-NEXT: callq fma
613; NO-FMA-NEXT: movaps %xmm0, %xmm1
614; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
615; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
616; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
617; NO-FMA-NEXT: addq $40, %rsp
618; NO-FMA-NEXT: .cfi_def_cfa_offset 8
619; NO-FMA-NEXT: retq
620;
621; HAS-FMA-LABEL: constrained_vector_fma_v4f64:
622; HAS-FMA: # %bb.0: # %entry
623; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01]
624; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
625; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
626; HAS-FMA-NEXT: retq
627entry:
628 %fma = call <4 x double> @llvm.experimental.constrained.fma.v4f64(
629 <4 x double> <double 3.5, double 2.5, double 1.5, double 0.5>,
630 <4 x double> <double 7.5, double 6.5, double 5.5, double 4.5>,
631 <4 x double> <double 11.5, double 10.5, double 9.5, double 8.5>,
632 metadata !"round.dynamic",
633 metadata !"fpexcept.strict")
634 ret <4 x double> %fma
635}
636
Cameron McInally7caac672018-06-15 20:57:55 +0000637define <4 x float> @constrained_vector_fma_v4f32() {
638; NO-FMA-LABEL: constrained_vector_fma_v4f32:
639; NO-FMA: # %bb.0: # %entry
640; NO-FMA-NEXT: subq $40, %rsp
641; NO-FMA-NEXT: .cfi_def_cfa_offset 48
642; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
643; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
644; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
645; NO-FMA-NEXT: callq fmaf
646; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
647; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
648; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
649; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
650; NO-FMA-NEXT: callq fmaf
651; NO-FMA-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
652; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
653; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
654; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
655; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
656; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
657; NO-FMA-NEXT: callq fmaf
658; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
659; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
660; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
661; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
662; NO-FMA-NEXT: callq fmaf
663; NO-FMA-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
664; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
665; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
666; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
667; NO-FMA-NEXT: addq $40, %rsp
668; NO-FMA-NEXT: .cfi_def_cfa_offset 8
669; NO-FMA-NEXT: retq
670;
671; HAS-FMA-LABEL: constrained_vector_fma_v4f32:
672; HAS-FMA: # %bb.0: # %entry
673; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01]
674; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
675; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
676; HAS-FMA-NEXT: retq
677entry:
678 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
679 <4 x float> <float 3.5, float 2.5, float 1.5, float 0.5>,
680 <4 x float> <float 7.5, float 6.5, float 5.5, float 4.5>,
681 <4 x float> <float 11.5, float 10.5, float 9.5, float 8.5>,
682 metadata !"round.dynamic",
683 metadata !"fpexcept.strict")
684 ret <4 x float> %fma
685}
686
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000687define <8 x float> @constrained_vector_fma_v8f32() {
688; NO-FMA-LABEL: constrained_vector_fma_v8f32:
689; NO-FMA: # %bb.0: # %entry
690; NO-FMA-NEXT: subq $56, %rsp
691; NO-FMA-NEXT: .cfi_def_cfa_offset 64
692; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
693; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
694; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
695; NO-FMA-NEXT: callq fmaf
696; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
697; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
698; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
699; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
700; NO-FMA-NEXT: callq fmaf
701; NO-FMA-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
702; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
703; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
704; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
705; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
706; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
707; NO-FMA-NEXT: callq fmaf
708; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
709; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
710; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
711; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
712; NO-FMA-NEXT: callq fmaf
713; NO-FMA-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
714; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
715; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
716; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
717; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
718; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
719; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
720; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
721; NO-FMA-NEXT: callq fmaf
722; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
723; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
724; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
725; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
726; NO-FMA-NEXT: callq fmaf
727; NO-FMA-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
728; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
729; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
730; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
731; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
732; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
733; NO-FMA-NEXT: callq fmaf
734; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
735; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
736; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
737; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
738; NO-FMA-NEXT: callq fmaf
739; NO-FMA-NEXT: movaps %xmm0, %xmm1
740; NO-FMA-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
741; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
742; NO-FMA-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
743; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
744; NO-FMA-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
745; NO-FMA-NEXT: addq $56, %rsp
746; NO-FMA-NEXT: .cfi_def_cfa_offset 8
747; NO-FMA-NEXT: retq
748;
749; HAS-FMA-LABEL: constrained_vector_fma_v8f32:
750; HAS-FMA: # %bb.0: # %entry
751; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01,7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
752; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00,1.150000e+01,1.050000e+01,9.500000e+00,8.500000e+00]
753; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
754; HAS-FMA-NEXT: retq
755entry:
756 %fma = call <8 x float> @llvm.experimental.constrained.fma.v8f32(
757 <8 x float> <float 3.5, float 2.5, float 1.5, float 0.5,
758 float 7.5, float 6.5, float 5.5, float 4.5>,
759 <8 x float> <float 7.5, float 6.5, float 5.5, float 4.5,
760 float 11.5, float 10.5, float 9.5, float 8.5>,
761 <8 x float> <float 11.5, float 10.5, float 9.5, float 8.5,
762 float 15.5, float 14.5, float 13.5, float 12.5>,
763 metadata !"round.dynamic",
764 metadata !"fpexcept.strict")
765 ret <8 x float> %fma
766}
767
768define <2 x double> @constrained_vector_sqrt_v2f64() {
769; NO-FMA-LABEL: constrained_vector_sqrt_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000770; NO-FMA: # %bb.0: # %entry
771; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0
772; NO-FMA-NEXT: retq
773;
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000774; HAS-FMA-LABEL: constrained_vector_sqrt_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000775; HAS-FMA: # %bb.0: # %entry
776; HAS-FMA-NEXT: vsqrtpd {{.*}}(%rip), %xmm0
777; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +0000778entry:
779 %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
780 <2 x double> <double 42.0, double 42.1>,
781 metadata !"round.dynamic",
782 metadata !"fpexcept.strict")
783 ret <2 x double> %sqrt
784}
785
Cameron McInally04ae8582018-08-01 14:17:19 +0000786define <3 x float> @constrained_vector_sqrt_v3f32() {
787; NO-FMA-LABEL: constrained_vector_sqrt_v3f32:
788; NO-FMA: # %bb.0: # %entry
789; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
790; NO-FMA-NEXT: sqrtss %xmm0, %xmm1
791; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
792; NO-FMA-NEXT: sqrtss %xmm0, %xmm0
793; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
794; NO-FMA-NEXT: sqrtss %xmm2, %xmm2
795; NO-FMA-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
796; NO-FMA-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
797; NO-FMA-NEXT: retq
798;
799; HAS-FMA-LABEL: constrained_vector_sqrt_v3f32:
800; HAS-FMA: # %bb.0: # %entry
801; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
802; HAS-FMA-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
803; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
804; HAS-FMA-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
805; HAS-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
806; HAS-FMA-NEXT: vsqrtss %xmm2, %xmm2, %xmm2
807; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
808; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
809; HAS-FMA-NEXT: retq
810entry:
811 %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
812 <3 x float> <float 42.0, float 43.0, float 44.0>,
813 metadata !"round.dynamic",
814 metadata !"fpexcept.strict")
815 ret <3 x float> %sqrt
816}
817
818define <3 x double> @constrained_vector_sqrt_v3f64() {
819; NO-FMA-LABEL: constrained_vector_sqrt_v3f64:
820; NO-FMA: # %bb.0: # %entry
821; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
822; NO-FMA-NEXT: sqrtsd %xmm0, %xmm1
823; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0
824; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
825; NO-FMA-NEXT: movapd %xmm0, %xmm1
826; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
827; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
828; NO-FMA-NEXT: retq
829;
830; HAS-FMA-LABEL: constrained_vector_sqrt_v3f64:
831; HAS-FMA: # %bb.0: # %entry
832; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
833; HAS-FMA-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
834; HAS-FMA-NEXT: vsqrtpd {{.*}}(%rip), %xmm1
835; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
836; HAS-FMA-NEXT: retq
837entry:
838 %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
839 <3 x double> <double 42.0, double 42.1, double 42.2>,
840 metadata !"round.dynamic",
841 metadata !"fpexcept.strict")
842 ret <3 x double> %sqrt
843}
844
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000845define <4 x double> @constrained_vector_sqrt_v4f64() {
846; NO-FMA-LABEL: constrained_vector_sqrt_v4f64:
847; NO-FMA: # %bb.0: # %entry
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000848; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0
Ulrich Weigand5f753712018-07-25 17:08:13 +0000849; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm1
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000850; NO-FMA-NEXT: retq
851;
852; HAS-FMA-LABEL: constrained_vector_sqrt_v4f64:
853; HAS-FMA: # %bb.0: # %entry
854; HAS-FMA-NEXT: vsqrtpd {{.*}}(%rip), %ymm0
855; HAS-FMA-NEXT: retq
856entry:
857 %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
858 <4 x double> <double 42.0, double 42.1,
859 double 42.2, double 42.3>,
860 metadata !"round.dynamic",
861 metadata !"fpexcept.strict")
862 ret <4 x double> %sqrt
863}
864
865define <2 x double> @constrained_vector_pow_v2f64() {
866; NO-FMA-LABEL: constrained_vector_pow_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000867; NO-FMA: # %bb.0: # %entry
868; NO-FMA-NEXT: subq $24, %rsp
869; NO-FMA-NEXT: .cfi_def_cfa_offset 32
870; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
871; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
872; NO-FMA-NEXT: callq pow
873; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
874; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
875; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
876; NO-FMA-NEXT: callq pow
877; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
878; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
879; NO-FMA-NEXT: addq $24, %rsp
880; NO-FMA-NEXT: .cfi_def_cfa_offset 8
881; NO-FMA-NEXT: retq
882;
Cameron McInally2c9bcff2018-07-23 14:40:17 +0000883; HAS-FMA-LABEL: constrained_vector_pow_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +0000884; HAS-FMA: # %bb.0: # %entry
885; HAS-FMA-NEXT: subq $24, %rsp
886; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
887; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
888; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
889; HAS-FMA-NEXT: callq pow
890; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
891; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
892; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
893; HAS-FMA-NEXT: callq pow
894; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
895; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
896; HAS-FMA-NEXT: addq $24, %rsp
897; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
898; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +0000899entry:
900 %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
901 <2 x double> <double 42.1, double 42.2>,
902 <2 x double> <double 3.0, double 3.0>,
903 metadata !"round.dynamic",
904 metadata !"fpexcept.strict")
905 ret <2 x double> %pow
906}
907
Cameron McInally04ae8582018-08-01 14:17:19 +0000908define <3 x float> @constrained_vector_pow_v3f32() {
909; NO-FMA-LABEL: constrained_vector_pow_v3f32:
910; NO-FMA: # %bb.0: # %entry
911; NO-FMA-NEXT: subq $40, %rsp
912; NO-FMA-NEXT: .cfi_def_cfa_offset 48
913; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
914; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
915; NO-FMA-NEXT: callq powf
916; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
917; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
918; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
919; NO-FMA-NEXT: callq powf
920; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
921; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
922; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
923; NO-FMA-NEXT: callq powf
924; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
925; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
926; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
927; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
928; NO-FMA-NEXT: movaps %xmm1, %xmm0
929; NO-FMA-NEXT: addq $40, %rsp
930; NO-FMA-NEXT: .cfi_def_cfa_offset 8
931; NO-FMA-NEXT: retq
932;
933; HAS-FMA-LABEL: constrained_vector_pow_v3f32:
934; HAS-FMA: # %bb.0: # %entry
935; HAS-FMA-NEXT: subq $40, %rsp
936; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
937; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
938; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
939; HAS-FMA-NEXT: callq powf
940; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
941; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
942; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
943; HAS-FMA-NEXT: callq powf
944; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
945; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
946; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
947; HAS-FMA-NEXT: callq powf
948; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
949; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
950; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
951; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
952; HAS-FMA-NEXT: addq $40, %rsp
953; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
954; HAS-FMA-NEXT: retq
955entry:
956 %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
957 <3 x float> <float 42.0, float 43.0, float 44.0>,
958 <3 x float> <float 3.0, float 3.0, float 3.0>,
959 metadata !"round.dynamic",
960 metadata !"fpexcept.strict")
961 ret <3 x float> %pow
962}
963
964define <3 x double> @constrained_vector_pow_v3f64() {
965; NO-FMA-LABEL: constrained_vector_pow_v3f64:
966; NO-FMA: # %bb.0: # %entry
967; NO-FMA-NEXT: subq $24, %rsp
968; NO-FMA-NEXT: .cfi_def_cfa_offset 32
969; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
970; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
971; NO-FMA-NEXT: callq pow
972; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
973; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
974; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
975; NO-FMA-NEXT: callq pow
976; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
977; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
978; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
979; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
980; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
981; NO-FMA-NEXT: callq pow
982; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
983; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
984; NO-FMA-NEXT: movaps %xmm0, %xmm1
985; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
986; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
987; NO-FMA-NEXT: addq $24, %rsp
988; NO-FMA-NEXT: .cfi_def_cfa_offset 8
989; NO-FMA-NEXT: retq
990;
991; HAS-FMA-LABEL: constrained_vector_pow_v3f64:
992; HAS-FMA: # %bb.0: # %entry
993; HAS-FMA-NEXT: subq $56, %rsp
994; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
995; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
996; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
997; HAS-FMA-NEXT: callq pow
998; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
999; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1000; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1001; HAS-FMA-NEXT: callq pow
1002; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1003; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1004; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1005; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1006; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1007; HAS-FMA-NEXT: vzeroupper
1008; HAS-FMA-NEXT: callq pow
1009; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1010; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1011; HAS-FMA-NEXT: addq $56, %rsp
1012; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1013; HAS-FMA-NEXT: retq
1014entry:
1015 %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
1016 <3 x double> <double 42.0, double 42.1, double 42.2>,
1017 <3 x double> <double 3.0, double 3.0, double 3.0>,
1018 metadata !"round.dynamic",
1019 metadata !"fpexcept.strict")
1020 ret <3 x double> %pow
1021}
1022
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001023define <4 x double> @constrained_vector_pow_v4f64() {
1024; NO-FMA-LABEL: constrained_vector_pow_v4f64:
1025; NO-FMA: # %bb.0: # %entry
1026; NO-FMA-NEXT: subq $40, %rsp
1027; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1028; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1029; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1030; NO-FMA-NEXT: callq pow
1031; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1032; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1033; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1034; NO-FMA-NEXT: callq pow
1035; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1036; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1037; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1038; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1039; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1040; NO-FMA-NEXT: callq pow
1041; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1042; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1043; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1044; NO-FMA-NEXT: callq pow
1045; NO-FMA-NEXT: movaps %xmm0, %xmm1
1046; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1047; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1048; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1049; NO-FMA-NEXT: addq $40, %rsp
1050; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1051; NO-FMA-NEXT: retq
1052;
1053; HAS-FMA-LABEL: constrained_vector_pow_v4f64:
1054; HAS-FMA: # %bb.0: # %entry
1055; HAS-FMA-NEXT: subq $40, %rsp
1056; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1057; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1058; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1059; HAS-FMA-NEXT: callq pow
1060; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1061; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1062; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1063; HAS-FMA-NEXT: callq pow
1064; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1065; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1066; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1067; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1068; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1069; HAS-FMA-NEXT: callq pow
1070; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1071; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1072; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
1073; HAS-FMA-NEXT: callq pow
1074; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1075; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1076; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1077; HAS-FMA-NEXT: addq $40, %rsp
1078; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1079; HAS-FMA-NEXT: retq
1080entry:
1081 %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
1082 <4 x double> <double 42.1, double 42.2,
1083 double 42.3, double 42.4>,
1084 <4 x double> <double 3.0, double 3.0,
1085 double 3.0, double 3.0>,
1086 metadata !"round.dynamic",
1087 metadata !"fpexcept.strict")
1088 ret <4 x double> %pow
1089}
1090
1091define <2 x double> @constrained_vector_powi_v2f64() {
1092; NO-FMA-LABEL: constrained_vector_powi_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001093; NO-FMA: # %bb.0: # %entry
1094; NO-FMA-NEXT: subq $24, %rsp
1095; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1096; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1097; NO-FMA-NEXT: movl $3, %edi
1098; NO-FMA-NEXT: callq __powidf2
1099; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1100; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1101; NO-FMA-NEXT: movl $3, %edi
1102; NO-FMA-NEXT: callq __powidf2
1103; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1104; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1105; NO-FMA-NEXT: addq $24, %rsp
1106; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1107; NO-FMA-NEXT: retq
1108;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001109; HAS-FMA-LABEL: constrained_vector_powi_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001110; HAS-FMA: # %bb.0: # %entry
1111; HAS-FMA-NEXT: subq $24, %rsp
1112; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
1113; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1114; HAS-FMA-NEXT: movl $3, %edi
1115; HAS-FMA-NEXT: callq __powidf2
1116; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1117; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1118; HAS-FMA-NEXT: movl $3, %edi
1119; HAS-FMA-NEXT: callq __powidf2
1120; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1121; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1122; HAS-FMA-NEXT: addq $24, %rsp
1123; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1124; HAS-FMA-NEXT: retq
1125entry:
1126 %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
1127 <2 x double> <double 42.1, double 42.2>,
1128 i32 3,
1129 metadata !"round.dynamic",
1130 metadata !"fpexcept.strict")
1131 ret <2 x double> %powi
1132}
1133
Cameron McInally04ae8582018-08-01 14:17:19 +00001134define <3 x float> @constrained_vector_powi_v3f32() {
1135; NO-FMA-LABEL: constrained_vector_powi_v3f32:
1136; NO-FMA: # %bb.0: # %entry
1137; NO-FMA-NEXT: subq $40, %rsp
1138; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1139; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1140; NO-FMA-NEXT: movl $3, %edi
1141; NO-FMA-NEXT: callq __powisf2
1142; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1143; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1144; NO-FMA-NEXT: movl $3, %edi
1145; NO-FMA-NEXT: callq __powisf2
1146; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1147; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1148; NO-FMA-NEXT: movl $3, %edi
1149; NO-FMA-NEXT: callq __powisf2
1150; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
1151; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1152; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1153; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1154; NO-FMA-NEXT: movaps %xmm1, %xmm0
1155; NO-FMA-NEXT: addq $40, %rsp
1156; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1157; NO-FMA-NEXT: retq
1158;
1159; HAS-FMA-LABEL: constrained_vector_powi_v3f32:
1160; HAS-FMA: # %bb.0: # %entry
1161; HAS-FMA-NEXT: subq $40, %rsp
1162; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1163; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1164; HAS-FMA-NEXT: movl $3, %edi
1165; HAS-FMA-NEXT: callq __powisf2
1166; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1167; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1168; HAS-FMA-NEXT: movl $3, %edi
1169; HAS-FMA-NEXT: callq __powisf2
1170; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1171; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1172; HAS-FMA-NEXT: movl $3, %edi
1173; HAS-FMA-NEXT: callq __powisf2
1174; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1175; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1176; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1177; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1178; HAS-FMA-NEXT: addq $40, %rsp
1179; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1180; HAS-FMA-NEXT: retq
1181entry:
1182 %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
1183 <3 x float> <float 42.0, float 43.0, float 44.0>,
1184 i32 3,
1185 metadata !"round.dynamic",
1186 metadata !"fpexcept.strict")
1187 ret <3 x float> %powi
1188}
1189
1190define <3 x double> @constrained_vector_powi_v3f64() {
1191; NO-FMA-LABEL: constrained_vector_powi_v3f64:
1192; NO-FMA: # %bb.0: # %entry
1193; NO-FMA-NEXT: subq $24, %rsp
1194; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1195; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1196; NO-FMA-NEXT: movl $3, %edi
1197; NO-FMA-NEXT: callq __powidf2
1198; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1199; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1200; NO-FMA-NEXT: movl $3, %edi
1201; NO-FMA-NEXT: callq __powidf2
1202; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1203; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1204; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1205; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1206; NO-FMA-NEXT: movl $3, %edi
1207; NO-FMA-NEXT: callq __powidf2
1208; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
1209; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1210; NO-FMA-NEXT: movaps %xmm0, %xmm1
1211; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
1212; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
1213; NO-FMA-NEXT: addq $24, %rsp
1214; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1215; NO-FMA-NEXT: retq
1216;
1217; HAS-FMA-LABEL: constrained_vector_powi_v3f64:
1218; HAS-FMA: # %bb.0: # %entry
1219; HAS-FMA-NEXT: subq $56, %rsp
1220; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
1221; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1222; HAS-FMA-NEXT: movl $3, %edi
1223; HAS-FMA-NEXT: callq __powidf2
1224; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1225; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1226; HAS-FMA-NEXT: movl $3, %edi
1227; HAS-FMA-NEXT: callq __powidf2
1228; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1229; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1230; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1231; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1232; HAS-FMA-NEXT: movl $3, %edi
1233; HAS-FMA-NEXT: vzeroupper
1234; HAS-FMA-NEXT: callq __powidf2
1235; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1236; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1237; HAS-FMA-NEXT: addq $56, %rsp
1238; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1239; HAS-FMA-NEXT: retq
1240entry:
1241 %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
1242 <3 x double> <double 42.0, double 42.1, double 42.2>,
1243 i32 3,
1244 metadata !"round.dynamic",
1245 metadata !"fpexcept.strict")
1246 ret <3 x double> %powi
1247}
1248
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001249define <4 x double> @constrained_vector_powi_v4f64() {
1250; NO-FMA-LABEL: constrained_vector_powi_v4f64:
1251; NO-FMA: # %bb.0: # %entry
1252; NO-FMA-NEXT: subq $40, %rsp
1253; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1254; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1255; NO-FMA-NEXT: movl $3, %edi
1256; NO-FMA-NEXT: callq __powidf2
1257; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1258; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1259; NO-FMA-NEXT: movl $3, %edi
1260; NO-FMA-NEXT: callq __powidf2
1261; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1262; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1263; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1264; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1265; NO-FMA-NEXT: movl $3, %edi
1266; NO-FMA-NEXT: callq __powidf2
1267; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1268; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1269; NO-FMA-NEXT: movl $3, %edi
1270; NO-FMA-NEXT: callq __powidf2
1271; NO-FMA-NEXT: movaps %xmm0, %xmm1
1272; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1273; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1274; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1275; NO-FMA-NEXT: addq $40, %rsp
1276; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1277; NO-FMA-NEXT: retq
1278;
1279; HAS-FMA-LABEL: constrained_vector_powi_v4f64:
1280; HAS-FMA: # %bb.0: # %entry
1281; HAS-FMA-NEXT: subq $40, %rsp
1282; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1283; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1284; HAS-FMA-NEXT: movl $3, %edi
1285; HAS-FMA-NEXT: callq __powidf2
1286; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1287; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1288; HAS-FMA-NEXT: movl $3, %edi
1289; HAS-FMA-NEXT: callq __powidf2
1290; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1291; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1292; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1293; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1294; HAS-FMA-NEXT: movl $3, %edi
1295; HAS-FMA-NEXT: callq __powidf2
1296; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1297; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1298; HAS-FMA-NEXT: movl $3, %edi
1299; HAS-FMA-NEXT: callq __powidf2
1300; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1301; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1302; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1303; HAS-FMA-NEXT: addq $40, %rsp
1304; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1305; HAS-FMA-NEXT: retq
1306entry:
1307 %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
1308 <4 x double> <double 42.1, double 42.2,
1309 double 42.3, double 42.4>,
1310 i32 3,
1311 metadata !"round.dynamic",
1312 metadata !"fpexcept.strict")
1313 ret <4 x double> %powi
1314}
1315
1316
1317define <2 x double> @constrained_vector_sin_v2f64() {
1318; NO-FMA-LABEL: constrained_vector_sin_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001319; NO-FMA: # %bb.0: # %entry
1320; NO-FMA-NEXT: subq $24, %rsp
1321; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1322; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1323; NO-FMA-NEXT: callq sin
1324; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1325; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1326; NO-FMA-NEXT: callq sin
1327; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1328; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1329; NO-FMA-NEXT: addq $24, %rsp
1330; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1331; NO-FMA-NEXT: retq
1332;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001333; HAS-FMA-LABEL: constrained_vector_sin_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001334; HAS-FMA: # %bb.0: # %entry
1335; HAS-FMA-NEXT: subq $24, %rsp
1336; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
1337; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1338; HAS-FMA-NEXT: callq sin
1339; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1340; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1341; HAS-FMA-NEXT: callq sin
1342; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1343; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1344; HAS-FMA-NEXT: addq $24, %rsp
1345; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1346; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00001347entry:
1348 %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
1349 <2 x double> <double 42.0, double 42.1>,
1350 metadata !"round.dynamic",
1351 metadata !"fpexcept.strict")
1352 ret <2 x double> %sin
1353}
1354
Cameron McInally04ae8582018-08-01 14:17:19 +00001355define <3 x float> @constrained_vector_sin_v3f32() {
1356; NO-FMA-LABEL: constrained_vector_sin_v3f32:
1357; NO-FMA: # %bb.0: # %entry
1358; NO-FMA-NEXT: subq $40, %rsp
1359; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1360; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1361; NO-FMA-NEXT: callq sinf
1362; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1363; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1364; NO-FMA-NEXT: callq sinf
1365; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1366; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1367; NO-FMA-NEXT: callq sinf
1368; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
1369; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1370; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1371; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1372; NO-FMA-NEXT: movaps %xmm1, %xmm0
1373; NO-FMA-NEXT: addq $40, %rsp
1374; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1375; NO-FMA-NEXT: retq
1376;
1377; HAS-FMA-LABEL: constrained_vector_sin_v3f32:
1378; HAS-FMA: # %bb.0: # %entry
1379; HAS-FMA-NEXT: subq $40, %rsp
1380; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1381; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1382; HAS-FMA-NEXT: callq sinf
1383; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1384; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1385; HAS-FMA-NEXT: callq sinf
1386; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1387; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1388; HAS-FMA-NEXT: callq sinf
1389; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1390; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1391; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1392; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1393; HAS-FMA-NEXT: addq $40, %rsp
1394; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1395; HAS-FMA-NEXT: retq
1396entry:
1397 %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
1398 <3 x float> <float 42.0, float 43.0, float 44.0>,
1399 metadata !"round.dynamic",
1400 metadata !"fpexcept.strict")
1401 ret <3 x float> %sin
1402}
1403
1404define <3 x double> @constrained_vector_sin_v3f64() {
1405; NO-FMA-LABEL: constrained_vector_sin_v3f64:
1406; NO-FMA: # %bb.0: # %entry
1407; NO-FMA-NEXT: subq $24, %rsp
1408; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1409; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1410; NO-FMA-NEXT: callq sin
1411; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1412; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1413; NO-FMA-NEXT: callq sin
1414; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1415; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1416; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1417; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1418; NO-FMA-NEXT: callq sin
1419; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
1420; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1421; NO-FMA-NEXT: movaps %xmm0, %xmm1
1422; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
1423; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
1424; NO-FMA-NEXT: addq $24, %rsp
1425; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1426; NO-FMA-NEXT: retq
1427;
1428; HAS-FMA-LABEL: constrained_vector_sin_v3f64:
1429; HAS-FMA: # %bb.0: # %entry
1430; HAS-FMA-NEXT: subq $56, %rsp
1431; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
1432; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1433; HAS-FMA-NEXT: callq sin
1434; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1435; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1436; HAS-FMA-NEXT: callq sin
1437; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1438; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1439; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1440; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1441; HAS-FMA-NEXT: vzeroupper
1442; HAS-FMA-NEXT: callq sin
1443; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1444; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1445; HAS-FMA-NEXT: addq $56, %rsp
1446; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1447; HAS-FMA-NEXT: retq
1448entry:
1449 %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
1450 <3 x double> <double 42.0, double 42.1, double 42.2>,
1451 metadata !"round.dynamic",
1452 metadata !"fpexcept.strict")
1453 ret <3 x double> %sin
1454}
1455
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001456define <4 x double> @constrained_vector_sin_v4f64() {
1457; NO-FMA-LABEL: constrained_vector_sin_v4f64:
1458; NO-FMA: # %bb.0: # %entry
1459; NO-FMA-NEXT: subq $40, %rsp
1460; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1461; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1462; NO-FMA-NEXT: callq sin
1463; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1464; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1465; NO-FMA-NEXT: callq sin
1466; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1467; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1468; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1469; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1470; NO-FMA-NEXT: callq sin
1471; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1472; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1473; NO-FMA-NEXT: callq sin
1474; NO-FMA-NEXT: movaps %xmm0, %xmm1
1475; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1476; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1477; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1478; NO-FMA-NEXT: addq $40, %rsp
1479; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1480; NO-FMA-NEXT: retq
1481;
1482; HAS-FMA-LABEL: constrained_vector_sin_v4f64:
1483; HAS-FMA: # %bb.0: # %entry
1484; HAS-FMA-NEXT: subq $40, %rsp
1485; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1486; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1487; HAS-FMA-NEXT: callq sin
1488; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1489; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1490; HAS-FMA-NEXT: callq sin
1491; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1492; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1493; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1494; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1495; HAS-FMA-NEXT: callq sin
1496; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1497; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1498; HAS-FMA-NEXT: callq sin
1499; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1500; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1501; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1502; HAS-FMA-NEXT: addq $40, %rsp
1503; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1504; HAS-FMA-NEXT: retq
1505entry:
1506 %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
1507 <4 x double> <double 42.0, double 42.1,
1508 double 42.2, double 42.3>,
1509 metadata !"round.dynamic",
1510 metadata !"fpexcept.strict")
1511 ret <4 x double> %sin
1512}
1513
1514define <2 x double> @constrained_vector_cos_v2f64() {
1515; NO-FMA-LABEL: constrained_vector_cos_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001516; NO-FMA: # %bb.0: # %entry
1517; NO-FMA-NEXT: subq $24, %rsp
1518; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1519; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1520; NO-FMA-NEXT: callq cos
1521; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1522; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1523; NO-FMA-NEXT: callq cos
1524; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1525; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1526; NO-FMA-NEXT: addq $24, %rsp
1527; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1528; NO-FMA-NEXT: retq
1529;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001530; HAS-FMA-LABEL: constrained_vector_cos_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001531; HAS-FMA: # %bb.0: # %entry
1532; HAS-FMA-NEXT: subq $24, %rsp
1533; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
1534; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1535; HAS-FMA-NEXT: callq cos
1536; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1537; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1538; HAS-FMA-NEXT: callq cos
1539; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1540; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1541; HAS-FMA-NEXT: addq $24, %rsp
1542; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1543; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00001544entry:
1545 %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
1546 <2 x double> <double 42.0, double 42.1>,
1547 metadata !"round.dynamic",
1548 metadata !"fpexcept.strict")
1549 ret <2 x double> %cos
1550}
1551
Cameron McInally04ae8582018-08-01 14:17:19 +00001552define <3 x float> @constrained_vector_cos_v3f32() {
1553; NO-FMA-LABEL: constrained_vector_cos_v3f32:
1554; NO-FMA: # %bb.0: # %entry
1555; NO-FMA-NEXT: subq $40, %rsp
1556; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1557; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1558; NO-FMA-NEXT: callq cosf
1559; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1560; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1561; NO-FMA-NEXT: callq cosf
1562; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1563; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1564; NO-FMA-NEXT: callq cosf
1565; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
1566; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1567; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1568; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1569; NO-FMA-NEXT: movaps %xmm1, %xmm0
1570; NO-FMA-NEXT: addq $40, %rsp
1571; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1572; NO-FMA-NEXT: retq
1573;
1574; HAS-FMA-LABEL: constrained_vector_cos_v3f32:
1575; HAS-FMA: # %bb.0: # %entry
1576; HAS-FMA-NEXT: subq $40, %rsp
1577; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1578; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1579; HAS-FMA-NEXT: callq cosf
1580; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1581; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1582; HAS-FMA-NEXT: callq cosf
1583; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1584; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1585; HAS-FMA-NEXT: callq cosf
1586; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1587; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1588; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1589; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1590; HAS-FMA-NEXT: addq $40, %rsp
1591; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1592; HAS-FMA-NEXT: retq
1593entry:
1594 %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
1595 <3 x float> <float 42.0, float 43.0, float 44.0>,
1596 metadata !"round.dynamic",
1597 metadata !"fpexcept.strict")
1598 ret <3 x float> %cos
1599}
1600
1601define <3 x double> @constrained_vector_cos_v3f64() {
1602; NO-FMA-LABEL: constrained_vector_cos_v3f64:
1603; NO-FMA: # %bb.0: # %entry
1604; NO-FMA-NEXT: subq $24, %rsp
1605; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1606; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1607; NO-FMA-NEXT: callq cos
1608; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1609; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1610; NO-FMA-NEXT: callq cos
1611; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1612; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1613; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1614; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1615; NO-FMA-NEXT: callq cos
1616; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
1617; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1618; NO-FMA-NEXT: movaps %xmm0, %xmm1
1619; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
1620; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
1621; NO-FMA-NEXT: addq $24, %rsp
1622; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1623; NO-FMA-NEXT: retq
1624;
1625; HAS-FMA-LABEL: constrained_vector_cos_v3f64:
1626; HAS-FMA: # %bb.0: # %entry
1627; HAS-FMA-NEXT: subq $56, %rsp
1628; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
1629; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1630; HAS-FMA-NEXT: callq cos
1631; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1632; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1633; HAS-FMA-NEXT: callq cos
1634; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1635; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1636; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1637; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1638; HAS-FMA-NEXT: vzeroupper
1639; HAS-FMA-NEXT: callq cos
1640; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1641; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1642; HAS-FMA-NEXT: addq $56, %rsp
1643; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1644; HAS-FMA-NEXT: retq
1645entry:
1646 %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
1647 <3 x double> <double 42.0, double 42.1, double 42.2>,
1648 metadata !"round.dynamic",
1649 metadata !"fpexcept.strict")
1650 ret <3 x double> %cos
1651}
1652
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001653define <4 x double> @constrained_vector_cos_v4f64() {
1654; NO-FMA-LABEL: constrained_vector_cos_v4f64:
1655; NO-FMA: # %bb.0: # %entry
1656; NO-FMA-NEXT: subq $40, %rsp
1657; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1658; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1659; NO-FMA-NEXT: callq cos
1660; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1661; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1662; NO-FMA-NEXT: callq cos
1663; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1664; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1665; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1666; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1667; NO-FMA-NEXT: callq cos
1668; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1669; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1670; NO-FMA-NEXT: callq cos
1671; NO-FMA-NEXT: movaps %xmm0, %xmm1
1672; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1673; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1674; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1675; NO-FMA-NEXT: addq $40, %rsp
1676; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1677; NO-FMA-NEXT: retq
1678;
1679; HAS-FMA-LABEL: constrained_vector_cos_v4f64:
1680; HAS-FMA: # %bb.0: # %entry
1681; HAS-FMA-NEXT: subq $40, %rsp
1682; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1683; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1684; HAS-FMA-NEXT: callq cos
1685; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1686; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1687; HAS-FMA-NEXT: callq cos
1688; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1689; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1690; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1691; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1692; HAS-FMA-NEXT: callq cos
1693; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1694; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1695; HAS-FMA-NEXT: callq cos
1696; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1697; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1698; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1699; HAS-FMA-NEXT: addq $40, %rsp
1700; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1701; HAS-FMA-NEXT: retq
1702entry:
1703 %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
1704 <4 x double> <double 42.0, double 42.1,
1705 double 42.2, double 42.3>,
1706 metadata !"round.dynamic",
1707 metadata !"fpexcept.strict")
1708 ret <4 x double> %cos
1709}
1710
1711define <2 x double> @constrained_vector_exp_v2f64() {
1712; NO-FMA-LABEL: constrained_vector_exp_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001713; NO-FMA: # %bb.0: # %entry
1714; NO-FMA-NEXT: subq $24, %rsp
1715; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1716; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1717; NO-FMA-NEXT: callq exp
1718; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1719; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1720; NO-FMA-NEXT: callq exp
1721; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1722; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1723; NO-FMA-NEXT: addq $24, %rsp
1724; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1725; NO-FMA-NEXT: retq
1726;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001727; HAS-FMA-LABEL: constrained_vector_exp_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001728; HAS-FMA: # %bb.0: # %entry
1729; HAS-FMA-NEXT: subq $24, %rsp
1730; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
1731; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1732; HAS-FMA-NEXT: callq exp
1733; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1734; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1735; HAS-FMA-NEXT: callq exp
1736; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1737; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1738; HAS-FMA-NEXT: addq $24, %rsp
1739; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1740; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00001741entry:
1742 %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
1743 <2 x double> <double 42.0, double 42.1>,
1744 metadata !"round.dynamic",
1745 metadata !"fpexcept.strict")
1746 ret <2 x double> %exp
1747}
1748
Cameron McInally04ae8582018-08-01 14:17:19 +00001749define <3 x float> @constrained_vector_exp_v3f32() {
1750; NO-FMA-LABEL: constrained_vector_exp_v3f32:
1751; NO-FMA: # %bb.0: # %entry
1752; NO-FMA-NEXT: subq $40, %rsp
1753; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1754; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1755; NO-FMA-NEXT: callq expf
1756; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1757; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1758; NO-FMA-NEXT: callq expf
1759; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1760; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1761; NO-FMA-NEXT: callq expf
1762; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
1763; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1764; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1765; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1766; NO-FMA-NEXT: movaps %xmm1, %xmm0
1767; NO-FMA-NEXT: addq $40, %rsp
1768; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1769; NO-FMA-NEXT: retq
1770;
1771; HAS-FMA-LABEL: constrained_vector_exp_v3f32:
1772; HAS-FMA: # %bb.0: # %entry
1773; HAS-FMA-NEXT: subq $40, %rsp
1774; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1775; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1776; HAS-FMA-NEXT: callq expf
1777; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1778; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1779; HAS-FMA-NEXT: callq expf
1780; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1781; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1782; HAS-FMA-NEXT: callq expf
1783; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1784; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1785; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1786; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1787; HAS-FMA-NEXT: addq $40, %rsp
1788; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1789; HAS-FMA-NEXT: retq
1790entry:
1791 %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
1792 <3 x float> <float 42.0, float 43.0, float 44.0>,
1793 metadata !"round.dynamic",
1794 metadata !"fpexcept.strict")
1795 ret <3 x float> %exp
1796}
1797
1798define <3 x double> @constrained_vector_exp_v3f64() {
1799; NO-FMA-LABEL: constrained_vector_exp_v3f64:
1800; NO-FMA: # %bb.0: # %entry
1801; NO-FMA-NEXT: subq $24, %rsp
1802; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1803; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1804; NO-FMA-NEXT: callq exp
1805; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1806; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1807; NO-FMA-NEXT: callq exp
1808; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1809; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1810; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1811; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1812; NO-FMA-NEXT: callq exp
1813; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
1814; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1815; NO-FMA-NEXT: movaps %xmm0, %xmm1
1816; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
1817; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
1818; NO-FMA-NEXT: addq $24, %rsp
1819; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1820; NO-FMA-NEXT: retq
1821;
1822; HAS-FMA-LABEL: constrained_vector_exp_v3f64:
1823; HAS-FMA: # %bb.0: # %entry
1824; HAS-FMA-NEXT: subq $56, %rsp
1825; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
1826; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1827; HAS-FMA-NEXT: callq exp
1828; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1829; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1830; HAS-FMA-NEXT: callq exp
1831; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1832; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1833; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1834; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1835; HAS-FMA-NEXT: vzeroupper
1836; HAS-FMA-NEXT: callq exp
1837; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1838; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1839; HAS-FMA-NEXT: addq $56, %rsp
1840; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1841; HAS-FMA-NEXT: retq
1842entry:
1843 %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
1844 <3 x double> <double 42.0, double 42.1, double 42.2>,
1845 metadata !"round.dynamic",
1846 metadata !"fpexcept.strict")
1847 ret <3 x double> %exp
1848}
1849
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001850define <4 x double> @constrained_vector_exp_v4f64() {
1851; NO-FMA-LABEL: constrained_vector_exp_v4f64:
1852; NO-FMA: # %bb.0: # %entry
1853; NO-FMA-NEXT: subq $40, %rsp
1854; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1855; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1856; NO-FMA-NEXT: callq exp
1857; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1858; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1859; NO-FMA-NEXT: callq exp
1860; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1861; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1862; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1863; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1864; NO-FMA-NEXT: callq exp
1865; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1866; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1867; NO-FMA-NEXT: callq exp
1868; NO-FMA-NEXT: movaps %xmm0, %xmm1
1869; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1870; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1871; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1872; NO-FMA-NEXT: addq $40, %rsp
1873; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1874; NO-FMA-NEXT: retq
1875;
1876; HAS-FMA-LABEL: constrained_vector_exp_v4f64:
1877; HAS-FMA: # %bb.0: # %entry
1878; HAS-FMA-NEXT: subq $40, %rsp
1879; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1880; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1881; HAS-FMA-NEXT: callq exp
1882; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1883; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1884; HAS-FMA-NEXT: callq exp
1885; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1886; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1887; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1888; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1889; HAS-FMA-NEXT: callq exp
1890; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1891; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1892; HAS-FMA-NEXT: callq exp
1893; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1894; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1895; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1896; HAS-FMA-NEXT: addq $40, %rsp
1897; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1898; HAS-FMA-NEXT: retq
1899entry:
1900 %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
1901 <4 x double> <double 42.0, double 42.1,
1902 double 42.2, double 42.3>,
1903 metadata !"round.dynamic",
1904 metadata !"fpexcept.strict")
1905 ret <4 x double> %exp
1906}
1907
1908define <2 x double> @constrained_vector_exp2_v2f64() {
1909; NO-FMA-LABEL: constrained_vector_exp2_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001910; NO-FMA: # %bb.0: # %entry
1911; NO-FMA-NEXT: subq $24, %rsp
1912; NO-FMA-NEXT: .cfi_def_cfa_offset 32
1913; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1914; NO-FMA-NEXT: callq exp2
1915; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1916; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1917; NO-FMA-NEXT: callq exp2
1918; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1919; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1920; NO-FMA-NEXT: addq $24, %rsp
1921; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1922; NO-FMA-NEXT: retq
1923;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00001924; HAS-FMA-LABEL: constrained_vector_exp2_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00001925; HAS-FMA: # %bb.0: # %entry
1926; HAS-FMA-NEXT: subq $24, %rsp
1927; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
1928; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1929; HAS-FMA-NEXT: callq exp2
1930; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1931; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1932; HAS-FMA-NEXT: callq exp2
1933; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1934; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
1935; HAS-FMA-NEXT: addq $24, %rsp
1936; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1937; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00001938entry:
1939 %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
1940 <2 x double> <double 42.1, double 42.0>,
1941 metadata !"round.dynamic",
1942 metadata !"fpexcept.strict")
1943 ret <2 x double> %exp2
1944}
1945
Cameron McInally04ae8582018-08-01 14:17:19 +00001946define <3 x float> @constrained_vector_exp2_v3f32() {
1947; NO-FMA-LABEL: constrained_vector_exp2_v3f32:
1948; NO-FMA: # %bb.0: # %entry
1949; NO-FMA-NEXT: subq $40, %rsp
1950; NO-FMA-NEXT: .cfi_def_cfa_offset 48
1951; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1952; NO-FMA-NEXT: callq exp2f
1953; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1954; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1955; NO-FMA-NEXT: callq exp2f
1956; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
1957; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1958; NO-FMA-NEXT: callq exp2f
1959; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
1960; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1961; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1962; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
1963; NO-FMA-NEXT: movaps %xmm1, %xmm0
1964; NO-FMA-NEXT: addq $40, %rsp
1965; NO-FMA-NEXT: .cfi_def_cfa_offset 8
1966; NO-FMA-NEXT: retq
1967;
1968; HAS-FMA-LABEL: constrained_vector_exp2_v3f32:
1969; HAS-FMA: # %bb.0: # %entry
1970; HAS-FMA-NEXT: subq $40, %rsp
1971; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
1972; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1973; HAS-FMA-NEXT: callq exp2f
1974; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1975; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1976; HAS-FMA-NEXT: callq exp2f
1977; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1978; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1979; HAS-FMA-NEXT: callq exp2f
1980; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1981; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1982; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1983; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1984; HAS-FMA-NEXT: addq $40, %rsp
1985; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
1986; HAS-FMA-NEXT: retq
1987entry:
1988 %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
1989 <3 x float> <float 42.0, float 43.0, float 44.0>,
1990 metadata !"round.dynamic",
1991 metadata !"fpexcept.strict")
1992 ret <3 x float> %exp2
1993}
1994
1995define <3 x double> @constrained_vector_exp2_v3f64() {
1996; NO-FMA-LABEL: constrained_vector_exp2_v3f64:
1997; NO-FMA: # %bb.0: # %entry
1998; NO-FMA-NEXT: subq $24, %rsp
1999; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2000; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2001; NO-FMA-NEXT: callq exp2
2002; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2003; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2004; NO-FMA-NEXT: callq exp2
2005; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2006; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2007; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2008; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2009; NO-FMA-NEXT: callq exp2
2010; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
2011; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2012; NO-FMA-NEXT: movaps %xmm0, %xmm1
2013; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
2014; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
2015; NO-FMA-NEXT: addq $24, %rsp
2016; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2017; NO-FMA-NEXT: retq
2018;
2019; HAS-FMA-LABEL: constrained_vector_exp2_v3f64:
2020; HAS-FMA: # %bb.0: # %entry
2021; HAS-FMA-NEXT: subq $56, %rsp
2022; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
2023; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2024; HAS-FMA-NEXT: callq exp2
2025; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2026; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2027; HAS-FMA-NEXT: callq exp2
2028; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2029; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2030; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
2031; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2032; HAS-FMA-NEXT: vzeroupper
2033; HAS-FMA-NEXT: callq exp2
2034; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
2035; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2036; HAS-FMA-NEXT: addq $56, %rsp
2037; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2038; HAS-FMA-NEXT: retq
2039entry:
2040 %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
2041 <3 x double> <double 42.0, double 42.1, double 42.2>,
2042 metadata !"round.dynamic",
2043 metadata !"fpexcept.strict")
2044 ret <3 x double> %exp2
2045}
2046
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002047define <4 x double> @constrained_vector_exp2_v4f64() {
2048; NO-FMA-LABEL: constrained_vector_exp2_v4f64:
2049; NO-FMA: # %bb.0: # %entry
2050; NO-FMA-NEXT: subq $40, %rsp
2051; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2052; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2053; NO-FMA-NEXT: callq exp2
2054; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2055; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2056; NO-FMA-NEXT: callq exp2
2057; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2058; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2059; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2060; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2061; NO-FMA-NEXT: callq exp2
2062; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2063; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2064; NO-FMA-NEXT: callq exp2
2065; NO-FMA-NEXT: movaps %xmm0, %xmm1
2066; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2067; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2068; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2069; NO-FMA-NEXT: addq $40, %rsp
2070; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2071; NO-FMA-NEXT: retq
2072;
2073; HAS-FMA-LABEL: constrained_vector_exp2_v4f64:
2074; HAS-FMA: # %bb.0: # %entry
2075; HAS-FMA-NEXT: subq $40, %rsp
2076; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
2077; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2078; HAS-FMA-NEXT: callq exp2
2079; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2080; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2081; HAS-FMA-NEXT: callq exp2
2082; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2083; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2084; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2085; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2086; HAS-FMA-NEXT: callq exp2
2087; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2088; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2089; HAS-FMA-NEXT: callq exp2
2090; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2091; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2092; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2093; HAS-FMA-NEXT: addq $40, %rsp
2094; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2095; HAS-FMA-NEXT: retq
2096entry:
2097 %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
2098 <4 x double> <double 42.1, double 42.2,
2099 double 42.3, double 42.4>,
2100 metadata !"round.dynamic",
2101 metadata !"fpexcept.strict")
2102 ret <4 x double> %exp2
2103}
2104
2105define <2 x double> @constrained_vector_log_v2f64() {
2106; NO-FMA-LABEL: constrained_vector_log_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002107; NO-FMA: # %bb.0: # %entry
2108; NO-FMA-NEXT: subq $24, %rsp
2109; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2110; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2111; NO-FMA-NEXT: callq log
2112; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2113; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2114; NO-FMA-NEXT: callq log
2115; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2116; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2117; NO-FMA-NEXT: addq $24, %rsp
2118; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2119; NO-FMA-NEXT: retq
2120;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002121; HAS-FMA-LABEL: constrained_vector_log_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002122; HAS-FMA: # %bb.0: # %entry
2123; HAS-FMA-NEXT: subq $24, %rsp
2124; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
2125; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2126; HAS-FMA-NEXT: callq log
2127; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2128; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2129; HAS-FMA-NEXT: callq log
2130; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2131; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2132; HAS-FMA-NEXT: addq $24, %rsp
2133; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2134; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00002135entry:
2136 %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
2137 <2 x double> <double 42.0, double 42.1>,
2138 metadata !"round.dynamic",
2139 metadata !"fpexcept.strict")
2140 ret <2 x double> %log
2141}
2142
Cameron McInally04ae8582018-08-01 14:17:19 +00002143define <3 x float> @constrained_vector_log_v3f32() {
2144; NO-FMA-LABEL: constrained_vector_log_v3f32:
2145; NO-FMA: # %bb.0: # %entry
2146; NO-FMA-NEXT: subq $40, %rsp
2147; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2148; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2149; NO-FMA-NEXT: callq logf
2150; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2151; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2152; NO-FMA-NEXT: callq logf
2153; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2154; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2155; NO-FMA-NEXT: callq logf
2156; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
2157; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2158; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2159; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2160; NO-FMA-NEXT: movaps %xmm1, %xmm0
2161; NO-FMA-NEXT: addq $40, %rsp
2162; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2163; NO-FMA-NEXT: retq
2164;
2165; HAS-FMA-LABEL: constrained_vector_log_v3f32:
2166; HAS-FMA: # %bb.0: # %entry
2167; HAS-FMA-NEXT: subq $40, %rsp
2168; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
2169; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2170; HAS-FMA-NEXT: callq logf
2171; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2172; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2173; HAS-FMA-NEXT: callq logf
2174; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2175; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2176; HAS-FMA-NEXT: callq logf
2177; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
2178; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2179; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2180; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2181; HAS-FMA-NEXT: addq $40, %rsp
2182; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2183; HAS-FMA-NEXT: retq
2184entry:
2185 %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
2186 <3 x float> <float 42.0, float 43.0, float 44.0>,
2187 metadata !"round.dynamic",
2188 metadata !"fpexcept.strict")
2189 ret <3 x float> %log
2190}
2191
2192define <3 x double> @constrained_vector_log_v3f64() {
2193; NO-FMA-LABEL: constrained_vector_log_v3f64:
2194; NO-FMA: # %bb.0: # %entry
2195; NO-FMA-NEXT: subq $24, %rsp
2196; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2197; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2198; NO-FMA-NEXT: callq log
2199; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2200; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2201; NO-FMA-NEXT: callq log
2202; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2203; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2204; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2205; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2206; NO-FMA-NEXT: callq log
2207; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
2208; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2209; NO-FMA-NEXT: movaps %xmm0, %xmm1
2210; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
2211; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
2212; NO-FMA-NEXT: addq $24, %rsp
2213; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2214; NO-FMA-NEXT: retq
2215;
2216; HAS-FMA-LABEL: constrained_vector_log_v3f64:
2217; HAS-FMA: # %bb.0: # %entry
2218; HAS-FMA-NEXT: subq $56, %rsp
2219; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
2220; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2221; HAS-FMA-NEXT: callq log
2222; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2223; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2224; HAS-FMA-NEXT: callq log
2225; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2226; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2227; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
2228; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2229; HAS-FMA-NEXT: vzeroupper
2230; HAS-FMA-NEXT: callq log
2231; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
2232; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2233; HAS-FMA-NEXT: addq $56, %rsp
2234; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2235; HAS-FMA-NEXT: retq
2236entry:
2237 %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
2238 <3 x double> <double 42.0, double 42.1, double 42.2>,
2239 metadata !"round.dynamic",
2240 metadata !"fpexcept.strict")
2241 ret <3 x double> %log
2242}
2243
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002244define <4 x double> @constrained_vector_log_v4f64() {
2245; NO-FMA-LABEL: constrained_vector_log_v4f64:
2246; NO-FMA: # %bb.0: # %entry
2247; NO-FMA-NEXT: subq $40, %rsp
2248; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2249; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2250; NO-FMA-NEXT: callq log
2251; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2252; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2253; NO-FMA-NEXT: callq log
2254; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2255; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2256; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2257; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2258; NO-FMA-NEXT: callq log
2259; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2260; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2261; NO-FMA-NEXT: callq log
2262; NO-FMA-NEXT: movaps %xmm0, %xmm1
2263; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2264; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2265; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2266; NO-FMA-NEXT: addq $40, %rsp
2267; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2268; NO-FMA-NEXT: retq
2269;
2270; HAS-FMA-LABEL: constrained_vector_log_v4f64:
2271; HAS-FMA: # %bb.0: # %entry
2272; HAS-FMA-NEXT: subq $40, %rsp
2273; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
2274; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2275; HAS-FMA-NEXT: callq log
2276; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2277; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2278; HAS-FMA-NEXT: callq log
2279; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2280; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2281; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2282; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2283; HAS-FMA-NEXT: callq log
2284; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2285; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2286; HAS-FMA-NEXT: callq log
2287; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2288; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2289; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2290; HAS-FMA-NEXT: addq $40, %rsp
2291; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2292; HAS-FMA-NEXT: retq
2293entry:
2294 %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
2295 <4 x double> <double 42.0, double 42.1,
2296 double 42.2, double 42.3>,
2297 metadata !"round.dynamic",
2298 metadata !"fpexcept.strict")
2299 ret <4 x double> %log
2300}
2301
2302define <2 x double> @constrained_vector_log10_v2f64() {
2303; NO-FMA-LABEL: constrained_vector_log10_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002304; NO-FMA: # %bb.0: # %entry
2305; NO-FMA-NEXT: subq $24, %rsp
2306; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2307; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2308; NO-FMA-NEXT: callq log10
2309; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2310; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2311; NO-FMA-NEXT: callq log10
2312; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2313; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2314; NO-FMA-NEXT: addq $24, %rsp
2315; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2316; NO-FMA-NEXT: retq
2317;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002318; HAS-FMA-LABEL: constrained_vector_log10_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002319; HAS-FMA: # %bb.0: # %entry
2320; HAS-FMA-NEXT: subq $24, %rsp
2321; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
2322; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2323; HAS-FMA-NEXT: callq log10
2324; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2325; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2326; HAS-FMA-NEXT: callq log10
2327; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2328; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2329; HAS-FMA-NEXT: addq $24, %rsp
2330; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2331; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00002332entry:
2333 %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
2334 <2 x double> <double 42.0, double 42.1>,
2335 metadata !"round.dynamic",
2336 metadata !"fpexcept.strict")
2337 ret <2 x double> %log10
2338}
2339
Cameron McInally04ae8582018-08-01 14:17:19 +00002340define <3 x float> @constrained_vector_log10_v3f32() {
2341; NO-FMA-LABEL: constrained_vector_log10_v3f32:
2342; NO-FMA: # %bb.0: # %entry
2343; NO-FMA-NEXT: subq $40, %rsp
2344; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2345; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2346; NO-FMA-NEXT: callq log10f
2347; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2348; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2349; NO-FMA-NEXT: callq log10f
2350; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2351; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2352; NO-FMA-NEXT: callq log10f
2353; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
2354; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2355; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2356; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2357; NO-FMA-NEXT: movaps %xmm1, %xmm0
2358; NO-FMA-NEXT: addq $40, %rsp
2359; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2360; NO-FMA-NEXT: retq
2361;
2362; HAS-FMA-LABEL: constrained_vector_log10_v3f32:
2363; HAS-FMA: # %bb.0: # %entry
2364; HAS-FMA-NEXT: subq $40, %rsp
2365; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
2366; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2367; HAS-FMA-NEXT: callq log10f
2368; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2369; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2370; HAS-FMA-NEXT: callq log10f
2371; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2372; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2373; HAS-FMA-NEXT: callq log10f
2374; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
2375; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2376; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2377; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2378; HAS-FMA-NEXT: addq $40, %rsp
2379; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2380; HAS-FMA-NEXT: retq
2381entry:
2382 %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
2383 <3 x float> <float 42.0, float 43.0, float 44.0>,
2384 metadata !"round.dynamic",
2385 metadata !"fpexcept.strict")
2386 ret <3 x float> %log10
2387}
2388
2389define <3 x double> @constrained_vector_log10_v3f64() {
2390; NO-FMA-LABEL: constrained_vector_log10_v3f64:
2391; NO-FMA: # %bb.0: # %entry
2392; NO-FMA-NEXT: subq $24, %rsp
2393; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2394; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2395; NO-FMA-NEXT: callq log10
2396; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2397; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2398; NO-FMA-NEXT: callq log10
2399; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2400; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2401; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2402; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2403; NO-FMA-NEXT: callq log10
2404; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
2405; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2406; NO-FMA-NEXT: movaps %xmm0, %xmm1
2407; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
2408; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
2409; NO-FMA-NEXT: addq $24, %rsp
2410; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2411; NO-FMA-NEXT: retq
2412;
2413; HAS-FMA-LABEL: constrained_vector_log10_v3f64:
2414; HAS-FMA: # %bb.0: # %entry
2415; HAS-FMA-NEXT: subq $56, %rsp
2416; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
2417; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2418; HAS-FMA-NEXT: callq log10
2419; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2420; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2421; HAS-FMA-NEXT: callq log10
2422; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2423; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2424; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
2425; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2426; HAS-FMA-NEXT: vzeroupper
2427; HAS-FMA-NEXT: callq log10
2428; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
2429; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2430; HAS-FMA-NEXT: addq $56, %rsp
2431; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2432; HAS-FMA-NEXT: retq
2433entry:
2434 %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
2435 <3 x double> <double 42.0, double 42.1, double 42.2>,
2436 metadata !"round.dynamic",
2437 metadata !"fpexcept.strict")
2438 ret <3 x double> %log10
2439}
2440
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002441define <4 x double> @constrained_vector_log10_v4f64() {
2442; NO-FMA-LABEL: constrained_vector_log10_v4f64:
2443; NO-FMA: # %bb.0: # %entry
2444; NO-FMA-NEXT: subq $40, %rsp
2445; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2446; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2447; NO-FMA-NEXT: callq log10
2448; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2449; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2450; NO-FMA-NEXT: callq log10
2451; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2452; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2453; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2454; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2455; NO-FMA-NEXT: callq log10
2456; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2457; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2458; NO-FMA-NEXT: callq log10
2459; NO-FMA-NEXT: movaps %xmm0, %xmm1
2460; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2461; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2462; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2463; NO-FMA-NEXT: addq $40, %rsp
2464; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2465; NO-FMA-NEXT: retq
2466;
2467; HAS-FMA-LABEL: constrained_vector_log10_v4f64:
2468; HAS-FMA: # %bb.0: # %entry
2469; HAS-FMA-NEXT: subq $40, %rsp
2470; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
2471; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2472; HAS-FMA-NEXT: callq log10
2473; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2474; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2475; HAS-FMA-NEXT: callq log10
2476; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2477; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2478; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2479; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2480; HAS-FMA-NEXT: callq log10
2481; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2482; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2483; HAS-FMA-NEXT: callq log10
2484; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2485; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2486; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2487; HAS-FMA-NEXT: addq $40, %rsp
2488; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2489; HAS-FMA-NEXT: retq
2490entry:
2491 %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
2492 <4 x double> <double 42.0, double 42.1,
2493 double 42.2, double 42.3>,
2494 metadata !"round.dynamic",
2495 metadata !"fpexcept.strict")
2496 ret <4 x double> %log10
2497}
2498
2499define <2 x double> @constrained_vector_log2_v2f64() {
2500; NO-FMA-LABEL: constrained_vector_log2_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002501; NO-FMA: # %bb.0: # %entry
2502; NO-FMA-NEXT: subq $24, %rsp
2503; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2504; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2505; NO-FMA-NEXT: callq log2
2506; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2507; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2508; NO-FMA-NEXT: callq log2
2509; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2510; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2511; NO-FMA-NEXT: addq $24, %rsp
2512; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2513; NO-FMA-NEXT: retq
2514;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002515; HAS-FMA-LABEL: constrained_vector_log2_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002516; HAS-FMA: # %bb.0: # %entry
2517; HAS-FMA-NEXT: subq $24, %rsp
2518; HAS-FMA-NEXT: .cfi_def_cfa_offset 32
2519; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2520; HAS-FMA-NEXT: callq log2
2521; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2522; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2523; HAS-FMA-NEXT: callq log2
2524; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2525; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2526; HAS-FMA-NEXT: addq $24, %rsp
2527; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2528; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00002529entry:
2530 %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
2531 <2 x double> <double 42.0, double 42.1>,
2532 metadata !"round.dynamic",
2533 metadata !"fpexcept.strict")
2534 ret <2 x double> %log2
2535}
2536
Cameron McInally04ae8582018-08-01 14:17:19 +00002537define <3 x float> @constrained_vector_log2_v3f32() {
2538; NO-FMA-LABEL: constrained_vector_log2_v3f32:
2539; NO-FMA: # %bb.0: # %entry
2540; NO-FMA-NEXT: subq $40, %rsp
2541; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2542; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2543; NO-FMA-NEXT: callq log2f
2544; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2545; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2546; NO-FMA-NEXT: callq log2f
2547; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2548; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2549; NO-FMA-NEXT: callq log2f
2550; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
2551; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2552; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2553; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2554; NO-FMA-NEXT: movaps %xmm1, %xmm0
2555; NO-FMA-NEXT: addq $40, %rsp
2556; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2557; NO-FMA-NEXT: retq
2558;
2559; HAS-FMA-LABEL: constrained_vector_log2_v3f32:
2560; HAS-FMA: # %bb.0: # %entry
2561; HAS-FMA-NEXT: subq $40, %rsp
2562; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
2563; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2564; HAS-FMA-NEXT: callq log2f
2565; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2566; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2567; HAS-FMA-NEXT: callq log2f
2568; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2569; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2570; HAS-FMA-NEXT: callq log2f
2571; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
2572; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2573; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2574; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2575; HAS-FMA-NEXT: addq $40, %rsp
2576; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2577; HAS-FMA-NEXT: retq
2578entry:
2579 %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
2580 <3 x float> <float 42.0, float 43.0, float 44.0>,
2581 metadata !"round.dynamic",
2582 metadata !"fpexcept.strict")
2583 ret <3 x float> %log2
2584}
2585
2586define <3 x double> @constrained_vector_log2_v3f64() {
2587; NO-FMA-LABEL: constrained_vector_log2_v3f64:
2588; NO-FMA: # %bb.0: # %entry
2589; NO-FMA-NEXT: subq $24, %rsp
2590; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2591; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2592; NO-FMA-NEXT: callq log2
2593; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2594; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2595; NO-FMA-NEXT: callq log2
2596; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2597; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2598; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2599; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2600; NO-FMA-NEXT: callq log2
2601; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
2602; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2603; NO-FMA-NEXT: movaps %xmm0, %xmm1
2604; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
2605; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
2606; NO-FMA-NEXT: addq $24, %rsp
2607; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2608; NO-FMA-NEXT: retq
2609;
2610; HAS-FMA-LABEL: constrained_vector_log2_v3f64:
2611; HAS-FMA: # %bb.0: # %entry
2612; HAS-FMA-NEXT: subq $56, %rsp
2613; HAS-FMA-NEXT: .cfi_def_cfa_offset 64
2614; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2615; HAS-FMA-NEXT: callq log2
2616; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2617; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2618; HAS-FMA-NEXT: callq log2
2619; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2620; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2621; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
2622; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2623; HAS-FMA-NEXT: vzeroupper
2624; HAS-FMA-NEXT: callq log2
2625; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
2626; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2627; HAS-FMA-NEXT: addq $56, %rsp
2628; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2629; HAS-FMA-NEXT: retq
2630entry:
2631 %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
2632 <3 x double> <double 42.0, double 42.1, double 42.2>,
2633 metadata !"round.dynamic",
2634 metadata !"fpexcept.strict")
2635 ret <3 x double> %log2
2636}
2637
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002638define <4 x double> @constrained_vector_log2_v4f64() {
2639; NO-FMA-LABEL: constrained_vector_log2_v4f64:
2640; NO-FMA: # %bb.0: # %entry
2641; NO-FMA-NEXT: subq $40, %rsp
2642; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2643; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2644; NO-FMA-NEXT: callq log2
2645; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2646; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2647; NO-FMA-NEXT: callq log2
2648; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2649; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2650; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2651; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2652; NO-FMA-NEXT: callq log2
2653; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2654; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2655; NO-FMA-NEXT: callq log2
2656; NO-FMA-NEXT: movaps %xmm0, %xmm1
2657; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2658; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2659; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2660; NO-FMA-NEXT: addq $40, %rsp
2661; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2662; NO-FMA-NEXT: retq
2663;
2664; HAS-FMA-LABEL: constrained_vector_log2_v4f64:
2665; HAS-FMA: # %bb.0: # %entry
2666; HAS-FMA-NEXT: subq $40, %rsp
2667; HAS-FMA-NEXT: .cfi_def_cfa_offset 48
2668; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2669; HAS-FMA-NEXT: callq log2
2670; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2671; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2672; HAS-FMA-NEXT: callq log2
2673; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2674; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2675; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
2676; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2677; HAS-FMA-NEXT: callq log2
2678; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2679; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2680; HAS-FMA-NEXT: callq log2
2681; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2682; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2683; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2684; HAS-FMA-NEXT: addq $40, %rsp
2685; HAS-FMA-NEXT: .cfi_def_cfa_offset 8
2686; HAS-FMA-NEXT: retq
2687entry:
2688 %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
2689 <4 x double> <double 42.0, double 42.1,
2690 double 42.2, double 42.3>,
2691 metadata !"round.dynamic",
2692 metadata !"fpexcept.strict")
2693 ret <4 x double> %log2
2694}
2695
2696define <2 x double> @constrained_vector_rint_v2f64() {
2697; NO-FMA-LABEL: constrained_vector_rint_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002698; NO-FMA: # %bb.0: # %entry
2699; NO-FMA-NEXT: subq $24, %rsp
2700; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2701; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2702; NO-FMA-NEXT: callq rint
2703; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2704; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2705; NO-FMA-NEXT: callq rint
2706; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2707; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2708; NO-FMA-NEXT: addq $24, %rsp
2709; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2710; NO-FMA-NEXT: retq
2711;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002712; HAS-FMA-LABEL: constrained_vector_rint_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002713; HAS-FMA: # %bb.0: # %entry
2714; HAS-FMA-NEXT: vroundpd $4, {{.*}}(%rip), %xmm0
2715; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00002716entry:
2717 %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
2718 <2 x double> <double 42.1, double 42.0>,
2719 metadata !"round.dynamic",
2720 metadata !"fpexcept.strict")
2721 ret <2 x double> %rint
2722}
2723
Cameron McInally04ae8582018-08-01 14:17:19 +00002724define <3 x float> @constrained_vector_rint_v3f32() {
2725; NO-FMA-LABEL: constrained_vector_rint_v3f32:
2726; NO-FMA: # %bb.0: # %entry
2727; NO-FMA-NEXT: subq $40, %rsp
2728; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2729; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2730; NO-FMA-NEXT: callq rintf
2731; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2732; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2733; NO-FMA-NEXT: callq rintf
2734; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2735; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2736; NO-FMA-NEXT: callq rintf
2737; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
2738; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2739; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2740; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2741; NO-FMA-NEXT: movaps %xmm1, %xmm0
2742; NO-FMA-NEXT: addq $40, %rsp
2743; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2744; NO-FMA-NEXT: retq
2745;
2746; HAS-FMA-LABEL: constrained_vector_rint_v3f32:
2747; HAS-FMA: # %bb.0: # %entry
2748; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2749; HAS-FMA-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
2750; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2751; HAS-FMA-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
2752; HAS-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2753; HAS-FMA-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
2754; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
2755; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
2756; HAS-FMA-NEXT: retq
2757entry:
2758 %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
2759 <3 x float> <float 42.0, float 43.0, float 44.0>,
2760 metadata !"round.dynamic",
2761 metadata !"fpexcept.strict")
2762 ret <3 x float> %rint
2763}
2764
2765define <3 x double> @constrained_vector_rint_v3f64() {
2766; NO-FMA-LABEL: constrained_vector_rint_v3f64:
2767; NO-FMA: # %bb.0: # %entry
2768; NO-FMA-NEXT: subq $24, %rsp
2769; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2770; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2771; NO-FMA-NEXT: callq rint
2772; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2773; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2774; NO-FMA-NEXT: callq rint
2775; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2776; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2777; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2778; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2779; NO-FMA-NEXT: callq rint
2780; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
2781; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2782; NO-FMA-NEXT: movaps %xmm0, %xmm1
2783; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
2784; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
2785; NO-FMA-NEXT: addq $24, %rsp
2786; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2787; NO-FMA-NEXT: retq
2788;
2789; HAS-FMA-LABEL: constrained_vector_rint_v3f64:
2790; HAS-FMA: # %bb.0: # %entry
2791; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2792; HAS-FMA-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
2793; HAS-FMA-NEXT: vroundpd $4, {{.*}}(%rip), %xmm1
2794; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2795; HAS-FMA-NEXT: retq
2796entry:
2797 %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
2798 <3 x double> <double 42.0, double 42.1, double 42.2>,
2799 metadata !"round.dynamic",
2800 metadata !"fpexcept.strict")
2801 ret <3 x double> %rint
2802}
2803
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002804define <4 x double> @constrained_vector_rint_v4f64() {
2805; NO-FMA-LABEL: constrained_vector_rint_v4f64:
2806; NO-FMA: # %bb.0: # %entry
2807; NO-FMA-NEXT: subq $40, %rsp
2808; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2809; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2810; NO-FMA-NEXT: callq rint
2811; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2812; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2813; NO-FMA-NEXT: callq rint
2814; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2815; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2816; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2817; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2818; NO-FMA-NEXT: callq rint
2819; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2820; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2821; NO-FMA-NEXT: callq rint
2822; NO-FMA-NEXT: movaps %xmm0, %xmm1
2823; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2824; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2825; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2826; NO-FMA-NEXT: addq $40, %rsp
2827; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2828; NO-FMA-NEXT: retq
2829;
2830; HAS-FMA-LABEL: constrained_vector_rint_v4f64:
2831; HAS-FMA: # %bb.0: # %entry
2832; HAS-FMA-NEXT: vroundpd $4, {{.*}}(%rip), %ymm0
2833; HAS-FMA-NEXT: retq
2834entry:
2835 %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
2836 <4 x double> <double 42.1, double 42.2,
2837 double 42.3, double 42.4>,
2838 metadata !"round.dynamic",
2839 metadata !"fpexcept.strict")
2840 ret <4 x double> %rint
2841}
2842
2843define <2 x double> @constrained_vector_nearbyint_v2f64() {
2844; NO-FMA-LABEL: constrained_vector_nearbyint_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002845; NO-FMA: # %bb.0: # %entry
2846; NO-FMA-NEXT: subq $24, %rsp
2847; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2848; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2849; NO-FMA-NEXT: callq nearbyint
2850; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2851; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2852; NO-FMA-NEXT: callq nearbyint
2853; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2854; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2855; NO-FMA-NEXT: addq $24, %rsp
2856; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2857; NO-FMA-NEXT: retq
2858;
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002859; HAS-FMA-LABEL: constrained_vector_nearbyint_v2f64:
Cameron McInally7caac672018-06-15 20:57:55 +00002860; HAS-FMA: # %bb.0: # %entry
2861; HAS-FMA-NEXT: vroundpd $12, {{.*}}(%rip), %xmm0
2862; HAS-FMA-NEXT: retq
Cameron McInallyf37bd012018-06-13 14:32:12 +00002863entry:
2864 %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
2865 <2 x double> <double 42.1, double 42.0>,
2866 metadata !"round.dynamic",
2867 metadata !"fpexcept.strict")
2868 ret <2 x double> %nearby
2869}
2870
Cameron McInally04ae8582018-08-01 14:17:19 +00002871define <3 x float> @constrained_vector_nearbyint_v3f32() {
2872; NO-FMA-LABEL: constrained_vector_nearbyint_v3f32:
2873; NO-FMA: # %bb.0: # %entry
2874; NO-FMA-NEXT: subq $40, %rsp
2875; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2876; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2877; NO-FMA-NEXT: callq nearbyintf
2878; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2879; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2880; NO-FMA-NEXT: callq nearbyintf
2881; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2882; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2883; NO-FMA-NEXT: callq nearbyintf
2884; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
2885; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2886; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2887; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2888; NO-FMA-NEXT: movaps %xmm1, %xmm0
2889; NO-FMA-NEXT: addq $40, %rsp
2890; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2891; NO-FMA-NEXT: retq
2892;
2893; HAS-FMA-LABEL: constrained_vector_nearbyint_v3f32:
2894; HAS-FMA: # %bb.0: # %entry
2895; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2896; HAS-FMA-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
2897; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2898; HAS-FMA-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1
2899; HAS-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2900; HAS-FMA-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2
2901; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
2902; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
2903; HAS-FMA-NEXT: retq
2904entry:
2905 %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
2906 <3 x float> <float 42.0, float 43.0, float 44.0>,
2907 metadata !"round.dynamic",
2908 metadata !"fpexcept.strict")
2909 ret <3 x float> %nearby
2910}
2911
2912define <3 x double> @constrained_vector_nearby_v3f64() {
2913; NO-FMA-LABEL: constrained_vector_nearby_v3f64:
2914; NO-FMA: # %bb.0: # %entry
2915; NO-FMA-NEXT: subq $24, %rsp
2916; NO-FMA-NEXT: .cfi_def_cfa_offset 32
2917; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2918; NO-FMA-NEXT: callq nearbyint
2919; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2920; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2921; NO-FMA-NEXT: callq nearbyint
2922; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2923; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2924; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2925; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2926; NO-FMA-NEXT: callq nearbyint
2927; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
2928; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2929; NO-FMA-NEXT: movaps %xmm0, %xmm1
2930; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
2931; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp)
2932; NO-FMA-NEXT: addq $24, %rsp
2933; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2934; NO-FMA-NEXT: retq
2935;
2936; HAS-FMA-LABEL: constrained_vector_nearby_v3f64:
2937; HAS-FMA: # %bb.0: # %entry
2938; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2939; HAS-FMA-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
2940; HAS-FMA-NEXT: vroundpd $12, {{.*}}(%rip), %xmm1
2941; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2942; HAS-FMA-NEXT: retq
2943entry:
2944 %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
2945 <3 x double> <double 42.0, double 42.1, double 42.2>,
2946 metadata !"round.dynamic",
2947 metadata !"fpexcept.strict")
2948 ret <3 x double> %nearby
2949}
2950
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002951define <4 x double> @constrained_vector_nearbyint_v4f64() {
2952; NO-FMA-LABEL: constrained_vector_nearbyint_v4f64:
2953; NO-FMA: # %bb.0: # %entry
2954; NO-FMA-NEXT: subq $40, %rsp
2955; NO-FMA-NEXT: .cfi_def_cfa_offset 48
2956; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2957; NO-FMA-NEXT: callq nearbyint
2958; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2959; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2960; NO-FMA-NEXT: callq nearbyint
2961; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2962; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0]
2963; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
2964; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2965; NO-FMA-NEXT: callq nearbyint
2966; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2967; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2968; NO-FMA-NEXT: callq nearbyint
2969; NO-FMA-NEXT: movaps %xmm0, %xmm1
2970; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2971; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0]
2972; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
2973; NO-FMA-NEXT: addq $40, %rsp
2974; NO-FMA-NEXT: .cfi_def_cfa_offset 8
2975; NO-FMA-NEXT: retq
2976;
2977; HAS-FMA-LABEL: constrained_vector_nearbyint_v4f64:
2978; HAS-FMA: # %bb.0: # %entry
2979; HAS-FMA-NEXT: vroundpd $12, {{.*}}(%rip), %ymm0
2980; HAS-FMA-NEXT: retq
2981entry:
2982 %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
2983 <4 x double> <double 42.1, double 42.2,
2984 double 42.3, double 42.4>,
2985 metadata !"round.dynamic",
2986 metadata !"fpexcept.strict")
2987 ret <4 x double> %nearby
2988}
Cameron McInallyf37bd012018-06-13 14:32:12 +00002989
Cameron McInally2c9bcff2018-07-23 14:40:17 +00002990; Single width declarations
Cameron McInallyf37bd012018-06-13 14:32:12 +00002991declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
2992declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
2993declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
2994declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
Cameron McInally7caac672018-06-15 20:57:55 +00002995declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
2996declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
Cameron McInallyf37bd012018-06-13 14:32:12 +00002997declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
Cameron McInallyf37bd012018-06-13 14:32:12 +00002998declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
Cameron McInally7caac672018-06-15 20:57:55 +00002999declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
Cameron McInallyf37bd012018-06-13 14:32:12 +00003000declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
3001declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
3002declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
3003declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
3004declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
3005declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
3006declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
3007declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
3008declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
Cameron McInally2c9bcff2018-07-23 14:40:17 +00003009
Cameron McInally04ae8582018-08-01 14:17:19 +00003010; Illegal width declarations
3011declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
3012declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
3013declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
3014declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
3015declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
3016declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
3017declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
3018declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
3019declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata)
3020declare <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double>, <3 x double>, <3 x double>, metadata, metadata)
3021declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
3022declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
3023declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
3024declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
3025declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
3026declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
3027declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
3028declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
3029declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
3030declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
3031declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
3032declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
3033declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
3034declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
3035declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
3036declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
3037declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
3038declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
3039declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
3040declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
3041declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
3042declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
3043declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
3044declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
3045
Cameron McInally2c9bcff2018-07-23 14:40:17 +00003046; Double width declarations
3047declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
3048declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
3049declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
3050declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
3051declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
3052declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata)
3053declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
3054declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
3055declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
3056declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
3057declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
3058declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
3059declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
3060declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
3061declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
3062declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
3063declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
3064declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)