blob: 807ac4e3fc6b56c3e4646f01517ac88c58a2a269 [file] [log] [blame]
Simon Pilgrimf9648b72016-10-07 16:00:59 +00001; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
3
4;
5; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
6; so we need to edit it to remove the NAN constant comments
7;
8
9; copysign(x, c1) -> fabs(x) iff ispos(c1)
10define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) {
11; SSE-LABEL: combine_vec_fcopysign_pos_constant0:
12; SSE: # BB#0:
13; SSE-NEXT: movaps {{.*#+}} xmm1 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00]
14; SSE-NEXT: andps {{.*}}(%rip), %xmm1
15; SSE-NEXT: andps {{.*}}(%rip), %xmm0
16; SSE-NEXT: orps %xmm1, %xmm0
17; SSE-NEXT: retq
18;
19; AVX-LABEL: combine_vec_fcopysign_pos_constant0:
20; AVX: # BB#0:
21; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
22; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
23; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
24; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
25; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
26; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
27; AVX-NEXT: retq
28 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>)
29 ret <4 x float> %1
30}
31
32define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) {
33; SSE-LABEL: combine_vec_fcopysign_pos_constant1:
34; SSE: # BB#0:
35; SSE-NEXT: movaps {{.*#+}} xmm1 = [0.000000e+00,2.000000e+00,4.000000e+00,8.000000e+00]
36; SSE-NEXT: andps {{.*}}(%rip), %xmm1
37; SSE-NEXT: andps {{.*}}(%rip), %xmm0
38; SSE-NEXT: orps %xmm1, %xmm0
39; SSE-NEXT: retq
40;
41; AVX-LABEL: combine_vec_fcopysign_pos_constant1:
42; AVX: # BB#0:
43; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
44; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
45; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
46; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
47; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
48; AVX-NEXT: retq
49 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>)
50 ret <4 x float> %1
51}
52
53define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %y) {
54; SSE-LABEL: combine_vec_fcopysign_fabs_sgn:
55; SSE: # BB#0:
56; SSE-NEXT: andps {{.*}}(%rip), %xmm0
57; SSE-NEXT: retq
58;
59; AVX-LABEL: combine_vec_fcopysign_fabs_sgn:
60; AVX: # BB#0:
61; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
62; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
63; AVX-NEXT: retq
64 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
65 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
66 ret <4 x float> %2
67}
68
69; copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
70define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
71; SSE-LABEL: combine_vec_fcopysign_neg_constant0:
72; SSE: # BB#0:
73; SSE-NEXT: movaps {{.*#+}} xmm1 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00]
74; SSE-NEXT: andps {{.*}}(%rip), %xmm1
75; SSE-NEXT: andps {{.*}}(%rip), %xmm0
76; SSE-NEXT: orps %xmm1, %xmm0
77; SSE-NEXT: retq
78;
79; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
80; AVX: # BB#0:
81; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
82; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
83; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
84; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
85; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
86; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
87; AVX-NEXT: retq
88 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
89 ret <4 x float> %1
90}
91
92define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
93; SSE-LABEL: combine_vec_fcopysign_neg_constant1:
94; SSE: # BB#0:
95; SSE-NEXT: movaps {{.*#+}} xmm1 = [-0.000000e+00,-2.000000e+00,-4.000000e+00,-8.000000e+00]
96; SSE-NEXT: andps {{.*}}(%rip), %xmm1
97; SSE-NEXT: andps {{.*}}(%rip), %xmm0
98; SSE-NEXT: orps %xmm1, %xmm0
99; SSE-NEXT: retq
100;
101; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
102; AVX: # BB#0:
103; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
104; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
105; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
106; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
107; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
108; AVX-NEXT: retq
109 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
110 ret <4 x float> %1
111}
112
113define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) {
114; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
115; SSE: # BB#0:
116; SSE-NEXT: movaps {{.*#+}} xmm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
117; SSE-NEXT: orps %xmm2, %xmm1
118; SSE-NEXT: andps %xmm2, %xmm1
119; SSE-NEXT: andps {{.*}}(%rip), %xmm0
120; SSE-NEXT: orps %xmm1, %xmm0
121; SSE-NEXT: retq
122;
123; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
124; AVX: # BB#0:
125; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
126; AVX-NEXT: vorps %xmm2, %xmm1, %xmm1
127; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm3
128; AVX-NEXT: vandps %xmm3, %xmm0, %xmm0
129; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
130; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
131; AVX-NEXT: retq
132 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
133 %2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1
134 %3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2)
135 ret <4 x float> %3
136}
137
138; copysign(fabs(x), y) -> copysign(x, y)
139define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %y) {
140; SSE-LABEL: combine_vec_fcopysign_fabs_mag:
141; SSE: # BB#0:
142; SSE-NEXT: andps {{.*}}(%rip), %xmm1
143; SSE-NEXT: andps {{.*}}(%rip), %xmm0
144; SSE-NEXT: orps %xmm1, %xmm0
145; SSE-NEXT: retq
146;
147; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
148; AVX: # BB#0:
149; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
150; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
151; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
152; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
153; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
154; AVX-NEXT: retq
155 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
156 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
157 ret <4 x float> %2
158}
159
160; copysign(fneg(x), y) -> copysign(x, y)
161define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %y) {
162; SSE-LABEL: combine_vec_fcopysign_fneg_mag:
163; SSE: # BB#0:
164; SSE-NEXT: andps {{.*}}(%rip), %xmm1
165; SSE-NEXT: andps {{.*}}(%rip), %xmm0
166; SSE-NEXT: orps %xmm1, %xmm0
167; SSE-NEXT: retq
168;
169; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
170; AVX: # BB#0:
171; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
172; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
173; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
174; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
175; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
176; AVX-NEXT: retq
177 %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
178 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
179 ret <4 x float> %2
180}
181
182; copysign(copysign(x,z), y) -> copysign(x, y)
183define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
184; SSE-LABEL: combine_vec_fcopysign_fcopysign_mag:
185; SSE: # BB#0:
186; SSE-NEXT: andps {{.*}}(%rip), %xmm1
187; SSE-NEXT: andps {{.*}}(%rip), %xmm0
188; SSE-NEXT: orps %xmm1, %xmm0
189; SSE-NEXT: retq
190;
191; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
192; AVX: # BB#0:
193; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
194; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
195; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
196; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
197; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
198; AVX-NEXT: retq
199 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z)
200 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
201 ret <4 x float> %2
202}
203
204; copysign(x, copysign(y,z)) -> copysign(x, z)
205define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
206; SSE-LABEL: combine_vec_fcopysign_fcopysign_sgn:
207; SSE: # BB#0:
208; SSE-NEXT: andps {{.*}}(%rip), %xmm2
209; SSE-NEXT: andps {{.*}}(%rip), %xmm0
210; SSE-NEXT: orps %xmm2, %xmm0
211; SSE-NEXT: retq
212;
213; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
214; AVX: # BB#0:
215; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
216; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
217; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
218; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
219; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
220; AVX-NEXT: retq
221 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z)
222 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
223 ret <4 x float> %2
224}
225
226; copysign(x, fp_extend(y)) -> copysign(x, y)
227define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float> %y) {
228; SSE-LABEL: combine_vec_fcopysign_fpext_sgn:
229; SSE: # BB#0:
230; SSE-NEXT: movaps %xmm2, %xmm3
231; SSE-NEXT: cvtss2sd %xmm2, %xmm4
232; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
233; SSE-NEXT: movaps %xmm2, %xmm6
234; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1]
235; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
236; SSE-NEXT: movaps {{.*#+}} xmm7
237; SSE-NEXT: movaps %xmm0, %xmm2
238; SSE-NEXT: andps %xmm7, %xmm2
239; SSE-NEXT: movaps {{.*#+}} xmm8 = [-0.000000e+00,-0.000000e+00]
240; SSE-NEXT: andps %xmm8, %xmm4
241; SSE-NEXT: orps %xmm4, %xmm2
242; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
243; SSE-NEXT: andps %xmm7, %xmm0
244; SSE-NEXT: xorps %xmm4, %xmm4
245; SSE-NEXT: cvtss2sd %xmm5, %xmm4
246; SSE-NEXT: andps %xmm8, %xmm4
247; SSE-NEXT: orps %xmm0, %xmm4
248; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0]
249; SSE-NEXT: movaps %xmm1, %xmm0
250; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
251; SSE-NEXT: andps %xmm7, %xmm0
252; SSE-NEXT: cvtss2sd %xmm3, %xmm3
253; SSE-NEXT: andps %xmm8, %xmm3
254; SSE-NEXT: orps %xmm0, %xmm3
255; SSE-NEXT: andps %xmm7, %xmm1
256; SSE-NEXT: xorps %xmm0, %xmm0
257; SSE-NEXT: cvtss2sd %xmm6, %xmm0
258; SSE-NEXT: andps %xmm8, %xmm0
259; SSE-NEXT: orps %xmm0, %xmm1
260; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
261; SSE-NEXT: movapd %xmm2, %xmm0
262; SSE-NEXT: retq
263;
264; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:
265; AVX: # BB#0:
266; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
267; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
268; AVX-NEXT: vcvtps2pd %xmm1, %ymm1
269; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
270; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
271; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
272; AVX-NEXT: retq
273 %1 = fpext <4 x float> %y to <4 x double>
274 %2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1)
275 ret <4 x double> %2
276}
277
278; copysign(x, fp_round(y)) -> copysign(x, y)
279define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x double> %y) {
280; SSE-LABEL: combine_vec_fcopysign_fptrunc_sgn:
281; SSE: # BB#0:
282; SSE-NEXT: movaps %xmm0, %xmm3
283; SSE-NEXT: movaps {{.*#+}} xmm5
284; SSE-NEXT: andps %xmm5, %xmm0
285; SSE-NEXT: cvtsd2ss %xmm1, %xmm6
286; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
287; SSE-NEXT: andps %xmm4, %xmm6
288; SSE-NEXT: orps %xmm6, %xmm0
289; SSE-NEXT: movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3]
290; SSE-NEXT: andps %xmm5, %xmm6
291; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
292; SSE-NEXT: cvtsd2ss %xmm1, %xmm1
293; SSE-NEXT: andps %xmm4, %xmm1
294; SSE-NEXT: orps %xmm6, %xmm1
295; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
296; SSE-NEXT: movaps %xmm3, %xmm1
297; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
298; SSE-NEXT: andps %xmm5, %xmm1
299; SSE-NEXT: xorps %xmm6, %xmm6
300; SSE-NEXT: cvtsd2ss %xmm2, %xmm6
301; SSE-NEXT: andps %xmm4, %xmm6
302; SSE-NEXT: orps %xmm1, %xmm6
303; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3]
304; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
305; SSE-NEXT: andps %xmm5, %xmm3
306; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
307; SSE-NEXT: xorps %xmm1, %xmm1
308; SSE-NEXT: cvtsd2ss %xmm2, %xmm1
309; SSE-NEXT: andps %xmm4, %xmm1
310; SSE-NEXT: orps %xmm3, %xmm1
311; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
312; SSE-NEXT: retq
313;
314; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn:
315; AVX: # BB#0:
316; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
317; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
Craig Topperb8596e42016-11-14 01:53:29 +0000318; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
Simon Pilgrimf9648b72016-10-07 16:00:59 +0000319; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
320; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
321; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
322; AVX-NEXT: vzeroupper
323; AVX-NEXT: retq
324 %1 = fptrunc <4 x double> %y to <4 x float>
325 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
326 ret <4 x float> %2
327}
328
329declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
330declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
331declare <4 x double> @llvm.copysign.v4f64(<4 x double> %Mag, <4 x double> %Sgn)