blob: e0619251f9204544d4d5a909689adbcd6434b3f4 [file] [log] [blame]
Matt Arsenault74a576e2017-01-25 06:27:02 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenault2a04ff92017-01-11 23:57:38 +00003
4; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
5; GCN: buffer_load_dword [[X:v[0-9]+]]
6; GCN: buffer_load_dword [[Y:v[0-9]+]]
7; GCN: buffer_load_dword [[Z:v[0-9]+]]
8
9; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
10; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000011define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +000012 %x = load volatile float, float addrspace(1)* undef
13 %y = load volatile float, float addrspace(1)* undef
14 %z = load volatile float, float addrspace(1)* undef
15 %cmp = icmp eq i32 %c, 0
16 %fabs.x = call float @llvm.fabs.f32(float %x)
17 %fabs.y = call float @llvm.fabs.f32(float %y)
18 %select = select i1 %cmp, float %fabs.x, float %fabs.y
19 %add = fadd float %select, %z
20 store float %add, float addrspace(1)* undef
21 ret void
22}
23
24; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32:
25; GCN: buffer_load_dword [[X:v[0-9]+]]
26; GCN: buffer_load_dword [[Y:v[0-9]+]]
27; GCN: buffer_load_dword [[Z:v[0-9]+]]
28; GCN: buffer_load_dword [[W:v[0-9]+]]
29
30; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
31; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
32; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000033define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +000034 %x = load volatile float, float addrspace(1)* undef
35 %y = load volatile float, float addrspace(1)* undef
36 %z = load volatile float, float addrspace(1)* undef
37 %w = load volatile float, float addrspace(1)* undef
38 %cmp = icmp eq i32 %c, 0
39 %fabs.x = call float @llvm.fabs.f32(float %x)
40 %fabs.y = call float @llvm.fabs.f32(float %y)
41 %select = select i1 %cmp, float %fabs.x, float %fabs.y
42 %add0 = fadd float %select, %z
43 %add1 = fadd float %fabs.x, %w
44 store volatile float %add0, float addrspace(1)* undef
45 store volatile float %add1, float addrspace(1)* undef
46 ret void
47}
48
49; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32:
50; GCN: buffer_load_dword [[X:v[0-9]+]]
51; GCN: buffer_load_dword [[Y:v[0-9]+]]
52; GCN: buffer_load_dword [[Z:v[0-9]+]]
53
54; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
55; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]]
56; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
57
58; GCN: buffer_store_dword [[ADD]]
59; GCN: buffer_store_dword [[X_ABS]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000060define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +000061 %x = load volatile float, float addrspace(1)* undef
62 %y = load volatile float, float addrspace(1)* undef
63 %z = load volatile float, float addrspace(1)* undef
64 %cmp = icmp eq i32 %c, 0
65 %fabs.x = call float @llvm.fabs.f32(float %x)
66 %fabs.y = call float @llvm.fabs.f32(float %y)
67 %select = select i1 %cmp, float %fabs.x, float %fabs.y
68 %add0 = fadd float %select, %z
69 store volatile float %add0, float addrspace(1)* undef
70 store volatile float %fabs.x, float addrspace(1)* undef
71 ret void
72}
73
74; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32:
75; GCN: buffer_load_dword [[X:v[0-9]+]]
76; GCN: buffer_load_dword [[Y:v[0-9]+]]
77; GCN: buffer_load_dword [[Z:v[0-9]+]]
78; GCN: buffer_load_dword [[W:v[0-9]+]]
79
80; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
81; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
82; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +000084 %x = load volatile float, float addrspace(1)* undef
85 %y = load volatile float, float addrspace(1)* undef
86 %z = load volatile float, float addrspace(1)* undef
87 %w = load volatile float, float addrspace(1)* undef
88 %cmp = icmp eq i32 %c, 0
89 %fabs.x = call float @llvm.fabs.f32(float %x)
90 %fabs.y = call float @llvm.fabs.f32(float %y)
91 %select = select i1 %cmp, float %fabs.x, float %fabs.y
92 %add0 = fadd float %select, %z
93 %add1 = fadd float %fabs.y, %w
94 store volatile float %add0, float addrspace(1)* undef
95 store volatile float %add1, float addrspace(1)* undef
96 ret void
97}
98
99; GCN-LABEL: {{^}}add_select_fabs_var_f32:
100; GCN: buffer_load_dword [[X:v[0-9]+]]
101; GCN: buffer_load_dword [[Y:v[0-9]+]]
102; GCN: buffer_load_dword [[Z:v[0-9]+]]
103
104; GCN: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
105; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_ABS]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000106; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000107define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000108 %x = load volatile float, float addrspace(1)* undef
109 %y = load volatile float, float addrspace(1)* undef
110 %z = load volatile float, float addrspace(1)* undef
111 %cmp = icmp eq i32 %c, 0
112 %fabs.x = call float @llvm.fabs.f32(float %x)
113 %select = select i1 %cmp, float %fabs.x, float %y
114 %add = fadd float %select, %z
115 store volatile float %add, float addrspace(1)* undef
116 ret void
117}
118
119; GCN-LABEL: {{^}}add_select_fabs_negk_f32:
120; GCN: buffer_load_dword [[X:v[0-9]+]]
121; GCN: buffer_load_dword [[Y:v[0-9]+]]
122
123; GCN: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
124; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000125; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000126define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000127 %x = load volatile float, float addrspace(1)* undef
128 %y = load volatile float, float addrspace(1)* undef
129 %cmp = icmp eq i32 %c, 0
130 %fabs = call float @llvm.fabs.f32(float %x)
131 %select = select i1 %cmp, float %fabs, float -1.0
132 %add = fadd float %select, %y
133 store volatile float %add, float addrspace(1)* undef
134 ret void
135}
136
137; FIXME: fabs should fold away
138; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32:
139; GCN: buffer_load_dword [[X:v[0-9]+]]
140
141; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
142; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000143define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000144 %x = load volatile float, float addrspace(1)* undef
145 %cmp = icmp eq i32 %c, 0
146 %select = select i1 %cmp, float -2.0, float -1.0
147 %fabs = call float @llvm.fabs.f32(float %select)
148 %add = fadd float %fabs, %x
149 store volatile float %add, float addrspace(1)* undef
150 ret void
151}
152
153; GCN-LABEL: {{^}}add_select_posk_posk_f32:
154; GCN: buffer_load_dword [[X:v[0-9]+]]
155
156; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000157; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000159 %x = load volatile float, float addrspace(1)* undef
160 %cmp = icmp eq i32 %c, 0
161 %select = select i1 %cmp, float 2.0, float 1.0
162 %add = fadd float %select, %x
163 store volatile float %add, float addrspace(1)* undef
164 ret void
165}
166
167; GCN-LABEL: {{^}}add_select_negk_fabs_f32:
168; GCN: buffer_load_dword [[X:v[0-9]+]]
169; GCN: buffer_load_dword [[Y:v[0-9]+]]
170
171; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
172; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
173; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000174; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000176 %x = load volatile float, float addrspace(1)* undef
177 %y = load volatile float, float addrspace(1)* undef
178 %cmp = icmp eq i32 %c, 0
179 %fabs = call float @llvm.fabs.f32(float %x)
180 %select = select i1 %cmp, float -1.0, float %fabs
181 %add = fadd float %select, %y
182 store volatile float %add, float addrspace(1)* undef
183 ret void
184}
185
186; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32:
187; GCN: buffer_load_dword [[X:v[0-9]+]]
188; GCN: buffer_load_dword [[Y:v[0-9]+]]
189; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
190
191; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
192; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
193; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[FABS_X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000194; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000195define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000196 %x = load volatile float, float addrspace(1)* undef
197 %y = load volatile float, float addrspace(1)* undef
198 %cmp = icmp eq i32 %c, 0
199 %fabs = call float @llvm.fabs.f32(float %x)
200 %select = select i1 %cmp, float -1024.0, float %fabs
201 %add = fadd float %select, %y
202 store volatile float %add, float addrspace(1)* undef
203 ret void
204}
205
206; GCN-LABEL: {{^}}add_select_fabs_posk_f32:
207; GCN: buffer_load_dword [[X:v[0-9]+]]
208; GCN: buffer_load_dword [[Y:v[0-9]+]]
209
210; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
211; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000212define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000213 %x = load volatile float, float addrspace(1)* undef
214 %y = load volatile float, float addrspace(1)* undef
215
216 %cmp = icmp eq i32 %c, 0
217 %fabs = call float @llvm.fabs.f32(float %x)
218 %select = select i1 %cmp, float %fabs, float 1.0
219 %add = fadd float %select, %y
220 store volatile float %add, float addrspace(1)* undef
221 ret void
222}
223
224; GCN-LABEL: {{^}}add_select_posk_fabs_f32:
225; GCN: buffer_load_dword [[X:v[0-9]+]]
226; GCN: buffer_load_dword [[Y:v[0-9]+]]
227
228; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
229; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
230; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000231define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000232 %x = load volatile float, float addrspace(1)* undef
233 %y = load volatile float, float addrspace(1)* undef
234 %cmp = icmp eq i32 %c, 0
235 %fabs = call float @llvm.fabs.f32(float %x)
236 %select = select i1 %cmp, float 1.0, float %fabs
237 %add = fadd float %select, %y
238 store volatile float %add, float addrspace(1)* undef
239 ret void
240}
241
242; GCN-LABEL: {{^}}add_select_fneg_fneg_f32:
243; GCN: buffer_load_dword [[X:v[0-9]+]]
244; GCN: buffer_load_dword [[Y:v[0-9]+]]
245; GCN: buffer_load_dword [[Z:v[0-9]+]]
246
247; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000248; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000249define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000250 %x = load volatile float, float addrspace(1)* undef
251 %y = load volatile float, float addrspace(1)* undef
252 %z = load volatile float, float addrspace(1)* undef
253 %cmp = icmp eq i32 %c, 0
254 %fneg.x = fsub float -0.0, %x
255 %fneg.y = fsub float -0.0, %y
256 %select = select i1 %cmp, float %fneg.x, float %fneg.y
257 %add = fadd float %select, %z
258 store volatile float %add, float addrspace(1)* undef
259 ret void
260}
261
262; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32:
263; GCN: buffer_load_dword [[X:v[0-9]+]]
264; GCN: buffer_load_dword [[Y:v[0-9]+]]
265; GCN: buffer_load_dword [[Z:v[0-9]+]]
266; GCN: buffer_load_dword [[W:v[0-9]+]]
267
268; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000269; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
270; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000271define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000272 %x = load volatile float, float addrspace(1)* undef
273 %y = load volatile float, float addrspace(1)* undef
274 %z = load volatile float, float addrspace(1)* undef
275 %w = load volatile float, float addrspace(1)* undef
276 %cmp = icmp eq i32 %c, 0
277 %fneg.x = fsub float -0.0, %x
278 %fneg.y = fsub float -0.0, %y
279 %select = select i1 %cmp, float %fneg.x, float %fneg.y
280 %add0 = fadd float %select, %z
281 %add1 = fadd float %fneg.x, %w
282 store volatile float %add0, float addrspace(1)* undef
283 store volatile float %add1, float addrspace(1)* undef
284 ret void
285}
286
287; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32:
288; GCN: buffer_load_dword [[X:v[0-9]+]]
289; GCN: buffer_load_dword [[Y:v[0-9]+]]
290; GCN: buffer_load_dword [[Z:v[0-9]+]]
291
292; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]]
293; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000294; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]]
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000295
296; GCN: buffer_store_dword [[ADD]]
297; GCN: buffer_store_dword [[NEG_X]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000298define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000299 %x = load volatile float, float addrspace(1)* undef
300 %y = load volatile float, float addrspace(1)* undef
301 %z = load volatile float, float addrspace(1)* undef
302 %cmp = icmp eq i32 %c, 0
303 %fneg.x = fsub float -0.0, %x
304 %fneg.y = fsub float -0.0, %y
305 %select = select i1 %cmp, float %fneg.x, float %fneg.y
306 %add0 = fadd float %select, %z
307 store volatile float %add0, float addrspace(1)* undef
308 store volatile float %fneg.x, float addrspace(1)* undef
309 ret void
310}
311
312; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32:
313; GCN: buffer_load_dword [[X:v[0-9]+]]
314; GCN: buffer_load_dword [[Y:v[0-9]+]]
315; GCN: buffer_load_dword [[Z:v[0-9]+]]
316; GCN: buffer_load_dword [[W:v[0-9]+]]
317
318; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000319; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
320; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000321define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000322 %x = load volatile float, float addrspace(1)* undef
323 %y = load volatile float, float addrspace(1)* undef
324 %z = load volatile float, float addrspace(1)* undef
325 %w = load volatile float, float addrspace(1)* undef
326 %cmp = icmp eq i32 %c, 0
327 %fneg.x = fsub float -0.0, %x
328 %fneg.y = fsub float -0.0, %y
329 %select = select i1 %cmp, float %fneg.x, float %fneg.y
330 %add0 = fadd float %select, %z
331 %add1 = fadd float %fneg.y, %w
332 store volatile float %add0, float addrspace(1)* undef
333 store volatile float %add1, float addrspace(1)* undef
334 ret void
335}
336
337; GCN-LABEL: {{^}}add_select_fneg_var_f32:
338; GCN: buffer_load_dword [[X:v[0-9]+]]
339; GCN: buffer_load_dword [[Y:v[0-9]+]]
340; GCN: buffer_load_dword [[Z:v[0-9]+]]
341
342; GCN: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
343; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_NEG]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000344; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000345define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000346 %x = load volatile float, float addrspace(1)* undef
347 %y = load volatile float, float addrspace(1)* undef
348 %z = load volatile float, float addrspace(1)* undef
349 %cmp = icmp eq i32 %c, 0
350 %fneg.x = fsub float -0.0, %x
351 %select = select i1 %cmp, float %fneg.x, float %y
352 %add = fadd float %select, %z
353 store volatile float %add, float addrspace(1)* undef
354 ret void
355}
356
357; GCN-LABEL: {{^}}add_select_fneg_negk_f32:
358; GCN: buffer_load_dword [[X:v[0-9]+]]
359; GCN: buffer_load_dword [[Y:v[0-9]+]]
360
361; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000362; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000363define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000364 %x = load volatile float, float addrspace(1)* undef
365 %y = load volatile float, float addrspace(1)* undef
366 %cmp = icmp eq i32 %c, 0
367 %fneg.x = fsub float -0.0, %x
368 %select = select i1 %cmp, float %fneg.x, float -1.0
369 %add = fadd float %select, %y
370 store volatile float %add, float addrspace(1)* undef
371 ret void
372}
373
374; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000375; GCN: buffer_load_dword [[X:v[0-9]+]]
376; GCN: buffer_load_dword [[Y:v[0-9]+]]
Stanislav Mekhanoshin79da2a72017-03-11 00:29:27 +0000377; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000378
379; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000380; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000381define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000382 %x = load volatile float, float addrspace(1)* undef
383 %y = load volatile float, float addrspace(1)* undef
384 %cmp = icmp eq i32 %c, 0
385 %fneg.x = fsub float -0.0, %x
386 %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000
387 %add = fadd float %select, %y
388 store volatile float %add, float addrspace(1)* undef
389 ret void
390}
391
392; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000393; GCN: buffer_load_dword [[X:v[0-9]+]]
394; GCN: buffer_load_dword [[Y:v[0-9]+]]
Stanislav Mekhanoshin79da2a72017-03-11 00:29:27 +0000395; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000396
397; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
398; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
399
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000400; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000401define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000402 %x = load volatile float, float addrspace(1)* undef
403 %y = load volatile float, float addrspace(1)* undef
404 %cmp = icmp eq i32 %c, 0
405 %fneg.x = fsub float -0.0, %x
406 %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000
407 %add = fadd float %select, %y
408 store volatile float %add, float addrspace(1)* undef
409 ret void
410}
411
412; GCN-LABEL: {{^}}add_select_negk_negk_f32:
413; GCN: buffer_load_dword [[X:v[0-9]+]]
414
415; GCN: v_cmp_eq_u32_e64
416; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000417; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000418define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000419 %x = load volatile float, float addrspace(1)* undef
420 %cmp = icmp eq i32 %c, 0
421 %select = select i1 %cmp, float -2.0, float -1.0
422 %add = fadd float %select, %x
423 store volatile float %add, float addrspace(1)* undef
424 ret void
425}
426
427; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32:
428; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000
429; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000
430; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
431
432; GCN: v_cmp_eq_u32_e64
433; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000434; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000435define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000436 %x = load volatile float, float addrspace(1)* undef
437 %cmp = icmp eq i32 %c, 0
438 %select = select i1 %cmp, float -2048.0, float -4096.0
439 %add = fadd float %select, %x
440 store volatile float %add, float addrspace(1)* undef
441 ret void
442}
443
444; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32:
445; GCN: buffer_load_dword [[X:v[0-9]+]]
446
447; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000448; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000449define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000450 %x = load volatile float, float addrspace(1)* undef
451 %cmp = icmp eq i32 %c, 0
452 %select = select i1 %cmp, float -2.0, float -1.0
453 %fneg.x = fsub float -0.0, %select
454 %add = fadd float %fneg.x, %x
455 store volatile float %add, float addrspace(1)* undef
456 ret void
457}
458
459; GCN-LABEL: {{^}}add_select_negk_fneg_f32:
460; GCN: buffer_load_dword [[X:v[0-9]+]]
461; GCN: buffer_load_dword [[Y:v[0-9]+]]
462
463; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
464; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000465; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000466define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000467 %x = load volatile float, float addrspace(1)* undef
468 %y = load volatile float, float addrspace(1)* undef
469 %cmp = icmp eq i32 %c, 0
470 %fneg.x = fsub float -0.0, %x
471 %select = select i1 %cmp, float -1.0, float %fneg.x
472 %add = fadd float %select, %y
473 store volatile float %add, float addrspace(1)* undef
474 ret void
475}
476
477; GCN-LABEL: {{^}}add_select_fneg_posk_f32:
478; GCN: buffer_load_dword [[X:v[0-9]+]]
479; GCN: buffer_load_dword [[Y:v[0-9]+]]
480
481; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000482; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000483define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000484 %x = load volatile float, float addrspace(1)* undef
485 %y = load volatile float, float addrspace(1)* undef
486 %cmp = icmp eq i32 %c, 0
487 %fneg.x = fsub float -0.0, %x
488 %select = select i1 %cmp, float %fneg.x, float 1.0
489 %add = fadd float %select, %y
490 store volatile float %add, float addrspace(1)* undef
491 ret void
492}
493
494; GCN-LABEL: {{^}}add_select_posk_fneg_f32:
495; GCN: buffer_load_dword [[X:v[0-9]+]]
496; GCN: buffer_load_dword [[Y:v[0-9]+]]
497
498; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
499; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000500; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000501define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000502 %x = load volatile float, float addrspace(1)* undef
503 %y = load volatile float, float addrspace(1)* undef
504 %cmp = icmp eq i32 %c, 0
505 %fneg.x = fsub float -0.0, %x
506 %select = select i1 %cmp, float 1.0, float %fneg.x
507 %add = fadd float %select, %y
508 store volatile float %add, float addrspace(1)* undef
509 ret void
510}
511
512; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32:
513; GCN: buffer_load_dword [[X:v[0-9]+]]
514; GCN: buffer_load_dword [[Y:v[0-9]+]]
515; GCN: buffer_load_dword [[Z:v[0-9]+]]
516
517; GCN-DAG: v_or_b32_e32 [[X_NEG_ABS:v[0-9]+]], 0x80000000, [[X]]
518; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
519; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG_ABS]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000520; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000521define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000522 %x = load volatile float, float addrspace(1)* undef
523 %y = load volatile float, float addrspace(1)* undef
524 %z = load volatile float, float addrspace(1)* undef
525 %cmp = icmp eq i32 %c, 0
526 %fabs.x = call float @llvm.fabs.f32(float %x)
527 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
528 %fabs.y = call float @llvm.fabs.f32(float %y)
529 %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y
530 %add = fadd float %select, %z
531 store volatile float %add, float addrspace(1)* undef
532 ret void
533}
534
535; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32:
536; GCN: buffer_load_dword [[X:v[0-9]+]]
537; GCN: buffer_load_dword [[Y:v[0-9]+]]
538; GCN: buffer_load_dword [[Z:v[0-9]+]]
539
540; GCN-DAG: v_or_b32_e32 [[Y_NEG_ABS:v[0-9]+]], 0x80000000, [[Y]]
541; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
542; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG_ABS]], [[X_ABS]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000543; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000544define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000545 %x = load volatile float, float addrspace(1)* undef
546 %y = load volatile float, float addrspace(1)* undef
547 %z = load volatile float, float addrspace(1)* undef
548 %cmp = icmp eq i32 %c, 0
549 %fabs.x = call float @llvm.fabs.f32(float %x)
550 %fabs.y = call float @llvm.fabs.f32(float %y)
551 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
552 %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y
553 %add = fadd float %select, %z
554 store volatile float %add, float addrspace(1)* undef
555 ret void
556}
557
558; GCN-LABEL: {{^}}add_select_neg_fabs_f32:
559; GCN: buffer_load_dword [[X:v[0-9]+]]
560; GCN: buffer_load_dword [[Y:v[0-9]+]]
561; GCN: buffer_load_dword [[Z:v[0-9]+]]
562
563; GCN-DAG: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
564; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
565; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000566; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000567define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000568 %x = load volatile float, float addrspace(1)* undef
569 %y = load volatile float, float addrspace(1)* undef
570 %z = load volatile float, float addrspace(1)* undef
571 %cmp = icmp eq i32 %c, 0
572 %fneg.x = fsub float -0.000000e+00, %x
573 %fabs.y = call float @llvm.fabs.f32(float %y)
574 %select = select i1 %cmp, float %fneg.x, float %fabs.y
575 %add = fadd float %select, %z
576 store volatile float %add, float addrspace(1)* undef
577 ret void
578}
579
580; GCN-LABEL: {{^}}add_select_fabs_neg_f32:
581; GCN: buffer_load_dword [[X:v[0-9]+]]
582; GCN: buffer_load_dword [[Y:v[0-9]+]]
583; GCN: buffer_load_dword [[Z:v[0-9]+]]
584
585; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
586; GCN-DAG: v_xor_b32_e32 [[Y_NEG:v[0-9]+]], 0x80000000, [[Y]]
587; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG]], [[X_ABS]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000588; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000589define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000590 %x = load volatile float, float addrspace(1)* undef
591 %y = load volatile float, float addrspace(1)* undef
592 %z = load volatile float, float addrspace(1)* undef
593 %cmp = icmp eq i32 %c, 0
594 %fabs.x = call float @llvm.fabs.f32(float %x)
595 %fneg.y = fsub float -0.000000e+00, %y
596 %select = select i1 %cmp, float %fabs.x, float %fneg.y
597 %add = fadd float %select, %z
598 store volatile float %add, float addrspace(1)* undef
599 ret void
600}
601
602; GCN-LABEL: {{^}}add_select_neg_negfabs_f32:
603; GCN: buffer_load_dword [[X:v[0-9]+]]
604; GCN: buffer_load_dword [[Y:v[0-9]+]]
605; GCN: buffer_load_dword [[Z:v[0-9]+]]
606
607; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
608; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000609; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000610define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000611 %x = load volatile float, float addrspace(1)* undef
612 %y = load volatile float, float addrspace(1)* undef
613 %z = load volatile float, float addrspace(1)* undef
614 %cmp = icmp eq i32 %c, 0
615 %fneg.x = fsub float -0.000000e+00, %x
616 %fabs.y = call float @llvm.fabs.f32(float %y)
617 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
618 %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y
619 %add = fadd float %select, %z
620 store volatile float %add, float addrspace(1)* undef
621 ret void
622}
623
624; GCN-LABEL: {{^}}add_select_negfabs_neg_f32:
625; GCN: buffer_load_dword [[X:v[0-9]+]]
626; GCN: buffer_load_dword [[Y:v[0-9]+]]
627; GCN: buffer_load_dword [[Z:v[0-9]+]]
628
629; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
630; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[X_ABS]], [[Y]], vcc
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000631; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000632define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000633 %x = load volatile float, float addrspace(1)* undef
634 %y = load volatile float, float addrspace(1)* undef
635 %z = load volatile float, float addrspace(1)* undef
636 %cmp = icmp eq i32 %c, 0
637 %fabs.x = call float @llvm.fabs.f32(float %x)
638 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
639 %fneg.y = fsub float -0.000000e+00, %y
640 %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x
641 %add = fadd float %select, %z
642 store volatile float %add, float addrspace(1)* undef
643 ret void
644}
645
646; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32:
647; GCN: buffer_load_dword [[X:v[0-9]+]]
648; GCN: buffer_load_dword [[Y:v[0-9]+]]
649
650; GCN-DAG: v_cmp_eq_u32_e64 vcc,
651; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
652; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
653; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000654define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000655 %x = load volatile float, float addrspace(1)* undef
656 %y = load volatile float, float addrspace(1)* undef
657 %cmp = icmp eq i32 %c, 0
658 %fabs.x = call float @llvm.fabs.f32(float %x)
659 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
660 %select = select i1 %cmp, float %fneg.fabs.x, float 4.0
661 %add = fmul float %select, %y
662 store volatile float %add, float addrspace(1)* undef
663 ret void
664}
665
666; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32:
667; GCN: buffer_load_dword [[X:v[0-9]+]]
668; GCN: buffer_load_dword [[Y:v[0-9]+]]
669
670; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
671; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
672
673; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
674; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000675define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000676 %x = load volatile float, float addrspace(1)* undef
677 %y = load volatile float, float addrspace(1)* undef
678 %cmp = icmp eq i32 %c, 0
679 %fabs.x = call float @llvm.fabs.f32(float %x)
680 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
681 %select = select i1 %cmp, float 4.0, float %fneg.fabs.x
682 %add = fmul float %select, %y
683 store volatile float %add, float addrspace(1)* undef
684 ret void
685}
686
687; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32:
688; GCN: buffer_load_dword [[X:v[0-9]+]]
689; GCN: buffer_load_dword [[Y:v[0-9]+]]
690
691; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
692; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000693define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000694 %x = load volatile float, float addrspace(1)* undef
695 %y = load volatile float, float addrspace(1)* undef
696 %cmp = icmp eq i32 %c, 0
697 %fabs.x = call float @llvm.fabs.f32(float %x)
698 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
699 %select = select i1 %cmp, float %fneg.fabs.x, float -4.0
700 %add = fmul float %select, %y
701 store volatile float %add, float addrspace(1)* undef
702 ret void
703}
704
705; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32:
706; GCN: buffer_load_dword [[X:v[0-9]+]]
707; GCN: buffer_load_dword [[Y:v[0-9]+]]
708
709; GCN: v_cmp_ne_u32_e64 vcc
710; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
711; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000712define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000713 %x = load volatile float, float addrspace(1)* undef
714 %y = load volatile float, float addrspace(1)* undef
715 %cmp = icmp eq i32 %c, 0
716 %fabs.x = call float @llvm.fabs.f32(float %x)
717 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
718 %select = select i1 %cmp, float -4.0, float %fneg.fabs.x
719 %add = fmul float %select, %y
720 store volatile float %add, float addrspace(1)* undef
721 ret void
722}
723
Matt Arsenault45337df2017-01-12 18:58:15 +0000724; --------------------------------------------------------------------------------
725; Don't fold if fneg can fold into the source
726; --------------------------------------------------------------------------------
727
728; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
729; GCN: buffer_load_dword [[X:v[0-9]+]]
730; GCN: buffer_load_dword [[Y:v[0-9]+]]
731
732; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
733; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
734; GCN-NEXT: buffer_store_dword [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000735define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
Matt Arsenault45337df2017-01-12 18:58:15 +0000736 %x = load volatile float, float addrspace(1)* undef
737 %y = load volatile float, float addrspace(1)* undef
738 %cmp = icmp eq i32 %c, 0
739 %add = fadd float %x, 4.0
740 %fneg = fsub float -0.0, %add
741 %select = select i1 %cmp, float %fneg, float 2.0
742 store volatile float %select, float addrspace(1)* undef
743 ret void
744}
745
746; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
747; GCN: buffer_load_dword [[X:v[0-9]+]]
748
749; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
750; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
751; GCN-NEXT: buffer_store_dword [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000752define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
Matt Arsenault45337df2017-01-12 18:58:15 +0000753 %x = load volatile float, float addrspace(1)* undef
754 %cmp = icmp eq i32 %c, 0
755 %add = fsub float %x, 4.0
756 %fneg = fsub float -0.0, %add
757 %select = select i1 %cmp, float %fneg, float 2.0
758 store volatile float %select, float addrspace(1)* undef
759 ret void
760}
761
762; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
763; GCN: buffer_load_dword [[X:v[0-9]+]]
764
765; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
766; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
767; GCN-NEXT: buffer_store_dword [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000768define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
Matt Arsenault45337df2017-01-12 18:58:15 +0000769 %x = load volatile float, float addrspace(1)* undef
770 %cmp = icmp eq i32 %c, 0
771 %mul = fmul float %x, 4.0
772 %fneg = fsub float -0.0, %mul
773 %select = select i1 %cmp, float %fneg, float 2.0
774 store volatile float %select, float addrspace(1)* undef
775 ret void
776}
777
778; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
779; GCN: buffer_load_dword [[X:v[0-9]+]]
780; GCN: buffer_load_dword [[Z:v[0-9]+]]
781
782; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
783; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
784; GCN-NEXT: buffer_store_dword [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000785define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
Matt Arsenault45337df2017-01-12 18:58:15 +0000786 %x = load volatile float, float addrspace(1)* undef
787 %z = load volatile float, float addrspace(1)* undef
788 %cmp = icmp eq i32 %c, 0
789 %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
790 %fneg = fsub float -0.0, %fma
791 %select = select i1 %cmp, float %fneg, float 2.0
792 store volatile float %select, float addrspace(1)* undef
793 ret void
794}
795
796; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
797; GCN: buffer_load_dword [[X:v[0-9]+]]
798; GCN: buffer_load_dword [[Z:v[0-9]+]]
799
800; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
801; GCN-NEXT: buffer_store_dword [[SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000802define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
Matt Arsenault45337df2017-01-12 18:58:15 +0000803 %x = load volatile float, float addrspace(1)* undef
804 %z = load volatile float, float addrspace(1)* undef
805 %cmp = icmp eq i32 %c, 0
806 %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
807 %fneg = fsub float -0.0, %fmad
808 %select = select i1 %cmp, float %fneg, float 2.0
809 store volatile float %select, float addrspace(1)* undef
810 ret void
811}
812
813; FIXME: This one should fold to rcp
814; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
815; GCN: buffer_load_dword [[X:v[0-9]+]]
816
817; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
818; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
819; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
820; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000821define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
Matt Arsenault45337df2017-01-12 18:58:15 +0000822 %x = load volatile float, float addrspace(1)* undef
823 %y = load volatile float, float addrspace(1)* undef
824 %cmp = icmp eq i32 %c, 0
825 %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
826 %fneg = fsub float -0.0, %rcp
827 %select = select i1 %cmp, float %fneg, float 2.0
828 store volatile float %select, float addrspace(1)* undef
829 ret void
830}
831
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000832declare float @llvm.fabs.f32(float) #1
Matt Arsenault45337df2017-01-12 18:58:15 +0000833declare float @llvm.fma.f32(float, float, float) #1
834declare float @llvm.fmuladd.f32(float, float, float) #1
835declare float @llvm.amdgcn.rcp.f32(float) #1
836declare float @llvm.amdgcn.rcp.legacy(float) #1
837declare float @llvm.amdgcn.fmul.legacy(float, float) #1
Matt Arsenault2a04ff92017-01-11 23:57:38 +0000838
839attributes #0 = { nounwind }
840attributes #1 = { nounwind readnone }