blob: ecd5b01545d6d7f41b6bee04fa2ccac2a3a6ab78 [file] [log] [blame]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}mac_f16
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
8; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
9; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
10; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
11; SI: v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]]
12; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
13; SI: buffer_store_short v[[R_F16]]
14; VI: v_mac_f16_e32 v[[C_F16]], v[[B_F16]], v[[A_F16]]
15; VI: buffer_store_short v[[C_F16]]
16; GCN: s_endpgm
17define void @mac_f16(
18 half addrspace(1)* %r,
19 half addrspace(1)* %a,
20 half addrspace(1)* %b,
21 half addrspace(1)* %c) #0 {
22entry:
23 %a.val = load half, half addrspace(1)* %a
24 %b.val = load half, half addrspace(1)* %b
25 %c.val = load half, half addrspace(1)* %c
26
27 %t.val = fmul half %a.val, %b.val
28 %r.val = fadd half %t.val, %c.val
29
30 store half %r.val, half addrspace(1)* %r
31 ret void
32}
33
34; GCN-LABEL: {{^}}mac_f16_same_add
35; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
36; SI: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
37; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
38; VI: v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
39; GCN: s_endpgm
40define void @mac_f16_same_add(
41 half addrspace(1)* %r0,
42 half addrspace(1)* %r1,
43 half addrspace(1)* %a,
44 half addrspace(1)* %b,
45 half addrspace(1)* %c,
46 half addrspace(1)* %d,
47 half addrspace(1)* %e) #0 {
48entry:
49 %a.val = load half, half addrspace(1)* %a
50 %b.val = load half, half addrspace(1)* %b
51 %c.val = load half, half addrspace(1)* %c
52 %d.val = load half, half addrspace(1)* %d
53 %e.val = load half, half addrspace(1)* %e
54
55 %t0.val = fmul half %a.val, %b.val
56 %r0.val = fadd half %t0.val, %c.val
57
58 %t1.val = fmul half %d.val, %e.val
59 %r1.val = fadd half %t1.val, %c.val
60
61 store half %r0.val, half addrspace(1)* %r0
62 store half %r1.val, half addrspace(1)* %r1
63 ret void
64}
65
66; GCN-LABEL: {{^}}mac_f16_neg_a
67; SI-NOT: v_mac_f32
68; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
69; VI-NOT: v_mac_f16
70; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
71; GCN: s_endpgm
72define void @mac_f16_neg_a(
73 half addrspace(1)* %r,
74 half addrspace(1)* %a,
75 half addrspace(1)* %b,
76 half addrspace(1)* %c) #0 {
77entry:
78 %a.val = load half, half addrspace(1)* %a
79 %b.val = load half, half addrspace(1)* %b
80 %c.val = load half, half addrspace(1)* %c
81
82 %a.neg = fsub half -0.0, %a.val
83 %t.val = fmul half %a.neg, %b.val
84 %r.val = fadd half %t.val, %c.val
85
86 store half %r.val, half addrspace(1)* %r
87 ret void
88}
89
90; GCN-LABEL: {{^}}mac_f16_neg_b
91; SI-NOT: v_mac_f32
92; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
93; VI-NOT: v_mac_f16
94; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
95; GCN: s_endpgm
96define void @mac_f16_neg_b(
97 half addrspace(1)* %r,
98 half addrspace(1)* %a,
99 half addrspace(1)* %b,
100 half addrspace(1)* %c) #0 {
101entry:
102 %a.val = load half, half addrspace(1)* %a
103 %b.val = load half, half addrspace(1)* %b
104 %c.val = load half, half addrspace(1)* %c
105
106 %b.neg = fsub half -0.0, %b.val
107 %t.val = fmul half %a.val, %b.neg
108 %r.val = fadd half %t.val, %c.val
109
110 store half %r.val, half addrspace(1)* %r
111 ret void
112}
113
114; GCN-LABEL: {{^}}mac_f16_neg_c
115; SI-NOT: v_mac_f32
116; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
117; VI-NOT: v_mac_f16
118; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
119; GCN: s_endpgm
120define void @mac_f16_neg_c(
121 half addrspace(1)* %r,
122 half addrspace(1)* %a,
123 half addrspace(1)* %b,
124 half addrspace(1)* %c) #0 {
125entry:
126 %a.val = load half, half addrspace(1)* %a
127 %b.val = load half, half addrspace(1)* %b
128 %c.val = load half, half addrspace(1)* %c
129
130 %c.neg = fsub half -0.0, %c.val
131 %t.val = fmul half %a.val, %b.val
132 %r.val = fadd half %t.val, %c.neg
133
134 store half %r.val, half addrspace(1)* %r
135 ret void
136}
137
138; GCN-LABEL: {{^}}mac_f16_neg_a_safe_fp_math
139; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
140; SI: v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
141; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
142; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
143; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
144; GCN: s_endpgm
145define void @mac_f16_neg_a_safe_fp_math(
146 half addrspace(1)* %r,
147 half addrspace(1)* %a,
148 half addrspace(1)* %b,
149 half addrspace(1)* %c) #0 {
150entry:
151 %a.val = load half, half addrspace(1)* %a
152 %b.val = load half, half addrspace(1)* %b
153 %c.val = load half, half addrspace(1)* %c
154
155 %a.neg = fsub half 0.0, %a.val
156 %t.val = fmul half %a.neg, %b.val
157 %r.val = fadd half %t.val, %c.val
158
159 store half %r.val, half addrspace(1)* %r
160 ret void
161}
162
163; GCN-LABEL: {{^}}mac_f16_neg_b_safe_fp_math
164; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
165; SI: v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
166; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
167; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
168; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
169; GCN: s_endpgm
170define void @mac_f16_neg_b_safe_fp_math(
171 half addrspace(1)* %r,
172 half addrspace(1)* %a,
173 half addrspace(1)* %b,
174 half addrspace(1)* %c) #0 {
175entry:
176 %a.val = load half, half addrspace(1)* %a
177 %b.val = load half, half addrspace(1)* %b
178 %c.val = load half, half addrspace(1)* %c
179
180 %b.neg = fsub half 0.0, %b.val
181 %t.val = fmul half %a.val, %b.neg
182 %r.val = fadd half %t.val, %c.val
183
184 store half %r.val, half addrspace(1)* %r
185 ret void
186}
187
188; GCN-LABEL: {{^}}mac_f16_neg_c_safe_fp_math
189; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
190; SI: v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
191; SI: v_mac_f32_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}}
192; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
193; VI: v_mac_f16_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}}
194; GCN: s_endpgm
195define void @mac_f16_neg_c_safe_fp_math(
196 half addrspace(1)* %r,
197 half addrspace(1)* %a,
198 half addrspace(1)* %b,
199 half addrspace(1)* %c) #0 {
200entry:
201 %a.val = load half, half addrspace(1)* %a
202 %b.val = load half, half addrspace(1)* %b
203 %c.val = load half, half addrspace(1)* %c
204
205 %c.neg = fsub half 0.0, %c.val
206 %t.val = fmul half %a.val, %b.val
207 %r.val = fadd half %t.val, %c.neg
208
209 store half %r.val, half addrspace(1)* %r
210 ret void
211}
212
213; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math
214; SI-NOT: v_mac_f32
215; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
216; VI-NOT: v_mac_f16
217; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
218; GCN: s_endpgm
219define void @mac_f16_neg_a_unsafe_fp_math(
220 half addrspace(1)* %r,
221 half addrspace(1)* %a,
222 half addrspace(1)* %b,
223 half addrspace(1)* %c) #1 {
224entry:
225 %a.val = load half, half addrspace(1)* %a
226 %b.val = load half, half addrspace(1)* %b
227 %c.val = load half, half addrspace(1)* %c
228
229 %a.neg = fsub half 0.0, %a.val
230 %t.val = fmul half %a.neg, %b.val
231 %r.val = fadd half %t.val, %c.val
232
233 store half %r.val, half addrspace(1)* %r
234 ret void
235}
236
237; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math
238; SI-NOT: v_mac_f32
239; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
240; VI-NOT: v_mac_f16
241; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
242; GCN: s_endpgm
243define void @mac_f16_neg_b_unsafe_fp_math(
244 half addrspace(1)* %r,
245 half addrspace(1)* %a,
246 half addrspace(1)* %b,
247 half addrspace(1)* %c) #1 {
248entry:
249 %a.val = load half, half addrspace(1)* %a
250 %b.val = load half, half addrspace(1)* %b
251 %c.val = load half, half addrspace(1)* %c
252
253 %b.neg = fsub half 0.0, %b.val
254 %t.val = fmul half %a.val, %b.neg
255 %r.val = fadd half %t.val, %c.val
256
257 store half %r.val, half addrspace(1)* %r
258 ret void
259}
260
261; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math
262; SI-NOT: v_mac_f32
263; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
264; VI-NOT: v_mac_f16
265; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
266; GCN: s_endpgm
267define void @mac_f16_neg_c_unsafe_fp_math(
268 half addrspace(1)* %r,
269 half addrspace(1)* %a,
270 half addrspace(1)* %b,
271 half addrspace(1)* %c) #1 {
272entry:
273 %a.val = load half, half addrspace(1)* %a
274 %b.val = load half, half addrspace(1)* %b
275 %c.val = load half, half addrspace(1)* %c
276
277 %c.neg = fsub half 0.0, %c.val
278 %t.val = fmul half %a.val, %b.val
279 %r.val = fadd half %t.val, %c.neg
280
281 store half %r.val, half addrspace(1)* %r
282 ret void
283}
284
285; GCN-LABEL: {{^}}mac_v2f16
286; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
287; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
288; GCN: buffer_load_dword v[[C_V2_F16:[0-9]+]]
289; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
290; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
291; GCN: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
292; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
293; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
294; SI: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
295; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
296; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
297; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
298; SI: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
299; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[C_F32_0]]
300; SI: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
301; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]]
302; SI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
303; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
304; VI: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]]
305; VI: v_mac_f16_e32 v[[C_F16_1]], v[[B_F16_1]], v[[A_F16_1]]
306; VI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[C_V2_F16]]
307; VI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]]
308; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
309; GCN: buffer_store_dword v[[R_V2_F16]]
310; GCN: s_endpgm
311define void @mac_v2f16(
312 <2 x half> addrspace(1)* %r,
313 <2 x half> addrspace(1)* %a,
314 <2 x half> addrspace(1)* %b,
315 <2 x half> addrspace(1)* %c) #0 {
316entry:
317 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
318 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
319 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
320
321 %t.val = fmul <2 x half> %a.val, %b.val
322 %r.val = fadd <2 x half> %t.val, %c.val
323
324 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
325 ret void
326}
327
328; GCN-LABEL: {{^}}mac_v2f16_same_add
329; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]]
330; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]]
331; SI: v_mac_f32_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}}
332; SI: v_mac_f32_e32 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}
333; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]]
334; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]]
335; VI: v_mac_f16_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}}
336; VI: v_mac_f16_e32 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}
337; GCN: s_endpgm
338define void @mac_v2f16_same_add(
339 <2 x half> addrspace(1)* %r0,
340 <2 x half> addrspace(1)* %r1,
341 <2 x half> addrspace(1)* %a,
342 <2 x half> addrspace(1)* %b,
343 <2 x half> addrspace(1)* %c,
344 <2 x half> addrspace(1)* %d,
345 <2 x half> addrspace(1)* %e) #0 {
346entry:
347 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
348 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
349 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
350 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
351 %e.val = load <2 x half>, <2 x half> addrspace(1)* %e
352
353 %t0.val = fmul <2 x half> %a.val, %b.val
354 %r0.val = fadd <2 x half> %t0.val, %c.val
355
356 %t1.val = fmul <2 x half> %d.val, %e.val
357 %r1.val = fadd <2 x half> %t1.val, %c.val
358
359 store <2 x half> %r0.val, <2 x half> addrspace(1)* %r0
360 store <2 x half> %r1.val, <2 x half> addrspace(1)* %r1
361 ret void
362}
363
364; GCN-LABEL: {{^}}mac_v2f16_neg_a
365; SI-NOT: v_mac_f32
366; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
367; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
368; VI-NOT: v_mac_f16
369; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
370; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
371; GCN: s_endpgm
372define void @mac_v2f16_neg_a(
373 <2 x half> addrspace(1)* %r,
374 <2 x half> addrspace(1)* %a,
375 <2 x half> addrspace(1)* %b,
376 <2 x half> addrspace(1)* %c) #0 {
377entry:
378 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
379 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
380 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
381
382 %a.neg = fsub <2 x half> <half -0.0, half -0.0>, %a.val
383 %t.val = fmul <2 x half> %a.neg, %b.val
384 %r.val = fadd <2 x half> %t.val, %c.val
385
386 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
387 ret void
388}
389
390; GCN-LABEL: {{^}}mac_v2f16_neg_b
391; SI-NOT: v_mac_f32
392; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
393; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
394; VI-NOT: v_mac_f16
395; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
396; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
397; GCN: s_endpgm
398define void @mac_v2f16_neg_b(
399 <2 x half> addrspace(1)* %r,
400 <2 x half> addrspace(1)* %a,
401 <2 x half> addrspace(1)* %b,
402 <2 x half> addrspace(1)* %c) #0 {
403entry:
404 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
405 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
406 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
407
408 %b.neg = fsub <2 x half> <half -0.0, half -0.0>, %b.val
409 %t.val = fmul <2 x half> %a.val, %b.neg
410 %r.val = fadd <2 x half> %t.val, %c.val
411
412 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
413 ret void
414}
415
416; GCN-LABEL: {{^}}mac_v2f16_neg_c
417; SI-NOT: v_mac_f32
418; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
419; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
420; VI-NOT: v_mac_f16
421; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
422; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
423; GCN: s_endpgm
424define void @mac_v2f16_neg_c(
425 <2 x half> addrspace(1)* %r,
426 <2 x half> addrspace(1)* %a,
427 <2 x half> addrspace(1)* %b,
428 <2 x half> addrspace(1)* %c) #0 {
429entry:
430 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
431 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
432 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
433
434 %c.neg = fsub <2 x half> <half -0.0, half -0.0>, %c.val
435 %t.val = fmul <2 x half> %a.val, %b.val
436 %r.val = fadd <2 x half> %t.val, %c.neg
437
438 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
439 ret void
440}
441
442; GCN-LABEL: {{^}}mac_v2f16_neg_a_safe_fp_math
443; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
444; SI: v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
445; SI: v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
446; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
447; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
448; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
449; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
450; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
451; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
452; GCN: s_endpgm
453define void @mac_v2f16_neg_a_safe_fp_math(
454 <2 x half> addrspace(1)* %r,
455 <2 x half> addrspace(1)* %a,
456 <2 x half> addrspace(1)* %b,
457 <2 x half> addrspace(1)* %c) #0 {
458entry:
459 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
460 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
461 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
462
463 %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
464 %t.val = fmul <2 x half> %a.neg, %b.val
465 %r.val = fadd <2 x half> %t.val, %c.val
466
467 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
468 ret void
469}
470
471; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math
472; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
473; SI: v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
474; SI: v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
475; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
476; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
477; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
478; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
479; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
480; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
481; GCN: s_endpgm
482define void @mac_v2f16_neg_b_safe_fp_math(
483 <2 x half> addrspace(1)* %r,
484 <2 x half> addrspace(1)* %a,
485 <2 x half> addrspace(1)* %b,
486 <2 x half> addrspace(1)* %c) #0 {
487entry:
488 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
489 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
490 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
491
492 %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
493 %t.val = fmul <2 x half> %a.val, %b.neg
494 %r.val = fadd <2 x half> %t.val, %c.val
495
496 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
497 ret void
498}
499
500; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math
501; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
502; SI: v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
503; SI: v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
504; SI: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
505; SI: v_mac_f32_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
506; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
507; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
508; VI: v_mac_f16_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
509; VI: v_mac_f16_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
510; GCN: s_endpgm
511define void @mac_v2f16_neg_c_safe_fp_math(
512 <2 x half> addrspace(1)* %r,
513 <2 x half> addrspace(1)* %a,
514 <2 x half> addrspace(1)* %b,
515 <2 x half> addrspace(1)* %c) #0 {
516entry:
517 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
518 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
519 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
520
521 %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
522 %t.val = fmul <2 x half> %a.val, %b.val
523 %r.val = fadd <2 x half> %t.val, %c.neg
524
525 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
526 ret void
527}
528
529; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math
530; SI-NOT: v_mac_f32
531; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
532; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
533; VI-NOT: v_mac_f16
534; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
535; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
536; GCN: s_endpgm
537define void @mac_v2f16_neg_a_unsafe_fp_math(
538 <2 x half> addrspace(1)* %r,
539 <2 x half> addrspace(1)* %a,
540 <2 x half> addrspace(1)* %b,
541 <2 x half> addrspace(1)* %c) #1 {
542entry:
543 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
544 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
545 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
546
547 %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
548 %t.val = fmul <2 x half> %a.neg, %b.val
549 %r.val = fadd <2 x half> %t.val, %c.val
550
551 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
552 ret void
553}
554
555; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math
556; SI-NOT: v_mac_f32
557; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
558; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
559; VI-NOT: v_mac_f16
560; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
561; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
562; GCN: s_endpgm
563define void @mac_v2f16_neg_b_unsafe_fp_math(
564 <2 x half> addrspace(1)* %r,
565 <2 x half> addrspace(1)* %a,
566 <2 x half> addrspace(1)* %b,
567 <2 x half> addrspace(1)* %c) #1 {
568entry:
569 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
570 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
571 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
572
573 %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
574 %t.val = fmul <2 x half> %a.val, %b.neg
575 %r.val = fadd <2 x half> %t.val, %c.val
576
577 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
578 ret void
579}
580
581; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math
582; SI-NOT: v_mac_f32
583; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
584; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
585; VI-NOT: v_mac_f16
586; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
587; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
588; GCN: s_endpgm
589define void @mac_v2f16_neg_c_unsafe_fp_math(
590 <2 x half> addrspace(1)* %r,
591 <2 x half> addrspace(1)* %a,
592 <2 x half> addrspace(1)* %b,
593 <2 x half> addrspace(1)* %c) #1 {
594entry:
595 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
596 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
597 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
598
599 %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
600 %t.val = fmul <2 x half> %a.val, %b.val
601 %r.val = fadd <2 x half> %t.val, %c.neg
602
603 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
604 ret void
605}
606
607attributes #0 = {"unsafe-fp-math"="false"}
608attributes #1 = {"unsafe-fp-math"="true"}