blob: dfecdb7675190751e97468816bab974611ac3728 [file] [log] [blame]
Matt Arsenault644883f2017-09-20 19:09:28 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
Matt Arsenaultd7e23032017-09-07 18:05:07 +00002; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s
3; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
4
5; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo:
Matt Arsenault644883f2017-09-20 19:09:28 +00006; GFX9: v_mad_mix_f32 v0, v0, v1, v2 ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c]
Matt Arsenaultd7e23032017-09-07 18:05:07 +00007; VI: v_mac_f32
8; CI: v_mad_f32
9define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
10 %src0.ext = fpext half %src0 to float
11 %src1.ext = fpext half %src1 to float
12 %src2.ext = fpext half %src2 to float
13 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
14 ret float %result
15}
16
17; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_int:
Matt Arsenault644883f2017-09-20 19:09:28 +000018; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +000019; CIVI: v_mac_f32
20define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
21 %src0.hi = lshr i32 %src0, 16
22 %src1.hi = lshr i32 %src1, 16
23 %src2.hi = lshr i32 %src2, 16
24 %src0.i16 = trunc i32 %src0.hi to i16
25 %src1.i16 = trunc i32 %src1.hi to i16
26 %src2.i16 = trunc i32 %src2.hi to i16
27 %src0.fp16 = bitcast i16 %src0.i16 to half
28 %src1.fp16 = bitcast i16 %src1.i16 to half
29 %src2.fp16 = bitcast i16 %src2.i16 to half
30 %src0.ext = fpext half %src0.fp16 to float
31 %src1.ext = fpext half %src1.fp16 to float
32 %src2.ext = fpext half %src2.fp16 to float
33 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
34 ret float %result
35}
36
37; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
Matt Arsenault644883f2017-09-20 19:09:28 +000038; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +000039; VI: v_mac_f32
40; CI: v_mad_f32
41define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
42 %src0.hi = extractelement <2 x half> %src0, i32 1
43 %src1.hi = extractelement <2 x half> %src1, i32 1
44 %src2.hi = extractelement <2 x half> %src2, i32 1
45 %src0.ext = fpext half %src0.hi to float
46 %src1.ext = fpext half %src1.hi to float
47 %src2.ext = fpext half %src2.hi to float
48 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
49 ret float %result
50}
51
52; GCN-LABEL: {{^}}v_mad_mix_v2f32:
53; GFX9: v_mov_b32_e32 v3, v1
Geoff Berryfabedba2017-10-03 16:59:13 +000054; GFX9-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1]
Matt Arsenaultd7e23032017-09-07 18:05:07 +000055; GFX9-NEXT: v_mad_mix_f32 v0, v0, v3, v2
56
57; CIVI: v_mac_f32
58define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
59 %src0.ext = fpext <2 x half> %src0 to <2 x float>
60 %src1.ext = fpext <2 x half> %src1 to <2 x float>
61 %src2.ext = fpext <2 x half> %src2 to <2 x float>
62 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
63 ret <2 x float> %result
64}
65
66; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle:
67; GCN: s_waitcnt
68; GFX9-NEXT: v_mov_b32_e32 v3, v1
Geoff Berryfabedba2017-10-03 16:59:13 +000069; GFX9-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1]
Matt Arsenaultd7e23032017-09-07 18:05:07 +000070; GFX9-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1]
71; GFX9-NEXT: s_setpc_b64
72
73; CIVI: v_mac_f32
74define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
75 %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
76 %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
77 %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
78 %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
79 %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
80 %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
81 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
82 ret <2 x float> %result
83}
84
85; GCN-LABEL: {{^}}v_mad_mix_f32_negf16lo_f16lo_f16lo:
86; GFX9: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +000087; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +000088; GFX9-NEXT: s_setpc_b64
89
90; CIVI: v_mad_f32
91define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
92 %src0.ext = fpext half %src0 to float
93 %src1.ext = fpext half %src1 to float
94 %src2.ext = fpext half %src2 to float
95 %src0.ext.neg = fsub float -0.0, %src0.ext
96 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext)
97 ret float %result
98}
99
100; GCN-LABEL: {{^}}v_mad_mix_f32_absf16lo_f16lo_f16lo:
101; GFX9: v_mad_mix_f32 v0, |v0|, v1, v2
102
103; CIVI: v_mad_f32
104define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
105 %src0.ext = fpext half %src0 to float
106 %src1.ext = fpext half %src1 to float
107 %src2.ext = fpext half %src2 to float
108 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
109 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
110 ret float %result
111}
112
113; GCN-LABEL: {{^}}v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
114; GFX9: s_waitcnt
115; GFX9-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2
116; GFX9-NEXT: s_setpc_b64
117
118; CIVI: v_mad_f32
119define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
120 %src0.ext = fpext half %src0 to float
121 %src1.ext = fpext half %src1 to float
122 %src2.ext = fpext half %src2 to float
123 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
124 %src0.ext.neg.abs = fsub float -0.0, %src0.ext.abs
125 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext)
126 ret float %result
127}
128
129; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32:
130; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000131; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000132; GFX9-NEXT: s_setpc_b64
133
134; CIVI: v_mad_f32
135define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
136 %src0.ext = fpext half %src0 to float
137 %src1.ext = fpext half %src1 to float
138 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
139 ret float %result
140}
141
142; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negf32:
143; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000144; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000145; GFX9-NEXT: s_setpc_b64
146
147; CIVI: v_mad_f32
148define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
149 %src0.ext = fpext half %src0 to float
150 %src1.ext = fpext half %src1 to float
151 %src2.neg = fsub float -0.0, %src2
152 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg)
153 ret float %result
154}
155
156; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_absf32:
157; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000158; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000159; GFX9-NEXT: s_setpc_b64
160
161; CIVI: v_mad_f32
162define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
163 %src0.ext = fpext half %src0 to float
164 %src1.ext = fpext half %src1 to float
165 %src2.abs = call float @llvm.fabs.f32(float %src2)
166 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs)
167 ret float %result
168}
169
170; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negabsf32:
171; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000172; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000173; GFX9-NEXT: s_setpc_b64
174
175; CIVI: v_mad_f32
176define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
177 %src0.ext = fpext half %src0 to float
178 %src1.ext = fpext half %src1 to float
179 %src2.abs = call float @llvm.fabs.f32(float %src2)
180 %src2.neg.abs = fsub float -0.0, %src2.abs
181 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs)
182 ret float %result
183}
184
185; TODO: Fold inline immediates. Need to be careful because it is an
186; f16 inline immediate that may be converted to f32, not an actual f32
187; inline immediate.
188
189; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imm1:
190; GCN: s_waitcnt
191; GFX9: v_mov_b32_e32 v2, 1.0
Matt Arsenault644883f2017-09-20 19:09:28 +0000192; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000193
194; CIVI: v_mad_f32 v0, v0, v1, 1.0
195; GCN-NEXT: s_setpc_b64
196define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
197 %src0.ext = fpext half %src0 to float
198 %src1.ext = fpext half %src1 to float
199 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
200 ret float %result
201}
202
203; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
204; GCN: s_waitcnt
205; GFX9: v_mov_b32_e32 v2, 0.15915494
Matt Arsenault644883f2017-09-20 19:09:28 +0000206; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000207; VI: v_mad_f32 v0, v0, v1, 0.15915494
208define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
209 %src0.ext = fpext half %src0 to float
210 %src1.ext = fpext half %src1 to float
211 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
212 ret float %result
213}
214
215; Attempt to break inline immediate folding. If the operand is
216; interpreted as f32, the inline immediate is really the f16 inline
217; imm value converted to f32.
218; fpext f16 1/2pi = 0x3e230000
219; f32 1/2pi = 0x3e22f983
220; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
221; GFX9: v_mov_b32_e32 v2, 0x3e230000
Matt Arsenault644883f2017-09-20 19:09:28 +0000222; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000223
224; CIVI: v_madak_f32 v0, v0, v1, 0x3e230000
225define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
226 %src0.ext = fpext half %src0 to float
227 %src1.ext = fpext half %src1 to float
228 %src2 = fpext half 0xH3118 to float
229 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
230 ret float %result
231}
232
233; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
234; GFX9: v_mov_b32_e32 v2, 0x367c0000
Matt Arsenault644883f2017-09-20 19:09:28 +0000235; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000236
237; CIVI: v_madak_f32 v0, v0, v1, 0x367c0000
238define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
239 %src0.ext = fpext half %src0 to float
240 %src1.ext = fpext half %src1 to float
241 %src2 = fpext half 0xH003F to float
242 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
243 ret float %result
244}
245
246; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1:
247; GFX9: v_mov_b32_e32 v2, v1
248; GFX9: v_mov_b32_e32 v3, 1.0
Geoff Berryfabedba2017-10-03 16:59:13 +0000249; GFX9: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
Matt Arsenault644883f2017-09-20 19:09:28 +0000250; GFX9: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000251define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
252 %src0.ext = fpext <2 x half> %src0 to <2 x float>
253 %src1.ext = fpext <2 x half> %src1 to <2 x float>
254 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
255 ret <2 x float> %result
256}
257
258; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi:
259; GFX9: v_mov_b32_e32 v2, v1
260; GFX9: v_mov_b32_e32 v3, 0x3e230000
Geoff Berryfabedba2017-10-03 16:59:13 +0000261; GFX9: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
Matt Arsenault644883f2017-09-20 19:09:28 +0000262; GFX9: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000263define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
264 %src0.ext = fpext <2 x half> %src0 to <2 x float>
265 %src1.ext = fpext <2 x half> %src1 to <2 x float>
266 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
267 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2)
268 ret <2 x float> %result
269}
270
271; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi:
272; GFX9: v_mov_b32_e32 v2, v1
273; GFX9: v_mov_b32_e32 v3, 0.15915494
Geoff Berryfabedba2017-10-03 16:59:13 +0000274; GFX9: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
Matt Arsenault644883f2017-09-20 19:09:28 +0000275; GFX9: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000276define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
277 %src0.ext = fpext <2 x half> %src0 to <2 x float>
278 %src1.ext = fpext <2 x half> %src1 to <2 x float>
279 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
280 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>)
281 ret <2 x float> %result
282}
283
284; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
Matt Arsenault644883f2017-09-20 19:09:28 +0000285; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] clamp ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000286; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
287; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
288define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
289 %src0.hi = extractelement <2 x half> %src0, i32 1
290 %src1.hi = extractelement <2 x half> %src1, i32 1
291 %src2.hi = extractelement <2 x half> %src2, i32 1
292 %src0.ext = fpext half %src0.hi to float
293 %src1.ext = fpext half %src1.hi to float
294 %src2.ext = fpext half %src2.hi to float
295 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
296 %max = call float @llvm.maxnum.f32(float %result, float 0.0)
297 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
298 ret float %clamp
299}
300
301; GCN-LABEL: no_mix_simple:
302; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000303; GCN-NEXT: v_mad_f32 v0, v0, v1, v2
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000304; GCN-NEXT: s_setpc_b64
305define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
306 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
307 ret float %result
308}
309
310; GCN-LABEL: no_mix_simple_fabs:
311; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000312; GCN-NEXT: v_mad_f32 v0, |v0|, v1, v2
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000313; GCN-NEXT: s_setpc_b64
314define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
315 %src0.fabs = call float @llvm.fabs.f32(float %src0)
316 %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
317 ret float %result
318}
319
320; FIXME: Should abe able to select in thits case
321; All sources are converted from f16, so it doesn't matter
322; v_mad_mix_f32 flushes.
323
324; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
325; GFX9: v_cvt_f32_f16
326; GFX9: v_cvt_f32_f16
327; GFX9: v_cvt_f32_f16
328; GFX9: v_fma_f32
329define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
330 %src0.ext = fpext half %src0 to float
331 %src1.ext = fpext half %src1 to float
332 %src2.ext = fpext half %src2 to float
333 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
334 ret float %result
335}
336
337; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals:
338; GFX9: v_cvt_f32_f16
339; GFX9: v_cvt_f32_f16
340; GFX9: v_fma_f32
341define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
342 %src0.ext = fpext half %src0 to float
343 %src1.ext = fpext half %src1 to float
344 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
345 ret float %result
346}
347
348; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
349; GFX9: v_cvt_f32_f16
350; GFX9: v_cvt_f32_f16
351; GFX9: v_cvt_f32_f16
352; GFX9: v_mul_f32
353; GFX9: v_add_f32
354define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
355 %src0.ext = fpext half %src0 to float
356 %src1.ext = fpext half %src1 to float
357 %src2.ext = fpext half %src2 to float
358 %mul = fmul float %src0.ext, %src1.ext
359 %result = fadd float %mul, %src2.ext
360 ret float %result
361}
362
363; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
364; GFX9: v_cvt_f32_f16
365; GFX9: v_cvt_f32_f16
366; GFX9: v_mul_f32
367; GFX9: v_add_f32
368define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
369 %src0.ext = fpext half %src0 to float
370 %src1.ext = fpext half %src1 to float
371 %mul = fmul float %src0.ext, %src1.ext
372 %result = fadd float %mul, %src2
373 ret float %result
374}
375
376; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
377; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000378; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000379; GFX9-NEXT: s_setpc_b64
380define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
381 %src0.ext = fpext half %src0 to float
382 %src1.ext = fpext half %src1 to float
383 %src2.ext = fpext half %src2 to float
384 %mul = fmul float %src0.ext, %src1.ext
385 %result = fadd float %mul, %src2.ext
386 ret float %result
387}
388
389; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
390; GCN: s_waitcnt
Matt Arsenault644883f2017-09-20 19:09:28 +0000391; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
Matt Arsenaultd7e23032017-09-07 18:05:07 +0000392; GFX9-NEXT: s_setpc_b64
393define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
394 %src0.ext = fpext half %src0 to float
395 %src1.ext = fpext half %src1 to float
396 %mul = fmul float %src0.ext, %src1.ext
397 %result = fadd float %mul, %src2
398 ret float %result
399}
400
401declare float @llvm.fabs.f32(float) #2
402declare float @llvm.minnum.f32(float, float) #2
403declare float @llvm.maxnum.f32(float, float) #2
404declare float @llvm.fmuladd.f32(float, float, float) #2
405declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
406
407attributes #0 = { nounwind "target-features"="-fp32-denormals" }
408attributes #1 = { nounwind "target-features"="+fp32-denormals" }
409attributes #2 = { nounwind readnone speculatable }