blob: d2cfc713ed37c6dd5004c372e08734235c01c34f [file] [log] [blame]
Matt Arsenault10268f92017-02-27 22:40:39 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
5; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
6; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
7
Matt Arsenaultf639c322016-01-28 20:53:42 +00008
Matt Arsenaultf4110712017-01-18 02:10:08 +00009; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
10; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, v{{[0-9]+}}
11; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
Matt Arsenaultf4110712017-01-18 02:10:08 +000013 %tid = call i32 @llvm.amdgcn.workitem.id.x()
14 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
15 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
16 %a = load float, float addrspace(1)* %gep0
17 %a.add = fadd nnan float %a, 1.0
18 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
19 %med = call float @llvm.minnum.f32(float %max, float 4.0)
20
21 store float %med, float addrspace(1)* %outgep
22 ret void
23}
24
Matt Arsenaultf639c322016-01-28 20:53:42 +000025; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f32:
26; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
27
28; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
29; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
Matt Arsenault45f82162016-07-11 23:35:48 +000031 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultf639c322016-01-28 20:53:42 +000032 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
33 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
34 %a = load float, float addrspace(1)* %gep0
35
36 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
37 %med = call float @llvm.minnum.f32(float %max, float 4.0)
38
39 store float %med, float addrspace(1)* %outgep
40 ret void
41}
42
43; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute0_f32:
44; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
45
46; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
47; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000048define amdgpu_kernel void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
Matt Arsenault45f82162016-07-11 23:35:48 +000049 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultf639c322016-01-28 20:53:42 +000050 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
51 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
52 %a = load float, float addrspace(1)* %gep0
53
54 %max = call float @llvm.maxnum.f32(float 2.0, float %a)
55 %med = call float @llvm.minnum.f32(float 4.0, float %max)
56
57 store float %med, float addrspace(1)* %outgep
58 ret void
59}
60
61; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute1_f32:
62; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
63
64; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
65; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000066define amdgpu_kernel void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
Matt Arsenault45f82162016-07-11 23:35:48 +000067 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultf639c322016-01-28 20:53:42 +000068 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
69 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
70 %a = load float, float addrspace(1)* %gep0
71
72 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
73 %med = call float @llvm.minnum.f32(float 4.0, float %max)
74
75 store float %med, float addrspace(1)* %outgep
76 ret void
77}
78
79; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_constant_order_f32:
80; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
81; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000082define amdgpu_kernel void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
Matt Arsenault45f82162016-07-11 23:35:48 +000083 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultf639c322016-01-28 20:53:42 +000084 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
85 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
86 %a = load float, float addrspace(1)* %gep0
87
88 %max = call float @llvm.maxnum.f32(float %a, float 4.0)
89 %med = call float @llvm.minnum.f32(float %max, float 2.0)
90
91 store float %med, float addrspace(1)* %outgep
92 ret void
93}
94
95
96; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_multi_use_f32:
97; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
98; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000099define amdgpu_kernel void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
Matt Arsenault45f82162016-07-11 23:35:48 +0000100 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultf639c322016-01-28 20:53:42 +0000101 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
102 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
103 %a = load float, float addrspace(1)* %gep0
104
105 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
106 %med = call float @llvm.minnum.f32(float %max, float 4.0)
107
108 store volatile float %med, float addrspace(1)* %outgep
109 store volatile float %max, float addrspace(1)* %outgep
110 ret void
111}
112
113; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000114; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
115; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000116define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
Matt Arsenault45f82162016-07-11 23:35:48 +0000117 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultf639c322016-01-28 20:53:42 +0000118 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
119 %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
120 %a = load double, double addrspace(1)* %gep0
121
122 %max = call double @llvm.maxnum.f64(double %a, double 2.0)
123 %med = call double @llvm.minnum.f64(double %max, double 4.0)
124
125 store double %med, double addrspace(1)* %outgep
126 ret void
127}
128
129; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32:
130; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
Matt Arsenault45f82162016-07-11 23:35:48 +0000132 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultf639c322016-01-28 20:53:42 +0000133 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
134 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
135 %a = load float, float addrspace(1)* %gep0
136
137 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
138 %med = call float @llvm.minnum.f32(float %max, float 4.0)
139
140 store float %med, float addrspace(1)* %outgep
141 ret void
142}
143
Matt Arsenault5b39b342016-01-28 20:53:48 +0000144; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32:
145; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
146
147; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
148; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
Matt Arsenault45f82162016-07-11 23:35:48 +0000150 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenault5b39b342016-01-28 20:53:48 +0000151 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
152 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
153 %a = load float, float addrspace(1)* %gep0
154
155 ; fmax_legacy
156 %cmp0 = fcmp ule float %a, 2.0
157 %max = select i1 %cmp0, float 2.0, float %a
158
159 ; fmin_legacy
160 %cmp1 = fcmp uge float %max, 4.0
161 %med = select i1 %cmp1, float 4.0, float %max
162
163 store float %med, float addrspace(1)* %outgep
164 ret void
165}
166
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000167; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0:
168; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
169; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
170; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
171; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000172define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000173 %tid = call i32 @llvm.amdgcn.workitem.id.x()
174 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
175 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
176 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
177 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
178 %a = load volatile float, float addrspace(1)* %gep0
179 %b = load volatile float, float addrspace(1)* %gep1
180 %c = load volatile float, float addrspace(1)* %gep2
181 %a.fneg = fsub float -0.0, %a
182 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
183 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
184 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
185 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
186 store float %med3, float addrspace(1)* %outgep
187 ret void
188}
189
190; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1:
191; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
192; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
193; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
194; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000195define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000196 %tid = call i32 @llvm.amdgcn.workitem.id.x()
197 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
198 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
199 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
200 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
201 %a = load volatile float, float addrspace(1)* %gep0
202 %b = load volatile float, float addrspace(1)* %gep1
203 %c = load volatile float, float addrspace(1)* %gep2
204 %b.fneg = fsub float -0.0, %b
205 %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
206 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
207 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
208 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
209 store float %med3, float addrspace(1)* %outgep
210 ret void
211}
212
213; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2:
214; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
215; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
216; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
217; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000218define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000219 %tid = call i32 @llvm.amdgcn.workitem.id.x()
220 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
221 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
222 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
223 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
224 %a = load volatile float, float addrspace(1)* %gep0
225 %b = load volatile float, float addrspace(1)* %gep1
226 %c = load volatile float, float addrspace(1)* %gep2
227 %c.fneg = fsub float -0.0, %c
228 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
229 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
230 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
231 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
232 store float %med3, float addrspace(1)* %outgep
233 ret void
234}
235
236; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012:
237; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
238; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
239; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
240; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]|
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000241define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000242 %tid = call i32 @llvm.amdgcn.workitem.id.x()
243 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
244 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
245 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
246 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
247 %a = load volatile float, float addrspace(1)* %gep0
248 %b = load volatile float, float addrspace(1)* %gep1
249 %c = load volatile float, float addrspace(1)* %gep2
250
251 %a.fneg = fsub float -0.0, %a
252 %b.fabs = call float @llvm.fabs.f32(float %b)
253 %c.fabs = call float @llvm.fabs.f32(float %c)
254 %c.fabs.fneg = fsub float -0.0, %c.fabs
255
256 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
257 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
258 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
259 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
260
261 store float %med3, float addrspace(1)* %outgep
262 ret void
263}
264
265; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012:
266; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
267; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
268; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
269; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]|
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000270define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000271 %tid = call i32 @llvm.amdgcn.workitem.id.x()
272 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
273 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
274 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
275 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
276 %a = load volatile float, float addrspace(1)* %gep0
277 %b = load volatile float, float addrspace(1)* %gep1
278 %c = load volatile float, float addrspace(1)* %gep2
279
280 %a.fabs = call float @llvm.fabs.f32(float %a)
281 %a.fabs.fneg = fsub float -0.0, %a.fabs
282 %b.fabs = call float @llvm.fabs.f32(float %b)
283 %b.fabs.fneg = fsub float -0.0, %b.fabs
284 %c.fabs = call float @llvm.fabs.f32(float %c)
285 %c.fabs.fneg = fsub float -0.0, %c.fabs
286
287 %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
288 %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
289 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
290 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
291
292 store float %med3, float addrspace(1)* %outgep
293 ret void
294}
295
296; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0:
297; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
298; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
299; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
300; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
301; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
302; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
303; GCN: v_med3_f32 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000304define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000305 %tid = call i32 @llvm.amdgcn.workitem.id.x()
306 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
307 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
308 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
309 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
310 %a = load volatile float, float addrspace(1)* %gep0
311 %b = load volatile float, float addrspace(1)* %gep1
312 %c = load volatile float, float addrspace(1)* %gep2
313
314 %a.nnan = fadd nnan float %a, 1.0
315 %b.nnan = fadd nnan float %b, 2.0
316 %c.nnan = fadd nnan float %c, 4.0
317
318 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
319 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
320 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
321 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
322 store float %med3, float addrspace(1)* %outgep
323 ret void
324}
325
326; 16 combinations
327
328; 0: max(min(x, y), min(max(x, y), z))
329; 1: max(min(x, y), min(max(y, x), z))
330; 2: max(min(x, y), min(z, max(x, y)))
331; 3: max(min(x, y), min(z, max(y, x)))
332; 4: max(min(y, x), min(max(x, y), z))
333; 5: max(min(y, x), min(max(y, x), z))
334; 6: max(min(y, x), min(z, max(x, y)))
335; 7: max(min(y, x), min(z, max(y, x)))
336;
337; + commute outermost max
338
339; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0:
340; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
341; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
342; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
343; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000344define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000345 %tid = call i32 @llvm.amdgcn.workitem.id.x()
346 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
347 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
348 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
349 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
350 %a = load volatile float, float addrspace(1)* %gep0
351 %b = load volatile float, float addrspace(1)* %gep1
352 %c = load volatile float, float addrspace(1)* %gep2
353 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
354 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
355 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
356 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
357 store float %med3, float addrspace(1)* %outgep
358 ret void
359}
360
361; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1:
362; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
363; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
364; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
365; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000366define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000367 %tid = call i32 @llvm.amdgcn.workitem.id.x()
368 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
369 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
370 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
371 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
372 %a = load volatile float, float addrspace(1)* %gep0
373 %b = load volatile float, float addrspace(1)* %gep1
374 %c = load volatile float, float addrspace(1)* %gep2
375 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
376 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
377 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
378 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
379 store float %med3, float addrspace(1)* %outgep
380 ret void
381}
382
383; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2:
384; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
385; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
386; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
387; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000388define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000389 %tid = call i32 @llvm.amdgcn.workitem.id.x()
390 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
391 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
392 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
393 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
394 %a = load volatile float, float addrspace(1)* %gep0
395 %b = load volatile float, float addrspace(1)* %gep1
396 %c = load volatile float, float addrspace(1)* %gep2
397 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
398 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
399 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
400 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
401 store float %med3, float addrspace(1)* %outgep
402 ret void
403}
404
405; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3:
406; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
407; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
408; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
409; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000410define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000411 %tid = call i32 @llvm.amdgcn.workitem.id.x()
412 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
413 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
414 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
415 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
416 %a = load volatile float, float addrspace(1)* %gep0
417 %b = load volatile float, float addrspace(1)* %gep1
418 %c = load volatile float, float addrspace(1)* %gep2
419 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
420 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
421 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
422 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
423 store float %med3, float addrspace(1)* %outgep
424 ret void
425}
426
427; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4:
428; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
429; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
430; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
431; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000432define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000433 %tid = call i32 @llvm.amdgcn.workitem.id.x()
434 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
435 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
436 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
437 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
438 %a = load volatile float, float addrspace(1)* %gep0
439 %b = load volatile float, float addrspace(1)* %gep1
440 %c = load volatile float, float addrspace(1)* %gep2
441 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
442 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
443 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
444 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
445 store float %med3, float addrspace(1)* %outgep
446 ret void
447}
448
449; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5:
450; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
451; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
452; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
453; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000454define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000455 %tid = call i32 @llvm.amdgcn.workitem.id.x()
456 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
457 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
458 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
459 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
460 %a = load volatile float, float addrspace(1)* %gep0
461 %b = load volatile float, float addrspace(1)* %gep1
462 %c = load volatile float, float addrspace(1)* %gep2
463 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
464 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
465 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
466 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
467 store float %med3, float addrspace(1)* %outgep
468 ret void
469}
470
471; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6:
472; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
473; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
474; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
475; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000476define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000477 %tid = call i32 @llvm.amdgcn.workitem.id.x()
478 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
479 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
480 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
481 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
482 %a = load volatile float, float addrspace(1)* %gep0
483 %b = load volatile float, float addrspace(1)* %gep1
484 %c = load volatile float, float addrspace(1)* %gep2
485 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
486 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
487 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
488 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
489 store float %med3, float addrspace(1)* %outgep
490 ret void
491}
492
493; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7:
494; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
495; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
496; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
497; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000498define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000499 %tid = call i32 @llvm.amdgcn.workitem.id.x()
500 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
501 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
502 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
503 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
504 %a = load volatile float, float addrspace(1)* %gep0
505 %b = load volatile float, float addrspace(1)* %gep1
506 %c = load volatile float, float addrspace(1)* %gep2
507 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
508 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
509 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
510 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
511 store float %med3, float addrspace(1)* %outgep
512 ret void
513}
514
515; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8:
516; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
517; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
518; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
519; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000520define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000521 %tid = call i32 @llvm.amdgcn.workitem.id.x()
522 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
523 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
524 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
525 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
526 %a = load volatile float, float addrspace(1)* %gep0
527 %b = load volatile float, float addrspace(1)* %gep1
528 %c = load volatile float, float addrspace(1)* %gep2
529 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
530 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
531 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
532 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
533 store float %med3, float addrspace(1)* %outgep
534 ret void
535}
536
537; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9:
538; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
539; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
540; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
541; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000542define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000543 %tid = call i32 @llvm.amdgcn.workitem.id.x()
544 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
545 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
546 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
547 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
548 %a = load volatile float, float addrspace(1)* %gep0
549 %b = load volatile float, float addrspace(1)* %gep1
550 %c = load volatile float, float addrspace(1)* %gep2
551 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
552 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
553 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
554 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
555 store float %med3, float addrspace(1)* %outgep
556 ret void
557}
558
559; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10:
560; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
561; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
562; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
563; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000564define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000565 %tid = call i32 @llvm.amdgcn.workitem.id.x()
566 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
567 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
568 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
569 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
570 %a = load volatile float, float addrspace(1)* %gep0
571 %b = load volatile float, float addrspace(1)* %gep1
572 %c = load volatile float, float addrspace(1)* %gep2
573 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
574 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
575 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
576 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
577 store float %med3, float addrspace(1)* %outgep
578 ret void
579}
580
581; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11:
582; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
583; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
584; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
585; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000586define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000587 %tid = call i32 @llvm.amdgcn.workitem.id.x()
588 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
589 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
590 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
591 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
592 %a = load volatile float, float addrspace(1)* %gep0
593 %b = load volatile float, float addrspace(1)* %gep1
594 %c = load volatile float, float addrspace(1)* %gep2
595 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
596 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
597 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
598 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
599 store float %med3, float addrspace(1)* %outgep
600 ret void
601}
602
603; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12:
604; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
605; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
606; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
607; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000608define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000609 %tid = call i32 @llvm.amdgcn.workitem.id.x()
610 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
611 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
612 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
613 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
614 %a = load volatile float, float addrspace(1)* %gep0
615 %b = load volatile float, float addrspace(1)* %gep1
616 %c = load volatile float, float addrspace(1)* %gep2
617 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
618 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
619 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
620 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
621 store float %med3, float addrspace(1)* %outgep
622 ret void
623}
624
625; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13:
626; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
627; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
628; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
629; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000630define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000631 %tid = call i32 @llvm.amdgcn.workitem.id.x()
632 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
633 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
634 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
635 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
636 %a = load volatile float, float addrspace(1)* %gep0
637 %b = load volatile float, float addrspace(1)* %gep1
638 %c = load volatile float, float addrspace(1)* %gep2
639 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
640 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
641 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
642 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
643 store float %med3, float addrspace(1)* %outgep
644 ret void
645}
646
647; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14:
648; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
649; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
650; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
651; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000652define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000653 %tid = call i32 @llvm.amdgcn.workitem.id.x()
654 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
655 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
656 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
657 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
658 %a = load volatile float, float addrspace(1)* %gep0
659 %b = load volatile float, float addrspace(1)* %gep1
660 %c = load volatile float, float addrspace(1)* %gep2
661 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
662 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
663 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
664 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
665 store float %med3, float addrspace(1)* %outgep
666 ret void
667}
668
669; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15:
670; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
671; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
672; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
673; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000674define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000675 %tid = call i32 @llvm.amdgcn.workitem.id.x()
676 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
677 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
678 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
679 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
680 %a = load volatile float, float addrspace(1)* %gep0
681 %b = load volatile float, float addrspace(1)* %gep1
682 %c = load volatile float, float addrspace(1)* %gep2
683 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
684 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
685 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
686 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
687 store float %med3, float addrspace(1)* %outgep
688 ret void
689}
690
691; ---------------------------------------------------------------------
692; Negative patterns
693; ---------------------------------------------------------------------
694
695; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use0:
Matt Arsenault10268f92017-02-27 22:40:39 +0000696; GCN-DAG: v_min_f32
697; GCN-DAG: v_max_f32
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000698; GCN: v_min_f32
699; GCN: v_max_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000700define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000701 %tid = call i32 @llvm.amdgcn.workitem.id.x()
702 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
703 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
704 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
705 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
706 %a = load volatile float, float addrspace(1)* %gep0
707 %b = load volatile float, float addrspace(1)* %gep1
708 %c = load volatile float, float addrspace(1)* %gep2
709 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
710 store volatile float %tmp0, float addrspace(1)* undef
711 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
712 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
713 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
714 store float %med3, float addrspace(1)* %outgep
715 ret void
716}
717
718; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use1:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000719define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000720 %tid = call i32 @llvm.amdgcn.workitem.id.x()
721 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
722 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
723 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
724 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
725 %a = load volatile float, float addrspace(1)* %gep0
726 %b = load volatile float, float addrspace(1)* %gep1
727 %c = load volatile float, float addrspace(1)* %gep2
728 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
729 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
730 store volatile float %tmp1, float addrspace(1)* undef
731 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
732 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
733 store float %med3, float addrspace(1)* %outgep
734 ret void
735}
736
737; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use2:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000738define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000739 %tid = call i32 @llvm.amdgcn.workitem.id.x()
740 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
741 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
742 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
743 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
744 %a = load volatile float, float addrspace(1)* %gep0
745 %b = load volatile float, float addrspace(1)* %gep1
746 %c = load volatile float, float addrspace(1)* %gep2
747 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
748 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
749 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
750 store volatile float %tmp2, float addrspace(1)* undef
751 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
752 store float %med3, float addrspace(1)* %outgep
753 ret void
754}
755
756
757; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000758define amdgpu_kernel void @v_test_safe_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000759 %tid = call i32 @llvm.amdgcn.workitem.id.x()
760 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
761 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
762 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
763 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
764 %a = load volatile float, float addrspace(1)* %gep0
765 %b = load volatile float, float addrspace(1)* %gep1
766 %c = load volatile float, float addrspace(1)* %gep2
767 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
768 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
769 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
770 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
771 store float %med3, float addrspace(1)* %outgep
772 ret void
773}
774
775; GCN-LABEL: {{^}}v_nnan_inputs_missing0_med3_f32_pat0:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000776define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000777 %tid = call i32 @llvm.amdgcn.workitem.id.x()
778 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
779 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
780 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
781 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
782 %a = load volatile float, float addrspace(1)* %gep0
783 %b = load volatile float, float addrspace(1)* %gep1
784 %c = load volatile float, float addrspace(1)* %gep2
785
786 %a.nnan = fadd float %a, 1.0
787 %b.nnan = fadd nnan float %b, 2.0
788 %c.nnan = fadd nnan float %c, 4.0
789
790 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
791 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
792 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
793 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
794 store float %med3, float addrspace(1)* %outgep
795 ret void
796}
797
798; GCN-LABEL: {{^}}v_nnan_inputs_missing1_med3_f32_pat0:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000799define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000800 %tid = call i32 @llvm.amdgcn.workitem.id.x()
801 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
802 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
803 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
804 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
805 %a = load volatile float, float addrspace(1)* %gep0
806 %b = load volatile float, float addrspace(1)* %gep1
807 %c = load volatile float, float addrspace(1)* %gep2
808
809 %a.nnan = fadd nnan float %a, 1.0
810 %b.nnan = fadd float %b, 2.0
811 %c.nnan = fadd nnan float %c, 4.0
812
813 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
814 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
815 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
816 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
817 store float %med3, float addrspace(1)* %outgep
818 ret void
819}
820
821; GCN-LABEL: {{^}}v_nnan_inputs_missing2_med3_f32_pat0:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000822define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000823 %tid = call i32 @llvm.amdgcn.workitem.id.x()
824 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
825 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
826 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
827 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
828 %a = load volatile float, float addrspace(1)* %gep0
829 %b = load volatile float, float addrspace(1)* %gep1
830 %c = load volatile float, float addrspace(1)* %gep2
831
832 %a.nnan = fadd nnan float %a, 1.0
833 %b.nnan = fadd nnan float %b, 2.0
834 %c.nnan = fadd float %c, 4.0
835
836 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
837 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
838 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
839 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
840 store float %med3, float addrspace(1)* %outgep
841 ret void
842}
843
844; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
845; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
846; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
847; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
848; GCN: v_min_f32
849; GCN: v_max_f32
850; GCN: v_min_f32
851; GCN: v_max_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000852define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000853 %tid = call i32 @llvm.amdgcn.workitem.id.x()
854 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
855 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
856 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
857 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
858 %a = load volatile float, float addrspace(1)* %gep0
859 %b = load volatile float, float addrspace(1)* %gep1
860 %c = load volatile float, float addrspace(1)* %gep2
861 %a.fneg = fsub float -0.0, %a
862 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
863 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
864 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
865 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
866 store float %med3, float addrspace(1)* %outgep
867 ret void
868}
869
870; A simple min and max is not sufficient
871; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32:
872; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
873; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
874; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
875; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[B]], [[A]]
876; GCN: v_min_f32_e32 v{{[0-9]+}}, [[C]], [[MAX]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000877define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000878 %tid = call i32 @llvm.amdgcn.workitem.id.x()
879 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
880 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
881 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
882 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
883 %a = load volatile float, float addrspace(1)* %gep0
884 %b = load volatile float, float addrspace(1)* %gep1
885 %c = load volatile float, float addrspace(1)* %gep2
886 %max = call float @llvm.maxnum.f32(float %a, float %b)
887 %minmax = call float @llvm.minnum.f32(float %max, float %c)
888 store float %minmax, float addrspace(1)* %outgep
889 ret void
890}
891
Matt Arsenault10268f92017-02-27 22:40:39 +0000892; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f16:
893; SI: v_cvt_f32_f16
894; SI: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
895; SI: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
896; SI: v_cvt_f16_f32
897
898; VI: v_add_f16_e32 v{{[0-9]+}}, 1.0
899; VI: v_max_f16_e32 v{{[0-9]+}}, 2.0
900; VI: v_min_f16_e32 v{{[0-9]+}}, 4.0
901
902; GFX9: v_add_f16_e32 v{{[0-9]+}}, 1.0
903; GFX9: v_med3_f16 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000904define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #1 {
Matt Arsenault10268f92017-02-27 22:40:39 +0000905 %tid = call i32 @llvm.amdgcn.workitem.id.x()
906 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
907 %outgep = getelementptr half, half addrspace(1)* %out, i32 %tid
908 %a = load half, half addrspace(1)* %gep0
909 %a.add = fadd nnan half %a, 1.0
910 %max = call half @llvm.maxnum.f16(half %a.add, half 2.0)
911 %med = call half @llvm.minnum.f16(half %max, half 4.0)
912
913 store half %med, half addrspace(1)* %outgep
914 ret void
915}
916
917; GCN-LABEL: {{^}}v_nnan_inputs_med3_f16_pat0:
918; GCN: {{buffer_|flat_}}load_ushort [[A:v[0-9]+]]
919; GCN: {{buffer_|flat_}}load_ushort [[B:v[0-9]+]]
920; GCN: {{buffer_|flat_}}load_ushort [[C:v[0-9]+]]
921
922; SI: v_cvt_f32_f16
923; SI: v_cvt_f32_f16
924; SI: v_add_f32_e32
925; SI: v_add_f32_e32
926; SI: v_add_f32_e32
927; SI: v_med3_f32
928; SI: v_cvt_f16_f32_e32
929
930
931; GFX89-DAG: v_add_f16_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
932; GFX89-DAG: v_add_f16_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
933; GFX89-DAG: v_add_f16_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
934
935; VI-DAG: v_min_f16
936; VI-DAG: v_max_f16
937; VI: v_min_f16
938; VI: v_max_f16
939
940; GFX9: v_med3_f16 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000941define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #1 {
Matt Arsenault10268f92017-02-27 22:40:39 +0000942 %tid = call i32 @llvm.amdgcn.workitem.id.x()
943 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
944 %gep1 = getelementptr half, half addrspace(1)* %bptr, i32 %tid
945 %gep2 = getelementptr half, half addrspace(1)* %cptr, i32 %tid
946 %outgep = getelementptr half, half addrspace(1)* %out, i32 %tid
947 %a = load volatile half, half addrspace(1)* %gep0
948 %b = load volatile half, half addrspace(1)* %gep1
949 %c = load volatile half, half addrspace(1)* %gep2
950
951 %a.nnan = fadd nnan half %a, 1.0
952 %b.nnan = fadd nnan half %b, 2.0
953 %c.nnan = fadd nnan half %c, 4.0
954
955 %tmp0 = call half @llvm.minnum.f16(half %a.nnan, half %b.nnan)
956 %tmp1 = call half @llvm.maxnum.f16(half %a.nnan, half %b.nnan)
957 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %c.nnan)
958 %med3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
959 store half %med3, half addrspace(1)* %outgep
960 ret void
961}
962
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000963declare i32 @llvm.amdgcn.workitem.id.x() #0
964declare float @llvm.fabs.f32(float) #0
965declare float @llvm.minnum.f32(float, float) #0
966declare float @llvm.maxnum.f32(float, float) #0
967declare double @llvm.minnum.f64(double, double) #0
968declare double @llvm.maxnum.f64(double, double) #0
Matt Arsenault10268f92017-02-27 22:40:39 +0000969declare half @llvm.fabs.f16(half) #0
970declare half @llvm.minnum.f16(half, half) #0
971declare half @llvm.maxnum.f16(half, half) #0
Matt Arsenaultf84e5d92017-01-31 03:07:46 +0000972
Matt Arsenaultf639c322016-01-28 20:53:42 +0000973attributes #0 = { nounwind readnone }
974attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
975attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }