blob: 6a78290f9a82d91bd5d25dfb12b629241e20e26b [file] [log] [blame]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}v_clamp_f32:
5; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
6; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
7define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
8 %tid = call i32 @llvm.amdgcn.workitem.id.x()
9 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
10 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
11 %a = load float, float addrspace(1)* %gep0
12 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
13 %med = call float @llvm.minnum.f32(float %max, float 1.0)
14
15 store float %med, float addrspace(1)* %out.gep
16 ret void
17}
18
19; GCN-LABEL: {{^}}v_clamp_neg_f32:
20; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
21; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
22define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
23 %tid = call i32 @llvm.amdgcn.workitem.id.x()
24 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
25 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
26 %a = load float, float addrspace(1)* %gep0
27 %fneg.a = fsub float -0.0, %a
28 %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
29 %med = call float @llvm.minnum.f32(float %max, float 1.0)
30
31 store float %med, float addrspace(1)* %out.gep
32 ret void
33}
34
35; GCN-LABEL: {{^}}v_clamp_negabs_f32:
36; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
37; GCN: v_max_f32_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
38define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
39 %tid = call i32 @llvm.amdgcn.workitem.id.x()
40 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
41 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
42 %a = load float, float addrspace(1)* %gep0
43 %fabs.a = call float @llvm.fabs.f32(float %a)
44 %fneg.fabs.a = fsub float -0.0, %fabs.a
45
46 %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
47 %med = call float @llvm.minnum.f32(float %max, float 1.0)
48
49 store float %med, float addrspace(1)* %out.gep
50 ret void
51}
52
53; GCN-LABEL: {{^}}v_clamp_negzero_f32:
54; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
55; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
56; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
57define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
58 %tid = call i32 @llvm.amdgcn.workitem.id.x()
59 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
60 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
61 %a = load float, float addrspace(1)* %gep0
62 %max = call float @llvm.maxnum.f32(float %a, float -0.0)
63 %med = call float @llvm.minnum.f32(float %max, float 1.0)
64
65 store float %med, float addrspace(1)* %out.gep
66 ret void
67}
68
69; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
70; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
71; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
72; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
73define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
74 %tid = call i32 @llvm.amdgcn.workitem.id.x()
75 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
76 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
77 %a = load float, float addrspace(1)* %gep0
78 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
79 %med = call float @llvm.minnum.f32(float %max, float 1.0)
80
81 store float %med, float addrspace(1)* %out.gep
82 store volatile float %max, float addrspace(1)* undef
83 ret void
84}
85
86; GCN-LABEL: {{^}}v_clamp_f16:
87; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
88; VI: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
89
Matt Arsenaultd5c65152017-02-22 23:27:53 +000090; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
91; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000092define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
93 %tid = call i32 @llvm.amdgcn.workitem.id.x()
94 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
95 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
96 %a = load half, half addrspace(1)* %gep0
97 %max = call half @llvm.maxnum.f16(half %a, half 0.0)
98 %med = call half @llvm.minnum.f16(half %max, half 1.0)
99
100 store half %med, half addrspace(1)* %out.gep
101 ret void
102}
103
104; GCN-LABEL: {{^}}v_clamp_neg_f16:
105; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
106; VI: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
107
108; FIXME: Better to fold neg into max
Matt Arsenaultd5c65152017-02-22 23:27:53 +0000109; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
110; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000111define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
112 %tid = call i32 @llvm.amdgcn.workitem.id.x()
113 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
114 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
115 %a = load half, half addrspace(1)* %gep0
116 %fneg.a = fsub half -0.0, %a
117 %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
118 %med = call half @llvm.minnum.f16(half %max, half 1.0)
119
120 store half %med, half addrspace(1)* %out.gep
121 ret void
122}
123
124; GCN-LABEL: {{^}}v_clamp_negabs_f16:
125; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
126; VI: v_max_f16_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
127
128; FIXME: Better to fold neg/abs into max
129
Matt Arsenaultd5c65152017-02-22 23:27:53 +0000130; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[A]]| clamp{{$}}
131; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000132define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
133 %tid = call i32 @llvm.amdgcn.workitem.id.x()
134 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
135 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
136 %a = load half, half addrspace(1)* %gep0
137 %fabs.a = call half @llvm.fabs.f16(half %a)
138 %fneg.fabs.a = fsub half -0.0, %fabs.a
139
140 %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
141 %med = call half @llvm.minnum.f16(half %max, half 1.0)
142
143 store half %med, half addrspace(1)* %out.gep
144 ret void
145}
146
147; FIXME: Do f64 instructions support clamp?
148; GCN-LABEL: {{^}}v_clamp_f64:
149; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault79a45db2017-02-22 23:53:37 +0000150; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000151define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
152 %tid = call i32 @llvm.amdgcn.workitem.id.x()
153 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
154 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
155 %a = load double, double addrspace(1)* %gep0
156 %max = call double @llvm.maxnum.f64(double %a, double 0.0)
157 %med = call double @llvm.minnum.f64(double %max, double 1.0)
158
159 store double %med, double addrspace(1)* %out.gep
160 ret void
161}
162
163; GCN-LABEL: {{^}}v_clamp_neg_f64:
164; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault79a45db2017-02-22 23:53:37 +0000165; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000166define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
167 %tid = call i32 @llvm.amdgcn.workitem.id.x()
168 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
169 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
170 %a = load double, double addrspace(1)* %gep0
171 %fneg.a = fsub double -0.0, %a
172 %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
173 %med = call double @llvm.minnum.f64(double %max, double 1.0)
174
175 store double %med, double addrspace(1)* %out.gep
176 ret void
177}
178
179; GCN-LABEL: {{^}}v_clamp_negabs_f64:
180; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault79a45db2017-02-22 23:53:37 +0000181; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000182define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
183 %tid = call i32 @llvm.amdgcn.workitem.id.x()
184 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
185 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
186 %a = load double, double addrspace(1)* %gep0
187 %fabs.a = call double @llvm.fabs.f64(double %a)
188 %fneg.fabs.a = fsub double -0.0, %fabs.a
189
190 %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
191 %med = call double @llvm.minnum.f64(double %max, double 1.0)
192
193 store double %med, double addrspace(1)* %out.gep
194 ret void
195}
196
197; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
198; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
199; GCN: v_med3_f32
200define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
201 %tid = call i32 @llvm.amdgcn.workitem.id.x()
202 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
203 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
204 %a = load float, float addrspace(1)* %gep0
205 %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
206 store float %med, float addrspace(1)* %out.gep
207 ret void
208}
209
210; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
211; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
212; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
213define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
214 %tid = call i32 @llvm.amdgcn.workitem.id.x()
215 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
216 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
217 %a = load float, float addrspace(1)* %gep0
218 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
219 store float %med, float addrspace(1)* %out.gep
220 ret void
221}
222
223; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
224; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
225; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
226define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
227 %tid = call i32 @llvm.amdgcn.workitem.id.x()
228 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
229 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
230 %a = load float, float addrspace(1)* %gep0
231 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
232 store float %med, float addrspace(1)* %out.gep
233 ret void
234}
235
236; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
237; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
238; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
239define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
240 %tid = call i32 @llvm.amdgcn.workitem.id.x()
241 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
242 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
243 %a = load float, float addrspace(1)* %gep0
244 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
245 store float %med, float addrspace(1)* %out.gep
246 ret void
247}
248
249; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
250; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
251; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
252define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
253 %tid = call i32 @llvm.amdgcn.workitem.id.x()
254 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
255 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
256 %a = load float, float addrspace(1)* %gep0
257 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
258 store float %med, float addrspace(1)* %out.gep
259 ret void
260}
261
262; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
263; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
264; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
265define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
266 %tid = call i32 @llvm.amdgcn.workitem.id.x()
267 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
268 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
269 %a = load float, float addrspace(1)* %gep0
270 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
271 store float %med, float addrspace(1)* %out.gep
272 ret void
273}
274
275; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
276; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
277; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
278define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
279 %tid = call i32 @llvm.amdgcn.workitem.id.x()
280 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
281 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
282 %a = load float, float addrspace(1)* %gep0
283 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
284 store float %med, float addrspace(1)* %out.gep
285 ret void
286}
287
288; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
289; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
290define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
291 %tid = call i32 @llvm.amdgcn.workitem.id.x()
292 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
293 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
294 store float %med, float addrspace(1)* %out.gep
295 ret void
296}
297
298; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
299; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
300define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
301 %tid = call i32 @llvm.amdgcn.workitem.id.x()
302 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
303 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
304 store float %med, float addrspace(1)* %out.gep
305 ret void
306}
307
308; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
309; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
310define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
311 %tid = call i32 @llvm.amdgcn.workitem.id.x()
312 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
313 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
314 store float %med, float addrspace(1)* %out.gep
315 ret void
316}
317
318; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
319; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
320define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
321 %tid = call i32 @llvm.amdgcn.workitem.id.x()
322 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
323 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
324 store float %med, float addrspace(1)* %out.gep
325 ret void
326}
327
328; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
329; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
330define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
331 %tid = call i32 @llvm.amdgcn.workitem.id.x()
332 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
333 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
334 store float %med, float addrspace(1)* %out.gep
335 ret void
336}
337
338; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
339; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
340define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
341 %tid = call i32 @llvm.amdgcn.workitem.id.x()
342 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
343 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
344 store float %med, float addrspace(1)* %out.gep
345 ret void
346}
347
348; ---------------------------------------------------------------------
349; Test non-default behaviors enabling snans and disabling dx10_clamp
350; ---------------------------------------------------------------------
351
352; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
353; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
354; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
355define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
356 %tid = call i32 @llvm.amdgcn.workitem.id.x()
357 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
358 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
359 %a = load float, float addrspace(1)* %gep0
360 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
361 %med = call float @llvm.minnum.f32(float %max, float 1.0)
362
363 store float %med, float addrspace(1)* %out.gep
364 ret void
365}
366
367; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
368; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
369; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
370define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
371 %tid = call i32 @llvm.amdgcn.workitem.id.x()
372 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
373 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
374 %a = load float, float addrspace(1)* %gep0
375 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
376 %med = call float @llvm.minnum.f32(float %max, float 1.0)
377
378 store float %med, float addrspace(1)* %out.gep
379 ret void
380}
381
382; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
383; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
384; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
385; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
386define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
387 %tid = call i32 @llvm.amdgcn.workitem.id.x()
388 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
389 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
390 %a = load float, float addrspace(1)* %gep0
391 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
392 %med = call float @llvm.minnum.f32(float %max, float 1.0)
393
394 store float %med, float addrspace(1)* %out.gep
395 ret void
396}
397
398; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
399; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
400; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
401define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
402 %tid = call i32 @llvm.amdgcn.workitem.id.x()
403 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
404 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
405 %a = load float, float addrspace(1)* %gep0
406 %add = fadd nnan float %a, 1.0
407 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
408 %med = call float @llvm.minnum.f32(float %max, float 1.0)
409
410 store float %med, float addrspace(1)* %out.gep
411 ret void
412}
413
414; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
415; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
416; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
417define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
418 %tid = call i32 @llvm.amdgcn.workitem.id.x()
419 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
420 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
421 %a = load float, float addrspace(1)* %gep0
422 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
423 store float %med, float addrspace(1)* %out.gep
424 ret void
425}
426
427; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
428; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
429; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
430define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
431 %tid = call i32 @llvm.amdgcn.workitem.id.x()
432 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
433 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
434 %a = load float, float addrspace(1)* %gep0
435 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
436 store float %med, float addrspace(1)* %out.gep
437 ret void
438}
439
440; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
441; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
442; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
443define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
444 %tid = call i32 @llvm.amdgcn.workitem.id.x()
445 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
446 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
447 %a = load float, float addrspace(1)* %gep0
448 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
449 store float %med, float addrspace(1)* %out.gep
450 ret void
451}
452
453; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
454; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
455; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
456define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
457 %tid = call i32 @llvm.amdgcn.workitem.id.x()
458 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
459 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
460 %a = load float, float addrspace(1)* %gep0
461 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
462 store float %med, float addrspace(1)* %out.gep
463 ret void
464}
465
466; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
467; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
468; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
469define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
470 %tid = call i32 @llvm.amdgcn.workitem.id.x()
471 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
472 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
473 %a = load float, float addrspace(1)* %gep0
474 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
475 store float %med, float addrspace(1)* %out.gep
476 ret void
477}
478
479; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
480; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
481; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
482define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
483 %tid = call i32 @llvm.amdgcn.workitem.id.x()
484 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
485 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
486 %a = load float, float addrspace(1)* %gep0
487 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
488 store float %med, float addrspace(1)* %out.gep
489 ret void
490}
491
492; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
493; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
494define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
495 %tid = call i32 @llvm.amdgcn.workitem.id.x()
496 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
497 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
498 store float %med, float addrspace(1)* %out.gep
499 ret void
500}
501
502; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
503; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
504define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
505 %tid = call i32 @llvm.amdgcn.workitem.id.x()
506 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
507 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
508 store float %med, float addrspace(1)* %out.gep
509 ret void
510}
511
512declare i32 @llvm.amdgcn.workitem.id.x() #1
513declare float @llvm.fabs.f32(float) #1
514declare float @llvm.minnum.f32(float, float) #1
515declare float @llvm.maxnum.f32(float, float) #1
516declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
517declare double @llvm.fabs.f64(double) #1
518declare double @llvm.minnum.f64(double, double) #1
519declare double @llvm.maxnum.f64(double, double) #1
520declare half @llvm.fabs.f16(half) #1
521declare half @llvm.minnum.f16(half, half) #1
522declare half @llvm.maxnum.f16(half, half) #1
523
524
525attributes #0 = { nounwind }
526attributes #1 = { nounwind readnone }
527attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
528attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
529attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }