blob: a0075066f68f6f6db4bc6731d5a9a264e7fe4b4a [file] [log] [blame]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}v_clamp_f32:
5; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
6; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
7define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
8 %tid = call i32 @llvm.amdgcn.workitem.id.x()
9 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
10 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
11 %a = load float, float addrspace(1)* %gep0
12 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
13 %med = call float @llvm.minnum.f32(float %max, float 1.0)
14
15 store float %med, float addrspace(1)* %out.gep
16 ret void
17}
18
19; GCN-LABEL: {{^}}v_clamp_neg_f32:
20; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
21; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
22define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
23 %tid = call i32 @llvm.amdgcn.workitem.id.x()
24 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
25 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
26 %a = load float, float addrspace(1)* %gep0
27 %fneg.a = fsub float -0.0, %a
28 %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
29 %med = call float @llvm.minnum.f32(float %max, float 1.0)
30
31 store float %med, float addrspace(1)* %out.gep
32 ret void
33}
34
35; GCN-LABEL: {{^}}v_clamp_negabs_f32:
36; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
37; GCN: v_max_f32_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
38define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
39 %tid = call i32 @llvm.amdgcn.workitem.id.x()
40 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
41 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
42 %a = load float, float addrspace(1)* %gep0
43 %fabs.a = call float @llvm.fabs.f32(float %a)
44 %fneg.fabs.a = fsub float -0.0, %fabs.a
45
46 %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
47 %med = call float @llvm.minnum.f32(float %max, float 1.0)
48
49 store float %med, float addrspace(1)* %out.gep
50 ret void
51}
52
53; GCN-LABEL: {{^}}v_clamp_negzero_f32:
54; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
55; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
56; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
57define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
58 %tid = call i32 @llvm.amdgcn.workitem.id.x()
59 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
60 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
61 %a = load float, float addrspace(1)* %gep0
62 %max = call float @llvm.maxnum.f32(float %a, float -0.0)
63 %med = call float @llvm.minnum.f32(float %max, float 1.0)
64
65 store float %med, float addrspace(1)* %out.gep
66 ret void
67}
68
69; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
70; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
71; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
72; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
73define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
74 %tid = call i32 @llvm.amdgcn.workitem.id.x()
75 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
76 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
77 %a = load float, float addrspace(1)* %gep0
78 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
79 %med = call float @llvm.minnum.f32(float %max, float 1.0)
80
81 store float %med, float addrspace(1)* %out.gep
82 store volatile float %max, float addrspace(1)* undef
83 ret void
84}
85
86; GCN-LABEL: {{^}}v_clamp_f16:
87; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
88; VI: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
89
Matt Arsenaultd5c65152017-02-22 23:27:53 +000090; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
91; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000092define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
93 %tid = call i32 @llvm.amdgcn.workitem.id.x()
94 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
95 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
96 %a = load half, half addrspace(1)* %gep0
97 %max = call half @llvm.maxnum.f16(half %a, half 0.0)
98 %med = call half @llvm.minnum.f16(half %max, half 1.0)
99
100 store half %med, half addrspace(1)* %out.gep
101 ret void
102}
103
104; GCN-LABEL: {{^}}v_clamp_neg_f16:
105; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
106; VI: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
107
108; FIXME: Better to fold neg into max
Matt Arsenaultd5c65152017-02-22 23:27:53 +0000109; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
110; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000111define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
112 %tid = call i32 @llvm.amdgcn.workitem.id.x()
113 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
114 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
115 %a = load half, half addrspace(1)* %gep0
116 %fneg.a = fsub half -0.0, %a
117 %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
118 %med = call half @llvm.minnum.f16(half %max, half 1.0)
119
120 store half %med, half addrspace(1)* %out.gep
121 ret void
122}
123
124; GCN-LABEL: {{^}}v_clamp_negabs_f16:
125; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
126; VI: v_max_f16_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
127
128; FIXME: Better to fold neg/abs into max
129
Matt Arsenaultd5c65152017-02-22 23:27:53 +0000130; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[A]]| clamp{{$}}
131; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000132define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
133 %tid = call i32 @llvm.amdgcn.workitem.id.x()
134 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
135 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
136 %a = load half, half addrspace(1)* %gep0
137 %fabs.a = call half @llvm.fabs.f16(half %a)
138 %fneg.fabs.a = fsub half -0.0, %fabs.a
139
140 %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
141 %med = call half @llvm.minnum.f16(half %max, half 1.0)
142
143 store half %med, half addrspace(1)* %out.gep
144 ret void
145}
146
147; FIXME: Do f64 instructions support clamp?
148; GCN-LABEL: {{^}}v_clamp_f64:
149; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
150; GCN: v_max_f64
151; GCN: v_min_f64
152define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
153 %tid = call i32 @llvm.amdgcn.workitem.id.x()
154 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
155 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
156 %a = load double, double addrspace(1)* %gep0
157 %max = call double @llvm.maxnum.f64(double %a, double 0.0)
158 %med = call double @llvm.minnum.f64(double %max, double 1.0)
159
160 store double %med, double addrspace(1)* %out.gep
161 ret void
162}
163
164; GCN-LABEL: {{^}}v_clamp_neg_f64:
165; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
166; GCN: v_max_f64
167; GCN: v_min_f64
168define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
169 %tid = call i32 @llvm.amdgcn.workitem.id.x()
170 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
171 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
172 %a = load double, double addrspace(1)* %gep0
173 %fneg.a = fsub double -0.0, %a
174 %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
175 %med = call double @llvm.minnum.f64(double %max, double 1.0)
176
177 store double %med, double addrspace(1)* %out.gep
178 ret void
179}
180
181; GCN-LABEL: {{^}}v_clamp_negabs_f64:
182; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
183; GCN: v_max_f64
184; GCN: v_min_f64
185define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
186 %tid = call i32 @llvm.amdgcn.workitem.id.x()
187 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
188 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
189 %a = load double, double addrspace(1)* %gep0
190 %fabs.a = call double @llvm.fabs.f64(double %a)
191 %fneg.fabs.a = fsub double -0.0, %fabs.a
192
193 %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
194 %med = call double @llvm.minnum.f64(double %max, double 1.0)
195
196 store double %med, double addrspace(1)* %out.gep
197 ret void
198}
199
200; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
201; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
202; GCN: v_med3_f32
203define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
204 %tid = call i32 @llvm.amdgcn.workitem.id.x()
205 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
206 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
207 %a = load float, float addrspace(1)* %gep0
208 %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
209 store float %med, float addrspace(1)* %out.gep
210 ret void
211}
212
213; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
214; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
215; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
216define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
217 %tid = call i32 @llvm.amdgcn.workitem.id.x()
218 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
219 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
220 %a = load float, float addrspace(1)* %gep0
221 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
222 store float %med, float addrspace(1)* %out.gep
223 ret void
224}
225
226; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
227; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
228; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
229define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
230 %tid = call i32 @llvm.amdgcn.workitem.id.x()
231 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
232 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
233 %a = load float, float addrspace(1)* %gep0
234 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
235 store float %med, float addrspace(1)* %out.gep
236 ret void
237}
238
239; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
240; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
241; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
242define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
243 %tid = call i32 @llvm.amdgcn.workitem.id.x()
244 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
245 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
246 %a = load float, float addrspace(1)* %gep0
247 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
248 store float %med, float addrspace(1)* %out.gep
249 ret void
250}
251
252; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
253; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
254; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
255define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
256 %tid = call i32 @llvm.amdgcn.workitem.id.x()
257 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
258 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
259 %a = load float, float addrspace(1)* %gep0
260 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
261 store float %med, float addrspace(1)* %out.gep
262 ret void
263}
264
265; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
266; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
267; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
268define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
269 %tid = call i32 @llvm.amdgcn.workitem.id.x()
270 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
271 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
272 %a = load float, float addrspace(1)* %gep0
273 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
274 store float %med, float addrspace(1)* %out.gep
275 ret void
276}
277
278; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
279; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
280; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
281define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
282 %tid = call i32 @llvm.amdgcn.workitem.id.x()
283 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
284 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
285 %a = load float, float addrspace(1)* %gep0
286 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
287 store float %med, float addrspace(1)* %out.gep
288 ret void
289}
290
291; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
292; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
293define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
294 %tid = call i32 @llvm.amdgcn.workitem.id.x()
295 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
296 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
297 store float %med, float addrspace(1)* %out.gep
298 ret void
299}
300
301; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
302; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
303define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
304 %tid = call i32 @llvm.amdgcn.workitem.id.x()
305 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
306 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
307 store float %med, float addrspace(1)* %out.gep
308 ret void
309}
310
311; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
312; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
313define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
314 %tid = call i32 @llvm.amdgcn.workitem.id.x()
315 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
316 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
317 store float %med, float addrspace(1)* %out.gep
318 ret void
319}
320
321; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
322; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
323define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
324 %tid = call i32 @llvm.amdgcn.workitem.id.x()
325 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
326 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
327 store float %med, float addrspace(1)* %out.gep
328 ret void
329}
330
331; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
332; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
333define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
334 %tid = call i32 @llvm.amdgcn.workitem.id.x()
335 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
336 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
337 store float %med, float addrspace(1)* %out.gep
338 ret void
339}
340
341; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
342; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
343define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
344 %tid = call i32 @llvm.amdgcn.workitem.id.x()
345 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
346 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
347 store float %med, float addrspace(1)* %out.gep
348 ret void
349}
350
351; ---------------------------------------------------------------------
352; Test non-default behaviors enabling snans and disabling dx10_clamp
353; ---------------------------------------------------------------------
354
355; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
356; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
357; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
358define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
359 %tid = call i32 @llvm.amdgcn.workitem.id.x()
360 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
361 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
362 %a = load float, float addrspace(1)* %gep0
363 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
364 %med = call float @llvm.minnum.f32(float %max, float 1.0)
365
366 store float %med, float addrspace(1)* %out.gep
367 ret void
368}
369
370; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
371; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
372; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
373define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
374 %tid = call i32 @llvm.amdgcn.workitem.id.x()
375 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
376 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
377 %a = load float, float addrspace(1)* %gep0
378 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
379 %med = call float @llvm.minnum.f32(float %max, float 1.0)
380
381 store float %med, float addrspace(1)* %out.gep
382 ret void
383}
384
385; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
386; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
387; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
388; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
389define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
390 %tid = call i32 @llvm.amdgcn.workitem.id.x()
391 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
392 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
393 %a = load float, float addrspace(1)* %gep0
394 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
395 %med = call float @llvm.minnum.f32(float %max, float 1.0)
396
397 store float %med, float addrspace(1)* %out.gep
398 ret void
399}
400
401; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
402; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
403; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
404define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
405 %tid = call i32 @llvm.amdgcn.workitem.id.x()
406 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
407 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
408 %a = load float, float addrspace(1)* %gep0
409 %add = fadd nnan float %a, 1.0
410 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
411 %med = call float @llvm.minnum.f32(float %max, float 1.0)
412
413 store float %med, float addrspace(1)* %out.gep
414 ret void
415}
416
417; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
418; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
419; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
420define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
421 %tid = call i32 @llvm.amdgcn.workitem.id.x()
422 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
423 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
424 %a = load float, float addrspace(1)* %gep0
425 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
426 store float %med, float addrspace(1)* %out.gep
427 ret void
428}
429
430; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
431; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
432; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
433define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
434 %tid = call i32 @llvm.amdgcn.workitem.id.x()
435 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
436 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
437 %a = load float, float addrspace(1)* %gep0
438 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
439 store float %med, float addrspace(1)* %out.gep
440 ret void
441}
442
443; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
444; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
445; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
446define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
447 %tid = call i32 @llvm.amdgcn.workitem.id.x()
448 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
449 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
450 %a = load float, float addrspace(1)* %gep0
451 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
452 store float %med, float addrspace(1)* %out.gep
453 ret void
454}
455
456; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
457; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
458; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
459define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
460 %tid = call i32 @llvm.amdgcn.workitem.id.x()
461 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
462 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
463 %a = load float, float addrspace(1)* %gep0
464 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
465 store float %med, float addrspace(1)* %out.gep
466 ret void
467}
468
469; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
470; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
471; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
472define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
473 %tid = call i32 @llvm.amdgcn.workitem.id.x()
474 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
475 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
476 %a = load float, float addrspace(1)* %gep0
477 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
478 store float %med, float addrspace(1)* %out.gep
479 ret void
480}
481
482; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
483; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
484; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
485define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
486 %tid = call i32 @llvm.amdgcn.workitem.id.x()
487 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
488 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
489 %a = load float, float addrspace(1)* %gep0
490 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
491 store float %med, float addrspace(1)* %out.gep
492 ret void
493}
494
495; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
496; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
497define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
498 %tid = call i32 @llvm.amdgcn.workitem.id.x()
499 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
500 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
501 store float %med, float addrspace(1)* %out.gep
502 ret void
503}
504
505; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
506; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
507define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
508 %tid = call i32 @llvm.amdgcn.workitem.id.x()
509 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
510 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
511 store float %med, float addrspace(1)* %out.gep
512 ret void
513}
514
515declare i32 @llvm.amdgcn.workitem.id.x() #1
516declare float @llvm.fabs.f32(float) #1
517declare float @llvm.minnum.f32(float, float) #1
518declare float @llvm.maxnum.f32(float, float) #1
519declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
520declare double @llvm.fabs.f64(double) #1
521declare double @llvm.minnum.f64(double, double) #1
522declare double @llvm.maxnum.f64(double, double) #1
523declare half @llvm.fabs.f16(half) #1
524declare half @llvm.minnum.f16(half, half) #1
525declare half @llvm.maxnum.f16(half, half) #1
526
527
528attributes #0 = { nounwind }
529attributes #1 = { nounwind readnone }
530attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
531attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
532attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }