blob: 5f9884e74e9a1036893329bb92d6433547a7ea83 [file] [log] [blame]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}v_clamp_f32:
5; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
6; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
7define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
8 %tid = call i32 @llvm.amdgcn.workitem.id.x()
9 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
10 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
11 %a = load float, float addrspace(1)* %gep0
12 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
13 %med = call float @llvm.minnum.f32(float %max, float 1.0)
14
15 store float %med, float addrspace(1)* %out.gep
16 ret void
17}
18
19; GCN-LABEL: {{^}}v_clamp_neg_f32:
20; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
21; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
22define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
23 %tid = call i32 @llvm.amdgcn.workitem.id.x()
24 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
25 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
26 %a = load float, float addrspace(1)* %gep0
27 %fneg.a = fsub float -0.0, %a
28 %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
29 %med = call float @llvm.minnum.f32(float %max, float 1.0)
30
31 store float %med, float addrspace(1)* %out.gep
32 ret void
33}
34
35; GCN-LABEL: {{^}}v_clamp_negabs_f32:
36; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
37; GCN: v_max_f32_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
38define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
39 %tid = call i32 @llvm.amdgcn.workitem.id.x()
40 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
41 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
42 %a = load float, float addrspace(1)* %gep0
43 %fabs.a = call float @llvm.fabs.f32(float %a)
44 %fneg.fabs.a = fsub float -0.0, %fabs.a
45
46 %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
47 %med = call float @llvm.minnum.f32(float %max, float 1.0)
48
49 store float %med, float addrspace(1)* %out.gep
50 ret void
51}
52
53; GCN-LABEL: {{^}}v_clamp_negzero_f32:
54; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
55; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
56; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
57define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
58 %tid = call i32 @llvm.amdgcn.workitem.id.x()
59 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
60 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
61 %a = load float, float addrspace(1)* %gep0
62 %max = call float @llvm.maxnum.f32(float %a, float -0.0)
63 %med = call float @llvm.minnum.f32(float %max, float 1.0)
64
65 store float %med, float addrspace(1)* %out.gep
66 ret void
67}
68
69; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
70; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
71; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
72; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
73define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
74 %tid = call i32 @llvm.amdgcn.workitem.id.x()
75 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
76 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
77 %a = load float, float addrspace(1)* %gep0
78 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
79 %med = call float @llvm.minnum.f32(float %max, float 1.0)
80
81 store float %med, float addrspace(1)* %out.gep
82 store volatile float %max, float addrspace(1)* undef
83 ret void
84}
85
86; GCN-LABEL: {{^}}v_clamp_f16:
87; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
88; VI: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
89
90; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
91; SI: v_max_f32_e64 v{{[0-9]+}}, [[CVT]], [[CVT]] clamp{{$}}
92; SI: v_cvt_f16_f32_e32
93define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
94 %tid = call i32 @llvm.amdgcn.workitem.id.x()
95 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
96 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
97 %a = load half, half addrspace(1)* %gep0
98 %max = call half @llvm.maxnum.f16(half %a, half 0.0)
99 %med = call half @llvm.minnum.f16(half %max, half 1.0)
100
101 store half %med, half addrspace(1)* %out.gep
102 ret void
103}
104
105; GCN-LABEL: {{^}}v_clamp_neg_f16:
106; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
107; VI: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
108
109; FIXME: Better to fold neg into max
110; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
111; SI: v_max_f32_e64 v{{[0-9]+}}, [[CVT]], [[CVT]] clamp{{$}}
112; SI: v_cvt_f16_f32
113define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
114 %tid = call i32 @llvm.amdgcn.workitem.id.x()
115 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
116 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
117 %a = load half, half addrspace(1)* %gep0
118 %fneg.a = fsub half -0.0, %a
119 %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
120 %med = call half @llvm.minnum.f16(half %max, half 1.0)
121
122 store half %med, half addrspace(1)* %out.gep
123 ret void
124}
125
126; GCN-LABEL: {{^}}v_clamp_negabs_f16:
127; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
128; VI: v_max_f16_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
129
130; FIXME: Better to fold neg/abs into max
131
132; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[A]]|
133; SI: v_max_f32_e64 v{{[0-9]+}}, [[CVT]], [[CVT]] clamp{{$}}
134; SI: v_cvt_f16_f32_e32
135define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
136 %tid = call i32 @llvm.amdgcn.workitem.id.x()
137 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
138 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
139 %a = load half, half addrspace(1)* %gep0
140 %fabs.a = call half @llvm.fabs.f16(half %a)
141 %fneg.fabs.a = fsub half -0.0, %fabs.a
142
143 %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
144 %med = call half @llvm.minnum.f16(half %max, half 1.0)
145
146 store half %med, half addrspace(1)* %out.gep
147 ret void
148}
149
150; FIXME: Do f64 instructions support clamp?
151; GCN-LABEL: {{^}}v_clamp_f64:
152; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
153; GCN: v_max_f64
154; GCN: v_min_f64
155define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
156 %tid = call i32 @llvm.amdgcn.workitem.id.x()
157 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
158 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
159 %a = load double, double addrspace(1)* %gep0
160 %max = call double @llvm.maxnum.f64(double %a, double 0.0)
161 %med = call double @llvm.minnum.f64(double %max, double 1.0)
162
163 store double %med, double addrspace(1)* %out.gep
164 ret void
165}
166
167; GCN-LABEL: {{^}}v_clamp_neg_f64:
168; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
169; GCN: v_max_f64
170; GCN: v_min_f64
171define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
172 %tid = call i32 @llvm.amdgcn.workitem.id.x()
173 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
174 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
175 %a = load double, double addrspace(1)* %gep0
176 %fneg.a = fsub double -0.0, %a
177 %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
178 %med = call double @llvm.minnum.f64(double %max, double 1.0)
179
180 store double %med, double addrspace(1)* %out.gep
181 ret void
182}
183
184; GCN-LABEL: {{^}}v_clamp_negabs_f64:
185; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
186; GCN: v_max_f64
187; GCN: v_min_f64
188define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
189 %tid = call i32 @llvm.amdgcn.workitem.id.x()
190 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
191 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
192 %a = load double, double addrspace(1)* %gep0
193 %fabs.a = call double @llvm.fabs.f64(double %a)
194 %fneg.fabs.a = fsub double -0.0, %fabs.a
195
196 %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
197 %med = call double @llvm.minnum.f64(double %max, double 1.0)
198
199 store double %med, double addrspace(1)* %out.gep
200 ret void
201}
202
203; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
204; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
205; GCN: v_med3_f32
206define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
207 %tid = call i32 @llvm.amdgcn.workitem.id.x()
208 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
209 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
210 %a = load float, float addrspace(1)* %gep0
211 %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
212 store float %med, float addrspace(1)* %out.gep
213 ret void
214}
215
216; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
217; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
218; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
219define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
220 %tid = call i32 @llvm.amdgcn.workitem.id.x()
221 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
222 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
223 %a = load float, float addrspace(1)* %gep0
224 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
225 store float %med, float addrspace(1)* %out.gep
226 ret void
227}
228
229; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
230; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
231; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
232define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
233 %tid = call i32 @llvm.amdgcn.workitem.id.x()
234 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
235 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
236 %a = load float, float addrspace(1)* %gep0
237 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
238 store float %med, float addrspace(1)* %out.gep
239 ret void
240}
241
242; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
243; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
244; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
245define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
246 %tid = call i32 @llvm.amdgcn.workitem.id.x()
247 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
248 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
249 %a = load float, float addrspace(1)* %gep0
250 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
251 store float %med, float addrspace(1)* %out.gep
252 ret void
253}
254
255; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
256; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
257; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
258define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
259 %tid = call i32 @llvm.amdgcn.workitem.id.x()
260 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
261 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
262 %a = load float, float addrspace(1)* %gep0
263 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
264 store float %med, float addrspace(1)* %out.gep
265 ret void
266}
267
268; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
269; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
270; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
271define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
272 %tid = call i32 @llvm.amdgcn.workitem.id.x()
273 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
274 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
275 %a = load float, float addrspace(1)* %gep0
276 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
277 store float %med, float addrspace(1)* %out.gep
278 ret void
279}
280
281; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
282; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
283; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
284define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
285 %tid = call i32 @llvm.amdgcn.workitem.id.x()
286 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
287 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
288 %a = load float, float addrspace(1)* %gep0
289 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
290 store float %med, float addrspace(1)* %out.gep
291 ret void
292}
293
294; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
295; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
296define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
297 %tid = call i32 @llvm.amdgcn.workitem.id.x()
298 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
299 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
300 store float %med, float addrspace(1)* %out.gep
301 ret void
302}
303
304; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
305; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
306define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
307 %tid = call i32 @llvm.amdgcn.workitem.id.x()
308 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
309 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
310 store float %med, float addrspace(1)* %out.gep
311 ret void
312}
313
314; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
315; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
316define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
317 %tid = call i32 @llvm.amdgcn.workitem.id.x()
318 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
319 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
320 store float %med, float addrspace(1)* %out.gep
321 ret void
322}
323
324; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
325; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
326define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
327 %tid = call i32 @llvm.amdgcn.workitem.id.x()
328 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
329 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
330 store float %med, float addrspace(1)* %out.gep
331 ret void
332}
333
334; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
335; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
336define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
337 %tid = call i32 @llvm.amdgcn.workitem.id.x()
338 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
339 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
340 store float %med, float addrspace(1)* %out.gep
341 ret void
342}
343
344; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
345; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
346define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
347 %tid = call i32 @llvm.amdgcn.workitem.id.x()
348 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
349 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
350 store float %med, float addrspace(1)* %out.gep
351 ret void
352}
353
354; ---------------------------------------------------------------------
355; Test non-default behaviors enabling snans and disabling dx10_clamp
356; ---------------------------------------------------------------------
357
358; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
359; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
360; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
361define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
362 %tid = call i32 @llvm.amdgcn.workitem.id.x()
363 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
364 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
365 %a = load float, float addrspace(1)* %gep0
366 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
367 %med = call float @llvm.minnum.f32(float %max, float 1.0)
368
369 store float %med, float addrspace(1)* %out.gep
370 ret void
371}
372
373; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
374; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
375; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
376define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
377 %tid = call i32 @llvm.amdgcn.workitem.id.x()
378 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
379 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
380 %a = load float, float addrspace(1)* %gep0
381 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
382 %med = call float @llvm.minnum.f32(float %max, float 1.0)
383
384 store float %med, float addrspace(1)* %out.gep
385 ret void
386}
387
388; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
389; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
390; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
391; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
392define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
393 %tid = call i32 @llvm.amdgcn.workitem.id.x()
394 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
395 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
396 %a = load float, float addrspace(1)* %gep0
397 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
398 %med = call float @llvm.minnum.f32(float %max, float 1.0)
399
400 store float %med, float addrspace(1)* %out.gep
401 ret void
402}
403
404; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
405; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
406; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
407define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
408 %tid = call i32 @llvm.amdgcn.workitem.id.x()
409 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
410 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
411 %a = load float, float addrspace(1)* %gep0
412 %add = fadd nnan float %a, 1.0
413 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
414 %med = call float @llvm.minnum.f32(float %max, float 1.0)
415
416 store float %med, float addrspace(1)* %out.gep
417 ret void
418}
419
420; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
421; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
422; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
423define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
424 %tid = call i32 @llvm.amdgcn.workitem.id.x()
425 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
426 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
427 %a = load float, float addrspace(1)* %gep0
428 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
429 store float %med, float addrspace(1)* %out.gep
430 ret void
431}
432
433; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
434; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
435; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
436define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
437 %tid = call i32 @llvm.amdgcn.workitem.id.x()
438 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
439 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
440 %a = load float, float addrspace(1)* %gep0
441 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
442 store float %med, float addrspace(1)* %out.gep
443 ret void
444}
445
446; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
447; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
448; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
449define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
450 %tid = call i32 @llvm.amdgcn.workitem.id.x()
451 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
452 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
453 %a = load float, float addrspace(1)* %gep0
454 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
455 store float %med, float addrspace(1)* %out.gep
456 ret void
457}
458
459; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
460; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
461; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
462define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
463 %tid = call i32 @llvm.amdgcn.workitem.id.x()
464 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
465 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
466 %a = load float, float addrspace(1)* %gep0
467 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
468 store float %med, float addrspace(1)* %out.gep
469 ret void
470}
471
472; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
473; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
474; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
475define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
476 %tid = call i32 @llvm.amdgcn.workitem.id.x()
477 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
478 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
479 %a = load float, float addrspace(1)* %gep0
480 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
481 store float %med, float addrspace(1)* %out.gep
482 ret void
483}
484
485; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
486; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
487; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
488define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
489 %tid = call i32 @llvm.amdgcn.workitem.id.x()
490 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
491 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
492 %a = load float, float addrspace(1)* %gep0
493 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
494 store float %med, float addrspace(1)* %out.gep
495 ret void
496}
497
498; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
499; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
500define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
501 %tid = call i32 @llvm.amdgcn.workitem.id.x()
502 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
503 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
504 store float %med, float addrspace(1)* %out.gep
505 ret void
506}
507
508; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
509; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
510define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
511 %tid = call i32 @llvm.amdgcn.workitem.id.x()
512 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
513 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
514 store float %med, float addrspace(1)* %out.gep
515 ret void
516}
517
518declare i32 @llvm.amdgcn.workitem.id.x() #1
519declare float @llvm.fabs.f32(float) #1
520declare float @llvm.minnum.f32(float, float) #1
521declare float @llvm.maxnum.f32(float, float) #1
522declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
523declare double @llvm.fabs.f64(double) #1
524declare double @llvm.minnum.f64(double, double) #1
525declare double @llvm.maxnum.f64(double, double) #1
526declare half @llvm.fabs.f16(half) #1
527declare half @llvm.minnum.f16(half, half) #1
528declare half @llvm.maxnum.f16(half, half) #1
529
530
531attributes #0 = { nounwind }
532attributes #1 = { nounwind readnone }
533attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
534attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
535attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }