blob: 216ecf76345661f0e93ad3864d2e92390816a0ac [file] [log] [blame]
Matt Arsenault6b114d22017-08-30 01:20:17 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
Matt Arsenault2fdf2a12017-02-21 23:35:48 +00004
5; GCN-LABEL: {{^}}v_clamp_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +00006; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +00007; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
8define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
9 %tid = call i32 @llvm.amdgcn.workitem.id.x()
10 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
11 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
12 %a = load float, float addrspace(1)* %gep0
13 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
14 %med = call float @llvm.minnum.f32(float %max, float 1.0)
15
16 store float %med, float addrspace(1)* %out.gep
17 ret void
18}
19
20; GCN-LABEL: {{^}}v_clamp_neg_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +000021; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000022; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
23define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
24 %tid = call i32 @llvm.amdgcn.workitem.id.x()
25 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
26 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
27 %a = load float, float addrspace(1)* %gep0
28 %fneg.a = fsub float -0.0, %a
29 %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
30 %med = call float @llvm.minnum.f32(float %max, float 1.0)
31
32 store float %med, float addrspace(1)* %out.gep
33 ret void
34}
35
36; GCN-LABEL: {{^}}v_clamp_negabs_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +000037; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000038; GCN: v_max_f32_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
39define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
40 %tid = call i32 @llvm.amdgcn.workitem.id.x()
41 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
42 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
43 %a = load float, float addrspace(1)* %gep0
44 %fabs.a = call float @llvm.fabs.f32(float %a)
45 %fneg.fabs.a = fsub float -0.0, %fabs.a
46
47 %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
48 %med = call float @llvm.minnum.f32(float %max, float 1.0)
49
50 store float %med, float addrspace(1)* %out.gep
51 ret void
52}
53
54; GCN-LABEL: {{^}}v_clamp_negzero_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +000055; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Stanislav Mekhanoshin79da2a72017-03-11 00:29:27 +000056; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000057; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
58define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
59 %tid = call i32 @llvm.amdgcn.workitem.id.x()
60 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
61 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
62 %a = load float, float addrspace(1)* %gep0
63 %max = call float @llvm.maxnum.f32(float %a, float -0.0)
64 %med = call float @llvm.minnum.f32(float %max, float 1.0)
65
66 store float %med, float addrspace(1)* %out.gep
67 ret void
68}
69
70; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +000071; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000072; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
73; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
74define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
75 %tid = call i32 @llvm.amdgcn.workitem.id.x()
76 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
77 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
78 %a = load float, float addrspace(1)* %gep0
79 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
80 %med = call float @llvm.minnum.f32(float %max, float 1.0)
81
82 store float %med, float addrspace(1)* %out.gep
83 store volatile float %max, float addrspace(1)* undef
84 ret void
85}
86
87; GCN-LABEL: {{^}}v_clamp_f16:
Matt Arsenault6b114d22017-08-30 01:20:17 +000088; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
89; GFX89: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000090
Matt Arsenaultd5c65152017-02-22 23:27:53 +000091; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
92; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +000093define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
94 %tid = call i32 @llvm.amdgcn.workitem.id.x()
95 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
96 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
97 %a = load half, half addrspace(1)* %gep0
98 %max = call half @llvm.maxnum.f16(half %a, half 0.0)
99 %med = call half @llvm.minnum.f16(half %max, half 1.0)
100
101 store half %med, half addrspace(1)* %out.gep
102 ret void
103}
104
105; GCN-LABEL: {{^}}v_clamp_neg_f16:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000106; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
107; GFX89: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000108
109; FIXME: Better to fold neg into max
Matt Arsenaultd5c65152017-02-22 23:27:53 +0000110; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
111; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000112define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
113 %tid = call i32 @llvm.amdgcn.workitem.id.x()
114 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
115 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
116 %a = load half, half addrspace(1)* %gep0
117 %fneg.a = fsub half -0.0, %a
118 %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
119 %med = call half @llvm.minnum.f16(half %max, half 1.0)
120
121 store half %med, half addrspace(1)* %out.gep
122 ret void
123}
124
125; GCN-LABEL: {{^}}v_clamp_negabs_f16:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000126; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
127; GFX89: v_max_f16_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000128
129; FIXME: Better to fold neg/abs into max
130
Matt Arsenaultd5c65152017-02-22 23:27:53 +0000131; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[A]]| clamp{{$}}
132; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000133define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
134 %tid = call i32 @llvm.amdgcn.workitem.id.x()
135 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
136 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
137 %a = load half, half addrspace(1)* %gep0
138 %fabs.a = call half @llvm.fabs.f16(half %a)
139 %fneg.fabs.a = fsub half -0.0, %fabs.a
140
141 %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
142 %med = call half @llvm.minnum.f16(half %max, half 1.0)
143
144 store half %med, half addrspace(1)* %out.gep
145 ret void
146}
147
148; FIXME: Do f64 instructions support clamp?
149; GCN-LABEL: {{^}}v_clamp_f64:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000150; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault79a45db2017-02-22 23:53:37 +0000151; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000152define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
153 %tid = call i32 @llvm.amdgcn.workitem.id.x()
154 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
155 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
156 %a = load double, double addrspace(1)* %gep0
157 %max = call double @llvm.maxnum.f64(double %a, double 0.0)
158 %med = call double @llvm.minnum.f64(double %max, double 1.0)
159
160 store double %med, double addrspace(1)* %out.gep
161 ret void
162}
163
164; GCN-LABEL: {{^}}v_clamp_neg_f64:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000165; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault79a45db2017-02-22 23:53:37 +0000166; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000167define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
168 %tid = call i32 @llvm.amdgcn.workitem.id.x()
169 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
170 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
171 %a = load double, double addrspace(1)* %gep0
172 %fneg.a = fsub double -0.0, %a
173 %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
174 %med = call double @llvm.minnum.f64(double %max, double 1.0)
175
176 store double %med, double addrspace(1)* %out.gep
177 ret void
178}
179
180; GCN-LABEL: {{^}}v_clamp_negabs_f64:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000181; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault79a45db2017-02-22 23:53:37 +0000182; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}}
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000183define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
184 %tid = call i32 @llvm.amdgcn.workitem.id.x()
185 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
186 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
187 %a = load double, double addrspace(1)* %gep0
188 %fabs.a = call double @llvm.fabs.f64(double %a)
189 %fneg.fabs.a = fsub double -0.0, %fabs.a
190
191 %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
192 %med = call double @llvm.minnum.f64(double %max, double 1.0)
193
194 store double %med, double addrspace(1)* %out.gep
195 ret void
196}
197
198; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000199; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000200; GCN: v_med3_f32
201define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
202 %tid = call i32 @llvm.amdgcn.workitem.id.x()
203 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
204 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
205 %a = load float, float addrspace(1)* %gep0
206 %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
207 store float %med, float addrspace(1)* %out.gep
208 ret void
209}
210
211; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000212; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000213; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
214define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
215 %tid = call i32 @llvm.amdgcn.workitem.id.x()
216 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
217 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
218 %a = load float, float addrspace(1)* %gep0
219 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
220 store float %med, float addrspace(1)* %out.gep
221 ret void
222}
223
224; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000225; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000226; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
227define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
228 %tid = call i32 @llvm.amdgcn.workitem.id.x()
229 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
230 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
231 %a = load float, float addrspace(1)* %gep0
232 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
233 store float %med, float addrspace(1)* %out.gep
234 ret void
235}
236
237; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000238; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000239; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
240define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
241 %tid = call i32 @llvm.amdgcn.workitem.id.x()
242 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
243 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
244 %a = load float, float addrspace(1)* %gep0
245 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
246 store float %med, float addrspace(1)* %out.gep
247 ret void
248}
249
250; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000251; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000252; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
253define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
254 %tid = call i32 @llvm.amdgcn.workitem.id.x()
255 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
256 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
257 %a = load float, float addrspace(1)* %gep0
258 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
259 store float %med, float addrspace(1)* %out.gep
260 ret void
261}
262
263; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000264; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000265; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
266define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
267 %tid = call i32 @llvm.amdgcn.workitem.id.x()
268 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
269 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
270 %a = load float, float addrspace(1)* %gep0
271 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
272 store float %med, float addrspace(1)* %out.gep
273 ret void
274}
275
276; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000277; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000278; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
279define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
280 %tid = call i32 @llvm.amdgcn.workitem.id.x()
281 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
282 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
283 %a = load float, float addrspace(1)* %gep0
284 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
285 store float %med, float addrspace(1)* %out.gep
286 ret void
287}
288
289; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
290; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
291define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
292 %tid = call i32 @llvm.amdgcn.workitem.id.x()
293 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
294 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
295 store float %med, float addrspace(1)* %out.gep
296 ret void
297}
298
299; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
300; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
301define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
302 %tid = call i32 @llvm.amdgcn.workitem.id.x()
303 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
304 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
305 store float %med, float addrspace(1)* %out.gep
306 ret void
307}
308
309; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
310; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
311define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
312 %tid = call i32 @llvm.amdgcn.workitem.id.x()
313 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
314 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
315 store float %med, float addrspace(1)* %out.gep
316 ret void
317}
318
319; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
320; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
321define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
322 %tid = call i32 @llvm.amdgcn.workitem.id.x()
323 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
324 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
325 store float %med, float addrspace(1)* %out.gep
326 ret void
327}
328
329; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
330; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
331define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
332 %tid = call i32 @llvm.amdgcn.workitem.id.x()
333 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
334 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
335 store float %med, float addrspace(1)* %out.gep
336 ret void
337}
338
339; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
340; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
341define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
342 %tid = call i32 @llvm.amdgcn.workitem.id.x()
343 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
344 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
345 store float %med, float addrspace(1)* %out.gep
346 ret void
347}
348
349; ---------------------------------------------------------------------
350; Test non-default behaviors enabling snans and disabling dx10_clamp
351; ---------------------------------------------------------------------
352
353; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000354; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000355; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
356define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
357 %tid = call i32 @llvm.amdgcn.workitem.id.x()
358 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
359 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
360 %a = load float, float addrspace(1)* %gep0
361 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
362 %med = call float @llvm.minnum.f32(float %max, float 1.0)
363
364 store float %med, float addrspace(1)* %out.gep
365 ret void
366}
367
368; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000369; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000370; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
371define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
372 %tid = call i32 @llvm.amdgcn.workitem.id.x()
373 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
374 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
375 %a = load float, float addrspace(1)* %gep0
376 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
377 %med = call float @llvm.minnum.f32(float %max, float 1.0)
378
379 store float %med, float addrspace(1)* %out.gep
380 ret void
381}
382
383; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000384; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000385; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
386; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
387define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
388 %tid = call i32 @llvm.amdgcn.workitem.id.x()
389 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
390 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
391 %a = load float, float addrspace(1)* %gep0
392 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
393 %med = call float @llvm.minnum.f32(float %max, float 1.0)
394
395 store float %med, float addrspace(1)* %out.gep
396 ret void
397}
398
399; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000400; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000401; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
402define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
403 %tid = call i32 @llvm.amdgcn.workitem.id.x()
404 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
405 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
406 %a = load float, float addrspace(1)* %gep0
407 %add = fadd nnan float %a, 1.0
408 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
409 %med = call float @llvm.minnum.f32(float %max, float 1.0)
410
411 store float %med, float addrspace(1)* %out.gep
412 ret void
413}
414
415; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000416; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000417; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
418define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
419 %tid = call i32 @llvm.amdgcn.workitem.id.x()
420 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
421 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
422 %a = load float, float addrspace(1)* %gep0
423 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
424 store float %med, float addrspace(1)* %out.gep
425 ret void
426}
427
428; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000429; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000430; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
431define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
432 %tid = call i32 @llvm.amdgcn.workitem.id.x()
433 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
434 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
435 %a = load float, float addrspace(1)* %gep0
436 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
437 store float %med, float addrspace(1)* %out.gep
438 ret void
439}
440
441; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000442; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000443; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
444define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
445 %tid = call i32 @llvm.amdgcn.workitem.id.x()
446 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
447 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
448 %a = load float, float addrspace(1)* %gep0
449 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
450 store float %med, float addrspace(1)* %out.gep
451 ret void
452}
453
454; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000455; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000456; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
457define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
458 %tid = call i32 @llvm.amdgcn.workitem.id.x()
459 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
460 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
461 %a = load float, float addrspace(1)* %gep0
462 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
463 store float %med, float addrspace(1)* %out.gep
464 ret void
465}
466
467; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000468; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000469; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
470define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
471 %tid = call i32 @llvm.amdgcn.workitem.id.x()
472 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
473 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
474 %a = load float, float addrspace(1)* %gep0
475 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
476 store float %med, float addrspace(1)* %out.gep
477 ret void
478}
479
480; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
Matt Arsenault6b114d22017-08-30 01:20:17 +0000481; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000482; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
483define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
484 %tid = call i32 @llvm.amdgcn.workitem.id.x()
485 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
486 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
487 %a = load float, float addrspace(1)* %gep0
488 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
489 store float %med, float addrspace(1)* %out.gep
490 ret void
491}
492
493; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
494; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
495define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
496 %tid = call i32 @llvm.amdgcn.workitem.id.x()
497 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
498 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
499 store float %med, float addrspace(1)* %out.gep
500 ret void
501}
502
503; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
504; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
505define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
506 %tid = call i32 @llvm.amdgcn.workitem.id.x()
507 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
508 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
509 store float %med, float addrspace(1)* %out.gep
510 ret void
511}
512
Matt Arsenault6b114d22017-08-30 01:20:17 +0000513; GCN-LABEL: {{^}}v_clamp_v2f16:
514; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
515; GFX9-NOT: [[A]]
516; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
517define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
518 %tid = call i32 @llvm.amdgcn.workitem.id.x()
519 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
520 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
521 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
522 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer)
523 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
524
525 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
526 ret void
527}
528
529; GCN-LABEL: {{^}}v_clamp_v2f16_undef_elt:
530; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
531; GFX9-NOT: [[A]]
532; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
533define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
534 %tid = call i32 @llvm.amdgcn.workitem.id.x()
535 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
536 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
537 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
538 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
539 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
540
541 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
542 ret void
543}
544
545; GCN-LABEL: {{^}}v_clamp_v2f16_not_zero:
546; GFX9: v_pk_max_f16
547; GFX9: v_pk_min_f16
548define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
549 %tid = call i32 @llvm.amdgcn.workitem.id.x()
550 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
551 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
552 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
553 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>)
554 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
555
556 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
557 ret void
558}
559
560; GCN-LABEL: {{^}}v_clamp_v2f16_not_one:
561; GFX9: v_pk_max_f16
562; GFX9: v_pk_min_f16
563define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
564 %tid = call i32 @llvm.amdgcn.workitem.id.x()
565 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
566 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
567 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
568 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>)
569 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>)
570
571 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
572 ret void
573}
574
575; GCN-LABEL: {{^}}v_clamp_neg_v2f16:
576; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
577; GFX9-NOT: [[A]]
578; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
579define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
580 %tid = call i32 @llvm.amdgcn.workitem.id.x()
581 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
582 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
583 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
584 %fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a
585 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer)
586 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
587
588 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
589 ret void
590}
591
592; GCN-LABEL: {{^}}v_clamp_negabs_v2f16:
593; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
594; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, [[A]]
595; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
596define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
597 %tid = call i32 @llvm.amdgcn.workitem.id.x()
598 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
599 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
600 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
601 %fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
602 %fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a
603
604 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer)
605 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
606
607 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
608 ret void
609}
610
611; GCN-LABEL: {{^}}v_clamp_neglo_v2f16:
612; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
613; GFX9-NOT: [[A]]
614; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] clamp{{$}}
615define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
616 %tid = call i32 @llvm.amdgcn.workitem.id.x()
617 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
618 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
619 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
620 %lo = extractelement <2 x half> %a, i32 0
621 %neg.lo = fsub half -0.0, %lo
622 %neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0
623 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer)
624 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
625
626 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
627 ret void
628}
629
630; GCN-LABEL: {{^}}v_clamp_neghi_v2f16:
631; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
632; GFX9-NOT: [[A]]
633; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_hi:[1,1] clamp{{$}}
634define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
635 %tid = call i32 @llvm.amdgcn.workitem.id.x()
636 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
637 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
638 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
639 %hi = extractelement <2 x half> %a, i32 1
640 %neg.hi = fsub half -0.0, %hi
641 %neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1
642 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer)
643 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
644
645 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
646 ret void
647}
648
649; GCN-LABEL: {{^}}v_clamp_v2f16_shuffle:
650; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
651; GFX9-NOT: [[A]]
652; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
653define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
654 %tid = call i32 @llvm.amdgcn.workitem.id.x()
655 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
656 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
657 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
658 %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
659 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
660 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
661
662 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
663 ret void
664}
665
Matt Arsenaultaafff872017-10-05 00:13:17 +0000666; GCN-LABEL: {{^}}v_clamp_diff_source_f32:
667; GCN: v_add_f32_e32 [[A:v[0-9]+]]
668; GCN: v_add_f32_e32 [[B:v[0-9]+]]
669; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}}
670define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
671{
672 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
673 %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
674 %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
675 %l0 = load float, float addrspace(1)* %gep0
676 %l1 = load float, float addrspace(1)* %gep1
677 %l2 = load float, float addrspace(1)* %gep2
678 %a = fadd nsz float %l0, %l1
679 %b = fadd nsz float %l0, %l2
680 %res = call nsz float @llvm.maxnum.f32(float %a, float %b)
681 %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
682 %min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
683 %out.gep = getelementptr float, float addrspace(1)* %out, i32 3
684 store float %min, float addrspace(1)* %out.gep
685 ret void
686}
687
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000688declare i32 @llvm.amdgcn.workitem.id.x() #1
689declare float @llvm.fabs.f32(float) #1
690declare float @llvm.minnum.f32(float, float) #1
691declare float @llvm.maxnum.f32(float, float) #1
692declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
693declare double @llvm.fabs.f64(double) #1
694declare double @llvm.minnum.f64(double, double) #1
695declare double @llvm.maxnum.f64(double, double) #1
696declare half @llvm.fabs.f16(half) #1
697declare half @llvm.minnum.f16(half, half) #1
698declare half @llvm.maxnum.f16(half, half) #1
Matt Arsenault6b114d22017-08-30 01:20:17 +0000699declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
700declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
701declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
Matt Arsenault2fdf2a12017-02-21 23:35:48 +0000702
703attributes #0 = { nounwind }
704attributes #1 = { nounwind readnone }
705attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
706attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
707attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }