blob: 5383bbe71ae369df4684be5a5bf5046a20a2a8bd [file] [log] [blame]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +00001; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s
3; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s
4; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s
5
6; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
7; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
8define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) {
9 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
10 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
11 %v = load float, float addrspace(1)* %gep, align 4
12 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
13 store float %canonicalized, float addrspace(1)* %gep, align 4
14 ret void
15}
16
17; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
18; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
19; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
20; GCN-NOT: 1.0
21define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) {
22 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
23 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
24 %load = load float, float addrspace(1)* %gep, align 4
25 %v = fmul float %load, 15.0
26 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
27 store float %canonicalized, float addrspace(1)* %gep, align 4
28 ret void
29}
30
31; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
32; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
33; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
34; GCN-NOT: 1.0
35define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) {
36 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
37 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
38 %load = load float, float addrspace(1)* %gep, align 4
39 %v = fsub float 15.0, %load
40 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
41 store float %canonicalized, float addrspace(1)* %gep, align 4
42 ret void
43}
44
45; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
46; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
47; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
48; GCN-NOT: 1.0
49define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) {
50 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
51 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
52 %load = load float, float addrspace(1)* %gep, align 4
53 %v = fadd float %load, 15.0
54 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
55 store float %canonicalized, float addrspace(1)* %gep, align 4
56 ret void
57}
58
59; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
60; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
61; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
62; GCN-NOT: 1.0
63define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) {
64 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
65 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
66 %load = load float, float addrspace(1)* %gep, align 4
67 %v = call float @llvm.sqrt.f32(float %load)
68 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
69 store float %canonicalized, float addrspace(1)* %gep, align 4
70 ret void
71}
72
73; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
74; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
75; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
76; GCN-NOT: 1.0
77define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) {
78 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
79 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
80 %load = load float, float addrspace(1)* %gep, align 4
81 %v = call float @llvm.ceil.f32(float %load)
82 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
83 store float %canonicalized, float addrspace(1)* %gep, align 4
84 ret void
85}
86
87; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
88; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
89; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
90; GCN-NOT: 1.0
91define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) {
92 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
93 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
94 %load = load float, float addrspace(1)* %gep, align 4
95 %v = call float @llvm.floor.f32(float %load)
96 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
97 store float %canonicalized, float addrspace(1)* %gep, align 4
98 ret void
99}
100
101; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
102; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
103; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
104; GCN-NOT: 1.0
105define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) {
106 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
107 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
108 %load = load float, float addrspace(1)* %gep, align 4
109 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0)
110 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
111 store float %canonicalized, float addrspace(1)* %gep, align 4
112 ret void
113}
114
115; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
116; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
117; GFX9-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
118; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
119; GCN-NOT: 1.0
120define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) {
121 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
122 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
123 %load = load float, float addrspace(1)* %gep, align 4
124 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0)
125 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
126 store float %canonicalized, float addrspace(1)* %gep, align 4
127 ret void
128}
129
130; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
131; GCN: flat_load_dword [[LOAD:v[0-9]+]],
132; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
133; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
134; GCN-NOT: 1.0
135define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) {
136 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
137 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
138 %load = load float, float addrspace(1)* %gep, align 4
139 %v = call float @llvm.canonicalize.f32(float %load)
140 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
141 store float %canonicalized, float addrspace(1)* %gep, align 4
142 ret void
143}
144
145; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
146; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
147; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
148; GCN-NOT: 1.0
149define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) {
150 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
151 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
152 %load = load float, float addrspace(1)* %gep, align 4
153 %v = fpext float %load to double
154 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
155 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
156 store double %canonicalized, double addrspace(1)* %gep2, align 8
157 ret void
158}
159
160; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
161; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
162; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
163; GCN-NOT: 1.0
164define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) {
165 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
166 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
167 %load = load half, half addrspace(1)* %gep, align 2
168 %v = fpext half %load to float
169 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
170 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
171 store float %canonicalized, float addrspace(1)* %gep2, align 4
172 ret void
173}
174
175; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
176; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
177; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
178; GCN-NOT: 1.0
179define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) {
180 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
181 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
182 %load = load double, double addrspace(1)* %gep, align 8
183 %v = fptrunc double %load to float
184 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
185 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
186 store float %canonicalized, float addrspace(1)* %gep2, align 4
187 ret void
188}
189
190; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
191; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
192; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
193; GCN-NOT: 1.0
194define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
195 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
196 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
197 %load = load float, float addrspace(1)* %gep, align 4
198 %v = fptrunc float %load to half
199 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
200 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
201 store half %canonicalized, half addrspace(1)* %gep2, align 2
202 ret void
203}
204
205; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
206; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
207; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
208; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]]
209; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
210; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]]
211; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]]
212; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
213; GCN-NOT: 1.0
214define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) {
215 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
216 %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id
217 %load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8
218 %v = fptrunc <2 x float> %load to <2 x half>
219 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
220 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
221 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4
222 ret void
223}
224
225; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
226; GCN: v_mul_f32_e64 v{{[0-9]+}}, 1.0, -v{{[0-9]+}}
227define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
228 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
229 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
230 %load = load float, float addrspace(1)* %gep, align 4
231 %v = fsub float -0.0, %load
232 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
233 store float %canonicalized, float addrspace(1)* %gep, align 4
234 ret void
235}
236
237; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
238; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
239; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
240; GCN-NOT: 1.0
241define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
242 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
243 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
244 %load = load float, float addrspace(1)* %gep, align 4
245 %v0 = fadd float %load, 0.0
246 %v = fsub float -0.0, %v0
247 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
248 store float %canonicalized, float addrspace(1)* %gep, align 4
249 ret void
250}
251
252; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
253; GCN: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
254define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
255 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
256 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
257 %load = load float, float addrspace(1)* %gep, align 4
258 %v = tail call float @llvm.fabs.f32(float %load)
259 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
260 store float %canonicalized, float addrspace(1)* %gep, align 4
261 ret void
262}
263
264; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
265; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
266; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
267; GCN-NOT: 1.0
268define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
269 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
270 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
271 %load = load float, float addrspace(1)* %gep, align 4
272 %v0 = fadd float %load, 0.0
273 %v = tail call float @llvm.fabs.f32(float %v0)
274 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
275 store float %canonicalized, float addrspace(1)* %gep, align 4
276 ret void
277}
278
279; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
280; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
281; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
282; GCN-NOT: 1.0
283define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) {
284 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
285 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
286 %load = load float, float addrspace(1)* %gep, align 4
287 %v = tail call float @llvm.sin.f32(float %load)
288 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
289 store float %canonicalized, float addrspace(1)* %gep, align 4
290 ret void
291}
292
293; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
294; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
295; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
296; GCN-NOT: 1.0
297define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) {
298 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
299 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
300 %load = load float, float addrspace(1)* %gep, align 4
301 %v = tail call float @llvm.cos.f32(float %load)
302 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
303 store float %canonicalized, float addrspace(1)* %gep, align 4
304 ret void
305}
306
307; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
308; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
309; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
310; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
311; GCN-NOT: 1.0
312define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) {
313 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
314 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
315 %load = load half, half addrspace(1)* %gep, align 2
316 %v = tail call half @llvm.sin.f16(half %load)
317 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
318 store half %canonicalized, half addrspace(1)* %gep, align 2
319 ret void
320}
321
322; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
323; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
324; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
325; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
326; GCN-NOT: 1.0
327define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) {
328 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
329 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
330 %load = load half, half addrspace(1)* %gep, align 2
331 %v = tail call half @llvm.cos.f16(half %load)
332 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
333 store half %canonicalized, half addrspace(1)* %gep, align 2
334 ret void
335}
336
337; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
338; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
339; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
340; GCN-NOT: 1.0
341define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) {
342 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
343 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
344 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
345 store float %canonicalized, float addrspace(1)* %gep, align 4
346 ret void
347}
348
349; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32:
350; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
351define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) {
352 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
353 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
354 %load = load float, float addrspace(1)* %gep, align 4
355 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
356 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
357 store float %canonicalized, float addrspace(1)* %gep, align 4
358 ret void
359}
360
361; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
362; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
363; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
364; GCN-NOT: 1.0
365define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) {
366 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
367 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
368 %load = load float, float addrspace(1)* %gep, align 4
369 %v0 = fadd float %load, 0.0
370 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0)
371 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
372 store float %canonicalized, float addrspace(1)* %gep, align 4
373 ret void
374}
375
376; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
377; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7f800001, v{{[0-9]+}}
378; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
379; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
380define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
381 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
382 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
383 %load = load float, float addrspace(1)* %gep, align 4
384 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float))
385 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
386 store float %canonicalized, float addrspace(1)* %gep, align 4
387 ret void
388}
389
390; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
391; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
392; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
393; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
394define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
395 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
396 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
397 %load = load float, float addrspace(1)* %gep, align 4
398 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float))
399 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
400 store float %canonicalized, float addrspace(1)* %gep, align 4
401 ret void
402}
403
404; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32:
405; GCN: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
406; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
407; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
408define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) {
409 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
410 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
411 %load = load float, float addrspace(1)* %gep, align 4
412 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0)
413 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
414 store float %canonicalized, float addrspace(1)* %gep, align 4
415 ret void
416}
417
418; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
419; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
420; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
421; GCN-NOT: 1.0
422define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) {
423 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
424 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
425 %load = load float, float addrspace(1)* %gep, align 4
426 %v0 = fadd float %load, 0.0
427 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0)
428 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
429 store float %canonicalized, float addrspace(1)* %gep, align 4
430 ret void
431}
432
433; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
434; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
435; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
436; GCN-NOT: 1.0
437define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) {
438 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
439 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
440 %load = load double, double addrspace(1)* %gep, align 8
441 %v0 = fadd double %load, 0.0
442 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0)
443 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
444 store double %canonicalized, double addrspace(1)* %gep, align 8
445 ret void
446}
447
448; GCN-LABEL: test_no_fold_canonicalize_fmul_value_f32_no_ieee:
449; GCN-EXCEPT: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
450define amdgpu_ps float @test_no_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
451entry:
452 %v = fmul float %arg, 15.0
453 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
454 ret float %canonicalized
455}
456
457; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
458; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
459; GCN-NEXT: ; return
460; GCN-NOT: 1.0
461define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
462entry:
463 %v = fmul nnan float %arg, 15.0
464 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
465 ret float %canonicalized
466}
467
468declare float @llvm.canonicalize.f32(float) #0
469declare double @llvm.canonicalize.f64(double) #0
470declare half @llvm.canonicalize.f16(half) #0
471declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
472declare i32 @llvm.amdgcn.workitem.id.x() #0
473declare float @llvm.sqrt.f32(float) #0
474declare float @llvm.ceil.f32(float) #0
475declare float @llvm.floor.f32(float) #0
476declare float @llvm.fma.f32(float, float, float) #0
477declare float @llvm.fmuladd.f32(float, float, float) #0
478declare float @llvm.fabs.f32(float) #0
479declare float @llvm.sin.f32(float) #0
480declare float @llvm.cos.f32(float) #0
481declare half @llvm.sin.f16(half) #0
482declare half @llvm.cos.f16(half) #0
483declare float @llvm.minnum.f32(float, float) #0
484declare float @llvm.maxnum.f32(float, float) #0
485declare double @llvm.maxnum.f64(double, double) #0
486
487attributes #0 = { nounwind readnone }