blob: 5ffa45595e701992ef2a20380cf0c73eac46e2b3 [file] [log] [blame]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +00001; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s
3; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s
4; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s
5
6; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
7; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
8define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) {
9 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
10 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
11 %v = load float, float addrspace(1)* %gep, align 4
12 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
13 store float %canonicalized, float addrspace(1)* %gep, align 4
14 ret void
15}
16
17; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
18; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
19; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
20; GCN-NOT: 1.0
21define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) {
22 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
23 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
24 %load = load float, float addrspace(1)* %gep, align 4
25 %v = fmul float %load, 15.0
26 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
27 store float %canonicalized, float addrspace(1)* %gep, align 4
28 ret void
29}
30
31; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
32; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
33; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
34; GCN-NOT: 1.0
35define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) {
36 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
37 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
38 %load = load float, float addrspace(1)* %gep, align 4
39 %v = fsub float 15.0, %load
40 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
41 store float %canonicalized, float addrspace(1)* %gep, align 4
42 ret void
43}
44
45; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
46; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
47; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
48; GCN-NOT: 1.0
49define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) {
50 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
51 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
52 %load = load float, float addrspace(1)* %gep, align 4
53 %v = fadd float %load, 15.0
54 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
55 store float %canonicalized, float addrspace(1)* %gep, align 4
56 ret void
57}
58
59; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
60; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
61; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
62; GCN-NOT: 1.0
63define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) {
64 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
65 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
66 %load = load float, float addrspace(1)* %gep, align 4
67 %v = call float @llvm.sqrt.f32(float %load)
68 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
69 store float %canonicalized, float addrspace(1)* %gep, align 4
70 ret void
71}
72
73; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
74; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
75; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
76; GCN-NOT: 1.0
77define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) {
78 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
79 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
80 %load = load float, float addrspace(1)* %gep, align 4
81 %v = call float @llvm.ceil.f32(float %load)
82 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
83 store float %canonicalized, float addrspace(1)* %gep, align 4
84 ret void
85}
86
87; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
88; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
89; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
90; GCN-NOT: 1.0
91define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) {
92 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
93 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
94 %load = load float, float addrspace(1)* %gep, align 4
95 %v = call float @llvm.floor.f32(float %load)
96 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
97 store float %canonicalized, float addrspace(1)* %gep, align 4
98 ret void
99}
100
101; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
102; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
103; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
104; GCN-NOT: 1.0
105define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) {
106 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
107 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
108 %load = load float, float addrspace(1)* %gep, align 4
109 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0)
110 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
111 store float %canonicalized, float addrspace(1)* %gep, align 4
112 ret void
113}
114
115; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
116; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
117; GFX9-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
118; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
119; GCN-NOT: 1.0
120define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) {
121 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
122 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
123 %load = load float, float addrspace(1)* %gep, align 4
124 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0)
125 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
126 store float %canonicalized, float addrspace(1)* %gep, align 4
127 ret void
128}
129
130; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
131; GCN: flat_load_dword [[LOAD:v[0-9]+]],
132; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
133; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
134; GCN-NOT: 1.0
135define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) {
136 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
137 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
138 %load = load float, float addrspace(1)* %gep, align 4
139 %v = call float @llvm.canonicalize.f32(float %load)
140 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
141 store float %canonicalized, float addrspace(1)* %gep, align 4
142 ret void
143}
144
145; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
146; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
147; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
148; GCN-NOT: 1.0
149define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) {
150 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
151 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
152 %load = load float, float addrspace(1)* %gep, align 4
153 %v = fpext float %load to double
154 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
155 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
156 store double %canonicalized, double addrspace(1)* %gep2, align 8
157 ret void
158}
159
160; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
161; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
162; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
163; GCN-NOT: 1.0
164define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) {
165 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
166 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
167 %load = load half, half addrspace(1)* %gep, align 2
168 %v = fpext half %load to float
169 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
170 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
171 store float %canonicalized, float addrspace(1)* %gep2, align 4
172 ret void
173}
174
175; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
176; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
177; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
178; GCN-NOT: 1.0
179define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) {
180 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
181 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
182 %load = load double, double addrspace(1)* %gep, align 8
183 %v = fptrunc double %load to float
184 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
185 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
186 store float %canonicalized, float addrspace(1)* %gep2, align 4
187 ret void
188}
189
190; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
191; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
192; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
193; GCN-NOT: 1.0
194define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
195 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
196 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
197 %load = load float, float addrspace(1)* %gep, align 4
198 %v = fptrunc float %load to half
199 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
200 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
201 store half %canonicalized, half addrspace(1)* %gep2, align 2
202 ret void
203}
204
205; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
206; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
207; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
208; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]]
209; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
210; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]]
211; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]]
212; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
213; GCN-NOT: 1.0
214define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) {
215 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
216 %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id
217 %load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8
218 %v = fptrunc <2 x float> %load to <2 x half>
219 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
220 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
221 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4
222 ret void
223}
224
225; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
226; GCN: v_mul_f32_e64 v{{[0-9]+}}, 1.0, -v{{[0-9]+}}
227define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
228 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
229 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
230 %load = load float, float addrspace(1)* %gep, align 4
231 %v = fsub float -0.0, %load
232 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
233 store float %canonicalized, float addrspace(1)* %gep, align 4
234 ret void
235}
236
237; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
238; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
239; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
240; GCN-NOT: 1.0
241define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
242 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
243 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
244 %load = load float, float addrspace(1)* %gep, align 4
245 %v0 = fadd float %load, 0.0
246 %v = fsub float -0.0, %v0
247 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
248 store float %canonicalized, float addrspace(1)* %gep, align 4
249 ret void
250}
251
252; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
253; GCN: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
254define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
255 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
256 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
257 %load = load float, float addrspace(1)* %gep, align 4
258 %v = tail call float @llvm.fabs.f32(float %load)
259 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
260 store float %canonicalized, float addrspace(1)* %gep, align 4
261 ret void
262}
263
264; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
265; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
266; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
267; GCN-NOT: 1.0
268define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
269 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
270 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
271 %load = load float, float addrspace(1)* %gep, align 4
272 %v0 = fadd float %load, 0.0
273 %v = tail call float @llvm.fabs.f32(float %v0)
274 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
275 store float %canonicalized, float addrspace(1)* %gep, align 4
276 ret void
277}
278
279; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
280; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
281; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
282; GCN-NOT: 1.0
283define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) {
284 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
285 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
286 %load = load float, float addrspace(1)* %gep, align 4
287 %v = tail call float @llvm.sin.f32(float %load)
288 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
289 store float %canonicalized, float addrspace(1)* %gep, align 4
290 ret void
291}
292
293; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
294; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
295; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
296; GCN-NOT: 1.0
297define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) {
298 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
299 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
300 %load = load float, float addrspace(1)* %gep, align 4
301 %v = tail call float @llvm.cos.f32(float %load)
302 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
303 store float %canonicalized, float addrspace(1)* %gep, align 4
304 ret void
305}
306
307; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
308; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
309; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
310; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
311; GCN-NOT: 1.0
312define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) {
313 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
314 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
315 %load = load half, half addrspace(1)* %gep, align 2
316 %v = tail call half @llvm.sin.f16(half %load)
317 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
318 store half %canonicalized, half addrspace(1)* %gep, align 2
319 ret void
320}
321
322; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
323; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
324; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
325; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
326; GCN-NOT: 1.0
327define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) {
328 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
329 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
330 %load = load half, half addrspace(1)* %gep, align 2
331 %v = tail call half @llvm.cos.f16(half %load)
332 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
333 store half %canonicalized, half addrspace(1)* %gep, align 2
334 ret void
335}
336
337; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
338; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
339; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
340; GCN-NOT: 1.0
341define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) {
342 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
343 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
344 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
345 store float %canonicalized, float addrspace(1)* %gep, align 4
346 ret void
347}
348
349; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32:
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000350; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
351; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
352; GFX9: flat_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000353define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) {
354 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
355 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
356 %load = load float, float addrspace(1)* %gep, align 4
357 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
358 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
359 store float %canonicalized, float addrspace(1)* %gep, align 4
360 ret void
361}
362
363; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
364; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
365; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
366; GCN-NOT: 1.0
367define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) {
368 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
369 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
370 %load = load float, float addrspace(1)* %gep, align 4
371 %v0 = fadd float %load, 0.0
372 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0)
373 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
374 store float %canonicalized, float addrspace(1)* %gep, align 4
375 ret void
376}
377
378; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
379; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7f800001, v{{[0-9]+}}
380; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
381; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
382define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
383 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
384 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
385 %load = load float, float addrspace(1)* %gep, align 4
386 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float))
387 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
388 store float %canonicalized, float addrspace(1)* %gep, align 4
389 ret void
390}
391
392; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000393; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
394; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
395; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
396; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
397; GFX9-NOT: 1.0
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000398define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
399 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
400 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
401 %load = load float, float addrspace(1)* %gep, align 4
402 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float))
403 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
404 store float %canonicalized, float addrspace(1)* %gep, align 4
405 ret void
406}
407
408; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32:
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000409; GFX9: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
410; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
411; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000412; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000413; GFX9-NOT: 1.0
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000414define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) {
415 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
416 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
417 %load = load float, float addrspace(1)* %gep, align 4
418 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0)
419 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
420 store float %canonicalized, float addrspace(1)* %gep, align 4
421 ret void
422}
423
424; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
425; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
426; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
427; GCN-NOT: 1.0
428define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) {
429 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
430 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
431 %load = load float, float addrspace(1)* %gep, align 4
432 %v0 = fadd float %load, 0.0
433 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0)
434 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
435 store float %canonicalized, float addrspace(1)* %gep, align 4
436 ret void
437}
438
439; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
440; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
441; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
442; GCN-NOT: 1.0
443define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) {
444 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
445 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
446 %load = load double, double addrspace(1)* %gep, align 8
447 %v0 = fadd double %load, 0.0
448 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0)
449 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
450 store double %canonicalized, double addrspace(1)* %gep, align 8
451 ret void
452}
453
454; GCN-LABEL: test_no_fold_canonicalize_fmul_value_f32_no_ieee:
455; GCN-EXCEPT: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
456define amdgpu_ps float @test_no_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
457entry:
458 %v = fmul float %arg, 15.0
459 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
460 ret float %canonicalized
461}
462
463; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
464; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
465; GCN-NEXT: ; return
466; GCN-NOT: 1.0
467define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
468entry:
469 %v = fmul nnan float %arg, 15.0
470 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
471 ret float %canonicalized
472}
473
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000474; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
475; GFX9-DENORM: flat_load_dword [[V:v[0-9]+]],
476; GFX9-DENORM: flat_store_dword v[{{[0-9:]+}}], [[V]]
477; GFX9-DENORM-NOT: 1.0
478; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
479define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
480 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
481 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
482 %v = load float, float addrspace(1)* %gep, align 4
483 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
484 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
485 store float %canonicalized, float addrspace(1)* %gep2, align 4
486 ret void
487}
488
489; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
490; GCN: flat_load_dwordx2 [[V:v\[[0-9:]+\]]],
491; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
492; GCN-NOT: 1.0
493define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 {
494 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
495 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
496 %v = load double, double addrspace(1)* %gep, align 8
497 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
498 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
499 store double %canonicalized, double addrspace(1)* %gep2, align 8
500 ret void
501}
502
503; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
504; GCN: flat_load_ushort [[V:v[0-9]+]],
505; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
506; GCN-NOT: 1.0
507define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 {
508 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
509 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
510 %v = load half, half addrspace(1)* %gep, align 2
511 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
512 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
513 store half %canonicalized, half addrspace(1)* %gep2, align 2
514 ret void
515}
516
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000517declare float @llvm.canonicalize.f32(float) #0
518declare double @llvm.canonicalize.f64(double) #0
519declare half @llvm.canonicalize.f16(half) #0
520declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
521declare i32 @llvm.amdgcn.workitem.id.x() #0
522declare float @llvm.sqrt.f32(float) #0
523declare float @llvm.ceil.f32(float) #0
524declare float @llvm.floor.f32(float) #0
525declare float @llvm.fma.f32(float, float, float) #0
526declare float @llvm.fmuladd.f32(float, float, float) #0
527declare float @llvm.fabs.f32(float) #0
528declare float @llvm.sin.f32(float) #0
529declare float @llvm.cos.f32(float) #0
530declare half @llvm.sin.f16(half) #0
531declare half @llvm.cos.f16(half) #0
532declare float @llvm.minnum.f32(float, float) #0
533declare float @llvm.maxnum.f32(float, float) #0
534declare double @llvm.maxnum.f64(double, double) #0
535
536attributes #0 = { nounwind readnone }
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000537attributes #1 = { "no-nans-fp-math"="true" }