blob: 48e4828557e5b2814387fc6c4000115b479886cb [file] [log] [blame]
Matt Arsenault70b92822017-11-12 23:53:44 +00001; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GCN-FLUSH %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,GCN-FLUSH %s
Konstantin Zhuravlyovc40d9f22017-12-08 20:52:28 +00003; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +00005
6; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +00007; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
8; GFX9-DENORM: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +00009define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) {
10 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
11 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
12 %v = load float, float addrspace(1)* %gep, align 4
13 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
14 store float %canonicalized, float addrspace(1)* %gep, align 4
15 ret void
16}
17
18; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
19; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +000020; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +000021; GCN-NOT: 1.0
22define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) {
23 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
25 %load = load float, float addrspace(1)* %gep, align 4
26 %v = fmul float %load, 15.0
27 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
28 store float %canonicalized, float addrspace(1)* %gep, align 4
29 ret void
30}
31
32; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
33; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +000034; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +000035; GCN-NOT: 1.0
36define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) {
37 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
38 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
39 %load = load float, float addrspace(1)* %gep, align 4
40 %v = fsub float 15.0, %load
41 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
42 store float %canonicalized, float addrspace(1)* %gep, align 4
43 ret void
44}
45
46; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
47; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +000048; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +000049; GCN-NOT: 1.0
50define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) {
51 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
52 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
53 %load = load float, float addrspace(1)* %gep, align 4
54 %v = fadd float %load, 15.0
55 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
56 store float %canonicalized, float addrspace(1)* %gep, align 4
57 ret void
58}
59
60; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
61; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +000062; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +000063; GCN-NOT: 1.0
64define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) {
65 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
66 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
67 %load = load float, float addrspace(1)* %gep, align 4
68 %v = call float @llvm.sqrt.f32(float %load)
69 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
70 store float %canonicalized, float addrspace(1)* %gep, align 4
71 ret void
72}
73
74; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
75; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +000076; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +000077; GCN-NOT: 1.0
78define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) {
79 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
80 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
81 %load = load float, float addrspace(1)* %gep, align 4
82 %v = call float @llvm.ceil.f32(float %load)
83 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
84 store float %canonicalized, float addrspace(1)* %gep, align 4
85 ret void
86}
87
88; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
89; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +000090; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +000091; GCN-NOT: 1.0
92define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) {
93 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
94 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
95 %load = load float, float addrspace(1)* %gep, align 4
96 %v = call float @llvm.floor.f32(float %load)
97 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
98 store float %canonicalized, float addrspace(1)* %gep, align 4
99 ret void
100}
101
102; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
103; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000104; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000105; GCN-NOT: 1.0
106define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) {
107 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
108 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
109 %load = load float, float addrspace(1)* %gep, align 4
110 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0)
111 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
112 store float %canonicalized, float addrspace(1)* %gep, align 4
113 ret void
114}
115
116; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
117; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
118; GFX9-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000119; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000120; GCN-NOT: 1.0
121define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) {
122 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
123 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
124 %load = load float, float addrspace(1)* %gep, align 4
125 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0)
126 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
127 store float %canonicalized, float addrspace(1)* %gep, align 4
128 ret void
129}
130
131; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000132; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]],
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000133; GCN-FLUSH: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
134; GCN-DENORM: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]]
Matt Arsenault4e309b02017-07-29 01:03:53 +0000135; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000136; GCN-NOT: 1.0
137define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) {
138 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
139 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
140 %load = load float, float addrspace(1)* %gep, align 4
141 %v = call float @llvm.canonicalize.f32(float %load)
142 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
143 store float %canonicalized, float addrspace(1)* %gep, align 4
144 ret void
145}
146
147; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
148; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000149; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000150; GCN-NOT: 1.0
151define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) {
152 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
153 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
154 %load = load float, float addrspace(1)* %gep, align 4
155 %v = fpext float %load to double
156 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
157 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
158 store double %canonicalized, double addrspace(1)* %gep2, align 8
159 ret void
160}
161
162; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
163; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000164; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000165; GCN-NOT: 1.0
166define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) {
167 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
168 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
169 %load = load half, half addrspace(1)* %gep, align 2
170 %v = fpext half %load to float
171 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
172 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
173 store float %canonicalized, float addrspace(1)* %gep2, align 4
174 ret void
175}
176
177; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
178; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
Matt Arsenault4e309b02017-07-29 01:03:53 +0000179; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000180; GCN-NOT: 1.0
181define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) {
182 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
183 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
184 %load = load double, double addrspace(1)* %gep, align 8
185 %v = fptrunc double %load to float
186 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
187 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
188 store float %canonicalized, float addrspace(1)* %gep2, align 4
189 ret void
190}
191
192; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
193; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000194; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000195; GCN-NOT: 1.0
196define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
197 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
198 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
199 %load = load float, float addrspace(1)* %gep, align 4
200 %v = fptrunc float %load to half
201 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
202 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
203 store half %canonicalized, half addrspace(1)* %gep2, align 2
204 ret void
205}
206
207; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
208; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
209; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
210; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]]
211; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
212; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]]
213; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]]
Matt Arsenault4e309b02017-07-29 01:03:53 +0000214; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000215; GCN-NOT: 1.0
216define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) {
217 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
218 %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id
219 %load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8
220 %v = fptrunc <2 x float> %load to <2 x half>
221 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
222 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
223 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4
224 ret void
225}
226
227; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000228; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, -v{{[0-9]+}}
229; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000230define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
231 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
232 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
233 %load = load float, float addrspace(1)* %gep, align 4
234 %v = fsub float -0.0, %load
235 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
236 store float %canonicalized, float addrspace(1)* %gep, align 4
237 ret void
238}
239
240; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
241; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000242; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000243; GCN-NOT: 1.0
244define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
245 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
246 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
247 %load = load float, float addrspace(1)* %gep, align 4
248 %v0 = fadd float %load, 0.0
249 %v = fsub float -0.0, %v0
250 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
251 store float %canonicalized, float addrspace(1)* %gep, align 4
252 ret void
253}
254
255; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000256; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
257; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000258define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
259 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
260 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
261 %load = load float, float addrspace(1)* %gep, align 4
262 %v = tail call float @llvm.fabs.f32(float %load)
263 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
264 store float %canonicalized, float addrspace(1)* %gep, align 4
265 ret void
266}
267
268; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
269; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000270; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000271; GCN-NOT: 1.0
272define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
273 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
274 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
275 %load = load float, float addrspace(1)* %gep, align 4
276 %v0 = fadd float %load, 0.0
277 %v = tail call float @llvm.fabs.f32(float %v0)
278 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
279 store float %canonicalized, float addrspace(1)* %gep, align 4
280 ret void
281}
282
283; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
284; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000285; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000286; GCN-NOT: 1.0
287define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) {
288 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
289 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
290 %load = load float, float addrspace(1)* %gep, align 4
291 %v = tail call float @llvm.sin.f32(float %load)
292 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
293 store float %canonicalized, float addrspace(1)* %gep, align 4
294 ret void
295}
296
297; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
298; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000299; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000300; GCN-NOT: 1.0
301define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) {
302 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
303 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
304 %load = load float, float addrspace(1)* %gep, align 4
305 %v = tail call float @llvm.cos.f32(float %load)
306 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
307 store float %canonicalized, float addrspace(1)* %gep, align 4
308 ret void
309}
310
311; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
312; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
313; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
Matt Arsenault4e309b02017-07-29 01:03:53 +0000314; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000315; GCN-NOT: 1.0
316define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) {
317 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
318 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
319 %load = load half, half addrspace(1)* %gep, align 2
320 %v = tail call half @llvm.sin.f16(half %load)
321 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
322 store half %canonicalized, half addrspace(1)* %gep, align 2
323 ret void
324}
325
326; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
327; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
328; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
Matt Arsenault4e309b02017-07-29 01:03:53 +0000329; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000330; GCN-NOT: 1.0
331define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) {
332 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
333 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
334 %load = load half, half addrspace(1)* %gep, align 2
335 %v = tail call half @llvm.cos.f16(half %load)
336 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
337 store half %canonicalized, half addrspace(1)* %gep, align 2
338 ret void
339}
340
341; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
342; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
Matt Arsenault4e309b02017-07-29 01:03:53 +0000343; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000344; GCN-NOT: 1.0
345define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) {
346 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
347 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
348 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
349 store float %canonicalized, float addrspace(1)* %gep, align 4
350 ret void
351}
352
353; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32:
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000354; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
355; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000356; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000357define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) {
358 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
359 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
360 %load = load float, float addrspace(1)* %gep, align 4
361 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
362 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
363 store float %canonicalized, float addrspace(1)* %gep, align 4
364 ret void
365}
366
367; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
368; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000369; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000370; GCN-NOT: 1.0
371define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) {
372 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
373 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
374 %load = load float, float addrspace(1)* %gep, align 4
375 %v0 = fadd float %load, 0.0
376 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0)
377 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
378 store float %canonicalized, float addrspace(1)* %gep, align 4
379 ret void
380}
381
382; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
383; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7f800001, v{{[0-9]+}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000384; GCN-FLUSH: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
385; GCN-DENORM: v_max_f32_e32 [[RESULT:v[0-9]+]], [[V0]], [[V0]]
386; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000387define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
388 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
389 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
390 %load = load float, float addrspace(1)* %gep, align 4
391 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float))
392 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
393 store float %canonicalized, float addrspace(1)* %gep, align 4
394 ret void
395}
396
397; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
Matt Arsenault70b92822017-11-12 23:53:44 +0000398; GFX9: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000399; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000400; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
401; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000402; GFX9-NOT: 1.0
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000403define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
404 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
405 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
406 %load = load float, float addrspace(1)* %gep, align 4
407 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float))
408 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
409 store float %canonicalized, float addrspace(1)* %gep, align 4
410 ret void
411}
412
413; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32:
Matt Arsenault70b92822017-11-12 23:53:44 +0000414; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, v{{[0-9]+}}
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000415; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault70b92822017-11-12 23:53:44 +0000416; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
417; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000418; GFX9-NOT: 1.0
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000419define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) {
420 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
421 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
422 %load = load float, float addrspace(1)* %gep, align 4
423 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0)
424 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
425 store float %canonicalized, float addrspace(1)* %gep, align 4
426 ret void
427}
428
429; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
430; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000431; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000432; GCN-NOT: 1.0
433define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) {
434 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
435 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
436 %load = load float, float addrspace(1)* %gep, align 4
437 %v0 = fadd float %load, 0.0
438 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0)
439 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
440 store float %canonicalized, float addrspace(1)* %gep, align 4
441 ret void
442}
443
444; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
445; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
Matt Arsenault4e309b02017-07-29 01:03:53 +0000446; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000447; GCN-NOT: 1.0
448define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) {
449 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
450 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
451 %load = load double, double addrspace(1)* %gep, align 8
452 %v0 = fadd double %load, 0.0
453 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0)
454 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
455 store double %canonicalized, double addrspace(1)* %gep, align 8
456 ret void
457}
458
459; GCN-LABEL: test_no_fold_canonicalize_fmul_value_f32_no_ieee:
460; GCN-EXCEPT: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
461define amdgpu_ps float @test_no_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
462entry:
463 %v = fmul float %arg, 15.0
464 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
465 ret float %canonicalized
466}
467
468; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
469; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
470; GCN-NEXT: ; return
471; GCN-NOT: 1.0
472define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
473entry:
474 %v = fmul nnan float %arg, 15.0
475 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
476 ret float %canonicalized
477}
478
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000479; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
Matt Arsenault4e309b02017-07-29 01:03:53 +0000480; GFX9-DENORM: global_load_dword [[V:v[0-9]+]],
481; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000482; GFX9-DENORM-NOT: 1.0
483; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
484define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
485 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
486 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
487 %v = load float, float addrspace(1)* %gep, align 4
488 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
489 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
490 store float %canonicalized, float addrspace(1)* %gep2, align 4
491 ret void
492}
493
494; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
Matt Arsenault4e309b02017-07-29 01:03:53 +0000495; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]],
496; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000497; GCN-NOT: 1.0
498define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 {
499 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
500 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
501 %v = load double, double addrspace(1)* %gep, align 8
502 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
503 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
504 store double %canonicalized, double addrspace(1)* %gep2, align 8
505 ret void
506}
507
508; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
Matt Arsenault4e309b02017-07-29 01:03:53 +0000509; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]],
510; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000511; GCN-NOT: 1.0
512define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 {
513 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
514 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
515 %v = load half, half addrspace(1)* %gep, align 2
516 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
517 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
518 store half %canonicalized, half addrspace(1)* %gep2, align 2
519 ret void
520}
521
Alexander Richardsond3aa4752017-10-25 21:44:21 +0000522; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
523; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
524; CHECK: .amd_amdgpu_isa
525
Stanislav Mekhanoshin5680b0c2017-07-12 21:20:28 +0000526declare float @llvm.canonicalize.f32(float) #0
527declare double @llvm.canonicalize.f64(double) #0
528declare half @llvm.canonicalize.f16(half) #0
529declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
530declare i32 @llvm.amdgcn.workitem.id.x() #0
531declare float @llvm.sqrt.f32(float) #0
532declare float @llvm.ceil.f32(float) #0
533declare float @llvm.floor.f32(float) #0
534declare float @llvm.fma.f32(float, float, float) #0
535declare float @llvm.fmuladd.f32(float, float, float) #0
536declare float @llvm.fabs.f32(float) #0
537declare float @llvm.sin.f32(float) #0
538declare float @llvm.cos.f32(float) #0
539declare half @llvm.sin.f16(half) #0
540declare half @llvm.cos.f16(half) #0
541declare float @llvm.minnum.f32(float, float) #0
542declare float @llvm.maxnum.f32(float, float) #0
543declare double @llvm.maxnum.f64(double, double) #0
544
545attributes #0 = { nounwind readnone }
Stanislav Mekhanoshindc2890a2017-07-13 23:59:15 +0000546attributes #1 = { "no-nans-fp-math"="true" }