blob: 7c4698b8cfaf9f51e21a55039659366e3a87a2b3 [file] [log] [blame]
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Matt Arsenault9cd90712016-04-14 01:42:16 +00002
Matt Arsenaultd5d78512017-01-31 17:28:40 +00003declare float @llvm.fabs.f32(float) #0
Matt Arsenault9cd90712016-04-14 01:42:16 +00004declare float @llvm.canonicalize.f32(float) #0
Matt Arsenaultd5d78512017-01-31 17:28:40 +00005declare double @llvm.fabs.f64(double) #0
Matt Arsenault9cd90712016-04-14 01:42:16 +00006declare double @llvm.canonicalize.f64(double) #0
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +00007declare half @llvm.canonicalize.f16(half) #0
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +00008declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +00009declare i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault540512c2018-04-26 19:21:37 +000010declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0
Matt Arsenault9cd90712016-04-14 01:42:16 +000011
12; GCN-LABEL: {{^}}v_test_canonicalize_var_f32:
13; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
14; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000016 %val = load float, float addrspace(1)* %out
17 %canonicalized = call float @llvm.canonicalize.f32(float %val)
18 store float %canonicalized, float addrspace(1)* %out
19 ret void
20}
21
22; GCN-LABEL: {{^}}s_test_canonicalize_var_f32:
23; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
24; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000026 %canonicalized = call float @llvm.canonicalize.f32(float %val)
27 store float %canonicalized, float addrspace(1)* %out
28 ret void
29}
30
Matt Arsenaultd5d78512017-01-31 17:28:40 +000031; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32:
32; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
33; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000035 %val = load float, float addrspace(1)* %out
36 %val.fabs = call float @llvm.fabs.f32(float %val)
37 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs)
38 store float %canonicalized, float addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32:
Matt Arsenaultde496c322018-07-30 12:16:58 +000043; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +000044; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000046 %val = load float, float addrspace(1)* %out
47 %val.fabs = call float @llvm.fabs.f32(float %val)
48 %val.fabs.fneg = fsub float -0.0, %val.fabs
49 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
50 store float %canonicalized, float addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32:
Matt Arsenaultde496c322018-07-30 12:16:58 +000055; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}}
Matt Arsenaultd5d78512017-01-31 17:28:40 +000056; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000058 %val = load float, float addrspace(1)* %out
59 %val.fneg = fsub float -0.0, %val
60 %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
61 store float %canonicalized, float addrspace(1)* %out
62 ret void
63}
64
Matt Arsenault9cd90712016-04-14 01:42:16 +000065; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
66; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
67; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000069 %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
70 store float %canonicalized, float addrspace(1)* %out
71 ret void
72}
73
74; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32:
75; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
76; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000077define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000078 %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
79 store float %canonicalized, float addrspace(1)* %out
80 ret void
81}
82
83; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32:
84; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
85; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000087 %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
88 store float %canonicalized, float addrspace(1)* %out
89 ret void
90}
91
92; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32:
93; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
94; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000095define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000096 %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
97 store float %canonicalized, float addrspace(1)* %out
98 ret void
99}
100
101; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32:
102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}}
103; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000104define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000105 %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
106 store float %canonicalized, float addrspace(1)* %out
107 ret void
108}
109
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000110; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000112; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000114 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
115 store float %canonicalized, float addrspace(1)* %out
116 ret void
117}
118
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000119; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
120; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000121; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000123 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
124 store float %canonicalized, float addrspace(1)* %out
125 ret void
126}
127
128; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
129; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
130; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000132 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
133 store float %canonicalized, float addrspace(1)* %out
134 ret void
135}
136
137; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
138; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
139; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000141 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
142 store float %canonicalized, float addrspace(1)* %out
143 ret void
144}
145
146; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32:
147; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
148; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000150 %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
151 store float %canonicalized, float addrspace(1)* %out
152 ret void
153}
154
155; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32:
156; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
157; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000159 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
160 store float %canonicalized, float addrspace(1)* %out
161 ret void
162}
163
164; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32:
165; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
166; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000167define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000168 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
169 store float %canonicalized, float addrspace(1)* %out
170 ret void
171}
172
173; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32:
174; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
175; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000176define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000177 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
178 store float %canonicalized, float addrspace(1)* %out
179 ret void
180}
181
182; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32:
183; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
184; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000185define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000186 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
187 store float %canonicalized, float addrspace(1)* %out
188 ret void
189}
190
191; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32:
192; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
193; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000194define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000195 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
196 store float %canonicalized, float addrspace(1)* %out
197 ret void
198}
199
200; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32:
201; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
202; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000203define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000204 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
205 store float %canonicalized, float addrspace(1)* %out
206 ret void
207}
208
209; GCN-LABEL: {{^}}v_test_canonicalize_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000210; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000211; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000212define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000213 %val = load double, double addrspace(1)* %out
214 %canonicalized = call double @llvm.canonicalize.f64(double %val)
215 store double %canonicalized, double addrspace(1)* %out
216 ret void
217}
218
219; GCN-LABEL: {{^}}s_test_canonicalize_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000220; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000221; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000222define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000223 %canonicalized = call double @llvm.canonicalize.f64(double %val)
224 store double %canonicalized, double addrspace(1)* %out
225 ret void
226}
227
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000228; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000229; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000230; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000231define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000232 %val = load double, double addrspace(1)* %out
233 %val.fabs = call double @llvm.fabs.f64(double %val)
234 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs)
235 store double %canonicalized, double addrspace(1)* %out
236 ret void
237}
238
239; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000240; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000241; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000242define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000243 %val = load double, double addrspace(1)* %out
244 %val.fabs = call double @llvm.fabs.f64(double %val)
245 %val.fabs.fneg = fsub double -0.0, %val.fabs
246 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
247 store double %canonicalized, double addrspace(1)* %out
248 ret void
249}
250
251; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000252; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000253; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000254define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000255 %val = load double, double addrspace(1)* %out
256 %val.fneg = fsub double -0.0, %val
257 %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
258 store double %canonicalized, double addrspace(1)* %out
259 ret void
260}
261
Matt Arsenault9cd90712016-04-14 01:42:16 +0000262; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
263; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
Matt Arsenault2b957b52016-05-02 20:07:26 +0000264; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000265; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000266define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000267 %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
268 store double %canonicalized, double addrspace(1)* %out
269 ret void
270}
271
272; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64:
273; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
274; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
275; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000276define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000277 %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
278 store double %canonicalized, double addrspace(1)* %out
279 ret void
280}
281
282; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64:
283; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
284; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
285; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000286define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000287 %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
288 store double %canonicalized, double addrspace(1)* %out
289 ret void
290}
291
292; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64:
293; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
294; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
295; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000296define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000297 %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
298 store double %canonicalized, double addrspace(1)* %out
299 ret void
300}
301
302; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64:
303; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
304; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
305; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000306define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000307 %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
308 store double %canonicalized, double addrspace(1)* %out
309 ret void
310}
311
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000312; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
313; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
314; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000315; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000316define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000317 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
318 store double %canonicalized, double addrspace(1)* %out
319 ret void
320}
321
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000322; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
323; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
324; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000325; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000326define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000327 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
328 store double %canonicalized, double addrspace(1)* %out
329 ret void
330}
331
332; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
333; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
334; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
335; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000336define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000337 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
338 store double %canonicalized, double addrspace(1)* %out
339 ret void
340}
341
342; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
343; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
344; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
345; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000346define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000347 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
348 store double %canonicalized, double addrspace(1)* %out
349 ret void
350}
351
352; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64:
353; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
354; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
355; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000356define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000357 %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
358 store double %canonicalized, double addrspace(1)* %out
359 ret void
360}
361
362; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64:
363; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
364; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
365; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000366define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000367 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
368 store double %canonicalized, double addrspace(1)* %out
369 ret void
370}
371
372; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64:
373; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
374; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
375; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000376define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000377 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
378 store double %canonicalized, double addrspace(1)* %out
379 ret void
380}
381
382; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64:
383; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
384; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
385; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000386define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000387 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
388 store double %canonicalized, double addrspace(1)* %out
389 ret void
390}
391
392; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64:
393; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
394; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
395; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000396define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000397 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
398 store double %canonicalized, double addrspace(1)* %out
399 ret void
400}
401
402; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64:
403; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
404; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
405; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000406define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000407 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
408 store double %canonicalized, double addrspace(1)* %out
409 ret void
410}
411
412; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64:
413; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
414; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
415; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000416define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000417 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
418 store double %canonicalized, double addrspace(1)* %out
419 ret void
420}
421
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000422; GCN-LABEL: {{^}}test_canonicalize_value_f64_flush:
423; GCN: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}]
424define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 {
425 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
426 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
427 %v = load double, double addrspace(1)* %gep, align 8
428 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
429 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
430 store double %canonicalized, double addrspace(1)* %gep2, align 8
431 ret void
432}
433
434; GCN-LABEL: {{^}}test_canonicalize_value_f32_flush:
435; GCN: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
436define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 {
437 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
438 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
439 %v = load float, float addrspace(1)* %gep, align 4
440 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
441 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
442 store float %canonicalized, float addrspace(1)* %gep2, align 4
443 ret void
444}
445
446; GCN-LABEL: {{^}}test_canonicalize_value_f16_flush:
447; GCN: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
448define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 {
449 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
450 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
451 %v = load half, half addrspace(1)* %gep, align 2
452 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
453 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
454 store half %canonicalized, half addrspace(1)* %gep2, align 2
455 ret void
456}
457
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000458; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush_gfx8:
459; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
460; GCN-DAG: v_mul_f16_sdwa v{{[0-9]+}}, [[ONE]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
461; GCN-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
462define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx8(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 {
463 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
464 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
465 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
466 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
467 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
468 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
469 ret void
470}
471
472; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush_gfx9:
473; GCN-DAG: v_pk_mul_f16 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
474define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx9(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #6 {
475 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
476 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
477 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
478 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
479 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
480 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
481 ret void
482}
483
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000484; GCN-LABEL: {{^}}test_canonicalize_value_f64_denorm:
485; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
486define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #5 {
487 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
488 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
489 %v = load double, double addrspace(1)* %gep, align 8
490 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
491 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
492 store double %canonicalized, double addrspace(1)* %gep2, align 8
493 ret void
494}
495
496; GCN-LABEL: {{^}}test_canonicalize_value_f32_denorm:
497; GCN: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
498define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #5 {
499 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
500 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
501 %v = load float, float addrspace(1)* %gep, align 4
502 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
503 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
504 store float %canonicalized, float addrspace(1)* %gep2, align 4
505 ret void
506}
507
508; GCN-LABEL: {{^}}test_canonicalize_value_f16_denorm:
509; GCN: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
510define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #5 {
511 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
512 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
513 %v = load half, half addrspace(1)* %gep, align 2
514 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
515 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
516 store half %canonicalized, half addrspace(1)* %gep2, align 2
517 ret void
518}
519
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000520; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_denorm:
521; GCN: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
522define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #5 {
523 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
524 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
525 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
526 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
527 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
528 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
529 ret void
530}
531
Matt Arsenault540512c2018-04-26 19:21:37 +0000532; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f64:
533; GCN: v_max_f64
534; GCN: v_max_f64
535define amdgpu_kernel void @v_test_canonicalize_var_v2f64(<2 x double> addrspace(1)* %out) #1 {
536 %tid = call i32 @llvm.amdgcn.workitem.id.x()
537 %gep = getelementptr <2 x double>, <2 x double> addrspace(1)* %out, i32 %tid
538 %val = load <2 x double>, <2 x double> addrspace(1)* %gep
539 %canonicalized = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %val)
540 store <2 x double> %canonicalized, <2 x double> addrspace(1)* %out
541 ret void
542}
543
Matt Arsenault9cd90712016-04-14 01:42:16 +0000544attributes #0 = { nounwind readnone }
545attributes #1 = { nounwind }
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000546attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
547attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000548attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
549attributes #5 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" "target-cpu"="gfx900" }
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000550attributes #6 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="gfx900" }