blob: c0ac6599e4a0e62ae845deb154150662d214b876 [file] [log] [blame]
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Matt Arsenault9cd90712016-04-14 01:42:16 +00002
Matt Arsenaultd5d78512017-01-31 17:28:40 +00003declare float @llvm.fabs.f32(float) #0
Matt Arsenault9cd90712016-04-14 01:42:16 +00004declare float @llvm.canonicalize.f32(float) #0
Matt Arsenaultd5d78512017-01-31 17:28:40 +00005declare double @llvm.fabs.f64(double) #0
Matt Arsenault9cd90712016-04-14 01:42:16 +00006declare double @llvm.canonicalize.f64(double) #0
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +00007declare half @llvm.canonicalize.f16(half) #0
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +00008declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +00009declare i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault9cd90712016-04-14 01:42:16 +000010
11; GCN-LABEL: {{^}}v_test_canonicalize_var_f32:
12; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
13; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000015 %val = load float, float addrspace(1)* %out
16 %canonicalized = call float @llvm.canonicalize.f32(float %val)
17 store float %canonicalized, float addrspace(1)* %out
18 ret void
19}
20
21; GCN-LABEL: {{^}}s_test_canonicalize_var_f32:
22; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
23; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000024define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000025 %canonicalized = call float @llvm.canonicalize.f32(float %val)
26 store float %canonicalized, float addrspace(1)* %out
27 ret void
28}
29
Matt Arsenaultd5d78512017-01-31 17:28:40 +000030; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32:
31; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
32; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000033define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000034 %val = load float, float addrspace(1)* %out
35 %val.fabs = call float @llvm.fabs.f32(float %val)
36 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs)
37 store float %canonicalized, float addrspace(1)* %out
38 ret void
39}
40
41; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32:
42; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -|{{v[0-9]+}}|
43; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000044define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000045 %val = load float, float addrspace(1)* %out
46 %val.fabs = call float @llvm.fabs.f32(float %val)
47 %val.fabs.fneg = fsub float -0.0, %val.fabs
48 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
49 store float %canonicalized, float addrspace(1)* %out
50 ret void
51}
52
53; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32:
54; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -{{v[0-9]+}}
55; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000056define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000057 %val = load float, float addrspace(1)* %out
58 %val.fneg = fsub float -0.0, %val
59 %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
60 store float %canonicalized, float addrspace(1)* %out
61 ret void
62}
63
Matt Arsenault9cd90712016-04-14 01:42:16 +000064; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
65; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
66; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000067define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000068 %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
69 store float %canonicalized, float addrspace(1)* %out
70 ret void
71}
72
73; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32:
74; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
75; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000076define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000077 %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
78 store float %canonicalized, float addrspace(1)* %out
79 ret void
80}
81
82; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32:
83; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
84; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000085define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000086 %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
87 store float %canonicalized, float addrspace(1)* %out
88 ret void
89}
90
91; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32:
92; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
93; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000094define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +000095 %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
96 store float %canonicalized, float addrspace(1)* %out
97 ret void
98}
99
100; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32:
101; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}}
102; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000103define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000104 %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
105 store float %canonicalized, float addrspace(1)* %out
106 ret void
107}
108
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000109; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
110; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000111; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000113 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
114 store float %canonicalized, float addrspace(1)* %out
115 ret void
116}
117
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000118; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
119; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000120; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000121define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000122 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
123 store float %canonicalized, float addrspace(1)* %out
124 ret void
125}
126
127; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
128; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
129; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000130define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000131 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
132 store float %canonicalized, float addrspace(1)* %out
133 ret void
134}
135
136; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
137; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
138; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000139define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000140 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
141 store float %canonicalized, float addrspace(1)* %out
142 ret void
143}
144
145; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32:
146; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
147; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000148define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000149 %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
150 store float %canonicalized, float addrspace(1)* %out
151 ret void
152}
153
154; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32:
155; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
156; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000157define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000158 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
159 store float %canonicalized, float addrspace(1)* %out
160 ret void
161}
162
163; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32:
164; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
165; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000166define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000167 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
168 store float %canonicalized, float addrspace(1)* %out
169 ret void
170}
171
172; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32:
173; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
174; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000176 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
177 store float %canonicalized, float addrspace(1)* %out
178 ret void
179}
180
181; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32:
182; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
183; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000184define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000185 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
186 store float %canonicalized, float addrspace(1)* %out
187 ret void
188}
189
190; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32:
191; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
192; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000193define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000194 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
195 store float %canonicalized, float addrspace(1)* %out
196 ret void
197}
198
199; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32:
200; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
201; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000202define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000203 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
204 store float %canonicalized, float addrspace(1)* %out
205 ret void
206}
207
208; GCN-LABEL: {{^}}v_test_canonicalize_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000209; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000210; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000211define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000212 %val = load double, double addrspace(1)* %out
213 %canonicalized = call double @llvm.canonicalize.f64(double %val)
214 store double %canonicalized, double addrspace(1)* %out
215 ret void
216}
217
218; GCN-LABEL: {{^}}s_test_canonicalize_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000219; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000220; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000221define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000222 %canonicalized = call double @llvm.canonicalize.f64(double %val)
223 store double %canonicalized, double addrspace(1)* %out
224 ret void
225}
226
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000227; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000228; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000229; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000230define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000231 %val = load double, double addrspace(1)* %out
232 %val.fabs = call double @llvm.fabs.f64(double %val)
233 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs)
234 store double %canonicalized, double addrspace(1)* %out
235 ret void
236}
237
238; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000239; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000240; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000241define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000242 %val = load double, double addrspace(1)* %out
243 %val.fabs = call double @llvm.fabs.f64(double %val)
244 %val.fabs.fneg = fsub double -0.0, %val.fabs
245 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
246 store double %canonicalized, double addrspace(1)* %out
247 ret void
248}
249
250; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000251; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000252; GCN: buffer_store_dwordx2 [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000253define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +0000254 %val = load double, double addrspace(1)* %out
255 %val.fneg = fsub double -0.0, %val
256 %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
257 store double %canonicalized, double addrspace(1)* %out
258 ret void
259}
260
Matt Arsenault9cd90712016-04-14 01:42:16 +0000261; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
262; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
Matt Arsenault2b957b52016-05-02 20:07:26 +0000263; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000264; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000265define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000266 %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
267 store double %canonicalized, double addrspace(1)* %out
268 ret void
269}
270
271; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64:
272; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
273; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
274; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000275define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000276 %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
277 store double %canonicalized, double addrspace(1)* %out
278 ret void
279}
280
281; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64:
282; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
283; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
284; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000285define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000286 %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
287 store double %canonicalized, double addrspace(1)* %out
288 ret void
289}
290
291; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64:
292; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
293; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
294; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000295define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000296 %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
297 store double %canonicalized, double addrspace(1)* %out
298 ret void
299}
300
301; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64:
302; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
303; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
304; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000305define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000306 %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
307 store double %canonicalized, double addrspace(1)* %out
308 ret void
309}
310
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000311; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
312; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
313; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000314; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000315define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000316 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
317 store double %canonicalized, double addrspace(1)* %out
318 ret void
319}
320
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000321; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
322; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
323; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
Matt Arsenault9cd90712016-04-14 01:42:16 +0000324; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000325define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000326 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
327 store double %canonicalized, double addrspace(1)* %out
328 ret void
329}
330
331; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
332; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
333; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
334; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000335define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000336 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
337 store double %canonicalized, double addrspace(1)* %out
338 ret void
339}
340
341; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
342; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
343; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
344; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000345define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000346 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
347 store double %canonicalized, double addrspace(1)* %out
348 ret void
349}
350
351; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64:
352; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
353; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
354; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000355define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000356 %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
357 store double %canonicalized, double addrspace(1)* %out
358 ret void
359}
360
361; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64:
362; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
363; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
364; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000365define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000366 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
367 store double %canonicalized, double addrspace(1)* %out
368 ret void
369}
370
371; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64:
372; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
373; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
374; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000375define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000376 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
377 store double %canonicalized, double addrspace(1)* %out
378 ret void
379}
380
381; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64:
382; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
383; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
384; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000385define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000386 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
387 store double %canonicalized, double addrspace(1)* %out
388 ret void
389}
390
391; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64:
392; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
393; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
394; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000395define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000396 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
397 store double %canonicalized, double addrspace(1)* %out
398 ret void
399}
400
401; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64:
402; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
403; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
404; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000405define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000406 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
407 store double %canonicalized, double addrspace(1)* %out
408 ret void
409}
410
411; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64:
412; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
413; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
414; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000415define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
Matt Arsenault9cd90712016-04-14 01:42:16 +0000416 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
417 store double %canonicalized, double addrspace(1)* %out
418 ret void
419}
420
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000421; GCN-LABEL: {{^}}test_canonicalize_value_f64_flush:
422; GCN: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}]
423define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 {
424 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
425 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
426 %v = load double, double addrspace(1)* %gep, align 8
427 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
428 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
429 store double %canonicalized, double addrspace(1)* %gep2, align 8
430 ret void
431}
432
433; GCN-LABEL: {{^}}test_canonicalize_value_f32_flush:
434; GCN: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
435define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 {
436 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
437 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
438 %v = load float, float addrspace(1)* %gep, align 4
439 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
440 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
441 store float %canonicalized, float addrspace(1)* %gep2, align 4
442 ret void
443}
444
445; GCN-LABEL: {{^}}test_canonicalize_value_f16_flush:
446; GCN: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
447define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 {
448 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
449 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
450 %v = load half, half addrspace(1)* %gep, align 2
451 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
452 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
453 store half %canonicalized, half addrspace(1)* %gep2, align 2
454 ret void
455}
456
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000457; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush_gfx8:
458; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
459; GCN-DAG: v_mul_f16_sdwa v{{[0-9]+}}, [[ONE]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
460; GCN-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
461define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx8(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 {
462 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
463 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
464 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
465 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
466 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
467 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
468 ret void
469}
470
471; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush_gfx9:
472; GCN-DAG: v_pk_mul_f16 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
473define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx9(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #6 {
474 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
475 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
476 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
477 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
478 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
479 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
480 ret void
481}
482
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000483; GCN-LABEL: {{^}}test_canonicalize_value_f64_denorm:
484; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
485define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #5 {
486 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
487 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
488 %v = load double, double addrspace(1)* %gep, align 8
489 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
490 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
491 store double %canonicalized, double addrspace(1)* %gep2, align 8
492 ret void
493}
494
495; GCN-LABEL: {{^}}test_canonicalize_value_f32_denorm:
496; GCN: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
497define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #5 {
498 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
499 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
500 %v = load float, float addrspace(1)* %gep, align 4
501 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
502 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
503 store float %canonicalized, float addrspace(1)* %gep2, align 4
504 ret void
505}
506
507; GCN-LABEL: {{^}}test_canonicalize_value_f16_denorm:
508; GCN: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
509define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #5 {
510 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
511 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
512 %v = load half, half addrspace(1)* %gep, align 2
513 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
514 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
515 store half %canonicalized, half addrspace(1)* %gep2, align 2
516 ret void
517}
518
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000519; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_denorm:
520; GCN: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
521define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #5 {
522 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
523 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
524 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
525 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
526 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
527 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
528 ret void
529}
530
Matt Arsenault9cd90712016-04-14 01:42:16 +0000531attributes #0 = { nounwind readnone }
532attributes #1 = { nounwind }
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000533attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
534attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000535attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
536attributes #5 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" "target-cpu"="gfx900" }
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000537attributes #6 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="gfx900" }