blob: dd8e277c1c75f6d31135b087dcbe54ee7b5f4c18 [file] [log] [blame]
Matt Arsenaulteb522e62017-02-27 22:15:25 +00001; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyov3d1cc882017-04-21 19:45:22 +00002; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
Matt Arsenaultce841302016-12-22 03:05:37 +00003
Matt Arsenaultd5d78512017-01-31 17:28:40 +00004declare half @llvm.fabs.f16(half) #0
Matt Arsenaultce841302016-12-22 03:05:37 +00005declare half @llvm.canonicalize.f16(half) #0
Matt Arsenault70306612017-01-23 18:52:26 +00006declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
7declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
Alexander Timofeev982aee62017-07-04 17:32:00 +00008declare i32 @llvm.amdgcn.workitem.id.x() #0
9
Matt Arsenaultce841302016-12-22 03:05:37 +000010
11; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
12; GCN: v_mul_f16_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
13; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000015 %val = load half, half addrspace(1)* %out
16 %canonicalized = call half @llvm.canonicalize.f16(half %val)
17 store half %canonicalized, half addrspace(1)* %out
18 ret void
19}
20
21; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
22; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
23; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000024define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000025 %val = bitcast i16 %val.arg to half
26 %canonicalized = call half @llvm.canonicalize.f16(half %val)
27 store half %canonicalized, half addrspace(1)* %out
28 ret void
29}
30
Matt Arsenaultd5d78512017-01-31 17:28:40 +000031; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16:
32; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
33; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000035 %val = load half, half addrspace(1)* %out
36 %val.fabs = call half @llvm.fabs.f16(half %val)
37 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs)
38 store half %canonicalized, half addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16:
43; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, -|{{v[0-9]+}}|
44; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000046 %val = load half, half addrspace(1)* %out
47 %val.fabs = call half @llvm.fabs.f16(half %val)
48 %val.fabs.fneg = fsub half -0.0, %val.fabs
49 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
50 store half %canonicalized, half addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16:
55; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, -{{v[0-9]+}}
56; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000058 %val = load half, half addrspace(1)* %out
59 %val.fneg = fsub half -0.0, %val
60 %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
61 store half %canonicalized, half addrspace(1)* %out
62 ret void
63}
64
Matt Arsenaultce841302016-12-22 03:05:37 +000065; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
66; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
67; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000069 %canonicalized = call half @llvm.canonicalize.f16(half 0.0)
70 store half %canonicalized, half addrspace(1)* %out
71 ret void
72}
73
74; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
75; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
76; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000077define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000078 %canonicalized = call half @llvm.canonicalize.f16(half -0.0)
79 store half %canonicalized, half addrspace(1)* %out
80 ret void
81}
82
83; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
84; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
85; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000087 %canonicalized = call half @llvm.canonicalize.f16(half 1.0)
88 store half %canonicalized, half addrspace(1)* %out
89 ret void
90}
91
92; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
93; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
94; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000095define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000096 %canonicalized = call half @llvm.canonicalize.f16(half -1.0)
97 store half %canonicalized, half addrspace(1)* %out
98 ret void
99}
100
101; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
103; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000104define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000105 %canonicalized = call half @llvm.canonicalize.f16(half 16.0)
106 store half %canonicalized, half addrspace(1)* %out
107 ret void
108}
109
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000110; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000112; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000114 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
115 store half %canonicalized, half addrspace(1)* %out
116 ret void
117}
118
119; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
120; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
121; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000123 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
124 store half %canonicalized, half addrspace(1)* %out
125 ret void
126}
127
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000128; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
129; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000130; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000132 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
133 store half %canonicalized, half addrspace(1)* %out
134 ret void
135}
136
137; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
138; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
139; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000141 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
142 store half %canonicalized, half addrspace(1)* %out
143 ret void
144}
145
146; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
147; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
148; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000150 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
151 store half %canonicalized, half addrspace(1)* %out
152 ret void
153}
154
155; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
156; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
157; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000159 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
160 store half %canonicalized, half addrspace(1)* %out
161 ret void
162}
163
164; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
165; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
166; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000167define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000168 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
169 store half %canonicalized, half addrspace(1)* %out
170 ret void
171}
172
173; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
174; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
175; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000176define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000177 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
178 store half %canonicalized, half addrspace(1)* %out
179 ret void
180}
181
182; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
183; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
184; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000185define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000186 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
187 store half %canonicalized, half addrspace(1)* %out
188 ret void
189}
190
191; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
192; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
193; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000194define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000195 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
196 store half %canonicalized, half addrspace(1)* %out
197 ret void
198}
199
200; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
201; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
202; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000203define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000204 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
205 store half %canonicalized, half addrspace(1)* %out
206 ret void
207}
208
Matt Arsenault70306612017-01-23 18:52:26 +0000209; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000210; VI: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
211; VI-DAG: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000212; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, {{v[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000213; VI-NOT: v_and_b32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000214
215; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+$}}
216; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000218 %tid = call i32 @llvm.amdgcn.workitem.id.x()
219 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
220 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000221 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
222 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
223 ret void
224}
225
226; FIXME: Fold modifier
227; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_v2f16:
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000228; VI-DAG: v_bfe_u32
229; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, v{{[0-9]+}}
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000230; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
231; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000232; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000233; VI-NOT: 0xffff
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000234; VI: v_or_b32
235
236; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
237; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000238; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000239define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000240 %tid = call i32 @llvm.amdgcn.workitem.id.x()
241 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
242 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000243 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
244 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs)
245 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
246 ret void
247}
248
249; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000250; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
251; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
252; VI-DAG: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
253; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000254; VI: v_or_b32
255
256; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
257; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]] neg_lo:[0,1] neg_hi:[0,1]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000258; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000259define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000260 %tid = call i32 @llvm.amdgcn.workitem.id.x()
261 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
262 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000263 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
264 %val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
265 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
266 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
267 ret void
268}
269
270; FIXME: Fold modifier
271; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000272; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
273; VI-DAG: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}}
274; VI-DAG: v_mul_f16_sdwa [[REG1:v[0-9]+]], v[[CONST1]], [[FNEG]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
Sam Kolton9fa16962017-04-06 15:03:28 +0000275; VI-DAG: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, [[FNEG]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000276; VI-NOT: 0xffff
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000277
278; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} neg_lo:[0,1] neg_hi:[0,1]{{$}}
279; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000280define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000281 %tid = call i32 @llvm.amdgcn.workitem.id.x()
282 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
283 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000284 %fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
285 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
286 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
287 ret void
288}
289
290; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16:
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000291; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
292; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], [[ONE]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
293; VI: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000294; VI-NOT: v_and_b32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000295
296; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}}
297; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000298define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000299 %val = bitcast i32 %val.arg to <2 x half>
300 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
301 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
302 ret void
303}
304
305; GCN-LABEL: {{^}}test_fold_canonicalize_p0_v2f16:
306; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
307; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000308define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000309 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
310 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
311 ret void
312}
313
314; GCN-LABEL: {{^}}test_fold_canonicalize_n0_v2f16:
315; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
316; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000317define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000318 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>)
319 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
320 ret void
321}
322
323; GCN-LABEL: {{^}}test_fold_canonicalize_p1_v2f16:
324; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
325; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000326define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000327 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>)
328 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
329 ret void
330}
331
332; GCN-LABEL: {{^}}test_fold_canonicalize_n1_v2f16:
333; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
334; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000335define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000336 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>)
337 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
338 ret void
339}
340
341; GCN-LABEL: {{^}}test_fold_canonicalize_literal_v2f16:
342; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}}
343; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000344define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000345 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>)
346 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
347 ret void
348}
349
350; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000351; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000352; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000353define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000354 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
355 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
356 ret void
357}
358
359; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_v2f16:
360; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
361; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000362define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000363 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
364 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
365 ret void
366}
367
368; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000369; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000370; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000371define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000372 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
373 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
374 ret void
375}
376
377; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_v2f16:
378; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
379; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000380define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000381 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
382 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
383 ret void
384}
385
386; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_v2f16:
387; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}}
388; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000389define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000390 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>)
391 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
392 ret void
393}
394
395; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_v2f16:
396; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
397; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000398define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000399 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
400 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
401 ret void
402}
403
404; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_v2f16:
405; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
406; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000407define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000408 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>)
409 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
410 ret void
411}
412
413; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_v2f16:
414; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
415; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000416define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000417 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>)
418 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
419 ret void
420}
421
422; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_v2f16:
423; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
424; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000425define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000426 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>)
427 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
428 ret void
429}
430
431; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_v2f16:
432; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
433; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000434define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000435 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>)
436 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
437 ret void
438}
439
440; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_v2f16:
441; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
442; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000443define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000444 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>)
445 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
446 ret void
447}
448
Matt Arsenaultce841302016-12-22 03:05:37 +0000449attributes #0 = { nounwind readnone }
450attributes #1 = { nounwind }
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000451attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
452attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }