blob: 52b8556517621ccb55fee8b334c12488631a18f0 [file] [log] [blame]
Matt Arsenaulteb522e62017-02-27 22:15:25 +00001; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyov3d1cc882017-04-21 19:45:22 +00002; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
Matt Arsenaultce841302016-12-22 03:05:37 +00003
Matt Arsenaultd5d78512017-01-31 17:28:40 +00004declare half @llvm.fabs.f16(half) #0
Matt Arsenaultce841302016-12-22 03:05:37 +00005declare half @llvm.canonicalize.f16(half) #0
Matt Arsenault70306612017-01-23 18:52:26 +00006declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
7declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
Alexander Timofeev982aee62017-07-04 17:32:00 +00008declare i32 @llvm.amdgcn.workitem.id.x() #0
9
Matt Arsenaultce841302016-12-22 03:05:37 +000010
11; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000012; GCN: v_max_f16_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
Matt Arsenaultce841302016-12-22 03:05:37 +000013; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000015 %val = load half, half addrspace(1)* %out
16 %canonicalized = call half @llvm.canonicalize.f16(half %val)
17 store half %canonicalized, half addrspace(1)* %out
18 ret void
19}
20
21; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000022; GCN: v_max_f16_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
Matt Arsenaultce841302016-12-22 03:05:37 +000023; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000024define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000025 %val = bitcast i16 %val.arg to half
26 %canonicalized = call half @llvm.canonicalize.f16(half %val)
27 store half %canonicalized, half addrspace(1)* %out
28 ret void
29}
30
Matt Arsenaultd5d78512017-01-31 17:28:40 +000031; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000032; GCN: v_max_f16_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +000033; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000035 %val = load half, half addrspace(1)* %out
36 %val.fabs = call half @llvm.fabs.f16(half %val)
37 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs)
38 store half %canonicalized, half addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000043; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +000044; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000046 %val = load half, half addrspace(1)* %out
47 %val.fabs = call half @llvm.fabs.f16(half %val)
48 %val.fabs.fneg = fsub half -0.0, %val.fabs
49 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
50 store half %canonicalized, half addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000055; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}}
Matt Arsenaultd5d78512017-01-31 17:28:40 +000056; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000058 %val = load half, half addrspace(1)* %out
59 %val.fneg = fsub half -0.0, %val
60 %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
61 store half %canonicalized, half addrspace(1)* %out
62 ret void
63}
64
Matt Arsenaultce841302016-12-22 03:05:37 +000065; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
66; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
67; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000069 %canonicalized = call half @llvm.canonicalize.f16(half 0.0)
70 store half %canonicalized, half addrspace(1)* %out
71 ret void
72}
73
74; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
75; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
76; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000077define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000078 %canonicalized = call half @llvm.canonicalize.f16(half -0.0)
79 store half %canonicalized, half addrspace(1)* %out
80 ret void
81}
82
83; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
84; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
85; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000087 %canonicalized = call half @llvm.canonicalize.f16(half 1.0)
88 store half %canonicalized, half addrspace(1)* %out
89 ret void
90}
91
92; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
93; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
94; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000095define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000096 %canonicalized = call half @llvm.canonicalize.f16(half -1.0)
97 store half %canonicalized, half addrspace(1)* %out
98 ret void
99}
100
101; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
103; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000104define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000105 %canonicalized = call half @llvm.canonicalize.f16(half 16.0)
106 store half %canonicalized, half addrspace(1)* %out
107 ret void
108}
109
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000110; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000112; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000114 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
115 store half %canonicalized, half addrspace(1)* %out
116 ret void
117}
118
119; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
120; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
121; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000123 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
124 store half %canonicalized, half addrspace(1)* %out
125 ret void
126}
127
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000128; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
129; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000130; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000132 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
133 store half %canonicalized, half addrspace(1)* %out
134 ret void
135}
136
137; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
138; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
139; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000141 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
142 store half %canonicalized, half addrspace(1)* %out
143 ret void
144}
145
146; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
147; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
148; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000150 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
151 store half %canonicalized, half addrspace(1)* %out
152 ret void
153}
154
155; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
156; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
157; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000159 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
160 store half %canonicalized, half addrspace(1)* %out
161 ret void
162}
163
164; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
165; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
166; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000167define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000168 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
169 store half %canonicalized, half addrspace(1)* %out
170 ret void
171}
172
173; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
174; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
175; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000176define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000177 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
178 store half %canonicalized, half addrspace(1)* %out
179 ret void
180}
181
182; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
183; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
184; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000185define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000186 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
187 store half %canonicalized, half addrspace(1)* %out
188 ret void
189}
190
191; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
192; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
193; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000194define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000195 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
196 store half %canonicalized, half addrspace(1)* %out
197 ret void
198}
199
200; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
201; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
202; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000203define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000204 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
205 store half %canonicalized, half addrspace(1)* %out
206 ret void
207}
208
Matt Arsenault70306612017-01-23 18:52:26 +0000209; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000210; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
211; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000212; VI-NOT: v_and_b32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000213
214; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+$}}
215; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000216define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000217 %tid = call i32 @llvm.amdgcn.workitem.id.x()
218 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
219 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000220 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
221 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
222 ret void
223}
224
225; FIXME: Fold modifier
226; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_v2f16:
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000227; VI-DAG: v_bfe_u32
228; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, v{{[0-9]+}}
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000229; VI: v_max_f16_sdwa [[REG0:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
230; VI: v_max_f16_e32 [[REG1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000231; VI-NOT: 0xffff
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000232; VI: v_or_b32
233
234; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
235; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000236; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000237define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000238 %tid = call i32 @llvm.amdgcn.workitem.id.x()
239 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
240 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000241 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
242 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs)
243 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
244 ret void
245}
246
247; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000248; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000249; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
250; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000251; VI: v_or_b32
252
253; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
254; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, [[ABS]] neg_lo:[0,1] neg_hi:[0,1]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000255; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000256define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000257 %tid = call i32 @llvm.amdgcn.workitem.id.x()
258 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
259 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000260 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
261 %val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
262 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
263 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
264 ret void
265}
266
267; FIXME: Fold modifier
268; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000269; VI: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}}
270; VI: v_lshrrev_b32_e32 [[FNEGHI:v[0-9]+]], 16, [[FNEG]]
271; VI-DAG: v_max_f16_sdwa [[REG1:v[0-9]+]], [[FNEG]], [[FNEGHI]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
272; VI-DAG: v_max_f16_e32 [[REG0:v[0-9]+]], [[FNEG]], [[FNEG]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000273; VI-NOT: 0xffff
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000274
275; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} neg_lo:[0,1] neg_hi:[0,1]{{$}}
276; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000277define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000278 %tid = call i32 @llvm.amdgcn.workitem.id.x()
279 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
280 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000281 %fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
282 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
283 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
284 ret void
285}
286
287; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000288; VI: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
289; VI: v_max_f16_e64 [[REG1:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000290; VI-NOT: v_and_b32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000291
292; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}}
293; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000294define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000295 %val = bitcast i32 %val.arg to <2 x half>
296 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
297 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
298 ret void
299}
300
301; GCN-LABEL: {{^}}test_fold_canonicalize_p0_v2f16:
302; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
303; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000304define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000305 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
306 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
307 ret void
308}
309
310; GCN-LABEL: {{^}}test_fold_canonicalize_n0_v2f16:
311; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
312; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000313define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000314 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>)
315 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
316 ret void
317}
318
319; GCN-LABEL: {{^}}test_fold_canonicalize_p1_v2f16:
320; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
321; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000322define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000323 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>)
324 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
325 ret void
326}
327
328; GCN-LABEL: {{^}}test_fold_canonicalize_n1_v2f16:
329; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
330; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000331define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000332 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>)
333 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
334 ret void
335}
336
337; GCN-LABEL: {{^}}test_fold_canonicalize_literal_v2f16:
338; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}}
339; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000340define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000341 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>)
342 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
343 ret void
344}
345
346; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000347; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000348; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000349define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000350 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
351 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
352 ret void
353}
354
355; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_v2f16:
356; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
357; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000358define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000359 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
360 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
361 ret void
362}
363
364; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000365; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000366; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000367define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000368 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
369 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
370 ret void
371}
372
373; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_v2f16:
374; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
375; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000376define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000377 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
378 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
379 ret void
380}
381
382; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_v2f16:
383; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}}
384; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000385define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000386 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>)
387 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
388 ret void
389}
390
391; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_v2f16:
392; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
393; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000394define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000395 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
396 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
397 ret void
398}
399
400; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_v2f16:
401; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
402; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000403define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000404 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>)
405 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
406 ret void
407}
408
409; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_v2f16:
410; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
411; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000412define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000413 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>)
414 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
415 ret void
416}
417
418; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_v2f16:
419; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
420; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000421define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000422 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>)
423 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
424 ret void
425}
426
427; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_v2f16:
428; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
429; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000430define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000431 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>)
432 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
433 ret void
434}
435
436; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_v2f16:
437; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
438; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000439define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000440 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>)
441 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
442 ret void
443}
444
Matt Arsenaultce841302016-12-22 03:05:37 +0000445attributes #0 = { nounwind readnone }
446attributes #1 = { nounwind }
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000447attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
448attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }