blob: 24195660e576aa0423a912f9c8918e2bfd8d3ad6 [file] [log] [blame]
Matt Arsenaulteb522e62017-02-27 22:15:25 +00001; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyovc40d9f22017-12-08 20:52:28 +00002; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
Matt Arsenaultce841302016-12-22 03:05:37 +00003
Matt Arsenaultd5d78512017-01-31 17:28:40 +00004declare half @llvm.fabs.f16(half) #0
Matt Arsenaultce841302016-12-22 03:05:37 +00005declare half @llvm.canonicalize.f16(half) #0
Matt Arsenault70306612017-01-23 18:52:26 +00006declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
7declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
Alexander Timofeev982aee62017-07-04 17:32:00 +00008declare i32 @llvm.amdgcn.workitem.id.x() #0
9
Matt Arsenaultce841302016-12-22 03:05:37 +000010
11; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000012; GCN: v_max_f16_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
Matt Arsenaultce841302016-12-22 03:05:37 +000013; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000015 %val = load half, half addrspace(1)* %out
16 %canonicalized = call half @llvm.canonicalize.f16(half %val)
17 store half %canonicalized, half addrspace(1)* %out
18 ret void
19}
20
21; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000022; GCN: v_max_f16_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
Matt Arsenaultce841302016-12-22 03:05:37 +000023; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000024define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000025 %val = bitcast i16 %val.arg to half
26 %canonicalized = call half @llvm.canonicalize.f16(half %val)
27 store half %canonicalized, half addrspace(1)* %out
28 ret void
29}
30
Matt Arsenaultd5d78512017-01-31 17:28:40 +000031; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000032; GCN: v_max_f16_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +000033; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000035 %val = load half, half addrspace(1)* %out
36 %val.fabs = call half @llvm.fabs.f16(half %val)
37 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs)
38 store half %canonicalized, half addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000043; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}|
Matt Arsenaultd5d78512017-01-31 17:28:40 +000044; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000046 %val = load half, half addrspace(1)* %out
47 %val.fabs = call half @llvm.fabs.f16(half %val)
48 %val.fabs.fneg = fsub half -0.0, %val.fabs
49 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
50 store half %canonicalized, half addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +000055; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}}
Matt Arsenaultd5d78512017-01-31 17:28:40 +000056; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultd5d78512017-01-31 17:28:40 +000058 %val = load half, half addrspace(1)* %out
59 %val.fneg = fsub half -0.0, %val
60 %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
61 store half %canonicalized, half addrspace(1)* %out
62 ret void
63}
64
Matt Arsenaultce841302016-12-22 03:05:37 +000065; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
66; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
67; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000069 %canonicalized = call half @llvm.canonicalize.f16(half 0.0)
70 store half %canonicalized, half addrspace(1)* %out
71 ret void
72}
73
74; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
75; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
76; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000077define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000078 %canonicalized = call half @llvm.canonicalize.f16(half -0.0)
79 store half %canonicalized, half addrspace(1)* %out
80 ret void
81}
82
83; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
84; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
85; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000087 %canonicalized = call half @llvm.canonicalize.f16(half 1.0)
88 store half %canonicalized, half addrspace(1)* %out
89 ret void
90}
91
92; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
93; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
94; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000095define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +000096 %canonicalized = call half @llvm.canonicalize.f16(half -1.0)
97 store half %canonicalized, half addrspace(1)* %out
98 ret void
99}
100
101; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
103; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000104define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000105 %canonicalized = call half @llvm.canonicalize.f16(half 16.0)
106 store half %canonicalized, half addrspace(1)* %out
107 ret void
108}
109
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000110; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
111; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000112; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000114 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
115 store half %canonicalized, half addrspace(1)* %out
116 ret void
117}
118
119; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
120; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
121; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000123 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
124 store half %canonicalized, half addrspace(1)* %out
125 ret void
126}
127
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000128; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
129; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
Matt Arsenaultce841302016-12-22 03:05:37 +0000130; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000132 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
133 store half %canonicalized, half addrspace(1)* %out
134 ret void
135}
136
137; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
138; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
139; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000141 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
142 store half %canonicalized, half addrspace(1)* %out
143 ret void
144}
145
146; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
147; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
148; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000150 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
151 store half %canonicalized, half addrspace(1)* %out
152 ret void
153}
154
155; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
156; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
157; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000159 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
160 store half %canonicalized, half addrspace(1)* %out
161 ret void
162}
163
164; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
165; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
166; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000167define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000168 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
169 store half %canonicalized, half addrspace(1)* %out
170 ret void
171}
172
173; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
174; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
175; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000176define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000177 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
178 store half %canonicalized, half addrspace(1)* %out
179 ret void
180}
181
182; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
183; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
184; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000185define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000186 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
187 store half %canonicalized, half addrspace(1)* %out
188 ret void
189}
190
191; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
192; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
193; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000194define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000195 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
196 store half %canonicalized, half addrspace(1)* %out
197 ret void
198}
199
200; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
201; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
202; GCN: buffer_store_short [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000203define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
Matt Arsenaultce841302016-12-22 03:05:37 +0000204 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
205 store half %canonicalized, half addrspace(1)* %out
206 ret void
207}
208
Matt Arsenault70306612017-01-23 18:52:26 +0000209; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16:
Sam Kolton5f7f32c2017-12-04 16:22:32 +0000210; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000211; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000212; VI-NOT: v_and_b32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000213
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000214; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000215; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000216define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000217 %tid = call i32 @llvm.amdgcn.workitem.id.x()
218 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
219 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000220 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
221 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
222 ret void
223}
224
225; FIXME: Fold modifier
226; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_v2f16:
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000227; VI-DAG: v_bfe_u32
228; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, v{{[0-9]+}}
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000229; VI: v_max_f16_sdwa [[REG0:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
230; VI: v_max_f16_e32 [[REG1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000231; VI-NOT: 0xffff
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000232; VI: v_or_b32
233
234; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000235; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000236; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000237define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000238 %tid = call i32 @llvm.amdgcn.workitem.id.x()
239 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
240 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000241 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
242 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs)
243 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
244 ret void
245}
246
247; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000248; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
Sam Kolton5f7f32c2017-12-04 16:22:32 +0000249; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000250; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000251; VI: v_or_b32
252
253; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000254; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1]{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000255; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000256define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000257 %tid = call i32 @llvm.amdgcn.workitem.id.x()
258 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
259 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000260 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
261 %val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
262 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
263 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
264 ret void
265}
266
Matt Arsenault70306612017-01-23 18:52:26 +0000267; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000268; VI: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}}
Sam Kolton5f7f32c2017-12-04 16:22:32 +0000269; VI-DAG: v_max_f16_sdwa [[REG1:v[0-9]+]], [[FNEG]], [[FNEG]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000270; VI-DAG: v_max_f16_e32 [[REG0:v[0-9]+]], [[FNEG]], [[FNEG]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000271; VI-NOT: 0xffff
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000272
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000273; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} neg_lo:[1,1] neg_hi:[1,1]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000274; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000275define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000276 %tid = call i32 @llvm.amdgcn.workitem.id.x()
277 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
278 %val = load <2 x half>, <2 x half> addrspace(1)* %gep
Matt Arsenault70306612017-01-23 18:52:26 +0000279 %fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
280 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
281 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
282 ret void
283}
284
285; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16:
Stanislav Mekhanoshin06cab792017-08-30 03:03:38 +0000286; VI: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
287; VI: v_max_f16_e64 [[REG1:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000288; VI-NOT: v_and_b32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000289
Stanislav Mekhanoshin442e28d2017-09-06 22:27:29 +0000290; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000291; GFX9: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000292define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000293 %val = bitcast i32 %val.arg to <2 x half>
294 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
295 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
296 ret void
297}
298
299; GCN-LABEL: {{^}}test_fold_canonicalize_p0_v2f16:
300; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
301; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000302define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000303 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
304 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
305 ret void
306}
307
308; GCN-LABEL: {{^}}test_fold_canonicalize_n0_v2f16:
309; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
310; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000311define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000312 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>)
313 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
314 ret void
315}
316
317; GCN-LABEL: {{^}}test_fold_canonicalize_p1_v2f16:
318; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
319; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000320define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000321 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>)
322 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
323 ret void
324}
325
326; GCN-LABEL: {{^}}test_fold_canonicalize_n1_v2f16:
327; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
328; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000329define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000330 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>)
331 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
332 ret void
333}
334
335; GCN-LABEL: {{^}}test_fold_canonicalize_literal_v2f16:
336; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}}
337; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000338define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000339 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>)
340 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
341 ret void
342}
343
344; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000345; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000346; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000347define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000348 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
349 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
350 ret void
351}
352
353; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_v2f16:
354; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
355; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000356define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000357 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
358 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
359 ret void
360}
361
362; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000363; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
Matt Arsenault70306612017-01-23 18:52:26 +0000364; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000365define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000366 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
367 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
368 ret void
369}
370
371; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_v2f16:
372; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
373; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000374define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 {
Matt Arsenault70306612017-01-23 18:52:26 +0000375 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
376 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
377 ret void
378}
379
380; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_v2f16:
381; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}}
382; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000383define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000384 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>)
385 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
386 ret void
387}
388
389; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_v2f16:
390; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
391; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000392define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000393 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
394 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
395 ret void
396}
397
398; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_v2f16:
399; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
400; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000401define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000402 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>)
403 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
404 ret void
405}
406
407; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_v2f16:
408; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
409; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000410define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000411 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>)
412 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
413 ret void
414}
415
416; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_v2f16:
417; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
418; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000419define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000420 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>)
421 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
422 ret void
423}
424
425; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_v2f16:
426; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
427; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000428define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000429 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>)
430 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
431 ret void
432}
433
434; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_v2f16:
435; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
436; GCN: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000437define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
Matt Arsenault70306612017-01-23 18:52:26 +0000438 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>)
439 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
440 ret void
441}
442
Matt Arsenaultce841302016-12-22 03:05:37 +0000443attributes #0 = { nounwind readnone }
444attributes #1 = { nounwind }
Matt Arsenaulta6867fd2017-01-23 22:31:03 +0000445attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
446attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }