blob: 03657176c383bdd1c134b0276964f9729b5fdd3c [file] [log] [blame]
Sam Kolton9fa16962017-04-06 15:03:28 +00001; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI -check-prefix=SIGFX9 %s
Matt Arsenaulteb522e62017-02-27 22:15:25 +00002; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
Sam Kolton9fa16962017-04-06 15:03:28 +00003; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=SIGFX9 %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00004
5; GCN-LABEL: {{^}}fpext_f16_to_f32
6; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
7; GCN: v_cvt_f32_f16_e32 v[[R_F32:[0-9]+]], v[[A_F16]]
8; GCN: buffer_store_dword v[[R_F32]]
9; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000010define amdgpu_kernel void @fpext_f16_to_f32(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000011 float addrspace(1)* %r,
Matt Arsenaulteb522e62017-02-27 22:15:25 +000012 half addrspace(1)* %a) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000013entry:
14 %a.val = load half, half addrspace(1)* %a
15 %r.val = fpext half %a.val to float
16 store float %r.val, float addrspace(1)* %r
17 ret void
18}
19
20; GCN-LABEL: {{^}}fpext_f16_to_f64
21; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
22; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
23; GCN: v_cvt_f64_f32_e32 v{{\[}}[[R_F64_0:[0-9]+]]:[[R_F64_1:[0-9]+]]{{\]}}, v[[A_F32]]
24; GCN: buffer_store_dwordx2 v{{\[}}[[R_F64_0]]:[[R_F64_1]]{{\]}}
25; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000026define amdgpu_kernel void @fpext_f16_to_f64(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000027 double addrspace(1)* %r,
Matt Arsenaulteb522e62017-02-27 22:15:25 +000028 half addrspace(1)* %a) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000029entry:
30 %a.val = load half, half addrspace(1)* %a
31 %r.val = fpext half %a.val to double
32 store double %r.val, double addrspace(1)* %r
33 ret void
34}
35
36; GCN-LABEL: {{^}}fpext_v2f16_to_v2f32
37; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
Sam Kolton9fa16962017-04-06 15:03:28 +000038; GFX9-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000039; GCN-DAG: v_cvt_f32_f16_e32 v[[R_F32_0:[0-9]+]], v[[A_V2_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000040; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
Sam Kolton9fa16962017-04-06 15:03:28 +000041; SIGFX9: v_cvt_f32_f16_e32 v[[R_F32_1:[0-9]+]], v[[A_F16_1]]
42; VI: v_cvt_f32_f16_sdwa v[[R_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000043; GCN: buffer_store_dwordx2 v{{\[}}[[R_F32_0]]:[[R_F32_1]]{{\]}}
44; GCN: s_endpgm
Sam Kolton9fa16962017-04-06 15:03:28 +000045
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000046define amdgpu_kernel void @fpext_v2f16_to_v2f32(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000047 <2 x float> addrspace(1)* %r,
Matt Arsenaulteb522e62017-02-27 22:15:25 +000048 <2 x half> addrspace(1)* %a) #0 {
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000049entry:
50 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
51 %r.val = fpext <2 x half> %a.val to <2 x float>
52 store <2 x float> %r.val, <2 x float> addrspace(1)* %r
53 ret void
54}
55
56; GCN-LABEL: {{^}}fpext_v2f16_to_v2f64
Matt Arsenaulteb522e62017-02-27 22:15:25 +000057; GCN: buffer_load_dword
Sam Kolton9fa16962017-04-06 15:03:28 +000058; SIGFX9-DAG: v_lshrrev_b32_e32
59; SIGFX9-DAG: v_cvt_f32_f16_e32
60; VI: v_cvt_f32_f16_sdwa
Matt Arsenaulteb522e62017-02-27 22:15:25 +000061; GCN: v_cvt_f32_f16_e32
62
63; GCN: v_cvt_f64_f32_e32
64; GCN: v_cvt_f64_f32_e32
65; GCN: buffer_store_dwordx4
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000066; GCN: s_endpgm
Sam Kolton9fa16962017-04-06 15:03:28 +000067
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @fpext_v2f16_to_v2f64(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000069 <2 x double> addrspace(1)* %r,
70 <2 x half> addrspace(1)* %a) {
71entry:
72 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
73 %r.val = fpext <2 x half> %a.val to <2 x double>
74 store <2 x double> %r.val, <2 x double> addrspace(1)* %r
75 ret void
76}
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000077
78; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32:
79; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000080define amdgpu_kernel void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) {
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000081entry:
82 %a.trunc = trunc i32 %a to i16
83 %a.val = bitcast i16 %a.trunc to half
84 %r.val = fpext half %a.val to float
85 store float %r.val, float addrspace(1)* %r
86 ret void
87}
88
89; GCN-LABEL: {{^}}fneg_fpext_f16_to_f32:
90; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
91; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000092define amdgpu_kernel void @fneg_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000093 float addrspace(1)* %r,
94 half addrspace(1)* %a) {
95entry:
96 %a.val = load half, half addrspace(1)* %a
97 %a.neg = fsub half -0.0, %a.val
98 %r.val = fpext half %a.neg to float
99 store float %r.val, float addrspace(1)* %r
100 ret void
101}
102
103; GCN-LABEL: {{^}}fabs_fpext_f16_to_f32:
104; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
105; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]|
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000106define amdgpu_kernel void @fabs_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000107 float addrspace(1)* %r,
108 half addrspace(1)* %a) {
109entry:
110 %a.val = load half, half addrspace(1)* %a
111 %a.fabs = call half @llvm.fabs.f16(half %a.val)
112 %r.val = fpext half %a.fabs to float
113 store float %r.val, float addrspace(1)* %r
114 ret void
115}
116
117; GCN-LABEL: {{^}}fneg_fabs_fpext_f16_to_f32:
118; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
119; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]|
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000120define amdgpu_kernel void @fneg_fabs_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000121 float addrspace(1)* %r,
122 half addrspace(1)* %a) {
123entry:
124 %a.val = load half, half addrspace(1)* %a
125 %a.fabs = call half @llvm.fabs.f16(half %a.val)
126 %a.fneg.fabs = fsub half -0.0, %a.fabs
127 %r.val = fpext half %a.fneg.fabs to float
128 store float %r.val, float addrspace(1)* %r
129 ret void
130}
131
132; GCN-LABEL: {{^}}fneg_multi_use_fpext_f16_to_f32:
133; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
134; GCN-DAG: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[A]]
135
136; FIXME: Using the source modifier here only wastes code size
137; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000138; GFX89-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000139
140; GCN: store_dword [[CVT]]
141; GCN: store_short [[XOR]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000142define amdgpu_kernel void @fneg_multi_use_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000143 float addrspace(1)* %r,
144 half addrspace(1)* %a) {
145entry:
146 %a.val = load half, half addrspace(1)* %a
147 %a.neg = fsub half -0.0, %a.val
148 %r.val = fpext half %a.neg to float
149 store volatile float %r.val, float addrspace(1)* %r
150 store volatile half %a.neg, half addrspace(1)* undef
151 ret void
152}
153
154; GCN-LABEL: {{^}}fneg_multi_foldable_use_fpext_f16_to_f32:
155; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
156; GCN-DAG: v_cvt_f32_f16_e64 [[CVTA_NEG:v[0-9]+]], -[[A]]
157; SI-DAG: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
158; SI: v_mul_f32_e32 [[MUL_F32:v[0-9]+]], [[CVTA]], [[CVTA_NEG]]
159; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
160
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000161; GFX89-DAG: v_cvt_f32_f16_e64 [[CVT_NEGA:v[0-9]+]], -[[A]]
162; GFX89: v_mul_f16_e64 [[MUL:v[0-9]+]], -[[A]], [[A]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000163
164; GCN: buffer_store_dword [[CVTA_NEG]]
165; GCN: buffer_store_short [[MUL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000166define amdgpu_kernel void @fneg_multi_foldable_use_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000167 float addrspace(1)* %r,
168 half addrspace(1)* %a) {
169entry:
170 %a.val = load half, half addrspace(1)* %a
171 %a.neg = fsub half -0.0, %a.val
172 %r.val = fpext half %a.neg to float
173 %mul = fmul half %a.neg, %a.val
174 store volatile float %r.val, float addrspace(1)* %r
175 store volatile half %mul, half addrspace(1)* undef
176 ret void
177}
178
179; GCN-LABEL: {{^}}fabs_multi_use_fpext_f16_to_f32:
180; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
181; GCN-DAG: v_and_b32_e32 [[XOR:v[0-9]+]], 0x7fff, [[A]]
182
183; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
184; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]|
185
186; GCN: store_dword [[CVT]]
187; GCN: store_short [[XOR]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000188define amdgpu_kernel void @fabs_multi_use_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000189 float addrspace(1)* %r,
190 half addrspace(1)* %a) {
191entry:
192 %a.val = load half, half addrspace(1)* %a
193 %a.fabs = call half @llvm.fabs.f16(half %a.val)
194 %r.val = fpext half %a.fabs to float
195 store volatile float %r.val, float addrspace(1)* %r
196 store volatile half %a.fabs, half addrspace(1)* undef
197 ret void
198}
199
200; GCN-LABEL: {{^}}fabs_multi_foldable_use_fpext_f16_to_f32:
201; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
202; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
203; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], |[[CVTA]]|, [[CVTA]]
204; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
205; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[CVTA]]
206
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000207; GFX89-DAG: v_cvt_f32_f16_e64 [[ABS_A:v[0-9]+]], |[[A]]|
208; GFX89: v_mul_f16_e64 [[MUL:v[0-9]+]], |[[A]]|, [[A]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000209
210; GCN: buffer_store_dword [[ABS_A]]
211; GCN: buffer_store_short [[MUL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000212define amdgpu_kernel void @fabs_multi_foldable_use_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000213 float addrspace(1)* %r,
214 half addrspace(1)* %a) {
215entry:
216 %a.val = load half, half addrspace(1)* %a
217 %a.fabs = call half @llvm.fabs.f16(half %a.val)
218 %r.val = fpext half %a.fabs to float
219 %mul = fmul half %a.fabs, %a.val
220 store volatile float %r.val, float addrspace(1)* %r
221 store volatile half %mul, half addrspace(1)* undef
222 ret void
223}
224
225; GCN-LABEL: {{^}}fabs_fneg_multi_use_fpext_f16_to_f32:
226; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
227; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x8000, [[A]]
228
229; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[OR]]
230; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]|
231
232; GCN: buffer_store_dword [[CVT]]
233; GCN: buffer_store_short [[OR]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000234define amdgpu_kernel void @fabs_fneg_multi_use_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000235 float addrspace(1)* %r,
236 half addrspace(1)* %a) {
237entry:
238 %a.val = load half, half addrspace(1)* %a
239 %a.fabs = call half @llvm.fabs.f16(half %a.val)
240 %a.fneg.fabs = fsub half -0.0, %a.fabs
241 %r.val = fpext half %a.fneg.fabs to float
242 store volatile float %r.val, float addrspace(1)* %r
243 store volatile half %a.fneg.fabs, half addrspace(1)* undef
244 ret void
245}
246
247; GCN-LABEL: {{^}}fabs_fneg_multi_foldable_use_fpext_f16_to_f32:
248; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
249; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
250; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], -|[[CVTA]]|, [[CVTA]]
251; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
252; SI: v_or_b32_e32 [[FABS_FNEG:v[0-9]+]], 0x80000000, [[CVTA]]
253
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000254; GFX89-DAG: v_cvt_f32_f16_e64 [[FABS_FNEG:v[0-9]+]], -|[[A]]|
255; GFX89-DAG: v_mul_f16_e64 [[MUL:v[0-9]+]], -|[[A]]|, [[A]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000256
257; GCN: buffer_store_dword [[FABS_FNEG]]
258; GCN: buffer_store_short [[MUL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000259define amdgpu_kernel void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32(
Matt Arsenault9dba9bd2017-02-02 02:27:04 +0000260 float addrspace(1)* %r,
261 half addrspace(1)* %a) {
262entry:
263 %a.val = load half, half addrspace(1)* %a
264 %a.fabs = call half @llvm.fabs.f16(half %a.val)
265 %a.fneg.fabs = fsub half -0.0, %a.fabs
266 %r.val = fpext half %a.fneg.fabs to float
267 %mul = fmul half %a.fneg.fabs, %a.val
268 store volatile float %r.val, float addrspace(1)* %r
269 store volatile half %mul, half addrspace(1)* undef
270 ret void
271}
272
273declare half @llvm.fabs.f16(half) #1
274
275attributes #1 = { nounwind readnone }