blob: 0df58519a6217d108e7e0df09b995c0eeb3039f5 [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Tim Northover20bd0ce2014-07-18 12:41:46 +00003
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00004; half args should be promoted to float for SI and lower.
Matt Arsenault81c7ae22015-06-04 16:00:27 +00005
6; GCN-LABEL: {{^}}load_f16_arg:
7; GCN: s_load_dword [[ARG:s[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00008; SI: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9; VI: v_trunc_f16_e32 [[CVT:v[0-9]+]], [[ARG]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +000010; GCN: buffer_store_short [[CVT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000011define amdgpu_kernel void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000012 store half %arg, half addrspace(1)* %out
Tim Northover20bd0ce2014-07-18 12:41:46 +000013 ret void
14}
15
Matt Arsenault81c7ae22015-06-04 16:00:27 +000016; GCN-LABEL: {{^}}load_v2f16_arg:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000017; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
18; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
Matt Arsenault327bb5a2016-07-01 22:47:50 +000019; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000020; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +000021; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenault81c7ae22015-06-04 16:00:27 +000022; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000023define amdgpu_kernel void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000024 store <2 x half> %arg, <2 x half> addrspace(1)* %out
25 ret void
26}
27
28; GCN-LABEL: {{^}}load_v3f16_arg:
29; GCN: buffer_load_ushort
30; GCN: buffer_load_ushort
31; GCN: buffer_load_ushort
32; GCN-NOT: buffer_load
33; GCN-DAG: buffer_store_dword
34; GCN-DAG: buffer_store_short
35; GCN-NOT: buffer_store
36; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000037define amdgpu_kernel void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000038 store <3 x half> %arg, <3 x half> addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}load_v4f16_arg:
43; GCN: buffer_load_ushort
44; GCN: buffer_load_ushort
45; GCN: buffer_load_ushort
46; GCN: buffer_load_ushort
Matt Arsenault327bb5a2016-07-01 22:47:50 +000047; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +000048; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049define amdgpu_kernel void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000050 store <4 x half> %arg, <4 x half> addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}load_v8f16_arg:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000056 store <8 x half> %arg, <8 x half> addrspace(1)* %out
57 ret void
58}
59
60; GCN-LABEL: {{^}}extload_v2f16_arg:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000061define amdgpu_kernel void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000062 %fpext = fpext <2 x half> %in to <2 x float>
63 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
64 ret void
65}
66
67; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000069 %ext = fpext half %arg to float
70 store float %ext, float addrspace(1)* %out
71 ret void
72}
73
74; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000075define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000076 %ext = fpext <2 x half> %arg to <2 x float>
77 store <2 x float> %ext, <2 x float> addrspace(1)* %out
78 ret void
79}
80
81; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
82; GCN: buffer_load_ushort
83; GCN: buffer_load_ushort
84; GCN: buffer_load_ushort
85; GCN-NOT: buffer_load
86; GCN: v_cvt_f32_f16_e32
87; GCN: v_cvt_f32_f16_e32
88; GCN: v_cvt_f32_f16_e32
89; GCN-NOT: v_cvt_f32_f16
90; GCN-DAG: buffer_store_dword
91; GCN-DAG: buffer_store_dwordx2
92; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000093define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +000094 %ext = fpext <3 x half> %arg to <3 x float>
95 store <3 x float> %ext, <3 x float> addrspace(1)* %out
96 ret void
97}
98
99; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000100define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000101 %ext = fpext <4 x half> %arg to <4 x float>
102 store <4 x float> %ext, <4 x float> addrspace(1)* %out
103 ret void
104}
105
106; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000107; GCN: buffer_load_ushort
108; GCN: buffer_load_ushort
109; GCN: buffer_load_ushort
110; GCN: buffer_load_ushort
111; GCN: buffer_load_ushort
112; GCN: buffer_load_ushort
113; GCN: buffer_load_ushort
114; GCN: buffer_load_ushort
115
116; GCN: v_cvt_f32_f16_e32
117; GCN: v_cvt_f32_f16_e32
118; GCN: v_cvt_f32_f16_e32
119; GCN: v_cvt_f32_f16_e32
120; GCN: v_cvt_f32_f16_e32
121; GCN: v_cvt_f32_f16_e32
122; GCN: v_cvt_f32_f16_e32
123; GCN: v_cvt_f32_f16_e32
124
125; GCN: buffer_store_dwordx4
126; GCN: buffer_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000127define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000128 %ext = fpext <8 x half> %arg to <8 x float>
129 store <8 x float> %ext, <8 x float> addrspace(1)* %out
130 ret void
131}
132
133; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000134; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000135; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000136; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000137; VI: v_trunc_f16_e32 v[[VARG:[0-9]+]], [[ARG]]
138; VI: v_cvt_f32_f16_e32 v[[VARG_F32:[0-9]+]], v[[VARG]]
139; VI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[VARG_F32]]
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000140; GCN: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000141define amdgpu_kernel void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000142 %ext = fpext half %arg to double
143 store double %ext, double addrspace(1)* %out
144 ret void
145}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000146
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000147; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000148; GCN-DAG: buffer_load_ushort v
149; GCN-DAG: buffer_load_ushort v
150; GCN-DAG: v_cvt_f32_f16_e32
151; GCN-DAG: v_cvt_f32_f16_e32
152; GCN-DAG: v_cvt_f64_f32_e32
153; GCN-DAG: v_cvt_f64_f32_e32
154; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000155define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000156 %ext = fpext <2 x half> %arg to <2 x double>
157 store <2 x double> %ext, <2 x double> addrspace(1)* %out
158 ret void
159}
160
161; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000162; GCN-DAG: buffer_load_ushort v
163; GCN-DAG: buffer_load_ushort v
164; GCN-DAG: buffer_load_ushort v
165; GCN-DAG: v_cvt_f32_f16_e32
166; GCN-DAG: v_cvt_f32_f16_e32
167; GCN-DAG: v_cvt_f32_f16_e32
168; GCN-DAG: v_cvt_f64_f32_e32
169; GCN-DAG: v_cvt_f64_f32_e32
170; GCN-DAG: v_cvt_f64_f32_e32
171; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000172define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000173 %ext = fpext <3 x half> %arg to <3 x double>
174 store <3 x double> %ext, <3 x double> addrspace(1)* %out
175 ret void
176}
177
178; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000179; GCN-DAG: buffer_load_ushort v
180; GCN-DAG: buffer_load_ushort v
181; GCN-DAG: buffer_load_ushort v
182; GCN-DAG: buffer_load_ushort v
183; GCN-DAG: v_cvt_f32_f16_e32
184; GCN-DAG: v_cvt_f32_f16_e32
185; GCN-DAG: v_cvt_f32_f16_e32
186; GCN-DAG: v_cvt_f32_f16_e32
187; GCN-DAG: v_cvt_f64_f32_e32
188; GCN-DAG: v_cvt_f64_f32_e32
189; GCN-DAG: v_cvt_f64_f32_e32
190; GCN-DAG: v_cvt_f64_f32_e32
191; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000192define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000193 %ext = fpext <4 x half> %arg to <4 x double>
194 store <4 x double> %ext, <4 x double> addrspace(1)* %out
195 ret void
196}
197
198; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000199; GCN-DAG: buffer_load_ushort v
200; GCN-DAG: buffer_load_ushort v
201; GCN-DAG: buffer_load_ushort v
202; GCN-DAG: buffer_load_ushort v
203
204; GCN-DAG: buffer_load_ushort v
205; GCN-DAG: buffer_load_ushort v
206; GCN-DAG: buffer_load_ushort v
207; GCN-DAG: buffer_load_ushort v
208
209; GCN-DAG: v_cvt_f32_f16_e32
210; GCN-DAG: v_cvt_f32_f16_e32
211; GCN-DAG: v_cvt_f32_f16_e32
212; GCN-DAG: v_cvt_f32_f16_e32
213
214; GCN-DAG: v_cvt_f32_f16_e32
215; GCN-DAG: v_cvt_f32_f16_e32
216; GCN-DAG: v_cvt_f32_f16_e32
217; GCN-DAG: v_cvt_f32_f16_e32
218
219; GCN-DAG: v_cvt_f64_f32_e32
220; GCN-DAG: v_cvt_f64_f32_e32
221; GCN-DAG: v_cvt_f64_f32_e32
222; GCN-DAG: v_cvt_f64_f32_e32
223
224; GCN-DAG: v_cvt_f64_f32_e32
225; GCN-DAG: v_cvt_f64_f32_e32
226; GCN-DAG: v_cvt_f64_f32_e32
227; GCN-DAG: v_cvt_f64_f32_e32
228
229; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000230define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000231 %ext = fpext <8 x half> %arg to <8 x double>
232 store <8 x double> %ext, <8 x double> addrspace(1)* %out
233 ret void
234}
235
236; GCN-LABEL: {{^}}global_load_store_f16:
237; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
238; GCN: buffer_store_short [[TMP]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000239define amdgpu_kernel void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000240 %val = load half, half addrspace(1)* %in
241 store half %val, half addrspace(1)* %out
242 ret void
243}
244
245; GCN-LABEL: {{^}}global_load_store_v2f16:
246; GCN: buffer_load_dword [[TMP:v[0-9]+]]
247; GCN: buffer_store_dword [[TMP]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000248define amdgpu_kernel void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000249 %val = load <2 x half>, <2 x half> addrspace(1)* %in
250 store <2 x half> %val, <2 x half> addrspace(1)* %out
251 ret void
252}
253
254; GCN-LABEL: {{^}}global_load_store_v4f16:
255; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
256; GCN: buffer_store_dwordx2 [[TMP]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000257define amdgpu_kernel void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000258 %val = load <4 x half>, <4 x half> addrspace(1)* %in
259 store <4 x half> %val, <4 x half> addrspace(1)* %out
260 ret void
261}
262
263; GCN-LABEL: {{^}}global_load_store_v8f16:
264; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
265; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
266; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000267define amdgpu_kernel void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000268 %val = load <8 x half>, <8 x half> addrspace(1)* %in
269 store <8 x half> %val, <8 x half> addrspace(1)* %out
270 ret void
271}
272
273; GCN-LABEL: {{^}}global_extload_f16_to_f32:
274; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
275; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
276; GCN: buffer_store_dword [[CVT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000277define amdgpu_kernel void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000278 %val = load half, half addrspace(1)* %in
279 %cvt = fpext half %val to float
280 store float %cvt, float addrspace(1)* %out
281 ret void
282}
283
284; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000285; GCN: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
286; GCN: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000287; SI: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
288; SI: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
289; VI: v_cvt_f32_f16_sdwa v[[CVT1:[0-9]+]], [[LOAD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000290; GCN: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000291; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000292define amdgpu_kernel void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000293 %val = load <2 x half>, <2 x half> addrspace(1)* %in
294 %cvt = fpext <2 x half> %val to <2 x float>
295 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
296 ret void
297}
298
299; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000300define amdgpu_kernel void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000301 %val = load <3 x half>, <3 x half> addrspace(1)* %in
302 %cvt = fpext <3 x half> %val to <3 x float>
303 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
304 ret void
305}
306
307; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000308define amdgpu_kernel void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000309 %val = load <4 x half>, <4 x half> addrspace(1)* %in
310 %cvt = fpext <4 x half> %val to <4 x float>
311 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
312 ret void
313}
314
315; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000316define amdgpu_kernel void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000317 %val = load <8 x half>, <8 x half> addrspace(1)* %in
318 %cvt = fpext <8 x half> %val to <8 x float>
319 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
320 ret void
321}
322
323; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000324; GCN: buffer_load_dwordx4
325; GCN: buffer_load_dwordx4
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000326
Sam Kolton9fa16962017-04-06 15:03:28 +0000327; SI: v_cvt_f32_f16_e32
328; SI: v_cvt_f32_f16_e32
329; SI: v_cvt_f32_f16_e32
330; SI: v_cvt_f32_f16_e32
331; SI: v_cvt_f32_f16_e32
332; SI: v_cvt_f32_f16_e32
333; SI: v_cvt_f32_f16_e32
334; SI: v_cvt_f32_f16_e32
335; SI: v_cvt_f32_f16_e32
336; SI: v_cvt_f32_f16_e32
337; SI: v_cvt_f32_f16_e32
338; SI: v_cvt_f32_f16_e32
339; SI: v_cvt_f32_f16_e32
340; SI: v_cvt_f32_f16_e32
341; SI: v_cvt_f32_f16_e32
342; SI: v_cvt_f32_f16_e32
343
344; VI: v_cvt_f32_f16_e32
345; VI: v_cvt_f32_f16_sdwa
346; ...
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000347
348; GCN: buffer_store_dwordx4
349; GCN: buffer_store_dwordx4
350; GCN: buffer_store_dwordx4
351; GCN: buffer_store_dwordx4
352
353; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000354define amdgpu_kernel void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000355 %val = load <16 x half>, <16 x half> addrspace(1)* %in
356 %cvt = fpext <16 x half> %val to <16 x float>
357 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
358 ret void
359}
360
361; GCN-LABEL: {{^}}global_extload_f16_to_f64:
362; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
363; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
364; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
365; GCN: buffer_store_dwordx2 [[CVT1]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000366define amdgpu_kernel void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000367 %val = load half, half addrspace(1)* %in
368 %cvt = fpext half %val to double
369 store double %cvt, double addrspace(1)* %out
370 ret void
371}
372
373; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000374; GCN-DAG: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Sam Kolton9fa16962017-04-06 15:03:28 +0000375
376; SI-DAG: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
377; SI-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
378; SI-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
379; SI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
380; SI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
381
382; VI-DAG: v_cvt_f32_f16_sdwa v[[CVT0:[0-9]+]], [[LOAD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
383; VI-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD]]
384; VI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT0]]
385; VI-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT1]]
386
Matt Arsenault61001bb2015-11-25 19:58:34 +0000387; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000388; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000389define amdgpu_kernel void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000390 %val = load <2 x half>, <2 x half> addrspace(1)* %in
391 %cvt = fpext <2 x half> %val to <2 x double>
392 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
393 ret void
394}
395
396; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
Matt Arsenault61001bb2015-11-25 19:58:34 +0000397
Tom Stellard115a6152016-11-10 16:02:37 +0000398; XSI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
399; XSI: v_cvt_f32_f16_e32
400; XSI: v_cvt_f32_f16_e32
401; XSI-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
402; XSI: v_cvt_f32_f16_e32
403; XSI-NOT: v_cvt_f32_f16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000404
Tom Stellard115a6152016-11-10 16:02:37 +0000405; XVI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
Tom Stellard115a6152016-11-10 16:02:37 +0000406; XVI: v_cvt_f32_f16_e32
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000407; XVI: v_cvt_f32_f16_e32
Sam Kolton9fa16962017-04-06 15:03:28 +0000408; XVI: v_cvt_f32_f16_sdwa
Tom Stellard115a6152016-11-10 16:02:37 +0000409; XVI-NOT: v_cvt_f32_f16
410
411; GCN: buffer_load_dwordx2 v{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]
Stanislav Mekhanoshina4e63ea2017-01-18 17:30:05 +0000412; GCN-DAG: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
413; GCN-DAG: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
414; SI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000415; SI-DAG: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
416; VI-DAG: v_cvt_f32_f16_sdwa [[Y32:v[0-9]+]], v[[IN_LO]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
Tom Stellard115a6152016-11-10 16:02:37 +0000417
Stanislav Mekhanoshina4e63ea2017-01-18 17:30:05 +0000418; GCN-DAG: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
419; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
420; GCN-DAG: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
Matt Arsenault61001bb2015-11-25 19:58:34 +0000421; GCN-NOT: v_cvt_f64_f32_e32
422
Tom Stellard115a6152016-11-10 16:02:37 +0000423; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[XLO]]:[[YHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
424; GCN-DAG: buffer_store_dwordx2 [[Z]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000425; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000426define amdgpu_kernel void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000427 %val = load <3 x half>, <3 x half> addrspace(1)* %in
428 %cvt = fpext <3 x half> %val to <3 x double>
429 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
430 ret void
431}
432
433; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000434define amdgpu_kernel void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000435 %val = load <4 x half>, <4 x half> addrspace(1)* %in
436 %cvt = fpext <4 x half> %val to <4 x double>
437 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
438 ret void
439}
440
441; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000442define amdgpu_kernel void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000443 %val = load <8 x half>, <8 x half> addrspace(1)* %in
444 %cvt = fpext <8 x half> %val to <8 x double>
445 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
446 ret void
447}
448
449; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000450define amdgpu_kernel void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000451 %val = load <16 x half>, <16 x half> addrspace(1)* %in
452 %cvt = fpext <16 x half> %val to <16 x double>
453 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
454 ret void
455}
456
457; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
458; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
459; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
460; GCN: buffer_store_short [[CVT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000461define amdgpu_kernel void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000462 %val = load float, float addrspace(1)* %in
463 %cvt = fptrunc float %val to half
464 store half %cvt, half addrspace(1)* %out
465 ret void
466}
467
468; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
469; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
470; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000471
472; SI-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
473; SI-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000474; SI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000475
476; VI-DAG: v_cvt_f16_f32_sdwa [[CVT1:v[0-9]+]], v[[HI]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000477; VI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[CVT1]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000478
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000479; GCN-DAG: buffer_store_dword [[PACKED]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000480; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000481define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000482 %val = load <2 x float>, <2 x float> addrspace(1)* %in
483 %cvt = fptrunc <2 x float> %val to <2 x half>
484 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
485 ret void
486}
487
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000488; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
489; GCN: buffer_load_dwordx4
Sam Kolton9fa16962017-04-06 15:03:28 +0000490; GCN-DAG: v_cvt_f16_f32_e32
491; SI-DAG: v_cvt_f16_f32_e32
492; VI-DAG: v_cvt_f16_f32_sdwa
493; GCN-DAG: v_cvt_f16_f32_e32
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000494; GCN: buffer_store_short
495; GCN: buffer_store_dword
496; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000497define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000498 %val = load <3 x float>, <3 x float> addrspace(1)* %in
499 %cvt = fptrunc <3 x float> %val to <3 x half>
500 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
501 ret void
502}
503
504; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
505; GCN: buffer_load_dwordx4
Sam Kolton9fa16962017-04-06 15:03:28 +0000506; GCN-DAG: v_cvt_f16_f32_e32
507; SI-DAG: v_cvt_f16_f32_e32
508; SI-DAG: v_cvt_f16_f32_e32
509; VI-DAG: v_cvt_f16_f32_sdwa
510; VI-DAG: v_cvt_f16_f32_sdwa
511; GCN-DAG: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000512; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000513; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000514define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000515 %val = load <4 x float>, <4 x float> addrspace(1)* %in
516 %cvt = fptrunc <4 x float> %val to <4 x half>
517 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
518 ret void
519}
520
521; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000522; GCN: buffer_load_dwordx4
523; GCN: buffer_load_dwordx4
Sam Kolton9fa16962017-04-06 15:03:28 +0000524; SI: v_cvt_f16_f32_e32
525; SI: v_cvt_f16_f32_e32
526; SI: v_cvt_f16_f32_e32
527; SI: v_cvt_f16_f32_e32
528; SI: v_cvt_f16_f32_e32
529; SI: v_cvt_f16_f32_e32
530; SI: v_cvt_f16_f32_e32
531; SI: v_cvt_f16_f32_e32
532; VI-DAG: v_cvt_f16_f32_e32
533; VI-DAG: v_cvt_f16_f32_e32
534; VI-DAG: v_cvt_f16_f32_e32
535; VI-DAG: v_cvt_f16_f32_e32
536; VI-DAG: v_cvt_f16_f32_sdwa
537; VI-DAG: v_cvt_f16_f32_sdwa
538; VI-DAG: v_cvt_f16_f32_sdwa
539; VI-DAG: v_cvt_f16_f32_sdwa
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000540; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000541; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000542define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000543 %val = load <8 x float>, <8 x float> addrspace(1)* %in
544 %cvt = fptrunc <8 x float> %val to <8 x half>
545 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
546 ret void
547}
548
549; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000550; GCN: buffer_load_dwordx4
551; GCN: buffer_load_dwordx4
552; GCN: buffer_load_dwordx4
553; GCN: buffer_load_dwordx4
Matt Arsenault68d93862015-09-24 08:36:14 +0000554; GCN-DAG: v_cvt_f16_f32_e32
555; GCN-DAG: v_cvt_f16_f32_e32
556; GCN-DAG: v_cvt_f16_f32_e32
557; GCN-DAG: v_cvt_f16_f32_e32
558; GCN-DAG: v_cvt_f16_f32_e32
559; GCN-DAG: v_cvt_f16_f32_e32
560; GCN-DAG: v_cvt_f16_f32_e32
561; GCN-DAG: v_cvt_f16_f32_e32
562; GCN-DAG: v_cvt_f16_f32_e32
563; GCN-DAG: v_cvt_f16_f32_e32
564; GCN-DAG: v_cvt_f16_f32_e32
565; GCN-DAG: v_cvt_f16_f32_e32
566; GCN-DAG: v_cvt_f16_f32_e32
567; GCN-DAG: v_cvt_f16_f32_e32
568; GCN-DAG: v_cvt_f16_f32_e32
569; GCN-DAG: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000570; GCN-DAG: buffer_store_dwordx4
571; GCN-DAG: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000572; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000573define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000574 %val = load <16 x float>, <16 x float> addrspace(1)* %in
575 %cvt = fptrunc <16 x float> %val to <16 x half>
576 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
577 ret void
578}
579
580; FIXME: Unsafe math should fold conversions away
581; GCN-LABEL: {{^}}fadd_f16:
582; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
583; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
584; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
585; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
586; SI: v_add_f32
587; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000588define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000589 %add = fadd half %a, %b
590 store half %add, half addrspace(1)* %out, align 4
591 ret void
592}
593
594; GCN-LABEL: {{^}}fadd_v2f16:
595; SI: v_add_f32
596; SI: v_add_f32
597; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000598define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000599 %add = fadd <2 x half> %a, %b
600 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
601 ret void
602}
603
604; GCN-LABEL: {{^}}fadd_v4f16:
605; SI: v_add_f32
606; SI: v_add_f32
607; SI: v_add_f32
608; SI: v_add_f32
609; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000610define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000611 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
612 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
613 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
614 %result = fadd <4 x half> %a, %b
615 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
616 ret void
617}
618
619; GCN-LABEL: {{^}}fadd_v8f16:
620; SI: v_add_f32
621; SI: v_add_f32
622; SI: v_add_f32
623; SI: v_add_f32
624; SI: v_add_f32
625; SI: v_add_f32
626; SI: v_add_f32
627; SI: v_add_f32
628; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000629define amdgpu_kernel void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000630 %add = fadd <8 x half> %a, %b
631 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
632 ret void
633}
634
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000635; GCN-LABEL: {{^}}test_bitcast_from_half:
636; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
637; GCN: buffer_store_short [[TMP]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000638define amdgpu_kernel void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000639 %val = load half, half addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000640 %val_int = bitcast half %val to i16
641 store i16 %val_int, i16 addrspace(1)* %out
642 ret void
643}
644
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000645; GCN-LABEL: {{^}}test_bitcast_to_half:
646; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
647; GCN: buffer_store_short [[TMP]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000648define amdgpu_kernel void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000649 %val = load i16, i16 addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000650 %val_fp = bitcast i16 %val to half
651 store half %val_fp, half addrspace(1)* %out
652 ret void
653}
Tim Northover00fdbbb2014-07-18 13:01:37 +0000654
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000655attributes #0 = { nounwind }