blob: f2bb3f9d110a9f2340be01da03069a9251f13eda [file] [log] [blame]
Matt Arsenault327bb5a2016-07-01 22:47:50 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
Matt Arsenault81c7ae22015-06-04 16:00:27 +00002; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Tim Northover20bd0ce2014-07-18 12:41:46 +00003
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00004; half args should be promoted to float for SI and lower.
Matt Arsenault81c7ae22015-06-04 16:00:27 +00005
6; GCN-LABEL: {{^}}load_f16_arg:
7; GCN: s_load_dword [[ARG:s[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00008; SI: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9; VI: v_trunc_f16_e32 [[CVT:v[0-9]+]], [[ARG]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +000010; GCN: buffer_store_short [[CVT]]
11define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
12 store half %arg, half addrspace(1)* %out
Tim Northover20bd0ce2014-07-18 12:41:46 +000013 ret void
14}
15
Matt Arsenault81c7ae22015-06-04 16:00:27 +000016; GCN-LABEL: {{^}}load_v2f16_arg:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000017; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
18; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
Matt Arsenault327bb5a2016-07-01 22:47:50 +000019; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
Matt Arsenault124384f2016-09-09 23:32:53 +000020; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[HI]], [[V0]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +000021; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenault81c7ae22015-06-04 16:00:27 +000022; GCN: s_endpgm
23define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
24 store <2 x half> %arg, <2 x half> addrspace(1)* %out
25 ret void
26}
27
28; GCN-LABEL: {{^}}load_v3f16_arg:
29; GCN: buffer_load_ushort
30; GCN: buffer_load_ushort
31; GCN: buffer_load_ushort
32; GCN-NOT: buffer_load
33; GCN-DAG: buffer_store_dword
34; GCN-DAG: buffer_store_short
35; GCN-NOT: buffer_store
36; GCN: s_endpgm
37define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
38 store <3 x half> %arg, <3 x half> addrspace(1)* %out
39 ret void
40}
41
42; GCN-LABEL: {{^}}load_v4f16_arg:
43; GCN: buffer_load_ushort
44; GCN: buffer_load_ushort
45; GCN: buffer_load_ushort
46; GCN: buffer_load_ushort
Matt Arsenault327bb5a2016-07-01 22:47:50 +000047; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +000048; GCN: s_endpgm
49define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
50 store <4 x half> %arg, <4 x half> addrspace(1)* %out
51 ret void
52}
53
54; GCN-LABEL: {{^}}load_v8f16_arg:
55define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
56 store <8 x half> %arg, <8 x half> addrspace(1)* %out
57 ret void
58}
59
60; GCN-LABEL: {{^}}extload_v2f16_arg:
61define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
62 %fpext = fpext <2 x half> %in to <2 x float>
63 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
64 ret void
65}
66
67; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
68define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
69 %ext = fpext half %arg to float
70 store float %ext, float addrspace(1)* %out
71 ret void
72}
73
74; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
75define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
76 %ext = fpext <2 x half> %arg to <2 x float>
77 store <2 x float> %ext, <2 x float> addrspace(1)* %out
78 ret void
79}
80
81; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
82; GCN: buffer_load_ushort
83; GCN: buffer_load_ushort
84; GCN: buffer_load_ushort
85; GCN-NOT: buffer_load
86; GCN: v_cvt_f32_f16_e32
87; GCN: v_cvt_f32_f16_e32
88; GCN: v_cvt_f32_f16_e32
89; GCN-NOT: v_cvt_f32_f16
90; GCN-DAG: buffer_store_dword
91; GCN-DAG: buffer_store_dwordx2
92; GCN: s_endpgm
93define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
94 %ext = fpext <3 x half> %arg to <3 x float>
95 store <3 x float> %ext, <3 x float> addrspace(1)* %out
96 ret void
97}
98
99; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
100define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
101 %ext = fpext <4 x half> %arg to <4 x float>
102 store <4 x float> %ext, <4 x float> addrspace(1)* %out
103 ret void
104}
105
106; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000107; GCN: buffer_load_ushort
108; GCN: buffer_load_ushort
109; GCN: buffer_load_ushort
110; GCN: buffer_load_ushort
111; GCN: buffer_load_ushort
112; GCN: buffer_load_ushort
113; GCN: buffer_load_ushort
114; GCN: buffer_load_ushort
115
116; GCN: v_cvt_f32_f16_e32
117; GCN: v_cvt_f32_f16_e32
118; GCN: v_cvt_f32_f16_e32
119; GCN: v_cvt_f32_f16_e32
120; GCN: v_cvt_f32_f16_e32
121; GCN: v_cvt_f32_f16_e32
122; GCN: v_cvt_f32_f16_e32
123; GCN: v_cvt_f32_f16_e32
124
125; GCN: buffer_store_dwordx4
126; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000127define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
128 %ext = fpext <8 x half> %arg to <8 x float>
129 store <8 x float> %ext, <8 x float> addrspace(1)* %out
130 ret void
131}
132
133; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000134; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000135; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000136; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000137; VI: v_trunc_f16_e32 v[[VARG:[0-9]+]], [[ARG]]
138; VI: v_cvt_f32_f16_e32 v[[VARG_F32:[0-9]+]], v[[VARG]]
139; VI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[VARG_F32]]
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000140; GCN: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000141define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
142 %ext = fpext half %arg to double
143 store double %ext, double addrspace(1)* %out
144 ret void
145}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000146
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000147; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000148; GCN-DAG: buffer_load_ushort v
149; GCN-DAG: buffer_load_ushort v
150; GCN-DAG: v_cvt_f32_f16_e32
151; GCN-DAG: v_cvt_f32_f16_e32
152; GCN-DAG: v_cvt_f64_f32_e32
153; GCN-DAG: v_cvt_f64_f32_e32
154; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000155define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
156 %ext = fpext <2 x half> %arg to <2 x double>
157 store <2 x double> %ext, <2 x double> addrspace(1)* %out
158 ret void
159}
160
161; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000162; GCN-DAG: buffer_load_ushort v
163; GCN-DAG: buffer_load_ushort v
164; GCN-DAG: buffer_load_ushort v
165; GCN-DAG: v_cvt_f32_f16_e32
166; GCN-DAG: v_cvt_f32_f16_e32
167; GCN-DAG: v_cvt_f32_f16_e32
168; GCN-DAG: v_cvt_f64_f32_e32
169; GCN-DAG: v_cvt_f64_f32_e32
170; GCN-DAG: v_cvt_f64_f32_e32
171; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000172define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
173 %ext = fpext <3 x half> %arg to <3 x double>
174 store <3 x double> %ext, <3 x double> addrspace(1)* %out
175 ret void
176}
177
178; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000179; GCN-DAG: buffer_load_ushort v
180; GCN-DAG: buffer_load_ushort v
181; GCN-DAG: buffer_load_ushort v
182; GCN-DAG: buffer_load_ushort v
183; GCN-DAG: v_cvt_f32_f16_e32
184; GCN-DAG: v_cvt_f32_f16_e32
185; GCN-DAG: v_cvt_f32_f16_e32
186; GCN-DAG: v_cvt_f32_f16_e32
187; GCN-DAG: v_cvt_f64_f32_e32
188; GCN-DAG: v_cvt_f64_f32_e32
189; GCN-DAG: v_cvt_f64_f32_e32
190; GCN-DAG: v_cvt_f64_f32_e32
191; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000192define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
193 %ext = fpext <4 x half> %arg to <4 x double>
194 store <4 x double> %ext, <4 x double> addrspace(1)* %out
195 ret void
196}
197
198; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000199; GCN-DAG: buffer_load_ushort v
200; GCN-DAG: buffer_load_ushort v
201; GCN-DAG: buffer_load_ushort v
202; GCN-DAG: buffer_load_ushort v
203
204; GCN-DAG: buffer_load_ushort v
205; GCN-DAG: buffer_load_ushort v
206; GCN-DAG: buffer_load_ushort v
207; GCN-DAG: buffer_load_ushort v
208
209; GCN-DAG: v_cvt_f32_f16_e32
210; GCN-DAG: v_cvt_f32_f16_e32
211; GCN-DAG: v_cvt_f32_f16_e32
212; GCN-DAG: v_cvt_f32_f16_e32
213
214; GCN-DAG: v_cvt_f32_f16_e32
215; GCN-DAG: v_cvt_f32_f16_e32
216; GCN-DAG: v_cvt_f32_f16_e32
217; GCN-DAG: v_cvt_f32_f16_e32
218
219; GCN-DAG: v_cvt_f64_f32_e32
220; GCN-DAG: v_cvt_f64_f32_e32
221; GCN-DAG: v_cvt_f64_f32_e32
222; GCN-DAG: v_cvt_f64_f32_e32
223
224; GCN-DAG: v_cvt_f64_f32_e32
225; GCN-DAG: v_cvt_f64_f32_e32
226; GCN-DAG: v_cvt_f64_f32_e32
227; GCN-DAG: v_cvt_f64_f32_e32
228
229; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000230define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
231 %ext = fpext <8 x half> %arg to <8 x double>
232 store <8 x double> %ext, <8 x double> addrspace(1)* %out
233 ret void
234}
235
236; GCN-LABEL: {{^}}global_load_store_f16:
237; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
238; GCN: buffer_store_short [[TMP]]
239define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
240 %val = load half, half addrspace(1)* %in
241 store half %val, half addrspace(1)* %out
242 ret void
243}
244
245; GCN-LABEL: {{^}}global_load_store_v2f16:
246; GCN: buffer_load_dword [[TMP:v[0-9]+]]
247; GCN: buffer_store_dword [[TMP]]
248define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
249 %val = load <2 x half>, <2 x half> addrspace(1)* %in
250 store <2 x half> %val, <2 x half> addrspace(1)* %out
251 ret void
252}
253
254; GCN-LABEL: {{^}}global_load_store_v4f16:
255; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
256; GCN: buffer_store_dwordx2 [[TMP]]
257define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
258 %val = load <4 x half>, <4 x half> addrspace(1)* %in
259 store <4 x half> %val, <4 x half> addrspace(1)* %out
260 ret void
261}
262
263; GCN-LABEL: {{^}}global_load_store_v8f16:
264; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
265; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
266; GCN: s_endpgm
267define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
268 %val = load <8 x half>, <8 x half> addrspace(1)* %in
269 store <8 x half> %val, <8 x half> addrspace(1)* %out
270 ret void
271}
272
273; GCN-LABEL: {{^}}global_extload_f16_to_f32:
274; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
275; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
276; GCN: buffer_store_dword [[CVT]]
277define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
278 %val = load half, half addrspace(1)* %in
279 %cvt = fpext half %val to float
280 store float %cvt, float addrspace(1)* %out
281 ret void
282}
283
284; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000285; GCN: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000286; VI: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000287; GCN: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000288; SI: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000289; GCN: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
290; GCN: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000291; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000292define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
293 %val = load <2 x half>, <2 x half> addrspace(1)* %in
294 %cvt = fpext <2 x half> %val to <2 x float>
295 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
296 ret void
297}
298
299; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
300define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
301 %val = load <3 x half>, <3 x half> addrspace(1)* %in
302 %cvt = fpext <3 x half> %val to <3 x float>
303 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
304 ret void
305}
306
307; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
308define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
309 %val = load <4 x half>, <4 x half> addrspace(1)* %in
310 %cvt = fpext <4 x half> %val to <4 x float>
311 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
312 ret void
313}
314
315; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
316define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
317 %val = load <8 x half>, <8 x half> addrspace(1)* %in
318 %cvt = fpext <8 x half> %val to <8 x float>
319 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
320 ret void
321}
322
323; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000324; GCN: buffer_load_dwordx4
325; GCN: buffer_load_dwordx4
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000326
327; GCN: v_cvt_f32_f16_e32
328; GCN: v_cvt_f32_f16_e32
329; GCN: v_cvt_f32_f16_e32
330; GCN: v_cvt_f32_f16_e32
331; GCN: v_cvt_f32_f16_e32
332; GCN: v_cvt_f32_f16_e32
333; GCN: v_cvt_f32_f16_e32
334; GCN: v_cvt_f32_f16_e32
335; GCN: v_cvt_f32_f16_e32
336; GCN: v_cvt_f32_f16_e32
337; GCN: v_cvt_f32_f16_e32
338; GCN: v_cvt_f32_f16_e32
339; GCN: v_cvt_f32_f16_e32
340; GCN: v_cvt_f32_f16_e32
341; GCN: v_cvt_f32_f16_e32
342; GCN: v_cvt_f32_f16_e32
343
344; GCN: buffer_store_dwordx4
345; GCN: buffer_store_dwordx4
346; GCN: buffer_store_dwordx4
347; GCN: buffer_store_dwordx4
348
349; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000350define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
351 %val = load <16 x half>, <16 x half> addrspace(1)* %in
352 %cvt = fpext <16 x half> %val to <16 x float>
353 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
354 ret void
355}
356
357; GCN-LABEL: {{^}}global_extload_f16_to_f64:
358; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
359; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
360; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
361; GCN: buffer_store_dwordx2 [[CVT1]]
362define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
363 %val = load half, half addrspace(1)* %in
364 %cvt = fpext half %val to double
365 store double %cvt, double addrspace(1)* %out
366 ret void
367}
368
369; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000370; GCN-DAG: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
371; GCN-DAG: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
372; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
373; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
Matt Arsenault61001bb2015-11-25 19:58:34 +0000374; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
375; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
376; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000377; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000378define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
379 %val = load <2 x half>, <2 x half> addrspace(1)* %in
380 %cvt = fpext <2 x half> %val to <2 x double>
381 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
382 ret void
383}
384
385; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
Matt Arsenault61001bb2015-11-25 19:58:34 +0000386
Tom Stellard115a6152016-11-10 16:02:37 +0000387; XSI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
388; XSI: v_cvt_f32_f16_e32
389; XSI: v_cvt_f32_f16_e32
390; XSI-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
391; XSI: v_cvt_f32_f16_e32
392; XSI-NOT: v_cvt_f32_f16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000393
Tom Stellard115a6152016-11-10 16:02:37 +0000394; XVI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
Tom Stellard115a6152016-11-10 16:02:37 +0000395; XVI-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
396; XVI: v_cvt_f32_f16_e32
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000397; XVI: v_cvt_f32_f16_e32
398; XVI: v_cvt_f32_f16_e32
Tom Stellard115a6152016-11-10 16:02:37 +0000399; XVI-NOT: v_cvt_f32_f16
400
401; GCN: buffer_load_dwordx2 v{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000402; VI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
Tom Stellard115a6152016-11-10 16:02:37 +0000403; GCN: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
404; GCN: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000405; SI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
Tom Stellard115a6152016-11-10 16:02:37 +0000406; GCN: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
407
408; GCN: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
409; GCN: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
410; GCN: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
Matt Arsenault61001bb2015-11-25 19:58:34 +0000411; GCN-NOT: v_cvt_f64_f32_e32
412
Tom Stellard115a6152016-11-10 16:02:37 +0000413; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[XLO]]:[[YHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
414; GCN-DAG: buffer_store_dwordx2 [[Z]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000415; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000416define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
417 %val = load <3 x half>, <3 x half> addrspace(1)* %in
418 %cvt = fpext <3 x half> %val to <3 x double>
419 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
420 ret void
421}
422
423; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
424define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
425 %val = load <4 x half>, <4 x half> addrspace(1)* %in
426 %cvt = fpext <4 x half> %val to <4 x double>
427 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
428 ret void
429}
430
431; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
432define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
433 %val = load <8 x half>, <8 x half> addrspace(1)* %in
434 %cvt = fpext <8 x half> %val to <8 x double>
435 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
436 ret void
437}
438
439; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
440define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
441 %val = load <16 x half>, <16 x half> addrspace(1)* %in
442 %cvt = fpext <16 x half> %val to <16 x double>
443 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
444 ret void
445}
446
447; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
448; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
449; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
450; GCN: buffer_store_short [[CVT]]
451define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
452 %val = load float, float addrspace(1)* %in
453 %cvt = fptrunc float %val to half
454 store half %cvt, half addrspace(1)* %out
455 ret void
456}
457
458; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
459; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
460; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
461; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000462; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
Matt Arsenault124384f2016-09-09 23:32:53 +0000463; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[SHL]], [[CVT0]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000464; GCN-DAG: buffer_store_dword [[PACKED]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000465; GCN: s_endpgm
466define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
467 %val = load <2 x float>, <2 x float> addrspace(1)* %in
468 %cvt = fptrunc <2 x float> %val to <2 x half>
469 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
470 ret void
471}
472
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000473; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
474; GCN: buffer_load_dwordx4
475; GCN: v_cvt_f16_f32_e32
476; GCN: v_cvt_f16_f32_e32
477; GCN: v_cvt_f16_f32_e32
Matt Arsenault68d93862015-09-24 08:36:14 +0000478; GCN-NOT: v_cvt_f16_f32_e32
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000479; GCN: buffer_store_short
480; GCN: buffer_store_dword
481; GCN: s_endpgm
482define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
483 %val = load <3 x float>, <3 x float> addrspace(1)* %in
484 %cvt = fptrunc <3 x float> %val to <3 x half>
485 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
486 ret void
487}
488
489; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
490; GCN: buffer_load_dwordx4
491; GCN: v_cvt_f16_f32_e32
492; GCN: v_cvt_f16_f32_e32
493; GCN: v_cvt_f16_f32_e32
494; GCN: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000495; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000496; GCN: s_endpgm
497define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
498 %val = load <4 x float>, <4 x float> addrspace(1)* %in
499 %cvt = fptrunc <4 x float> %val to <4 x half>
500 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
501 ret void
502}
503
504; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000505; GCN: buffer_load_dwordx4
506; GCN: buffer_load_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000507; GCN: v_cvt_f16_f32_e32
508; GCN: v_cvt_f16_f32_e32
509; GCN: v_cvt_f16_f32_e32
510; GCN: v_cvt_f16_f32_e32
511; GCN: v_cvt_f16_f32_e32
512; GCN: v_cvt_f16_f32_e32
513; GCN: v_cvt_f16_f32_e32
514; GCN: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000515; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000516; GCN: s_endpgm
517define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
518 %val = load <8 x float>, <8 x float> addrspace(1)* %in
519 %cvt = fptrunc <8 x float> %val to <8 x half>
520 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
521 ret void
522}
523
524; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000525; GCN: buffer_load_dwordx4
526; GCN: buffer_load_dwordx4
527; GCN: buffer_load_dwordx4
528; GCN: buffer_load_dwordx4
Matt Arsenault68d93862015-09-24 08:36:14 +0000529; GCN-DAG: v_cvt_f16_f32_e32
530; GCN-DAG: v_cvt_f16_f32_e32
531; GCN-DAG: v_cvt_f16_f32_e32
532; GCN-DAG: v_cvt_f16_f32_e32
533; GCN-DAG: v_cvt_f16_f32_e32
534; GCN-DAG: v_cvt_f16_f32_e32
535; GCN-DAG: v_cvt_f16_f32_e32
536; GCN-DAG: v_cvt_f16_f32_e32
537; GCN-DAG: v_cvt_f16_f32_e32
538; GCN-DAG: v_cvt_f16_f32_e32
539; GCN-DAG: v_cvt_f16_f32_e32
540; GCN-DAG: v_cvt_f16_f32_e32
541; GCN-DAG: v_cvt_f16_f32_e32
542; GCN-DAG: v_cvt_f16_f32_e32
543; GCN-DAG: v_cvt_f16_f32_e32
544; GCN-DAG: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000545; GCN-DAG: buffer_store_dwordx4
546; GCN-DAG: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000547; GCN: s_endpgm
548define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
549 %val = load <16 x float>, <16 x float> addrspace(1)* %in
550 %cvt = fptrunc <16 x float> %val to <16 x half>
551 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
552 ret void
553}
554
555; FIXME: Unsafe math should fold conversions away
556; GCN-LABEL: {{^}}fadd_f16:
557; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
558; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
559; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
560; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
561; SI: v_add_f32
562; GCN: s_endpgm
563define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
564 %add = fadd half %a, %b
565 store half %add, half addrspace(1)* %out, align 4
566 ret void
567}
568
569; GCN-LABEL: {{^}}fadd_v2f16:
570; SI: v_add_f32
571; SI: v_add_f32
572; GCN: s_endpgm
573define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
574 %add = fadd <2 x half> %a, %b
575 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
576 ret void
577}
578
579; GCN-LABEL: {{^}}fadd_v4f16:
580; SI: v_add_f32
581; SI: v_add_f32
582; SI: v_add_f32
583; SI: v_add_f32
584; GCN: s_endpgm
585define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
586 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
587 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
588 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
589 %result = fadd <4 x half> %a, %b
590 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
591 ret void
592}
593
594; GCN-LABEL: {{^}}fadd_v8f16:
595; SI: v_add_f32
596; SI: v_add_f32
597; SI: v_add_f32
598; SI: v_add_f32
599; SI: v_add_f32
600; SI: v_add_f32
601; SI: v_add_f32
602; SI: v_add_f32
603; GCN: s_endpgm
604define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
605 %add = fadd <8 x half> %a, %b
606 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
607 ret void
608}
609
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000610; GCN-LABEL: {{^}}test_bitcast_from_half:
611; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
612; GCN: buffer_store_short [[TMP]]
613define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
614 %val = load half, half addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000615 %val_int = bitcast half %val to i16
616 store i16 %val_int, i16 addrspace(1)* %out
617 ret void
618}
619
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000620; GCN-LABEL: {{^}}test_bitcast_to_half:
621; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
622; GCN: buffer_store_short [[TMP]]
623define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000624 %val = load i16, i16 addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000625 %val_fp = bitcast i16 %val to half
626 store half %val_fp, half addrspace(1)* %out
627 ret void
628}
Tim Northover00fdbbb2014-07-18 13:01:37 +0000629
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000630attributes #0 = { nounwind }