blob: b63ba8e3632646cc56f1e9e76decd9b279103604 [file] [log] [blame]
Matt Arsenault327bb5a2016-07-01 22:47:50 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
Matt Arsenault81c7ae22015-06-04 16:00:27 +00002; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Tim Northover20bd0ce2014-07-18 12:41:46 +00003
Matt Arsenault81c7ae22015-06-04 16:00:27 +00004; half args should be promoted to float
5
6; GCN-LABEL: {{^}}load_f16_arg:
7; GCN: s_load_dword [[ARG:s[0-9]+]]
8; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9; GCN: buffer_store_short [[CVT]]
10define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
11 store half %arg, half addrspace(1)* %out
Tim Northover20bd0ce2014-07-18 12:41:46 +000012 ret void
13}
14
Matt Arsenault81c7ae22015-06-04 16:00:27 +000015; GCN-LABEL: {{^}}load_v2f16_arg:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000016; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
17; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
Matt Arsenault327bb5a2016-07-01 22:47:50 +000018; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
Matt Arsenault124384f2016-09-09 23:32:53 +000019; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[HI]], [[V0]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +000020; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenault81c7ae22015-06-04 16:00:27 +000021; GCN: s_endpgm
22define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
23 store <2 x half> %arg, <2 x half> addrspace(1)* %out
24 ret void
25}
26
27; GCN-LABEL: {{^}}load_v3f16_arg:
28; GCN: buffer_load_ushort
29; GCN: buffer_load_ushort
30; GCN: buffer_load_ushort
31; GCN-NOT: buffer_load
32; GCN-DAG: buffer_store_dword
33; GCN-DAG: buffer_store_short
34; GCN-NOT: buffer_store
35; GCN: s_endpgm
36define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
37 store <3 x half> %arg, <3 x half> addrspace(1)* %out
38 ret void
39}
40
41; GCN-LABEL: {{^}}load_v4f16_arg:
42; GCN: buffer_load_ushort
43; GCN: buffer_load_ushort
44; GCN: buffer_load_ushort
45; GCN: buffer_load_ushort
Matt Arsenault327bb5a2016-07-01 22:47:50 +000046; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +000047; GCN: s_endpgm
48define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
49 store <4 x half> %arg, <4 x half> addrspace(1)* %out
50 ret void
51}
52
53; GCN-LABEL: {{^}}load_v8f16_arg:
54define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
55 store <8 x half> %arg, <8 x half> addrspace(1)* %out
56 ret void
57}
58
59; GCN-LABEL: {{^}}extload_v2f16_arg:
60define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
61 %fpext = fpext <2 x half> %in to <2 x float>
62 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
63 ret void
64}
65
66; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
67define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
68 %ext = fpext half %arg to float
69 store float %ext, float addrspace(1)* %out
70 ret void
71}
72
73; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
74define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
75 %ext = fpext <2 x half> %arg to <2 x float>
76 store <2 x float> %ext, <2 x float> addrspace(1)* %out
77 ret void
78}
79
80; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
81; GCN: buffer_load_ushort
82; GCN: buffer_load_ushort
83; GCN: buffer_load_ushort
84; GCN-NOT: buffer_load
85; GCN: v_cvt_f32_f16_e32
86; GCN: v_cvt_f32_f16_e32
87; GCN: v_cvt_f32_f16_e32
88; GCN-NOT: v_cvt_f32_f16
89; GCN-DAG: buffer_store_dword
90; GCN-DAG: buffer_store_dwordx2
91; GCN: s_endpgm
92define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
93 %ext = fpext <3 x half> %arg to <3 x float>
94 store <3 x float> %ext, <3 x float> addrspace(1)* %out
95 ret void
96}
97
98; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
99define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
100 %ext = fpext <4 x half> %arg to <4 x float>
101 store <4 x float> %ext, <4 x float> addrspace(1)* %out
102 ret void
103}
104
105; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000106; GCN: buffer_load_ushort
107; GCN: buffer_load_ushort
108; GCN: buffer_load_ushort
109; GCN: buffer_load_ushort
110; GCN: buffer_load_ushort
111; GCN: buffer_load_ushort
112; GCN: buffer_load_ushort
113; GCN: buffer_load_ushort
114
115; GCN: v_cvt_f32_f16_e32
116; GCN: v_cvt_f32_f16_e32
117; GCN: v_cvt_f32_f16_e32
118; GCN: v_cvt_f32_f16_e32
119; GCN: v_cvt_f32_f16_e32
120; GCN: v_cvt_f32_f16_e32
121; GCN: v_cvt_f32_f16_e32
122; GCN: v_cvt_f32_f16_e32
123
124; GCN: buffer_store_dwordx4
125; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000126define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
127 %ext = fpext <8 x half> %arg to <8 x float>
128 store <8 x float> %ext, <8 x float> addrspace(1)* %out
129 ret void
130}
131
132; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000133; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
134; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
135; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
136; GCN: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000137define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
138 %ext = fpext half %arg to double
139 store double %ext, double addrspace(1)* %out
140 ret void
141}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000142
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000143; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000144; GCN-DAG: buffer_load_ushort v
145; GCN-DAG: buffer_load_ushort v
146; GCN-DAG: v_cvt_f32_f16_e32
147; GCN-DAG: v_cvt_f32_f16_e32
148; GCN-DAG: v_cvt_f64_f32_e32
149; GCN-DAG: v_cvt_f64_f32_e32
150; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000151define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
152 %ext = fpext <2 x half> %arg to <2 x double>
153 store <2 x double> %ext, <2 x double> addrspace(1)* %out
154 ret void
155}
156
157; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000158; GCN-DAG: buffer_load_ushort v
159; GCN-DAG: buffer_load_ushort v
160; GCN-DAG: buffer_load_ushort v
161; GCN-DAG: v_cvt_f32_f16_e32
162; GCN-DAG: v_cvt_f32_f16_e32
163; GCN-DAG: v_cvt_f32_f16_e32
164; GCN-DAG: v_cvt_f64_f32_e32
165; GCN-DAG: v_cvt_f64_f32_e32
166; GCN-DAG: v_cvt_f64_f32_e32
167; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000168define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
169 %ext = fpext <3 x half> %arg to <3 x double>
170 store <3 x double> %ext, <3 x double> addrspace(1)* %out
171 ret void
172}
173
174; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000175; GCN-DAG: buffer_load_ushort v
176; GCN-DAG: buffer_load_ushort v
177; GCN-DAG: buffer_load_ushort v
178; GCN-DAG: buffer_load_ushort v
179; GCN-DAG: v_cvt_f32_f16_e32
180; GCN-DAG: v_cvt_f32_f16_e32
181; GCN-DAG: v_cvt_f32_f16_e32
182; GCN-DAG: v_cvt_f32_f16_e32
183; GCN-DAG: v_cvt_f64_f32_e32
184; GCN-DAG: v_cvt_f64_f32_e32
185; GCN-DAG: v_cvt_f64_f32_e32
186; GCN-DAG: v_cvt_f64_f32_e32
187; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000188define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
189 %ext = fpext <4 x half> %arg to <4 x double>
190 store <4 x double> %ext, <4 x double> addrspace(1)* %out
191 ret void
192}
193
194; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000195; GCN-DAG: buffer_load_ushort v
196; GCN-DAG: buffer_load_ushort v
197; GCN-DAG: buffer_load_ushort v
198; GCN-DAG: buffer_load_ushort v
199
200; GCN-DAG: buffer_load_ushort v
201; GCN-DAG: buffer_load_ushort v
202; GCN-DAG: buffer_load_ushort v
203; GCN-DAG: buffer_load_ushort v
204
205; GCN-DAG: v_cvt_f32_f16_e32
206; GCN-DAG: v_cvt_f32_f16_e32
207; GCN-DAG: v_cvt_f32_f16_e32
208; GCN-DAG: v_cvt_f32_f16_e32
209
210; GCN-DAG: v_cvt_f32_f16_e32
211; GCN-DAG: v_cvt_f32_f16_e32
212; GCN-DAG: v_cvt_f32_f16_e32
213; GCN-DAG: v_cvt_f32_f16_e32
214
215; GCN-DAG: v_cvt_f64_f32_e32
216; GCN-DAG: v_cvt_f64_f32_e32
217; GCN-DAG: v_cvt_f64_f32_e32
218; GCN-DAG: v_cvt_f64_f32_e32
219
220; GCN-DAG: v_cvt_f64_f32_e32
221; GCN-DAG: v_cvt_f64_f32_e32
222; GCN-DAG: v_cvt_f64_f32_e32
223; GCN-DAG: v_cvt_f64_f32_e32
224
225; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000226define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
227 %ext = fpext <8 x half> %arg to <8 x double>
228 store <8 x double> %ext, <8 x double> addrspace(1)* %out
229 ret void
230}
231
232; GCN-LABEL: {{^}}global_load_store_f16:
233; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
234; GCN: buffer_store_short [[TMP]]
235define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
236 %val = load half, half addrspace(1)* %in
237 store half %val, half addrspace(1)* %out
238 ret void
239}
240
241; GCN-LABEL: {{^}}global_load_store_v2f16:
242; GCN: buffer_load_dword [[TMP:v[0-9]+]]
243; GCN: buffer_store_dword [[TMP]]
244define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
245 %val = load <2 x half>, <2 x half> addrspace(1)* %in
246 store <2 x half> %val, <2 x half> addrspace(1)* %out
247 ret void
248}
249
250; GCN-LABEL: {{^}}global_load_store_v4f16:
251; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
252; GCN: buffer_store_dwordx2 [[TMP]]
253define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
254 %val = load <4 x half>, <4 x half> addrspace(1)* %in
255 store <4 x half> %val, <4 x half> addrspace(1)* %out
256 ret void
257}
258
259; GCN-LABEL: {{^}}global_load_store_v8f16:
260; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
261; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
262; GCN: s_endpgm
263define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
264 %val = load <8 x half>, <8 x half> addrspace(1)* %in
265 store <8 x half> %val, <8 x half> addrspace(1)* %out
266 ret void
267}
268
269; GCN-LABEL: {{^}}global_extload_f16_to_f32:
270; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
271; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
272; GCN: buffer_store_dword [[CVT]]
273define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
274 %val = load half, half addrspace(1)* %in
275 %cvt = fpext half %val to float
276 store float %cvt, float addrspace(1)* %out
277 ret void
278}
279
280; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000281; GCN: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
282; GCN: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
283; GCN: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
284; GCN: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
285; GCN: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000286; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000287define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
288 %val = load <2 x half>, <2 x half> addrspace(1)* %in
289 %cvt = fpext <2 x half> %val to <2 x float>
290 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
291 ret void
292}
293
294; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
295define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
296 %val = load <3 x half>, <3 x half> addrspace(1)* %in
297 %cvt = fpext <3 x half> %val to <3 x float>
298 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
299 ret void
300}
301
302; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
303define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
304 %val = load <4 x half>, <4 x half> addrspace(1)* %in
305 %cvt = fpext <4 x half> %val to <4 x float>
306 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
307 ret void
308}
309
310; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
311define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
312 %val = load <8 x half>, <8 x half> addrspace(1)* %in
313 %cvt = fpext <8 x half> %val to <8 x float>
314 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
315 ret void
316}
317
318; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000319; GCN: buffer_load_dwordx4
320; GCN: buffer_load_dwordx4
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000321
322; GCN: v_cvt_f32_f16_e32
323; GCN: v_cvt_f32_f16_e32
324; GCN: v_cvt_f32_f16_e32
325; GCN: v_cvt_f32_f16_e32
326; GCN: v_cvt_f32_f16_e32
327; GCN: v_cvt_f32_f16_e32
328; GCN: v_cvt_f32_f16_e32
329; GCN: v_cvt_f32_f16_e32
330; GCN: v_cvt_f32_f16_e32
331; GCN: v_cvt_f32_f16_e32
332; GCN: v_cvt_f32_f16_e32
333; GCN: v_cvt_f32_f16_e32
334; GCN: v_cvt_f32_f16_e32
335; GCN: v_cvt_f32_f16_e32
336; GCN: v_cvt_f32_f16_e32
337; GCN: v_cvt_f32_f16_e32
338
339; GCN: buffer_store_dwordx4
340; GCN: buffer_store_dwordx4
341; GCN: buffer_store_dwordx4
342; GCN: buffer_store_dwordx4
343
344; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000345define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
346 %val = load <16 x half>, <16 x half> addrspace(1)* %in
347 %cvt = fpext <16 x half> %val to <16 x float>
348 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
349 ret void
350}
351
352; GCN-LABEL: {{^}}global_extload_f16_to_f64:
353; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
354; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
355; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
356; GCN: buffer_store_dwordx2 [[CVT1]]
357define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
358 %val = load half, half addrspace(1)* %in
359 %cvt = fpext half %val to double
360 store double %cvt, double addrspace(1)* %out
361 ret void
362}
363
364; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000365; GCN-DAG: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
366; GCN-DAG: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
367; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
368; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
Matt Arsenault61001bb2015-11-25 19:58:34 +0000369; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
370; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
371; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000372; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000373define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
374 %val = load <2 x half>, <2 x half> addrspace(1)* %in
375 %cvt = fpext <2 x half> %val to <2 x double>
376 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
377 ret void
378}
379
380; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
Matt Arsenault61001bb2015-11-25 19:58:34 +0000381
Tom Stellard115a6152016-11-10 16:02:37 +0000382; XSI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
383; XSI: v_cvt_f32_f16_e32
384; XSI: v_cvt_f32_f16_e32
385; XSI-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
386; XSI: v_cvt_f32_f16_e32
387; XSI-NOT: v_cvt_f32_f16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000388
Tom Stellard115a6152016-11-10 16:02:37 +0000389; XVI: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
390; XVI: v_cvt_f32_f16_e32
391; XVI: v_cvt_f32_f16_e32
392; XVI-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
393; XVI: v_cvt_f32_f16_e32
394; XVI-NOT: v_cvt_f32_f16
395
396; GCN: buffer_load_dwordx2 v{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]
397; GCN: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
398; GCN: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
399; GCN: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
400; GCN: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
401
402; GCN: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
403; GCN: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
404; GCN: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
Matt Arsenault61001bb2015-11-25 19:58:34 +0000405; GCN-NOT: v_cvt_f64_f32_e32
406
Tom Stellard115a6152016-11-10 16:02:37 +0000407; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[XLO]]:[[YHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
408; GCN-DAG: buffer_store_dwordx2 [[Z]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000409; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000410define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
411 %val = load <3 x half>, <3 x half> addrspace(1)* %in
412 %cvt = fpext <3 x half> %val to <3 x double>
413 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
414 ret void
415}
416
417; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
418define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
419 %val = load <4 x half>, <4 x half> addrspace(1)* %in
420 %cvt = fpext <4 x half> %val to <4 x double>
421 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
422 ret void
423}
424
425; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
426define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
427 %val = load <8 x half>, <8 x half> addrspace(1)* %in
428 %cvt = fpext <8 x half> %val to <8 x double>
429 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
430 ret void
431}
432
433; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
434define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
435 %val = load <16 x half>, <16 x half> addrspace(1)* %in
436 %cvt = fpext <16 x half> %val to <16 x double>
437 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
438 ret void
439}
440
441; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
442; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
443; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
444; GCN: buffer_store_short [[CVT]]
445define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
446 %val = load float, float addrspace(1)* %in
447 %cvt = fptrunc float %val to half
448 store half %cvt, half addrspace(1)* %out
449 ret void
450}
451
452; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
453; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
454; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
455; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000456; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
Matt Arsenault124384f2016-09-09 23:32:53 +0000457; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[SHL]], [[CVT0]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000458; GCN-DAG: buffer_store_dword [[PACKED]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000459; GCN: s_endpgm
460define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
461 %val = load <2 x float>, <2 x float> addrspace(1)* %in
462 %cvt = fptrunc <2 x float> %val to <2 x half>
463 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
464 ret void
465}
466
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000467; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
468; GCN: buffer_load_dwordx4
469; GCN: v_cvt_f16_f32_e32
470; GCN: v_cvt_f16_f32_e32
471; GCN: v_cvt_f16_f32_e32
Matt Arsenault68d93862015-09-24 08:36:14 +0000472; GCN-NOT: v_cvt_f16_f32_e32
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000473; GCN: buffer_store_short
474; GCN: buffer_store_dword
475; GCN: s_endpgm
476define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
477 %val = load <3 x float>, <3 x float> addrspace(1)* %in
478 %cvt = fptrunc <3 x float> %val to <3 x half>
479 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
480 ret void
481}
482
483; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
484; GCN: buffer_load_dwordx4
485; GCN: v_cvt_f16_f32_e32
486; GCN: v_cvt_f16_f32_e32
487; GCN: v_cvt_f16_f32_e32
488; GCN: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000489; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000490; GCN: s_endpgm
491define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
492 %val = load <4 x float>, <4 x float> addrspace(1)* %in
493 %cvt = fptrunc <4 x float> %val to <4 x half>
494 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
495 ret void
496}
497
498; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000499; GCN: buffer_load_dwordx4
500; GCN: buffer_load_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000501; GCN: v_cvt_f16_f32_e32
502; GCN: v_cvt_f16_f32_e32
503; GCN: v_cvt_f16_f32_e32
504; GCN: v_cvt_f16_f32_e32
505; GCN: v_cvt_f16_f32_e32
506; GCN: v_cvt_f16_f32_e32
507; GCN: v_cvt_f16_f32_e32
508; GCN: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000509; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000510; GCN: s_endpgm
511define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
512 %val = load <8 x float>, <8 x float> addrspace(1)* %in
513 %cvt = fptrunc <8 x float> %val to <8 x half>
514 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
515 ret void
516}
517
518; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000519; GCN: buffer_load_dwordx4
520; GCN: buffer_load_dwordx4
521; GCN: buffer_load_dwordx4
522; GCN: buffer_load_dwordx4
Matt Arsenault68d93862015-09-24 08:36:14 +0000523; GCN-DAG: v_cvt_f16_f32_e32
524; GCN-DAG: v_cvt_f16_f32_e32
525; GCN-DAG: v_cvt_f16_f32_e32
526; GCN-DAG: v_cvt_f16_f32_e32
527; GCN-DAG: v_cvt_f16_f32_e32
528; GCN-DAG: v_cvt_f16_f32_e32
529; GCN-DAG: v_cvt_f16_f32_e32
530; GCN-DAG: v_cvt_f16_f32_e32
531; GCN-DAG: v_cvt_f16_f32_e32
532; GCN-DAG: v_cvt_f16_f32_e32
533; GCN-DAG: v_cvt_f16_f32_e32
534; GCN-DAG: v_cvt_f16_f32_e32
535; GCN-DAG: v_cvt_f16_f32_e32
536; GCN-DAG: v_cvt_f16_f32_e32
537; GCN-DAG: v_cvt_f16_f32_e32
538; GCN-DAG: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000539; GCN-DAG: buffer_store_dwordx4
540; GCN-DAG: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000541; GCN: s_endpgm
542define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
543 %val = load <16 x float>, <16 x float> addrspace(1)* %in
544 %cvt = fptrunc <16 x float> %val to <16 x half>
545 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
546 ret void
547}
548
549; FIXME: Unsafe math should fold conversions away
550; GCN-LABEL: {{^}}fadd_f16:
551; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
552; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
553; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
554; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
555; SI: v_add_f32
556; GCN: s_endpgm
557define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
558 %add = fadd half %a, %b
559 store half %add, half addrspace(1)* %out, align 4
560 ret void
561}
562
563; GCN-LABEL: {{^}}fadd_v2f16:
564; SI: v_add_f32
565; SI: v_add_f32
566; GCN: s_endpgm
567define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
568 %add = fadd <2 x half> %a, %b
569 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
570 ret void
571}
572
573; GCN-LABEL: {{^}}fadd_v4f16:
574; SI: v_add_f32
575; SI: v_add_f32
576; SI: v_add_f32
577; SI: v_add_f32
578; GCN: s_endpgm
579define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
580 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
581 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
582 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
583 %result = fadd <4 x half> %a, %b
584 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
585 ret void
586}
587
588; GCN-LABEL: {{^}}fadd_v8f16:
589; SI: v_add_f32
590; SI: v_add_f32
591; SI: v_add_f32
592; SI: v_add_f32
593; SI: v_add_f32
594; SI: v_add_f32
595; SI: v_add_f32
596; SI: v_add_f32
597; GCN: s_endpgm
598define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
599 %add = fadd <8 x half> %a, %b
600 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
601 ret void
602}
603
604; GCN-LABEL: {{^}}fsub_f16:
605; GCN: v_subrev_f32_e32
606; GCN: s_endpgm
607define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
608 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
609 %a = load half, half addrspace(1)* %in
610 %b = load half, half addrspace(1)* %b_ptr
611 %sub = fsub half %a, %b
612 store half %sub, half addrspace(1)* %out
613 ret void
614}
615
616; GCN-LABEL: {{^}}test_bitcast_from_half:
617; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
618; GCN: buffer_store_short [[TMP]]
619define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
620 %val = load half, half addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000621 %val_int = bitcast half %val to i16
622 store i16 %val_int, i16 addrspace(1)* %out
623 ret void
624}
625
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000626; GCN-LABEL: {{^}}test_bitcast_to_half:
627; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
628; GCN: buffer_store_short [[TMP]]
629define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000630 %val = load i16, i16 addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000631 %val_fp = bitcast i16 %val to half
632 store half %val_fp, half addrspace(1)* %out
633 ret void
634}
Tim Northover00fdbbb2014-07-18 13:01:37 +0000635
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000636attributes #0 = { nounwind }