blob: d21d66176a1493e30c457982c3136eb0e8c6cb3b [file] [log] [blame]
Matt Arsenault327bb5a2016-07-01 22:47:50 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
Matt Arsenault81c7ae22015-06-04 16:00:27 +00002; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Tim Northover20bd0ce2014-07-18 12:41:46 +00003
Matt Arsenault81c7ae22015-06-04 16:00:27 +00004; half args should be promoted to float
5
6; GCN-LABEL: {{^}}load_f16_arg:
7; GCN: s_load_dword [[ARG:s[0-9]+]]
8; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9; GCN: buffer_store_short [[CVT]]
10define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
11 store half %arg, half addrspace(1)* %out
Tim Northover20bd0ce2014-07-18 12:41:46 +000012 ret void
13}
14
Matt Arsenault81c7ae22015-06-04 16:00:27 +000015; GCN-LABEL: {{^}}load_v2f16_arg:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000016; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
17; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
Matt Arsenault327bb5a2016-07-01 22:47:50 +000018; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
19; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]]
20; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenault81c7ae22015-06-04 16:00:27 +000021; GCN: s_endpgm
22define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
23 store <2 x half> %arg, <2 x half> addrspace(1)* %out
24 ret void
25}
26
27; GCN-LABEL: {{^}}load_v3f16_arg:
28; GCN: buffer_load_ushort
29; GCN: buffer_load_ushort
30; GCN: buffer_load_ushort
31; GCN-NOT: buffer_load
32; GCN-DAG: buffer_store_dword
33; GCN-DAG: buffer_store_short
34; GCN-NOT: buffer_store
35; GCN: s_endpgm
36define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
37 store <3 x half> %arg, <3 x half> addrspace(1)* %out
38 ret void
39}
40
41; GCN-LABEL: {{^}}load_v4f16_arg:
42; GCN: buffer_load_ushort
43; GCN: buffer_load_ushort
44; GCN: buffer_load_ushort
45; GCN: buffer_load_ushort
Matt Arsenault327bb5a2016-07-01 22:47:50 +000046; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +000047; GCN: s_endpgm
48define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
49 store <4 x half> %arg, <4 x half> addrspace(1)* %out
50 ret void
51}
52
53; GCN-LABEL: {{^}}load_v8f16_arg:
54define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
55 store <8 x half> %arg, <8 x half> addrspace(1)* %out
56 ret void
57}
58
59; GCN-LABEL: {{^}}extload_v2f16_arg:
60define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
61 %fpext = fpext <2 x half> %in to <2 x float>
62 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
63 ret void
64}
65
66; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
67define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
68 %ext = fpext half %arg to float
69 store float %ext, float addrspace(1)* %out
70 ret void
71}
72
73; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
74define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
75 %ext = fpext <2 x half> %arg to <2 x float>
76 store <2 x float> %ext, <2 x float> addrspace(1)* %out
77 ret void
78}
79
80; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
81; GCN: buffer_load_ushort
82; GCN: buffer_load_ushort
83; GCN: buffer_load_ushort
84; GCN-NOT: buffer_load
85; GCN: v_cvt_f32_f16_e32
86; GCN: v_cvt_f32_f16_e32
87; GCN: v_cvt_f32_f16_e32
88; GCN-NOT: v_cvt_f32_f16
89; GCN-DAG: buffer_store_dword
90; GCN-DAG: buffer_store_dwordx2
91; GCN: s_endpgm
92define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
93 %ext = fpext <3 x half> %arg to <3 x float>
94 store <3 x float> %ext, <3 x float> addrspace(1)* %out
95 ret void
96}
97
98; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
99define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
100 %ext = fpext <4 x half> %arg to <4 x float>
101 store <4 x float> %ext, <4 x float> addrspace(1)* %out
102 ret void
103}
104
105; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000106; GCN: buffer_load_ushort
107; GCN: buffer_load_ushort
108; GCN: buffer_load_ushort
109; GCN: buffer_load_ushort
110; GCN: buffer_load_ushort
111; GCN: buffer_load_ushort
112; GCN: buffer_load_ushort
113; GCN: buffer_load_ushort
114
115; GCN: v_cvt_f32_f16_e32
116; GCN: v_cvt_f32_f16_e32
117; GCN: v_cvt_f32_f16_e32
118; GCN: v_cvt_f32_f16_e32
119; GCN: v_cvt_f32_f16_e32
120; GCN: v_cvt_f32_f16_e32
121; GCN: v_cvt_f32_f16_e32
122; GCN: v_cvt_f32_f16_e32
123
124; GCN: buffer_store_dwordx4
125; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000126define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
127 %ext = fpext <8 x half> %arg to <8 x float>
128 store <8 x float> %ext, <8 x float> addrspace(1)* %out
129 ret void
130}
131
132; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000133; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
134; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
135; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
136; GCN: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000137define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
138 %ext = fpext half %arg to double
139 store double %ext, double addrspace(1)* %out
140 ret void
141}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000142
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000143; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000144; GCN-DAG: buffer_load_ushort v
145; GCN-DAG: buffer_load_ushort v
146; GCN-DAG: v_cvt_f32_f16_e32
147; GCN-DAG: v_cvt_f32_f16_e32
148; GCN-DAG: v_cvt_f64_f32_e32
149; GCN-DAG: v_cvt_f64_f32_e32
150; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000151define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
152 %ext = fpext <2 x half> %arg to <2 x double>
153 store <2 x double> %ext, <2 x double> addrspace(1)* %out
154 ret void
155}
156
157; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000158; GCN-DAG: buffer_load_ushort v
159; GCN-DAG: buffer_load_ushort v
160; GCN-DAG: buffer_load_ushort v
161; GCN-DAG: v_cvt_f32_f16_e32
162; GCN-DAG: v_cvt_f32_f16_e32
163; GCN-DAG: v_cvt_f32_f16_e32
164; GCN-DAG: v_cvt_f64_f32_e32
165; GCN-DAG: v_cvt_f64_f32_e32
166; GCN-DAG: v_cvt_f64_f32_e32
167; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000168define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
169 %ext = fpext <3 x half> %arg to <3 x double>
170 store <3 x double> %ext, <3 x double> addrspace(1)* %out
171 ret void
172}
173
174; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000175; GCN-DAG: buffer_load_ushort v
176; GCN-DAG: buffer_load_ushort v
177; GCN-DAG: buffer_load_ushort v
178; GCN-DAG: buffer_load_ushort v
179; GCN-DAG: v_cvt_f32_f16_e32
180; GCN-DAG: v_cvt_f32_f16_e32
181; GCN-DAG: v_cvt_f32_f16_e32
182; GCN-DAG: v_cvt_f32_f16_e32
183; GCN-DAG: v_cvt_f64_f32_e32
184; GCN-DAG: v_cvt_f64_f32_e32
185; GCN-DAG: v_cvt_f64_f32_e32
186; GCN-DAG: v_cvt_f64_f32_e32
187; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000188define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
189 %ext = fpext <4 x half> %arg to <4 x double>
190 store <4 x double> %ext, <4 x double> addrspace(1)* %out
191 ret void
192}
193
194; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000195; GCN-DAG: buffer_load_ushort v
196; GCN-DAG: buffer_load_ushort v
197; GCN-DAG: buffer_load_ushort v
198; GCN-DAG: buffer_load_ushort v
199
200; GCN-DAG: buffer_load_ushort v
201; GCN-DAG: buffer_load_ushort v
202; GCN-DAG: buffer_load_ushort v
203; GCN-DAG: buffer_load_ushort v
204
205; GCN-DAG: v_cvt_f32_f16_e32
206; GCN-DAG: v_cvt_f32_f16_e32
207; GCN-DAG: v_cvt_f32_f16_e32
208; GCN-DAG: v_cvt_f32_f16_e32
209
210; GCN-DAG: v_cvt_f32_f16_e32
211; GCN-DAG: v_cvt_f32_f16_e32
212; GCN-DAG: v_cvt_f32_f16_e32
213; GCN-DAG: v_cvt_f32_f16_e32
214
215; GCN-DAG: v_cvt_f64_f32_e32
216; GCN-DAG: v_cvt_f64_f32_e32
217; GCN-DAG: v_cvt_f64_f32_e32
218; GCN-DAG: v_cvt_f64_f32_e32
219
220; GCN-DAG: v_cvt_f64_f32_e32
221; GCN-DAG: v_cvt_f64_f32_e32
222; GCN-DAG: v_cvt_f64_f32_e32
223; GCN-DAG: v_cvt_f64_f32_e32
224
225; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000226define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
227 %ext = fpext <8 x half> %arg to <8 x double>
228 store <8 x double> %ext, <8 x double> addrspace(1)* %out
229 ret void
230}
231
232; GCN-LABEL: {{^}}global_load_store_f16:
233; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
234; GCN: buffer_store_short [[TMP]]
235define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
236 %val = load half, half addrspace(1)* %in
237 store half %val, half addrspace(1)* %out
238 ret void
239}
240
241; GCN-LABEL: {{^}}global_load_store_v2f16:
242; GCN: buffer_load_dword [[TMP:v[0-9]+]]
243; GCN: buffer_store_dword [[TMP]]
244define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
245 %val = load <2 x half>, <2 x half> addrspace(1)* %in
246 store <2 x half> %val, <2 x half> addrspace(1)* %out
247 ret void
248}
249
250; GCN-LABEL: {{^}}global_load_store_v4f16:
251; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
252; GCN: buffer_store_dwordx2 [[TMP]]
253define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
254 %val = load <4 x half>, <4 x half> addrspace(1)* %in
255 store <4 x half> %val, <4 x half> addrspace(1)* %out
256 ret void
257}
258
259; GCN-LABEL: {{^}}global_load_store_v8f16:
260; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
261; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
262; GCN: s_endpgm
263define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
264 %val = load <8 x half>, <8 x half> addrspace(1)* %in
265 store <8 x half> %val, <8 x half> addrspace(1)* %out
266 ret void
267}
268
269; GCN-LABEL: {{^}}global_extload_f16_to_f32:
270; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
271; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
272; GCN: buffer_store_dword [[CVT]]
273define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
274 %val = load half, half addrspace(1)* %in
275 %cvt = fpext half %val to float
276 store float %cvt, float addrspace(1)* %out
277 ret void
278}
279
280; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000281; GCN: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
282; GCN: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
283; GCN: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
284; GCN: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
285; GCN: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000286; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000287define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
288 %val = load <2 x half>, <2 x half> addrspace(1)* %in
289 %cvt = fpext <2 x half> %val to <2 x float>
290 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
291 ret void
292}
293
294; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
295define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
296 %val = load <3 x half>, <3 x half> addrspace(1)* %in
297 %cvt = fpext <3 x half> %val to <3 x float>
298 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
299 ret void
300}
301
302; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
303define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
304 %val = load <4 x half>, <4 x half> addrspace(1)* %in
305 %cvt = fpext <4 x half> %val to <4 x float>
306 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
307 ret void
308}
309
310; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
311define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
312 %val = load <8 x half>, <8 x half> addrspace(1)* %in
313 %cvt = fpext <8 x half> %val to <8 x float>
314 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
315 ret void
316}
317
318; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000319; GCN: buffer_load_dwordx4
320; GCN: buffer_load_dwordx4
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000321
322; GCN: v_cvt_f32_f16_e32
323; GCN: v_cvt_f32_f16_e32
324; GCN: v_cvt_f32_f16_e32
325; GCN: v_cvt_f32_f16_e32
326; GCN: v_cvt_f32_f16_e32
327; GCN: v_cvt_f32_f16_e32
328; GCN: v_cvt_f32_f16_e32
329; GCN: v_cvt_f32_f16_e32
330; GCN: v_cvt_f32_f16_e32
331; GCN: v_cvt_f32_f16_e32
332; GCN: v_cvt_f32_f16_e32
333; GCN: v_cvt_f32_f16_e32
334; GCN: v_cvt_f32_f16_e32
335; GCN: v_cvt_f32_f16_e32
336; GCN: v_cvt_f32_f16_e32
337; GCN: v_cvt_f32_f16_e32
338
339; GCN: buffer_store_dwordx4
340; GCN: buffer_store_dwordx4
341; GCN: buffer_store_dwordx4
342; GCN: buffer_store_dwordx4
343
344; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000345define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
346 %val = load <16 x half>, <16 x half> addrspace(1)* %in
347 %cvt = fpext <16 x half> %val to <16 x float>
348 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
349 ret void
350}
351
352; GCN-LABEL: {{^}}global_extload_f16_to_f64:
353; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
354; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
355; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
356; GCN: buffer_store_dwordx2 [[CVT1]]
357define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
358 %val = load half, half addrspace(1)* %in
359 %cvt = fpext half %val to double
360 store double %cvt, double addrspace(1)* %out
361 ret void
362}
363
364; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000365; GCN-DAG: buffer_load_dword [[LOAD:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
366; GCN-DAG: v_lshrrev_b32_e32 [[HI:v[0-9]+]], 16, [[LOAD]]
367; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD]]
368; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[HI]]
Matt Arsenault61001bb2015-11-25 19:58:34 +0000369; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
370; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
371; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000372; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000373define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
374 %val = load <2 x half>, <2 x half> addrspace(1)* %in
375 %cvt = fpext <2 x half> %val to <2 x double>
376 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
377 ret void
378}
379
380; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
Matt Arsenault61001bb2015-11-25 19:58:34 +0000381
382; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
Tom Stellardcb6ba622016-04-30 00:23:06 +0000383; GCN-DAG: v_cvt_f32_f16_e32
Matt Arsenaultb36d4622016-03-01 21:31:53 +0000384; GCN-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
Matt Arsenault7f9eabd2016-05-21 03:55:07 +0000385; GCN-DAG: v_cvt_f32_f16_e32
386; GCN-DAG: v_cvt_f32_f16_e32
Matt Arsenault61001bb2015-11-25 19:58:34 +0000387
388; GCN: v_cvt_f64_f32_e32
389; GCN: v_cvt_f64_f32_e32
390; GCN: v_cvt_f64_f32_e32
391; GCN-NOT: v_cvt_f64_f32_e32
392
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000393; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
394; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000395; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000396define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
397 %val = load <3 x half>, <3 x half> addrspace(1)* %in
398 %cvt = fpext <3 x half> %val to <3 x double>
399 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
400 ret void
401}
402
403; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
404define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
405 %val = load <4 x half>, <4 x half> addrspace(1)* %in
406 %cvt = fpext <4 x half> %val to <4 x double>
407 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
408 ret void
409}
410
411; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
412define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
413 %val = load <8 x half>, <8 x half> addrspace(1)* %in
414 %cvt = fpext <8 x half> %val to <8 x double>
415 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
416 ret void
417}
418
419; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
420define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
421 %val = load <16 x half>, <16 x half> addrspace(1)* %in
422 %cvt = fpext <16 x half> %val to <16 x double>
423 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
424 ret void
425}
426
427; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
428; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
429; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
430; GCN: buffer_store_short [[CVT]]
431define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
432 %val = load float, float addrspace(1)* %in
433 %cvt = fptrunc float %val to half
434 store half %cvt, half addrspace(1)* %out
435 ret void
436}
437
438; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
439; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
440; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
441; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000442; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
443; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]]
444; GCN-DAG: buffer_store_dword [[PACKED]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000445; GCN: s_endpgm
446define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
447 %val = load <2 x float>, <2 x float> addrspace(1)* %in
448 %cvt = fptrunc <2 x float> %val to <2 x half>
449 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
450 ret void
451}
452
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000453; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
454; GCN: buffer_load_dwordx4
455; GCN: v_cvt_f16_f32_e32
456; GCN: v_cvt_f16_f32_e32
457; GCN: v_cvt_f16_f32_e32
Matt Arsenault68d93862015-09-24 08:36:14 +0000458; GCN-NOT: v_cvt_f16_f32_e32
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000459; GCN: buffer_store_short
460; GCN: buffer_store_dword
461; GCN: s_endpgm
462define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
463 %val = load <3 x float>, <3 x float> addrspace(1)* %in
464 %cvt = fptrunc <3 x float> %val to <3 x half>
465 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
466 ret void
467}
468
469; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
470; GCN: buffer_load_dwordx4
471; GCN: v_cvt_f16_f32_e32
472; GCN: v_cvt_f16_f32_e32
473; GCN: v_cvt_f16_f32_e32
474; GCN: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000475; GCN: buffer_store_dwordx2
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000476; GCN: s_endpgm
477define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
478 %val = load <4 x float>, <4 x float> addrspace(1)* %in
479 %cvt = fptrunc <4 x float> %val to <4 x half>
480 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
481 ret void
482}
483
484; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000485; GCN: buffer_load_dwordx4
486; GCN: buffer_load_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000487; GCN: v_cvt_f16_f32_e32
488; GCN: v_cvt_f16_f32_e32
489; GCN: v_cvt_f16_f32_e32
490; GCN: v_cvt_f16_f32_e32
491; GCN: v_cvt_f16_f32_e32
492; GCN: v_cvt_f16_f32_e32
493; GCN: v_cvt_f16_f32_e32
494; GCN: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000495; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000496; GCN: s_endpgm
497define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
498 %val = load <8 x float>, <8 x float> addrspace(1)* %in
499 %cvt = fptrunc <8 x float> %val to <8 x half>
500 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
501 ret void
502}
503
504; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000505; GCN: buffer_load_dwordx4
506; GCN: buffer_load_dwordx4
507; GCN: buffer_load_dwordx4
508; GCN: buffer_load_dwordx4
Matt Arsenault68d93862015-09-24 08:36:14 +0000509; GCN-DAG: v_cvt_f16_f32_e32
510; GCN-DAG: v_cvt_f16_f32_e32
511; GCN-DAG: v_cvt_f16_f32_e32
512; GCN-DAG: v_cvt_f16_f32_e32
513; GCN-DAG: v_cvt_f16_f32_e32
514; GCN-DAG: v_cvt_f16_f32_e32
515; GCN-DAG: v_cvt_f16_f32_e32
516; GCN-DAG: v_cvt_f16_f32_e32
517; GCN-DAG: v_cvt_f16_f32_e32
518; GCN-DAG: v_cvt_f16_f32_e32
519; GCN-DAG: v_cvt_f16_f32_e32
520; GCN-DAG: v_cvt_f16_f32_e32
521; GCN-DAG: v_cvt_f16_f32_e32
522; GCN-DAG: v_cvt_f16_f32_e32
523; GCN-DAG: v_cvt_f16_f32_e32
524; GCN-DAG: v_cvt_f16_f32_e32
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000525; GCN-DAG: buffer_store_dwordx4
526; GCN-DAG: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000527; GCN: s_endpgm
528define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
529 %val = load <16 x float>, <16 x float> addrspace(1)* %in
530 %cvt = fptrunc <16 x float> %val to <16 x half>
531 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
532 ret void
533}
534
535; FIXME: Unsafe math should fold conversions away
536; GCN-LABEL: {{^}}fadd_f16:
537; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
538; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
539; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
540; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
541; SI: v_add_f32
542; GCN: s_endpgm
543define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
544 %add = fadd half %a, %b
545 store half %add, half addrspace(1)* %out, align 4
546 ret void
547}
548
549; GCN-LABEL: {{^}}fadd_v2f16:
550; SI: v_add_f32
551; SI: v_add_f32
552; GCN: s_endpgm
553define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
554 %add = fadd <2 x half> %a, %b
555 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
556 ret void
557}
558
559; GCN-LABEL: {{^}}fadd_v4f16:
560; SI: v_add_f32
561; SI: v_add_f32
562; SI: v_add_f32
563; SI: v_add_f32
564; GCN: s_endpgm
565define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
566 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
567 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
568 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
569 %result = fadd <4 x half> %a, %b
570 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
571 ret void
572}
573
574; GCN-LABEL: {{^}}fadd_v8f16:
575; SI: v_add_f32
576; SI: v_add_f32
577; SI: v_add_f32
578; SI: v_add_f32
579; SI: v_add_f32
580; SI: v_add_f32
581; SI: v_add_f32
582; SI: v_add_f32
583; GCN: s_endpgm
584define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
585 %add = fadd <8 x half> %a, %b
586 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
587 ret void
588}
589
590; GCN-LABEL: {{^}}fsub_f16:
591; GCN: v_subrev_f32_e32
592; GCN: s_endpgm
593define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
594 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
595 %a = load half, half addrspace(1)* %in
596 %b = load half, half addrspace(1)* %b_ptr
597 %sub = fsub half %a, %b
598 store half %sub, half addrspace(1)* %out
599 ret void
600}
601
602; GCN-LABEL: {{^}}test_bitcast_from_half:
603; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
604; GCN: buffer_store_short [[TMP]]
605define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
606 %val = load half, half addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000607 %val_int = bitcast half %val to i16
608 store i16 %val_int, i16 addrspace(1)* %out
609 ret void
610}
611
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000612; GCN-LABEL: {{^}}test_bitcast_to_half:
613; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
614; GCN: buffer_store_short [[TMP]]
615define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000616 %val = load i16, i16 addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000617 %val_fp = bitcast i16 %val to half
618 store half %val_fp, half addrspace(1)* %out
619 ret void
620}
Tim Northover00fdbbb2014-07-18 13:01:37 +0000621
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000622attributes #0 = { nounwind }