blob: bf8f11860b50d05c3662696ff9c893bf170ea34e [file] [log] [blame]
Matt Arsenault81c7ae22015-06-04 16:00:27 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Tim Northover20bd0ce2014-07-18 12:41:46 +00003
Matt Arsenault81c7ae22015-06-04 16:00:27 +00004; half args should be promoted to float
5
6; GCN-LABEL: {{^}}load_f16_arg:
7; GCN: s_load_dword [[ARG:s[0-9]+]]
8; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9; GCN: buffer_store_short [[CVT]]
10define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
11 store half %arg, half addrspace(1)* %out
Tim Northover20bd0ce2014-07-18 12:41:46 +000012 ret void
13}
14
Matt Arsenault81c7ae22015-06-04 16:00:27 +000015; GCN-LABEL: {{^}}load_v2f16_arg:
16; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
17; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
18; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
19; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
20; GCN: s_endpgm
21define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
22 store <2 x half> %arg, <2 x half> addrspace(1)* %out
23 ret void
24}
25
26; GCN-LABEL: {{^}}load_v3f16_arg:
27; GCN: buffer_load_ushort
28; GCN: buffer_load_ushort
29; GCN: buffer_load_ushort
30; GCN-NOT: buffer_load
31; GCN-DAG: buffer_store_dword
32; GCN-DAG: buffer_store_short
33; GCN-NOT: buffer_store
34; GCN: s_endpgm
35define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
36 store <3 x half> %arg, <3 x half> addrspace(1)* %out
37 ret void
38}
39
40; GCN-LABEL: {{^}}load_v4f16_arg:
41; GCN: buffer_load_ushort
42; GCN: buffer_load_ushort
43; GCN: buffer_load_ushort
44; GCN: buffer_load_ushort
45; GCN: buffer_store_short
46; GCN: buffer_store_short
47; GCN: buffer_store_short
48; GCN: buffer_store_short
49; GCN: s_endpgm
50define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
51 store <4 x half> %arg, <4 x half> addrspace(1)* %out
52 ret void
53}
54
55; GCN-LABEL: {{^}}load_v8f16_arg:
56define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
57 store <8 x half> %arg, <8 x half> addrspace(1)* %out
58 ret void
59}
60
61; GCN-LABEL: {{^}}extload_v2f16_arg:
62define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
63 %fpext = fpext <2 x half> %in to <2 x float>
64 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
65 ret void
66}
67
68; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
69define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
70 %ext = fpext half %arg to float
71 store float %ext, float addrspace(1)* %out
72 ret void
73}
74
75; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
76define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
77 %ext = fpext <2 x half> %arg to <2 x float>
78 store <2 x float> %ext, <2 x float> addrspace(1)* %out
79 ret void
80}
81
82; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
83; GCN: buffer_load_ushort
84; GCN: buffer_load_ushort
85; GCN: buffer_load_ushort
86; GCN-NOT: buffer_load
87; GCN: v_cvt_f32_f16_e32
88; GCN: v_cvt_f32_f16_e32
89; GCN: v_cvt_f32_f16_e32
90; GCN-NOT: v_cvt_f32_f16
91; GCN-DAG: buffer_store_dword
92; GCN-DAG: buffer_store_dwordx2
93; GCN: s_endpgm
94define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
95 %ext = fpext <3 x half> %arg to <3 x float>
96 store <3 x float> %ext, <3 x float> addrspace(1)* %out
97 ret void
98}
99
100; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
101define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
102 %ext = fpext <4 x half> %arg to <4 x float>
103 store <4 x float> %ext, <4 x float> addrspace(1)* %out
104 ret void
105}
106
107; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
108define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
109 %ext = fpext <8 x half> %arg to <8 x float>
110 store <8 x float> %ext, <8 x float> addrspace(1)* %out
111 ret void
112}
113
114; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
115define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
116 %ext = fpext half %arg to double
117 store double %ext, double addrspace(1)* %out
118 ret void
119}
120; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
121define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
122 %ext = fpext <2 x half> %arg to <2 x double>
123 store <2 x double> %ext, <2 x double> addrspace(1)* %out
124 ret void
125}
126
127; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
128define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
129 %ext = fpext <3 x half> %arg to <3 x double>
130 store <3 x double> %ext, <3 x double> addrspace(1)* %out
131 ret void
132}
133
134; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
135define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
136 %ext = fpext <4 x half> %arg to <4 x double>
137 store <4 x double> %ext, <4 x double> addrspace(1)* %out
138 ret void
139}
140
141; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
142define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
143 %ext = fpext <8 x half> %arg to <8 x double>
144 store <8 x double> %ext, <8 x double> addrspace(1)* %out
145 ret void
146}
147
148; GCN-LABEL: {{^}}global_load_store_f16:
149; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
150; GCN: buffer_store_short [[TMP]]
151define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
152 %val = load half, half addrspace(1)* %in
153 store half %val, half addrspace(1)* %out
154 ret void
155}
156
157; GCN-LABEL: {{^}}global_load_store_v2f16:
158; GCN: buffer_load_dword [[TMP:v[0-9]+]]
159; GCN: buffer_store_dword [[TMP]]
160define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
161 %val = load <2 x half>, <2 x half> addrspace(1)* %in
162 store <2 x half> %val, <2 x half> addrspace(1)* %out
163 ret void
164}
165
166; GCN-LABEL: {{^}}global_load_store_v4f16:
167; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
168; GCN: buffer_store_dwordx2 [[TMP]]
169define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
170 %val = load <4 x half>, <4 x half> addrspace(1)* %in
171 store <4 x half> %val, <4 x half> addrspace(1)* %out
172 ret void
173}
174
175; GCN-LABEL: {{^}}global_load_store_v8f16:
176; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
177; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
178; GCN: s_endpgm
179define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
180 %val = load <8 x half>, <8 x half> addrspace(1)* %in
181 store <8 x half> %val, <8 x half> addrspace(1)* %out
182 ret void
183}
184
185; GCN-LABEL: {{^}}global_extload_f16_to_f32:
186; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
187; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
188; GCN: buffer_store_dword [[CVT]]
189define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
190 %val = load half, half addrspace(1)* %in
191 %cvt = fpext half %val to float
192 store float %cvt, float addrspace(1)* %out
193 ret void
194}
195
196; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
197define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
198 %val = load <2 x half>, <2 x half> addrspace(1)* %in
199 %cvt = fpext <2 x half> %val to <2 x float>
200 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
201 ret void
202}
203
204; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
205define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
206 %val = load <3 x half>, <3 x half> addrspace(1)* %in
207 %cvt = fpext <3 x half> %val to <3 x float>
208 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
209 ret void
210}
211
212; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
213define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
214 %val = load <4 x half>, <4 x half> addrspace(1)* %in
215 %cvt = fpext <4 x half> %val to <4 x float>
216 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
217 ret void
218}
219
220; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
221define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
222 %val = load <8 x half>, <8 x half> addrspace(1)* %in
223 %cvt = fpext <8 x half> %val to <8 x float>
224 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
225 ret void
226}
227
228; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
229define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
230 %val = load <16 x half>, <16 x half> addrspace(1)* %in
231 %cvt = fpext <16 x half> %val to <16 x float>
232 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
233 ret void
234}
235
236; GCN-LABEL: {{^}}global_extload_f16_to_f64:
237; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
238; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
239; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
240; GCN: buffer_store_dwordx2 [[CVT1]]
241define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
242 %val = load half, half addrspace(1)* %in
243 %cvt = fpext half %val to double
244 store double %cvt, double addrspace(1)* %out
245 ret void
246}
247
248; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
249define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
250 %val = load <2 x half>, <2 x half> addrspace(1)* %in
251 %cvt = fpext <2 x half> %val to <2 x double>
252 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
253 ret void
254}
255
256; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
257define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
258 %val = load <3 x half>, <3 x half> addrspace(1)* %in
259 %cvt = fpext <3 x half> %val to <3 x double>
260 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
261 ret void
262}
263
264; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
265define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
266 %val = load <4 x half>, <4 x half> addrspace(1)* %in
267 %cvt = fpext <4 x half> %val to <4 x double>
268 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
269 ret void
270}
271
272; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
273define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
274 %val = load <8 x half>, <8 x half> addrspace(1)* %in
275 %cvt = fpext <8 x half> %val to <8 x double>
276 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
277 ret void
278}
279
280; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
281define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
282 %val = load <16 x half>, <16 x half> addrspace(1)* %in
283 %cvt = fpext <16 x half> %val to <16 x double>
284 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
285 ret void
286}
287
288; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
289; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
290; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
291; GCN: buffer_store_short [[CVT]]
292define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
293 %val = load float, float addrspace(1)* %in
294 %cvt = fptrunc float %val to half
295 store half %cvt, half addrspace(1)* %out
296 ret void
297}
298
299; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
300; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
301; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
302; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
303; GCN-DAG: buffer_store_short [[CVT0]]
304; GCN-DAG: buffer_store_short [[CVT1]]
305; GCN: s_endpgm
306define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
307 %val = load <2 x float>, <2 x float> addrspace(1)* %in
308 %cvt = fptrunc <2 x float> %val to <2 x half>
309 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
310 ret void
311}
312
313; FIXME: Shouldn't do 4th conversion
314; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
315; GCN: buffer_load_dwordx4
316; GCN: v_cvt_f16_f32_e32
317; GCN: v_cvt_f16_f32_e32
318; GCN: v_cvt_f16_f32_e32
319; GCN: v_cvt_f16_f32_e32
320; GCN: buffer_store_short
321; GCN: buffer_store_dword
322; GCN: s_endpgm
323define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
324 %val = load <3 x float>, <3 x float> addrspace(1)* %in
325 %cvt = fptrunc <3 x float> %val to <3 x half>
326 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
327 ret void
328}
329
330; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
331; GCN: buffer_load_dwordx4
332; GCN: v_cvt_f16_f32_e32
333; GCN: v_cvt_f16_f32_e32
334; GCN: v_cvt_f16_f32_e32
335; GCN: v_cvt_f16_f32_e32
336; GCN: buffer_store_short
337; GCN: buffer_store_short
338; GCN: buffer_store_short
339; GCN: buffer_store_short
340; GCN: s_endpgm
341define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
342 %val = load <4 x float>, <4 x float> addrspace(1)* %in
343 %cvt = fptrunc <4 x float> %val to <4 x half>
344 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
345 ret void
346}
347
348; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
349; GCN: buffer_load_dword
350; GCN: buffer_load_dword
351; GCN: buffer_load_dword
352; GCN: buffer_load_dword
353; GCN: buffer_load_dword
354; GCN: buffer_load_dword
355; GCN: buffer_load_dword
356; GCN: buffer_load_dword
357; GCN: v_cvt_f16_f32_e32
358; GCN: v_cvt_f16_f32_e32
359; GCN: v_cvt_f16_f32_e32
360; GCN: v_cvt_f16_f32_e32
361; GCN: v_cvt_f16_f32_e32
362; GCN: v_cvt_f16_f32_e32
363; GCN: v_cvt_f16_f32_e32
364; GCN: v_cvt_f16_f32_e32
365; GCN: buffer_store_short
366; GCN: buffer_store_short
367; GCN: buffer_store_short
368; GCN: buffer_store_short
369; GCN: buffer_store_short
370; GCN: buffer_store_short
371; GCN: buffer_store_short
372; GCN: buffer_store_short
373; GCN: s_endpgm
374define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
375 %val = load <8 x float>, <8 x float> addrspace(1)* %in
376 %cvt = fptrunc <8 x float> %val to <8 x half>
377 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
378 ret void
379}
380
381; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
382; GCN: buffer_load_dword
383; GCN: buffer_load_dword
384; GCN: buffer_load_dword
385; GCN: buffer_load_dword
386; GCN: buffer_load_dword
387; GCN: buffer_load_dword
388; GCN: buffer_load_dword
389; GCN: buffer_load_dword
390; GCN: buffer_load_dword
391; GCN: buffer_load_dword
392; GCN: buffer_load_dword
393; GCN: buffer_load_dword
394; GCN: buffer_load_dword
395; GCN: buffer_load_dword
396; GCN: buffer_load_dword
397; GCN: buffer_load_dword
398; GCN: v_cvt_f16_f32_e32
399; GCN: v_cvt_f16_f32_e32
400; GCN: v_cvt_f16_f32_e32
401; GCN: v_cvt_f16_f32_e32
402; GCN: v_cvt_f16_f32_e32
403; GCN: v_cvt_f16_f32_e32
404; GCN: v_cvt_f16_f32_e32
405; GCN: v_cvt_f16_f32_e32
406; GCN: v_cvt_f16_f32_e32
407; GCN: v_cvt_f16_f32_e32
408; GCN: v_cvt_f16_f32_e32
409; GCN: v_cvt_f16_f32_e32
410; GCN: v_cvt_f16_f32_e32
411; GCN: v_cvt_f16_f32_e32
412; GCN: v_cvt_f16_f32_e32
413; GCN: v_cvt_f16_f32_e32
414; GCN: buffer_store_short
415; GCN: buffer_store_short
416; GCN: buffer_store_short
417; GCN: buffer_store_short
418; GCN: buffer_store_short
419; GCN: buffer_store_short
420; GCN: buffer_store_short
421; GCN: buffer_store_short
422; GCN: buffer_store_short
423; GCN: buffer_store_short
424; GCN: buffer_store_short
425; GCN: buffer_store_short
426; GCN: buffer_store_short
427; GCN: buffer_store_short
428; GCN: buffer_store_short
429; GCN: buffer_store_short
430; GCN: s_endpgm
431define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
432 %val = load <16 x float>, <16 x float> addrspace(1)* %in
433 %cvt = fptrunc <16 x float> %val to <16 x half>
434 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
435 ret void
436}
437
438; FIXME: Unsafe math should fold conversions away
439; GCN-LABEL: {{^}}fadd_f16:
440; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
441; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
442; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
443; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
444; SI: v_add_f32
445; GCN: s_endpgm
446define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
447 %add = fadd half %a, %b
448 store half %add, half addrspace(1)* %out, align 4
449 ret void
450}
451
452; GCN-LABEL: {{^}}fadd_v2f16:
453; SI: v_add_f32
454; SI: v_add_f32
455; GCN: s_endpgm
456define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
457 %add = fadd <2 x half> %a, %b
458 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
459 ret void
460}
461
462; GCN-LABEL: {{^}}fadd_v4f16:
463; SI: v_add_f32
464; SI: v_add_f32
465; SI: v_add_f32
466; SI: v_add_f32
467; GCN: s_endpgm
468define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
469 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
470 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
471 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
472 %result = fadd <4 x half> %a, %b
473 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
474 ret void
475}
476
477; GCN-LABEL: {{^}}fadd_v8f16:
478; SI: v_add_f32
479; SI: v_add_f32
480; SI: v_add_f32
481; SI: v_add_f32
482; SI: v_add_f32
483; SI: v_add_f32
484; SI: v_add_f32
485; SI: v_add_f32
486; GCN: s_endpgm
487define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
488 %add = fadd <8 x half> %a, %b
489 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
490 ret void
491}
492
493; GCN-LABEL: {{^}}fsub_f16:
494; GCN: v_subrev_f32_e32
495; GCN: s_endpgm
496define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
497 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
498 %a = load half, half addrspace(1)* %in
499 %b = load half, half addrspace(1)* %b_ptr
500 %sub = fsub half %a, %b
501 store half %sub, half addrspace(1)* %out
502 ret void
503}
504
505; GCN-LABEL: {{^}}test_bitcast_from_half:
506; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
507; GCN: buffer_store_short [[TMP]]
508define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
509 %val = load half, half addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000510 %val_int = bitcast half %val to i16
511 store i16 %val_int, i16 addrspace(1)* %out
512 ret void
513}
514
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000515; GCN-LABEL: {{^}}test_bitcast_to_half:
516; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
517; GCN: buffer_store_short [[TMP]]
518define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000519 %val = load i16, i16 addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000520 %val_fp = bitcast i16 %val to half
521 store half %val_fp, half addrspace(1)* %out
522 ret void
523}
Tim Northover00fdbbb2014-07-18 13:01:37 +0000524
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000525attributes #0 = { nounwind }