blob: 4a3ea5e4a7e8e713e9933de3e0ad5845e1557787 [file] [log] [blame]
Matt Arsenault81c7ae22015-06-04 16:00:27 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Tim Northover20bd0ce2014-07-18 12:41:46 +00003
Matt Arsenault81c7ae22015-06-04 16:00:27 +00004; half args should be promoted to float
5
6; GCN-LABEL: {{^}}load_f16_arg:
7; GCN: s_load_dword [[ARG:s[0-9]+]]
8; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9; GCN: buffer_store_short [[CVT]]
10define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
11 store half %arg, half addrspace(1)* %out
Tim Northover20bd0ce2014-07-18 12:41:46 +000012 ret void
13}
14
Matt Arsenault81c7ae22015-06-04 16:00:27 +000015; GCN-LABEL: {{^}}load_v2f16_arg:
16; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
17; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
18; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
19; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
20; GCN: s_endpgm
21define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
22 store <2 x half> %arg, <2 x half> addrspace(1)* %out
23 ret void
24}
25
26; GCN-LABEL: {{^}}load_v3f16_arg:
27; GCN: buffer_load_ushort
28; GCN: buffer_load_ushort
29; GCN: buffer_load_ushort
30; GCN-NOT: buffer_load
31; GCN-DAG: buffer_store_dword
32; GCN-DAG: buffer_store_short
33; GCN-NOT: buffer_store
34; GCN: s_endpgm
35define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
36 store <3 x half> %arg, <3 x half> addrspace(1)* %out
37 ret void
38}
39
40; GCN-LABEL: {{^}}load_v4f16_arg:
41; GCN: buffer_load_ushort
42; GCN: buffer_load_ushort
43; GCN: buffer_load_ushort
44; GCN: buffer_load_ushort
45; GCN: buffer_store_short
46; GCN: buffer_store_short
47; GCN: buffer_store_short
48; GCN: buffer_store_short
49; GCN: s_endpgm
50define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
51 store <4 x half> %arg, <4 x half> addrspace(1)* %out
52 ret void
53}
54
55; GCN-LABEL: {{^}}load_v8f16_arg:
56define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
57 store <8 x half> %arg, <8 x half> addrspace(1)* %out
58 ret void
59}
60
61; GCN-LABEL: {{^}}extload_v2f16_arg:
62define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
63 %fpext = fpext <2 x half> %in to <2 x float>
64 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
65 ret void
66}
67
68; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
69define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
70 %ext = fpext half %arg to float
71 store float %ext, float addrspace(1)* %out
72 ret void
73}
74
75; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
76define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
77 %ext = fpext <2 x half> %arg to <2 x float>
78 store <2 x float> %ext, <2 x float> addrspace(1)* %out
79 ret void
80}
81
82; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
83; GCN: buffer_load_ushort
84; GCN: buffer_load_ushort
85; GCN: buffer_load_ushort
86; GCN-NOT: buffer_load
87; GCN: v_cvt_f32_f16_e32
88; GCN: v_cvt_f32_f16_e32
89; GCN: v_cvt_f32_f16_e32
90; GCN-NOT: v_cvt_f32_f16
91; GCN-DAG: buffer_store_dword
92; GCN-DAG: buffer_store_dwordx2
93; GCN: s_endpgm
94define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
95 %ext = fpext <3 x half> %arg to <3 x float>
96 store <3 x float> %ext, <3 x float> addrspace(1)* %out
97 ret void
98}
99
100; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
101define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
102 %ext = fpext <4 x half> %arg to <4 x float>
103 store <4 x float> %ext, <4 x float> addrspace(1)* %out
104 ret void
105}
106
107; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
108define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
109 %ext = fpext <8 x half> %arg to <8 x float>
110 store <8 x float> %ext, <8 x float> addrspace(1)* %out
111 ret void
112}
113
114; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000115; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
116; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
117; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
118; GCN: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000119define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
120 %ext = fpext half %arg to double
121 store double %ext, double addrspace(1)* %out
122 ret void
123}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000124
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000125; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000126; GCN-DAG: buffer_load_ushort v
127; GCN-DAG: buffer_load_ushort v
128; GCN-DAG: v_cvt_f32_f16_e32
129; GCN-DAG: v_cvt_f32_f16_e32
130; GCN-DAG: v_cvt_f64_f32_e32
131; GCN-DAG: v_cvt_f64_f32_e32
132; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000133define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
134 %ext = fpext <2 x half> %arg to <2 x double>
135 store <2 x double> %ext, <2 x double> addrspace(1)* %out
136 ret void
137}
138
139; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000140; GCN-DAG: buffer_load_ushort v
141; GCN-DAG: buffer_load_ushort v
142; GCN-DAG: buffer_load_ushort v
143; GCN-DAG: v_cvt_f32_f16_e32
144; GCN-DAG: v_cvt_f32_f16_e32
145; GCN-DAG: v_cvt_f32_f16_e32
146; GCN-DAG: v_cvt_f64_f32_e32
147; GCN-DAG: v_cvt_f64_f32_e32
148; GCN-DAG: v_cvt_f64_f32_e32
149; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000150define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
151 %ext = fpext <3 x half> %arg to <3 x double>
152 store <3 x double> %ext, <3 x double> addrspace(1)* %out
153 ret void
154}
155
156; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000157; GCN-DAG: buffer_load_ushort v
158; GCN-DAG: buffer_load_ushort v
159; GCN-DAG: buffer_load_ushort v
160; GCN-DAG: buffer_load_ushort v
161; GCN-DAG: v_cvt_f32_f16_e32
162; GCN-DAG: v_cvt_f32_f16_e32
163; GCN-DAG: v_cvt_f32_f16_e32
164; GCN-DAG: v_cvt_f32_f16_e32
165; GCN-DAG: v_cvt_f64_f32_e32
166; GCN-DAG: v_cvt_f64_f32_e32
167; GCN-DAG: v_cvt_f64_f32_e32
168; GCN-DAG: v_cvt_f64_f32_e32
169; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000170define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
171 %ext = fpext <4 x half> %arg to <4 x double>
172 store <4 x double> %ext, <4 x double> addrspace(1)* %out
173 ret void
174}
175
176; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000177; GCN-DAG: buffer_load_ushort v
178; GCN-DAG: buffer_load_ushort v
179; GCN-DAG: buffer_load_ushort v
180; GCN-DAG: buffer_load_ushort v
181
182; GCN-DAG: buffer_load_ushort v
183; GCN-DAG: buffer_load_ushort v
184; GCN-DAG: buffer_load_ushort v
185; GCN-DAG: buffer_load_ushort v
186
187; GCN-DAG: v_cvt_f32_f16_e32
188; GCN-DAG: v_cvt_f32_f16_e32
189; GCN-DAG: v_cvt_f32_f16_e32
190; GCN-DAG: v_cvt_f32_f16_e32
191
192; GCN-DAG: v_cvt_f32_f16_e32
193; GCN-DAG: v_cvt_f32_f16_e32
194; GCN-DAG: v_cvt_f32_f16_e32
195; GCN-DAG: v_cvt_f32_f16_e32
196
197; GCN-DAG: v_cvt_f64_f32_e32
198; GCN-DAG: v_cvt_f64_f32_e32
199; GCN-DAG: v_cvt_f64_f32_e32
200; GCN-DAG: v_cvt_f64_f32_e32
201
202; GCN-DAG: v_cvt_f64_f32_e32
203; GCN-DAG: v_cvt_f64_f32_e32
204; GCN-DAG: v_cvt_f64_f32_e32
205; GCN-DAG: v_cvt_f64_f32_e32
206
207; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000208define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
209 %ext = fpext <8 x half> %arg to <8 x double>
210 store <8 x double> %ext, <8 x double> addrspace(1)* %out
211 ret void
212}
213
214; GCN-LABEL: {{^}}global_load_store_f16:
215; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
216; GCN: buffer_store_short [[TMP]]
217define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
218 %val = load half, half addrspace(1)* %in
219 store half %val, half addrspace(1)* %out
220 ret void
221}
222
223; GCN-LABEL: {{^}}global_load_store_v2f16:
224; GCN: buffer_load_dword [[TMP:v[0-9]+]]
225; GCN: buffer_store_dword [[TMP]]
226define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
227 %val = load <2 x half>, <2 x half> addrspace(1)* %in
228 store <2 x half> %val, <2 x half> addrspace(1)* %out
229 ret void
230}
231
232; GCN-LABEL: {{^}}global_load_store_v4f16:
233; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
234; GCN: buffer_store_dwordx2 [[TMP]]
235define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
236 %val = load <4 x half>, <4 x half> addrspace(1)* %in
237 store <4 x half> %val, <4 x half> addrspace(1)* %out
238 ret void
239}
240
241; GCN-LABEL: {{^}}global_load_store_v8f16:
242; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
243; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
244; GCN: s_endpgm
245define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
246 %val = load <8 x half>, <8 x half> addrspace(1)* %in
247 store <8 x half> %val, <8 x half> addrspace(1)* %out
248 ret void
249}
250
251; GCN-LABEL: {{^}}global_extload_f16_to_f32:
252; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
253; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
254; GCN: buffer_store_dword [[CVT]]
255define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
256 %val = load half, half addrspace(1)* %in
257 %cvt = fpext half %val to float
258 store float %cvt, float addrspace(1)* %out
259 ret void
260}
261
262; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000263; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
264; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
265; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
266; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
267; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
268; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000269define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
270 %val = load <2 x half>, <2 x half> addrspace(1)* %in
271 %cvt = fpext <2 x half> %val to <2 x float>
272 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
273 ret void
274}
275
276; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
277define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
278 %val = load <3 x half>, <3 x half> addrspace(1)* %in
279 %cvt = fpext <3 x half> %val to <3 x float>
280 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
281 ret void
282}
283
284; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
285define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
286 %val = load <4 x half>, <4 x half> addrspace(1)* %in
287 %cvt = fpext <4 x half> %val to <4 x float>
288 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
289 ret void
290}
291
292; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
293define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
294 %val = load <8 x half>, <8 x half> addrspace(1)* %in
295 %cvt = fpext <8 x half> %val to <8 x float>
296 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
297 ret void
298}
299
300; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
301define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
302 %val = load <16 x half>, <16 x half> addrspace(1)* %in
303 %cvt = fpext <16 x half> %val to <16 x float>
304 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
305 ret void
306}
307
308; GCN-LABEL: {{^}}global_extload_f16_to_f64:
309; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
310; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
311; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
312; GCN: buffer_store_dwordx2 [[CVT1]]
313define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
314 %val = load half, half addrspace(1)* %in
315 %cvt = fpext half %val to double
316 store double %cvt, double addrspace(1)* %out
317 ret void
318}
319
320; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000321; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
322; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
323; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
324; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
325; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]]
326; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]]
327; GCN-DAG: buffer_store_dwordx2 [[CVT2]]
328; GCN-DAG: buffer_store_dwordx2 [[CVT3]]
329; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000330define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
331 %val = load <2 x half>, <2 x half> addrspace(1)* %in
332 %cvt = fpext <2 x half> %val to <2 x double>
333 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
334 ret void
335}
336
337; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
338define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
339 %val = load <3 x half>, <3 x half> addrspace(1)* %in
340 %cvt = fpext <3 x half> %val to <3 x double>
341 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
342 ret void
343}
344
345; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
346define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
347 %val = load <4 x half>, <4 x half> addrspace(1)* %in
348 %cvt = fpext <4 x half> %val to <4 x double>
349 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
350 ret void
351}
352
353; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
354define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
355 %val = load <8 x half>, <8 x half> addrspace(1)* %in
356 %cvt = fpext <8 x half> %val to <8 x double>
357 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
358 ret void
359}
360
361; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
362define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
363 %val = load <16 x half>, <16 x half> addrspace(1)* %in
364 %cvt = fpext <16 x half> %val to <16 x double>
365 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
366 ret void
367}
368
369; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
370; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
371; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
372; GCN: buffer_store_short [[CVT]]
373define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
374 %val = load float, float addrspace(1)* %in
375 %cvt = fptrunc float %val to half
376 store half %cvt, half addrspace(1)* %out
377 ret void
378}
379
380; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
381; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
382; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
383; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
384; GCN-DAG: buffer_store_short [[CVT0]]
385; GCN-DAG: buffer_store_short [[CVT1]]
386; GCN: s_endpgm
387define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
388 %val = load <2 x float>, <2 x float> addrspace(1)* %in
389 %cvt = fptrunc <2 x float> %val to <2 x half>
390 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
391 ret void
392}
393
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000394; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
395; GCN: buffer_load_dwordx4
396; GCN: v_cvt_f16_f32_e32
397; GCN: v_cvt_f16_f32_e32
398; GCN: v_cvt_f16_f32_e32
Matt Arsenault68d93862015-09-24 08:36:14 +0000399; GCN-NOT: v_cvt_f16_f32_e32
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000400; GCN: buffer_store_short
401; GCN: buffer_store_dword
402; GCN: s_endpgm
403define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
404 %val = load <3 x float>, <3 x float> addrspace(1)* %in
405 %cvt = fptrunc <3 x float> %val to <3 x half>
406 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
407 ret void
408}
409
410; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
411; GCN: buffer_load_dwordx4
412; GCN: v_cvt_f16_f32_e32
413; GCN: v_cvt_f16_f32_e32
414; GCN: v_cvt_f16_f32_e32
415; GCN: v_cvt_f16_f32_e32
416; GCN: buffer_store_short
417; GCN: buffer_store_short
418; GCN: buffer_store_short
419; GCN: buffer_store_short
420; GCN: s_endpgm
421define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
422 %val = load <4 x float>, <4 x float> addrspace(1)* %in
423 %cvt = fptrunc <4 x float> %val to <4 x half>
424 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
425 ret void
426}
427
428; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
429; GCN: buffer_load_dword
430; GCN: buffer_load_dword
431; GCN: buffer_load_dword
432; GCN: buffer_load_dword
433; GCN: buffer_load_dword
434; GCN: buffer_load_dword
435; GCN: buffer_load_dword
436; GCN: buffer_load_dword
437; GCN: v_cvt_f16_f32_e32
438; GCN: v_cvt_f16_f32_e32
439; GCN: v_cvt_f16_f32_e32
440; GCN: v_cvt_f16_f32_e32
441; GCN: v_cvt_f16_f32_e32
442; GCN: v_cvt_f16_f32_e32
443; GCN: v_cvt_f16_f32_e32
444; GCN: v_cvt_f16_f32_e32
445; GCN: buffer_store_short
446; GCN: buffer_store_short
447; GCN: buffer_store_short
448; GCN: buffer_store_short
449; GCN: buffer_store_short
450; GCN: buffer_store_short
451; GCN: buffer_store_short
452; GCN: buffer_store_short
453; GCN: s_endpgm
454define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
455 %val = load <8 x float>, <8 x float> addrspace(1)* %in
456 %cvt = fptrunc <8 x float> %val to <8 x half>
457 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
458 ret void
459}
460
461; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
462; GCN: buffer_load_dword
463; GCN: buffer_load_dword
464; GCN: buffer_load_dword
465; GCN: buffer_load_dword
466; GCN: buffer_load_dword
467; GCN: buffer_load_dword
468; GCN: buffer_load_dword
469; GCN: buffer_load_dword
470; GCN: buffer_load_dword
471; GCN: buffer_load_dword
472; GCN: buffer_load_dword
473; GCN: buffer_load_dword
474; GCN: buffer_load_dword
475; GCN: buffer_load_dword
476; GCN: buffer_load_dword
477; GCN: buffer_load_dword
Matt Arsenault68d93862015-09-24 08:36:14 +0000478; GCN-DAG: v_cvt_f16_f32_e32
479; GCN-DAG: v_cvt_f16_f32_e32
480; GCN-DAG: v_cvt_f16_f32_e32
481; GCN-DAG: v_cvt_f16_f32_e32
482; GCN-DAG: v_cvt_f16_f32_e32
483; GCN-DAG: v_cvt_f16_f32_e32
484; GCN-DAG: v_cvt_f16_f32_e32
485; GCN-DAG: v_cvt_f16_f32_e32
486; GCN-DAG: v_cvt_f16_f32_e32
487; GCN-DAG: v_cvt_f16_f32_e32
488; GCN-DAG: v_cvt_f16_f32_e32
489; GCN-DAG: v_cvt_f16_f32_e32
490; GCN-DAG: v_cvt_f16_f32_e32
491; GCN-DAG: v_cvt_f16_f32_e32
492; GCN-DAG: v_cvt_f16_f32_e32
493; GCN-DAG: v_cvt_f16_f32_e32
494; GCN-DAG: buffer_store_short
495; GCN-DAG: buffer_store_short
496; GCN-DAG: buffer_store_short
497; GCN-DAG: buffer_store_short
498; GCN-DAG: buffer_store_short
499; GCN-DAG: buffer_store_short
500; GCN-DAG: buffer_store_short
501; GCN-DAG: buffer_store_short
502; GCN-DAG: buffer_store_short
503; GCN-DAG: buffer_store_short
504; GCN-DAG: buffer_store_short
505; GCN-DAG: buffer_store_short
506; GCN-DAG: buffer_store_short
507; GCN-DAG: buffer_store_short
508; GCN-DAG: buffer_store_short
509; GCN-DAG: buffer_store_short
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000510; GCN: s_endpgm
511define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
512 %val = load <16 x float>, <16 x float> addrspace(1)* %in
513 %cvt = fptrunc <16 x float> %val to <16 x half>
514 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
515 ret void
516}
517
518; FIXME: Unsafe math should fold conversions away
519; GCN-LABEL: {{^}}fadd_f16:
520; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
521; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
522; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
523; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
524; SI: v_add_f32
525; GCN: s_endpgm
526define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
527 %add = fadd half %a, %b
528 store half %add, half addrspace(1)* %out, align 4
529 ret void
530}
531
532; GCN-LABEL: {{^}}fadd_v2f16:
533; SI: v_add_f32
534; SI: v_add_f32
535; GCN: s_endpgm
536define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
537 %add = fadd <2 x half> %a, %b
538 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
539 ret void
540}
541
542; GCN-LABEL: {{^}}fadd_v4f16:
543; SI: v_add_f32
544; SI: v_add_f32
545; SI: v_add_f32
546; SI: v_add_f32
547; GCN: s_endpgm
548define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
549 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
550 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
551 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
552 %result = fadd <4 x half> %a, %b
553 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
554 ret void
555}
556
557; GCN-LABEL: {{^}}fadd_v8f16:
558; SI: v_add_f32
559; SI: v_add_f32
560; SI: v_add_f32
561; SI: v_add_f32
562; SI: v_add_f32
563; SI: v_add_f32
564; SI: v_add_f32
565; SI: v_add_f32
566; GCN: s_endpgm
567define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
568 %add = fadd <8 x half> %a, %b
569 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
570 ret void
571}
572
573; GCN-LABEL: {{^}}fsub_f16:
574; GCN: v_subrev_f32_e32
575; GCN: s_endpgm
576define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
577 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
578 %a = load half, half addrspace(1)* %in
579 %b = load half, half addrspace(1)* %b_ptr
580 %sub = fsub half %a, %b
581 store half %sub, half addrspace(1)* %out
582 ret void
583}
584
585; GCN-LABEL: {{^}}test_bitcast_from_half:
586; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
587; GCN: buffer_store_short [[TMP]]
588define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
589 %val = load half, half addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000590 %val_int = bitcast half %val to i16
591 store i16 %val_int, i16 addrspace(1)* %out
592 ret void
593}
594
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000595; GCN-LABEL: {{^}}test_bitcast_to_half:
596; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
597; GCN: buffer_store_short [[TMP]]
598define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000599 %val = load i16, i16 addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000600 %val_fp = bitcast i16 %val to half
601 store half %val_fp, half addrspace(1)* %out
602 ret void
603}
Tim Northover00fdbbb2014-07-18 13:01:37 +0000604
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000605attributes #0 = { nounwind }