blob: ba31a70b4307e29abfb19984c2585a53f22aff8b [file] [log] [blame]
Matt Arsenault81c7ae22015-06-04 16:00:27 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Tim Northover20bd0ce2014-07-18 12:41:46 +00003
Matt Arsenault81c7ae22015-06-04 16:00:27 +00004; half args should be promoted to float
5
6; GCN-LABEL: {{^}}load_f16_arg:
7; GCN: s_load_dword [[ARG:s[0-9]+]]
8; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
9; GCN: buffer_store_short [[CVT]]
10define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
11 store half %arg, half addrspace(1)* %out
Tim Northover20bd0ce2014-07-18 12:41:46 +000012 ret void
13}
14
Matt Arsenault81c7ae22015-06-04 16:00:27 +000015; GCN-LABEL: {{^}}load_v2f16_arg:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000016; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
17; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
18; GCN-DAG: buffer_store_short [[V0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
19; GCN-DAG: buffer_store_short [[V1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
Matt Arsenault81c7ae22015-06-04 16:00:27 +000020; GCN: s_endpgm
21define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
22 store <2 x half> %arg, <2 x half> addrspace(1)* %out
23 ret void
24}
25
26; GCN-LABEL: {{^}}load_v3f16_arg:
27; GCN: buffer_load_ushort
28; GCN: buffer_load_ushort
29; GCN: buffer_load_ushort
30; GCN-NOT: buffer_load
31; GCN-DAG: buffer_store_dword
32; GCN-DAG: buffer_store_short
33; GCN-NOT: buffer_store
34; GCN: s_endpgm
35define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
36 store <3 x half> %arg, <3 x half> addrspace(1)* %out
37 ret void
38}
39
40; GCN-LABEL: {{^}}load_v4f16_arg:
41; GCN: buffer_load_ushort
42; GCN: buffer_load_ushort
43; GCN: buffer_load_ushort
44; GCN: buffer_load_ushort
45; GCN: buffer_store_short
46; GCN: buffer_store_short
47; GCN: buffer_store_short
48; GCN: buffer_store_short
49; GCN: s_endpgm
50define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
51 store <4 x half> %arg, <4 x half> addrspace(1)* %out
52 ret void
53}
54
55; GCN-LABEL: {{^}}load_v8f16_arg:
56define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
57 store <8 x half> %arg, <8 x half> addrspace(1)* %out
58 ret void
59}
60
61; GCN-LABEL: {{^}}extload_v2f16_arg:
62define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
63 %fpext = fpext <2 x half> %in to <2 x float>
64 store <2 x float> %fpext, <2 x float> addrspace(1)* %out
65 ret void
66}
67
68; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
69define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
70 %ext = fpext half %arg to float
71 store float %ext, float addrspace(1)* %out
72 ret void
73}
74
75; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
76define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
77 %ext = fpext <2 x half> %arg to <2 x float>
78 store <2 x float> %ext, <2 x float> addrspace(1)* %out
79 ret void
80}
81
82; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
83; GCN: buffer_load_ushort
84; GCN: buffer_load_ushort
85; GCN: buffer_load_ushort
86; GCN-NOT: buffer_load
87; GCN: v_cvt_f32_f16_e32
88; GCN: v_cvt_f32_f16_e32
89; GCN: v_cvt_f32_f16_e32
90; GCN-NOT: v_cvt_f32_f16
91; GCN-DAG: buffer_store_dword
92; GCN-DAG: buffer_store_dwordx2
93; GCN: s_endpgm
94define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
95 %ext = fpext <3 x half> %arg to <3 x float>
96 store <3 x float> %ext, <3 x float> addrspace(1)* %out
97 ret void
98}
99
100; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
101define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
102 %ext = fpext <4 x half> %arg to <4 x float>
103 store <4 x float> %ext, <4 x float> addrspace(1)* %out
104 ret void
105}
106
107; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000108; GCN: buffer_load_ushort
109; GCN: buffer_load_ushort
110; GCN: buffer_load_ushort
111; GCN: buffer_load_ushort
112; GCN: buffer_load_ushort
113; GCN: buffer_load_ushort
114; GCN: buffer_load_ushort
115; GCN: buffer_load_ushort
116
117; GCN: v_cvt_f32_f16_e32
118; GCN: v_cvt_f32_f16_e32
119; GCN: v_cvt_f32_f16_e32
120; GCN: v_cvt_f32_f16_e32
121; GCN: v_cvt_f32_f16_e32
122; GCN: v_cvt_f32_f16_e32
123; GCN: v_cvt_f32_f16_e32
124; GCN: v_cvt_f32_f16_e32
125
126; GCN: buffer_store_dwordx4
127; GCN: buffer_store_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000128define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
129 %ext = fpext <8 x half> %arg to <8 x float>
130 store <8 x float> %ext, <8 x float> addrspace(1)* %out
131 ret void
132}
133
134; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000135; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
136; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
137; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
138; GCN: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000139define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
140 %ext = fpext half %arg to double
141 store double %ext, double addrspace(1)* %out
142 ret void
143}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000144
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000145; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000146; GCN-DAG: buffer_load_ushort v
147; GCN-DAG: buffer_load_ushort v
148; GCN-DAG: v_cvt_f32_f16_e32
149; GCN-DAG: v_cvt_f32_f16_e32
150; GCN-DAG: v_cvt_f64_f32_e32
151; GCN-DAG: v_cvt_f64_f32_e32
152; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000153define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
154 %ext = fpext <2 x half> %arg to <2 x double>
155 store <2 x double> %ext, <2 x double> addrspace(1)* %out
156 ret void
157}
158
159; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000160; GCN-DAG: buffer_load_ushort v
161; GCN-DAG: buffer_load_ushort v
162; GCN-DAG: buffer_load_ushort v
163; GCN-DAG: v_cvt_f32_f16_e32
164; GCN-DAG: v_cvt_f32_f16_e32
165; GCN-DAG: v_cvt_f32_f16_e32
166; GCN-DAG: v_cvt_f64_f32_e32
167; GCN-DAG: v_cvt_f64_f32_e32
168; GCN-DAG: v_cvt_f64_f32_e32
169; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000170define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
171 %ext = fpext <3 x half> %arg to <3 x double>
172 store <3 x double> %ext, <3 x double> addrspace(1)* %out
173 ret void
174}
175
176; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000177; GCN-DAG: buffer_load_ushort v
178; GCN-DAG: buffer_load_ushort v
179; GCN-DAG: buffer_load_ushort v
180; GCN-DAG: buffer_load_ushort v
181; GCN-DAG: v_cvt_f32_f16_e32
182; GCN-DAG: v_cvt_f32_f16_e32
183; GCN-DAG: v_cvt_f32_f16_e32
184; GCN-DAG: v_cvt_f32_f16_e32
185; GCN-DAG: v_cvt_f64_f32_e32
186; GCN-DAG: v_cvt_f64_f32_e32
187; GCN-DAG: v_cvt_f64_f32_e32
188; GCN-DAG: v_cvt_f64_f32_e32
189; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000190define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
191 %ext = fpext <4 x half> %arg to <4 x double>
192 store <4 x double> %ext, <4 x double> addrspace(1)* %out
193 ret void
194}
195
196; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000197; GCN-DAG: buffer_load_ushort v
198; GCN-DAG: buffer_load_ushort v
199; GCN-DAG: buffer_load_ushort v
200; GCN-DAG: buffer_load_ushort v
201
202; GCN-DAG: buffer_load_ushort v
203; GCN-DAG: buffer_load_ushort v
204; GCN-DAG: buffer_load_ushort v
205; GCN-DAG: buffer_load_ushort v
206
207; GCN-DAG: v_cvt_f32_f16_e32
208; GCN-DAG: v_cvt_f32_f16_e32
209; GCN-DAG: v_cvt_f32_f16_e32
210; GCN-DAG: v_cvt_f32_f16_e32
211
212; GCN-DAG: v_cvt_f32_f16_e32
213; GCN-DAG: v_cvt_f32_f16_e32
214; GCN-DAG: v_cvt_f32_f16_e32
215; GCN-DAG: v_cvt_f32_f16_e32
216
217; GCN-DAG: v_cvt_f64_f32_e32
218; GCN-DAG: v_cvt_f64_f32_e32
219; GCN-DAG: v_cvt_f64_f32_e32
220; GCN-DAG: v_cvt_f64_f32_e32
221
222; GCN-DAG: v_cvt_f64_f32_e32
223; GCN-DAG: v_cvt_f64_f32_e32
224; GCN-DAG: v_cvt_f64_f32_e32
225; GCN-DAG: v_cvt_f64_f32_e32
226
227; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000228define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
229 %ext = fpext <8 x half> %arg to <8 x double>
230 store <8 x double> %ext, <8 x double> addrspace(1)* %out
231 ret void
232}
233
234; GCN-LABEL: {{^}}global_load_store_f16:
235; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
236; GCN: buffer_store_short [[TMP]]
237define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
238 %val = load half, half addrspace(1)* %in
239 store half %val, half addrspace(1)* %out
240 ret void
241}
242
243; GCN-LABEL: {{^}}global_load_store_v2f16:
244; GCN: buffer_load_dword [[TMP:v[0-9]+]]
245; GCN: buffer_store_dword [[TMP]]
246define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
247 %val = load <2 x half>, <2 x half> addrspace(1)* %in
248 store <2 x half> %val, <2 x half> addrspace(1)* %out
249 ret void
250}
251
252; GCN-LABEL: {{^}}global_load_store_v4f16:
253; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
254; GCN: buffer_store_dwordx2 [[TMP]]
255define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
256 %val = load <4 x half>, <4 x half> addrspace(1)* %in
257 store <4 x half> %val, <4 x half> addrspace(1)* %out
258 ret void
259}
260
261; GCN-LABEL: {{^}}global_load_store_v8f16:
262; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
263; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
264; GCN: s_endpgm
265define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
266 %val = load <8 x half>, <8 x half> addrspace(1)* %in
267 store <8 x half> %val, <8 x half> addrspace(1)* %out
268 ret void
269}
270
271; GCN-LABEL: {{^}}global_extload_f16_to_f32:
272; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
273; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
274; GCN: buffer_store_dword [[CVT]]
275define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
276 %val = load half, half addrspace(1)* %in
277 %cvt = fpext half %val to float
278 store float %cvt, float addrspace(1)* %out
279 ret void
280}
281
282; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000283; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
284; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000285; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
286; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
287; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
288; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000289define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
290 %val = load <2 x half>, <2 x half> addrspace(1)* %in
291 %cvt = fpext <2 x half> %val to <2 x float>
292 store <2 x float> %cvt, <2 x float> addrspace(1)* %out
293 ret void
294}
295
296; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
297define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
298 %val = load <3 x half>, <3 x half> addrspace(1)* %in
299 %cvt = fpext <3 x half> %val to <3 x float>
300 store <3 x float> %cvt, <3 x float> addrspace(1)* %out
301 ret void
302}
303
304; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
305define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
306 %val = load <4 x half>, <4 x half> addrspace(1)* %in
307 %cvt = fpext <4 x half> %val to <4 x float>
308 store <4 x float> %cvt, <4 x float> addrspace(1)* %out
309 ret void
310}
311
312; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
313define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
314 %val = load <8 x half>, <8 x half> addrspace(1)* %in
315 %cvt = fpext <8 x half> %val to <8 x float>
316 store <8 x float> %cvt, <8 x float> addrspace(1)* %out
317 ret void
318}
319
320; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000321; GCN: buffer_load_ushort
322; GCN: buffer_load_ushort
323; GCN: buffer_load_ushort
324; GCN: buffer_load_ushort
325; GCN: buffer_load_ushort
326; GCN: buffer_load_ushort
327; GCN: buffer_load_ushort
328; GCN: buffer_load_ushort
329; GCN: buffer_load_ushort
330; GCN: buffer_load_ushort
331; GCN: buffer_load_ushort
332; GCN: buffer_load_ushort
333; GCN: buffer_load_ushort
334; GCN: buffer_load_ushort
335; GCN: buffer_load_ushort
336; GCN: buffer_load_ushort
337
338; GCN: v_cvt_f32_f16_e32
339; GCN: v_cvt_f32_f16_e32
340; GCN: v_cvt_f32_f16_e32
341; GCN: v_cvt_f32_f16_e32
342; GCN: v_cvt_f32_f16_e32
343; GCN: v_cvt_f32_f16_e32
344; GCN: v_cvt_f32_f16_e32
345; GCN: v_cvt_f32_f16_e32
346; GCN: v_cvt_f32_f16_e32
347; GCN: v_cvt_f32_f16_e32
348; GCN: v_cvt_f32_f16_e32
349; GCN: v_cvt_f32_f16_e32
350; GCN: v_cvt_f32_f16_e32
351; GCN: v_cvt_f32_f16_e32
352; GCN: v_cvt_f32_f16_e32
353; GCN: v_cvt_f32_f16_e32
354
355; GCN: buffer_store_dwordx4
356; GCN: buffer_store_dwordx4
357; GCN: buffer_store_dwordx4
358; GCN: buffer_store_dwordx4
359
360; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000361define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
362 %val = load <16 x half>, <16 x half> addrspace(1)* %in
363 %cvt = fpext <16 x half> %val to <16 x float>
364 store <16 x float> %cvt, <16 x float> addrspace(1)* %out
365 ret void
366}
367
368; GCN-LABEL: {{^}}global_extload_f16_to_f64:
369; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
370; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
371; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
372; GCN: buffer_store_dwordx2 [[CVT1]]
373define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
374 %val = load half, half addrspace(1)* %in
375 %cvt = fpext half %val to double
376 store double %cvt, double addrspace(1)* %out
377 ret void
378}
379
380; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000381; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
382; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000383; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
384; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
Matt Arsenault61001bb2015-11-25 19:58:34 +0000385; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
386; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
387; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
Matt Arsenaultacd68b52015-09-09 01:12:27 +0000388; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000389define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
390 %val = load <2 x half>, <2 x half> addrspace(1)* %in
391 %cvt = fpext <2 x half> %val to <2 x double>
392 store <2 x double> %cvt, <2 x double> addrspace(1)* %out
393 ret void
394}
395
396; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
Matt Arsenault61001bb2015-11-25 19:58:34 +0000397
398; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
Tom Stellardcb6ba622016-04-30 00:23:06 +0000399; GCN-DAG: v_cvt_f32_f16_e32
Matt Arsenaultb36d4622016-03-01 21:31:53 +0000400; GCN-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
Matt Arsenault61001bb2015-11-25 19:58:34 +0000401; GCN: v_cvt_f32_f16_e32
Tom Stellardcb6ba622016-04-30 00:23:06 +0000402; GCN: v_cvt_f32_f16_e32
Matt Arsenaultb36d4622016-03-01 21:31:53 +0000403; GCN-NOT: v_cvt_f32_f16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000404
405; GCN: v_cvt_f64_f32_e32
406; GCN: v_cvt_f64_f32_e32
407; GCN: v_cvt_f64_f32_e32
408; GCN-NOT: v_cvt_f64_f32_e32
409
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000410; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
411; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
Matt Arsenault61001bb2015-11-25 19:58:34 +0000412; GCN: s_endpgm
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000413define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
414 %val = load <3 x half>, <3 x half> addrspace(1)* %in
415 %cvt = fpext <3 x half> %val to <3 x double>
416 store <3 x double> %cvt, <3 x double> addrspace(1)* %out
417 ret void
418}
419
420; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
421define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
422 %val = load <4 x half>, <4 x half> addrspace(1)* %in
423 %cvt = fpext <4 x half> %val to <4 x double>
424 store <4 x double> %cvt, <4 x double> addrspace(1)* %out
425 ret void
426}
427
428; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
429define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
430 %val = load <8 x half>, <8 x half> addrspace(1)* %in
431 %cvt = fpext <8 x half> %val to <8 x double>
432 store <8 x double> %cvt, <8 x double> addrspace(1)* %out
433 ret void
434}
435
436; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
437define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
438 %val = load <16 x half>, <16 x half> addrspace(1)* %in
439 %cvt = fpext <16 x half> %val to <16 x double>
440 store <16 x double> %cvt, <16 x double> addrspace(1)* %out
441 ret void
442}
443
444; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
445; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
446; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
447; GCN: buffer_store_short [[CVT]]
448define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
449 %val = load float, float addrspace(1)* %in
450 %cvt = fptrunc float %val to half
451 store half %cvt, half addrspace(1)* %out
452 ret void
453}
454
455; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
456; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
457; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
458; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
459; GCN-DAG: buffer_store_short [[CVT0]]
460; GCN-DAG: buffer_store_short [[CVT1]]
461; GCN: s_endpgm
462define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
463 %val = load <2 x float>, <2 x float> addrspace(1)* %in
464 %cvt = fptrunc <2 x float> %val to <2 x half>
465 store <2 x half> %cvt, <2 x half> addrspace(1)* %out
466 ret void
467}
468
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000469; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
470; GCN: buffer_load_dwordx4
471; GCN: v_cvt_f16_f32_e32
472; GCN: v_cvt_f16_f32_e32
473; GCN: v_cvt_f16_f32_e32
Matt Arsenault68d93862015-09-24 08:36:14 +0000474; GCN-NOT: v_cvt_f16_f32_e32
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000475; GCN: buffer_store_short
476; GCN: buffer_store_dword
477; GCN: s_endpgm
478define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
479 %val = load <3 x float>, <3 x float> addrspace(1)* %in
480 %cvt = fptrunc <3 x float> %val to <3 x half>
481 store <3 x half> %cvt, <3 x half> addrspace(1)* %out
482 ret void
483}
484
485; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
486; GCN: buffer_load_dwordx4
487; GCN: v_cvt_f16_f32_e32
488; GCN: v_cvt_f16_f32_e32
489; GCN: v_cvt_f16_f32_e32
490; GCN: v_cvt_f16_f32_e32
491; GCN: buffer_store_short
492; GCN: buffer_store_short
493; GCN: buffer_store_short
494; GCN: buffer_store_short
495; GCN: s_endpgm
496define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
497 %val = load <4 x float>, <4 x float> addrspace(1)* %in
498 %cvt = fptrunc <4 x float> %val to <4 x half>
499 store <4 x half> %cvt, <4 x half> addrspace(1)* %out
500 ret void
501}
502
503; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000504; GCN: buffer_load_dwordx4
505; GCN: buffer_load_dwordx4
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000506; GCN: v_cvt_f16_f32_e32
507; GCN: v_cvt_f16_f32_e32
508; GCN: v_cvt_f16_f32_e32
509; GCN: v_cvt_f16_f32_e32
510; GCN: v_cvt_f16_f32_e32
511; GCN: v_cvt_f16_f32_e32
512; GCN: v_cvt_f16_f32_e32
513; GCN: v_cvt_f16_f32_e32
514; GCN: buffer_store_short
515; GCN: buffer_store_short
516; GCN: buffer_store_short
517; GCN: buffer_store_short
518; GCN: buffer_store_short
519; GCN: buffer_store_short
520; GCN: buffer_store_short
521; GCN: buffer_store_short
522; GCN: s_endpgm
523define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
524 %val = load <8 x float>, <8 x float> addrspace(1)* %in
525 %cvt = fptrunc <8 x float> %val to <8 x half>
526 store <8 x half> %cvt, <8 x half> addrspace(1)* %out
527 ret void
528}
529
530; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
Matt Arsenault4d801cd2015-11-24 12:05:03 +0000531; GCN: buffer_load_dwordx4
532; GCN: buffer_load_dwordx4
533; GCN: buffer_load_dwordx4
534; GCN: buffer_load_dwordx4
Matt Arsenault68d93862015-09-24 08:36:14 +0000535; GCN-DAG: v_cvt_f16_f32_e32
536; GCN-DAG: v_cvt_f16_f32_e32
537; GCN-DAG: v_cvt_f16_f32_e32
538; GCN-DAG: v_cvt_f16_f32_e32
539; GCN-DAG: v_cvt_f16_f32_e32
540; GCN-DAG: v_cvt_f16_f32_e32
541; GCN-DAG: v_cvt_f16_f32_e32
542; GCN-DAG: v_cvt_f16_f32_e32
543; GCN-DAG: v_cvt_f16_f32_e32
544; GCN-DAG: v_cvt_f16_f32_e32
545; GCN-DAG: v_cvt_f16_f32_e32
546; GCN-DAG: v_cvt_f16_f32_e32
547; GCN-DAG: v_cvt_f16_f32_e32
548; GCN-DAG: v_cvt_f16_f32_e32
549; GCN-DAG: v_cvt_f16_f32_e32
550; GCN-DAG: v_cvt_f16_f32_e32
551; GCN-DAG: buffer_store_short
552; GCN-DAG: buffer_store_short
553; GCN-DAG: buffer_store_short
554; GCN-DAG: buffer_store_short
555; GCN-DAG: buffer_store_short
556; GCN-DAG: buffer_store_short
557; GCN-DAG: buffer_store_short
558; GCN-DAG: buffer_store_short
559; GCN-DAG: buffer_store_short
560; GCN-DAG: buffer_store_short
561; GCN-DAG: buffer_store_short
562; GCN-DAG: buffer_store_short
563; GCN-DAG: buffer_store_short
564; GCN-DAG: buffer_store_short
565; GCN-DAG: buffer_store_short
566; GCN-DAG: buffer_store_short
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000567; GCN: s_endpgm
568define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
569 %val = load <16 x float>, <16 x float> addrspace(1)* %in
570 %cvt = fptrunc <16 x float> %val to <16 x half>
571 store <16 x half> %cvt, <16 x half> addrspace(1)* %out
572 ret void
573}
574
575; FIXME: Unsafe math should fold conversions away
576; GCN-LABEL: {{^}}fadd_f16:
577; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
578; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
579; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
580; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
581; SI: v_add_f32
582; GCN: s_endpgm
583define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
584 %add = fadd half %a, %b
585 store half %add, half addrspace(1)* %out, align 4
586 ret void
587}
588
589; GCN-LABEL: {{^}}fadd_v2f16:
590; SI: v_add_f32
591; SI: v_add_f32
592; GCN: s_endpgm
593define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
594 %add = fadd <2 x half> %a, %b
595 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
596 ret void
597}
598
599; GCN-LABEL: {{^}}fadd_v4f16:
600; SI: v_add_f32
601; SI: v_add_f32
602; SI: v_add_f32
603; SI: v_add_f32
604; GCN: s_endpgm
605define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
606 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
607 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
608 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
609 %result = fadd <4 x half> %a, %b
610 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
611 ret void
612}
613
614; GCN-LABEL: {{^}}fadd_v8f16:
615; SI: v_add_f32
616; SI: v_add_f32
617; SI: v_add_f32
618; SI: v_add_f32
619; SI: v_add_f32
620; SI: v_add_f32
621; SI: v_add_f32
622; SI: v_add_f32
623; GCN: s_endpgm
624define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
625 %add = fadd <8 x half> %a, %b
626 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
627 ret void
628}
629
630; GCN-LABEL: {{^}}fsub_f16:
631; GCN: v_subrev_f32_e32
632; GCN: s_endpgm
633define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
634 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
635 %a = load half, half addrspace(1)* %in
636 %b = load half, half addrspace(1)* %b_ptr
637 %sub = fsub half %a, %b
638 store half %sub, half addrspace(1)* %out
639 ret void
640}
641
642; GCN-LABEL: {{^}}test_bitcast_from_half:
643; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
644; GCN: buffer_store_short [[TMP]]
645define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
646 %val = load half, half addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000647 %val_int = bitcast half %val to i16
648 store i16 %val_int, i16 addrspace(1)* %out
649 ret void
650}
651
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000652; GCN-LABEL: {{^}}test_bitcast_to_half:
653; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
654; GCN: buffer_store_short [[TMP]]
655define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
David Blaikiea79ac142015-02-27 21:17:42 +0000656 %val = load i16, i16 addrspace(1)* %in
Tim Northover20bd0ce2014-07-18 12:41:46 +0000657 %val_fp = bitcast i16 %val to half
658 store half %val_fp, half addrspace(1)* %out
659 ret void
660}
Tim Northover00fdbbb2014-07-18 13:01:37 +0000661
Matt Arsenault81c7ae22015-06-04 16:00:27 +0000662attributes #0 = { nounwind }