blob: eadec5c47ad6cc031d30b274181401f08104998d [file] [log] [blame]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}select_f16
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
8; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
9; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
10; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
11; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
12; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
13; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
14; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
15; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
16; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
17; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
18; GCN: buffer_store_short v[[R_F16]]
19; GCN: s_endpgm
20define void @select_f16(
21 half addrspace(1)* %r,
22 half addrspace(1)* %a,
23 half addrspace(1)* %b,
24 half addrspace(1)* %c,
25 half addrspace(1)* %d) {
26entry:
27 %a.val = load half, half addrspace(1)* %a
28 %b.val = load half, half addrspace(1)* %b
29 %c.val = load half, half addrspace(1)* %c
30 %d.val = load half, half addrspace(1)* %d
31 %fcmp = fcmp olt half %a.val, %b.val
32 %r.val = select i1 %fcmp, half %c.val, half %d.val
33 store half %r.val, half addrspace(1)* %r
34 ret void
35}
36
37; GCN-LABEL: {{^}}select_f16_imm_a
38; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
39; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
40; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
41; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x3800{{$}}
42; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
43; SI: v_cmp_gt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
44; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
45; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
46; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
47; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault4bd72362016-12-10 00:39:12 +000048; VI: v_cmp_lt_f16_e32 vcc, 0.5, v[[B_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000049; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
50; GCN: buffer_store_short v[[R_F16]]
51; GCN: s_endpgm
52define void @select_f16_imm_a(
53 half addrspace(1)* %r,
54 half addrspace(1)* %b,
55 half addrspace(1)* %c,
56 half addrspace(1)* %d) {
57entry:
58 %b.val = load half, half addrspace(1)* %b
59 %c.val = load half, half addrspace(1)* %c
60 %d.val = load half, half addrspace(1)* %d
61 %fcmp = fcmp olt half 0xH3800, %b.val
62 %r.val = select i1 %fcmp, half %c.val, half %d.val
63 store half %r.val, half addrspace(1)* %r
64 ret void
65}
66
67; GCN-LABEL: {{^}}select_f16_imm_b
68; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
69; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
70; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
71; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0x3800{{$}}
72; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
73; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
74; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
75; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
76; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
77; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault4bd72362016-12-10 00:39:12 +000078; VI: v_cmp_gt_f16_e32 vcc, 0.5, v[[A_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000079; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
80; GCN: buffer_store_short v[[R_F16]]
81; GCN: s_endpgm
82define void @select_f16_imm_b(
83 half addrspace(1)* %r,
84 half addrspace(1)* %a,
85 half addrspace(1)* %c,
86 half addrspace(1)* %d) {
87entry:
88 %a.val = load half, half addrspace(1)* %a
89 %c.val = load half, half addrspace(1)* %c
90 %d.val = load half, half addrspace(1)* %d
91 %fcmp = fcmp olt half %a.val, 0xH3800
92 %r.val = select i1 %fcmp, half %c.val, half %d.val
93 store half %r.val, half addrspace(1)* %r
94 ret void
95}
96
97; GCN-LABEL: {{^}}select_f16_imm_c
98; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
99; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
100; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
101; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], 0x3800{{$}}
102; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
103; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
104; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
105; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
106; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
107; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
108; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
109; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x3800{{$}}
110; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
111; GCN: buffer_store_short v[[R_F16]]
112; GCN: s_endpgm
113define void @select_f16_imm_c(
114 half addrspace(1)* %r,
115 half addrspace(1)* %a,
116 half addrspace(1)* %b,
117 half addrspace(1)* %d) {
118entry:
119 %a.val = load half, half addrspace(1)* %a
120 %b.val = load half, half addrspace(1)* %b
121 %d.val = load half, half addrspace(1)* %d
122 %fcmp = fcmp olt half %a.val, %b.val
123 %r.val = select i1 %fcmp, half 0xH3800, half %d.val
124 store half %r.val, half addrspace(1)* %r
125 ret void
126}
127
128; GCN-LABEL: {{^}}select_f16_imm_d
129; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
130; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
131; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
132; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], 0x3800{{$}}
133; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
134; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
135; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
136; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
137; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
138; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
139; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
140; VI: v_mov_b32_e32 v[[D_F16:[0-9]+]], 0x3800{{$}}
141; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
142; GCN: buffer_store_short v[[R_F16]]
143; GCN: s_endpgm
144define void @select_f16_imm_d(
145 half addrspace(1)* %r,
146 half addrspace(1)* %a,
147 half addrspace(1)* %b,
148 half addrspace(1)* %c) {
149entry:
150 %a.val = load half, half addrspace(1)* %a
151 %b.val = load half, half addrspace(1)* %b
152 %c.val = load half, half addrspace(1)* %c
153 %fcmp = fcmp olt half %a.val, %b.val
154 %r.val = select i1 %fcmp, half %c.val, half 0xH3800
155 store half %r.val, half addrspace(1)* %r
156 ret void
157}
158
159; GCN-LABEL: {{^}}select_v2f16
160; SI: v_cvt_f32_f16_e32
161; SI: v_cvt_f32_f16_e32
162; SI: v_cvt_f32_f16_e32
163; SI: v_cvt_f32_f16_e32
164; SI: v_cmp_lt_f32_e64
165; SI: v_cmp_lt_f32_e32
166; VI: v_cmp_lt_f16_e32
167; VI: v_cmp_lt_f16_e64
168; GCN: v_cndmask_b32_e32
169; GCN: v_cndmask_b32_e64
170; SI: v_cvt_f16_f32_e32
171; SI: v_cvt_f16_f32_e32
172; GCN: s_endpgm
173define void @select_v2f16(
174 <2 x half> addrspace(1)* %r,
175 <2 x half> addrspace(1)* %a,
176 <2 x half> addrspace(1)* %b,
177 <2 x half> addrspace(1)* %c,
178 <2 x half> addrspace(1)* %d) {
179entry:
180 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
181 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
182 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
183 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
184 %fcmp = fcmp olt <2 x half> %a.val, %b.val
185 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
186 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
187 ret void
188}
189
190; GCN-LABEL: {{^}}select_v2f16_imm_a
191; SI: v_cvt_f32_f16_e32
192; SI: v_cvt_f32_f16_e32
193; SI: v_cvt_f32_f16_e32
194; SI: v_cvt_f32_f16_e32
195; SI: v_cmp_gt_f32_e32
196; SI: v_cvt_f32_f16_e32
197; SI: v_cvt_f32_f16_e32
198; SI: v_cvt_f32_f16_e32
199; SI: v_cvt_f32_f16_e32
200; SI: v_cmp_gt_f32_e64
201; VI: v_cmp_lt_f16_e32
202; VI: v_cmp_lt_f16_e64
203; GCN: v_cndmask_b32_e32
204; SI: v_cvt_f16_f32_e32
205; GCN: v_cndmask_b32_e64
206; SI: v_cvt_f16_f32_e32
207; GCN: s_endpgm
208define void @select_v2f16_imm_a(
209 <2 x half> addrspace(1)* %r,
210 <2 x half> addrspace(1)* %b,
211 <2 x half> addrspace(1)* %c,
212 <2 x half> addrspace(1)* %d) {
213entry:
214 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
215 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
216 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
217 %fcmp = fcmp olt <2 x half> <half 0xH3800, half 0xH3900>, %b.val
218 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
219 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
220 ret void
221}
222
223; GCN-LABEL: {{^}}select_v2f16_imm_b
224; SI: v_cvt_f32_f16_e32
225; SI: v_cvt_f32_f16_e32
226; SI: v_cvt_f32_f16_e32
227; SI: v_cvt_f32_f16_e32
228; SI: v_cmp_lt_f32_e32
229; SI: v_cvt_f32_f16_e32
230; SI: v_cvt_f32_f16_e32
231; SI: v_cvt_f32_f16_e32
232; SI: v_cvt_f32_f16_e32
233; SI: v_cmp_lt_f32_e64
234; VI: v_cmp_gt_f16_e32
235; VI: v_cmp_gt_f16_e64
236; GCN: v_cndmask_b32_e32
237; SI: v_cvt_f16_f32_e32
238; GCN: v_cndmask_b32_e64
239; SI: v_cvt_f16_f32_e32
240; GCN: s_endpgm
241define void @select_v2f16_imm_b(
242 <2 x half> addrspace(1)* %r,
243 <2 x half> addrspace(1)* %a,
244 <2 x half> addrspace(1)* %c,
245 <2 x half> addrspace(1)* %d) {
246entry:
247 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
248 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
249 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
250 %fcmp = fcmp olt <2 x half> %a.val, <half 0xH3800, half 0xH3900>
251 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
252 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
253 ret void
254}
255
256; GCN-LABEL: {{^}}select_v2f16_imm_c
257; SI: v_cvt_f32_f16_e32
258; SI: v_cvt_f32_f16_e32
259; SI: v_cvt_f32_f16_e32
260; SI: v_cvt_f32_f16_e32
261; SI: v_cvt_f32_f16_e32
262; SI: v_cvt_f32_f16_e32
263; SI: v_cvt_f32_f16_e32
264; SI: v_cvt_f32_f16_e32
265; SI: v_cmp_lt_f32_e32
266; SI: v_cmp_lt_f32_e64
267; VI: v_cmp_lt_f16_e32
268; VI: v_cmp_lt_f16_e64
269; GCN: v_cndmask_b32_e32
270; GCN: v_cndmask_b32_e64
271; SI: v_cvt_f16_f32_e32
272; SI: v_cvt_f16_f32_e32
273; GCN: s_endpgm
274define void @select_v2f16_imm_c(
275 <2 x half> addrspace(1)* %r,
276 <2 x half> addrspace(1)* %a,
277 <2 x half> addrspace(1)* %b,
278 <2 x half> addrspace(1)* %d) {
279entry:
280 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
281 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
282 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
283 %fcmp = fcmp olt <2 x half> %a.val, %b.val
284 %r.val = select <2 x i1> %fcmp, <2 x half> <half 0xH3800, half 0xH3900>, <2 x half> %d.val
285 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
286 ret void
287}
288
289; GCN-LABEL: {{^}}select_v2f16_imm_d
290; SI: v_cvt_f32_f16_e32
291; SI: v_cvt_f32_f16_e32
292; SI: v_cvt_f32_f16_e32
293; SI: v_cvt_f32_f16_e32
294; SI: v_cvt_f32_f16_e32
295; SI: v_cvt_f32_f16_e32
296; SI: v_cvt_f32_f16_e32
297; SI: v_cvt_f32_f16_e32
298; SI: v_cmp_lt_f32_e32
299; SI: v_cmp_lt_f32_e64
300; VI: v_cmp_lt_f16_e32
301; VI: v_cmp_lt_f16_e64
302; GCN: v_cndmask_b32_e32
303; GCN: v_cndmask_b32_e64
304; SI: v_cvt_f16_f32_e32
305; SI: v_cvt_f16_f32_e32
306; GCN: s_endpgm
307define void @select_v2f16_imm_d(
308 <2 x half> addrspace(1)* %r,
309 <2 x half> addrspace(1)* %a,
310 <2 x half> addrspace(1)* %b,
311 <2 x half> addrspace(1)* %c) {
312entry:
313 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
314 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
315 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
316 %fcmp = fcmp olt <2 x half> %a.val, %b.val
317 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> <half 0xH3800, half 0xH3900>
318 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
319 ret void
320}