blob: a0ad475c56994d7b2d1410160d77ad76d6b665a3 [file] [log] [blame]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}select_f16
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
8; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
9; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
10; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
11; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
12; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
13; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
14; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
15; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
16; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
17; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
18; GCN: buffer_store_short v[[R_F16]]
19; GCN: s_endpgm
20define void @select_f16(
21 half addrspace(1)* %r,
22 half addrspace(1)* %a,
23 half addrspace(1)* %b,
24 half addrspace(1)* %c,
25 half addrspace(1)* %d) {
26entry:
27 %a.val = load half, half addrspace(1)* %a
28 %b.val = load half, half addrspace(1)* %b
29 %c.val = load half, half addrspace(1)* %c
30 %d.val = load half, half addrspace(1)* %d
31 %fcmp = fcmp olt half %a.val, %b.val
32 %r.val = select i1 %fcmp, half %c.val, half %d.val
33 store half %r.val, half addrspace(1)* %r
34 ret void
35}
36
37; GCN-LABEL: {{^}}select_f16_imm_a
38; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
39; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
40; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
41; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x3800{{$}}
42; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
43; SI: v_cmp_gt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
44; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
45; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
46; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
47; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
48; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
49; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
50; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
51; GCN: buffer_store_short v[[R_F16]]
52; GCN: s_endpgm
53define void @select_f16_imm_a(
54 half addrspace(1)* %r,
55 half addrspace(1)* %b,
56 half addrspace(1)* %c,
57 half addrspace(1)* %d) {
58entry:
59 %b.val = load half, half addrspace(1)* %b
60 %c.val = load half, half addrspace(1)* %c
61 %d.val = load half, half addrspace(1)* %d
62 %fcmp = fcmp olt half 0xH3800, %b.val
63 %r.val = select i1 %fcmp, half %c.val, half %d.val
64 store half %r.val, half addrspace(1)* %r
65 ret void
66}
67
68; GCN-LABEL: {{^}}select_f16_imm_b
69; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
70; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
71; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
72; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0x3800{{$}}
73; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
74; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
75; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
76; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
77; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
78; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
79; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
80; VI: v_cmp_gt_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
81; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
82; GCN: buffer_store_short v[[R_F16]]
83; GCN: s_endpgm
84define void @select_f16_imm_b(
85 half addrspace(1)* %r,
86 half addrspace(1)* %a,
87 half addrspace(1)* %c,
88 half addrspace(1)* %d) {
89entry:
90 %a.val = load half, half addrspace(1)* %a
91 %c.val = load half, half addrspace(1)* %c
92 %d.val = load half, half addrspace(1)* %d
93 %fcmp = fcmp olt half %a.val, 0xH3800
94 %r.val = select i1 %fcmp, half %c.val, half %d.val
95 store half %r.val, half addrspace(1)* %r
96 ret void
97}
98
99; GCN-LABEL: {{^}}select_f16_imm_c
100; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
101; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
102; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
103; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], 0x3800{{$}}
104; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
105; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
106; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
107; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
108; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
109; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
110; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
111; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x3800{{$}}
112; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
113; GCN: buffer_store_short v[[R_F16]]
114; GCN: s_endpgm
115define void @select_f16_imm_c(
116 half addrspace(1)* %r,
117 half addrspace(1)* %a,
118 half addrspace(1)* %b,
119 half addrspace(1)* %d) {
120entry:
121 %a.val = load half, half addrspace(1)* %a
122 %b.val = load half, half addrspace(1)* %b
123 %d.val = load half, half addrspace(1)* %d
124 %fcmp = fcmp olt half %a.val, %b.val
125 %r.val = select i1 %fcmp, half 0xH3800, half %d.val
126 store half %r.val, half addrspace(1)* %r
127 ret void
128}
129
130; GCN-LABEL: {{^}}select_f16_imm_d
131; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
132; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
133; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
134; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], 0x3800{{$}}
135; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
136; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
137; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
138; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
139; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
140; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
141; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
142; VI: v_mov_b32_e32 v[[D_F16:[0-9]+]], 0x3800{{$}}
143; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
144; GCN: buffer_store_short v[[R_F16]]
145; GCN: s_endpgm
146define void @select_f16_imm_d(
147 half addrspace(1)* %r,
148 half addrspace(1)* %a,
149 half addrspace(1)* %b,
150 half addrspace(1)* %c) {
151entry:
152 %a.val = load half, half addrspace(1)* %a
153 %b.val = load half, half addrspace(1)* %b
154 %c.val = load half, half addrspace(1)* %c
155 %fcmp = fcmp olt half %a.val, %b.val
156 %r.val = select i1 %fcmp, half %c.val, half 0xH3800
157 store half %r.val, half addrspace(1)* %r
158 ret void
159}
160
161; GCN-LABEL: {{^}}select_v2f16
162; SI: v_cvt_f32_f16_e32
163; SI: v_cvt_f32_f16_e32
164; SI: v_cvt_f32_f16_e32
165; SI: v_cvt_f32_f16_e32
166; SI: v_cmp_lt_f32_e64
167; SI: v_cmp_lt_f32_e32
168; VI: v_cmp_lt_f16_e32
169; VI: v_cmp_lt_f16_e64
170; GCN: v_cndmask_b32_e32
171; GCN: v_cndmask_b32_e64
172; SI: v_cvt_f16_f32_e32
173; SI: v_cvt_f16_f32_e32
174; GCN: s_endpgm
175define void @select_v2f16(
176 <2 x half> addrspace(1)* %r,
177 <2 x half> addrspace(1)* %a,
178 <2 x half> addrspace(1)* %b,
179 <2 x half> addrspace(1)* %c,
180 <2 x half> addrspace(1)* %d) {
181entry:
182 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
183 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
184 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
185 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
186 %fcmp = fcmp olt <2 x half> %a.val, %b.val
187 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
188 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
189 ret void
190}
191
192; GCN-LABEL: {{^}}select_v2f16_imm_a
193; SI: v_cvt_f32_f16_e32
194; SI: v_cvt_f32_f16_e32
195; SI: v_cvt_f32_f16_e32
196; SI: v_cvt_f32_f16_e32
197; SI: v_cmp_gt_f32_e32
198; SI: v_cvt_f32_f16_e32
199; SI: v_cvt_f32_f16_e32
200; SI: v_cvt_f32_f16_e32
201; SI: v_cvt_f32_f16_e32
202; SI: v_cmp_gt_f32_e64
203; VI: v_cmp_lt_f16_e32
204; VI: v_cmp_lt_f16_e64
205; GCN: v_cndmask_b32_e32
206; SI: v_cvt_f16_f32_e32
207; GCN: v_cndmask_b32_e64
208; SI: v_cvt_f16_f32_e32
209; GCN: s_endpgm
210define void @select_v2f16_imm_a(
211 <2 x half> addrspace(1)* %r,
212 <2 x half> addrspace(1)* %b,
213 <2 x half> addrspace(1)* %c,
214 <2 x half> addrspace(1)* %d) {
215entry:
216 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
217 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
218 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
219 %fcmp = fcmp olt <2 x half> <half 0xH3800, half 0xH3900>, %b.val
220 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
221 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
222 ret void
223}
224
225; GCN-LABEL: {{^}}select_v2f16_imm_b
226; SI: v_cvt_f32_f16_e32
227; SI: v_cvt_f32_f16_e32
228; SI: v_cvt_f32_f16_e32
229; SI: v_cvt_f32_f16_e32
230; SI: v_cmp_lt_f32_e32
231; SI: v_cvt_f32_f16_e32
232; SI: v_cvt_f32_f16_e32
233; SI: v_cvt_f32_f16_e32
234; SI: v_cvt_f32_f16_e32
235; SI: v_cmp_lt_f32_e64
236; VI: v_cmp_gt_f16_e32
237; VI: v_cmp_gt_f16_e64
238; GCN: v_cndmask_b32_e32
239; SI: v_cvt_f16_f32_e32
240; GCN: v_cndmask_b32_e64
241; SI: v_cvt_f16_f32_e32
242; GCN: s_endpgm
243define void @select_v2f16_imm_b(
244 <2 x half> addrspace(1)* %r,
245 <2 x half> addrspace(1)* %a,
246 <2 x half> addrspace(1)* %c,
247 <2 x half> addrspace(1)* %d) {
248entry:
249 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
250 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
251 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
252 %fcmp = fcmp olt <2 x half> %a.val, <half 0xH3800, half 0xH3900>
253 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
254 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
255 ret void
256}
257
258; GCN-LABEL: {{^}}select_v2f16_imm_c
259; SI: v_cvt_f32_f16_e32
260; SI: v_cvt_f32_f16_e32
261; SI: v_cvt_f32_f16_e32
262; SI: v_cvt_f32_f16_e32
263; SI: v_cvt_f32_f16_e32
264; SI: v_cvt_f32_f16_e32
265; SI: v_cvt_f32_f16_e32
266; SI: v_cvt_f32_f16_e32
267; SI: v_cmp_lt_f32_e32
268; SI: v_cmp_lt_f32_e64
269; VI: v_cmp_lt_f16_e32
270; VI: v_cmp_lt_f16_e64
271; GCN: v_cndmask_b32_e32
272; GCN: v_cndmask_b32_e64
273; SI: v_cvt_f16_f32_e32
274; SI: v_cvt_f16_f32_e32
275; GCN: s_endpgm
276define void @select_v2f16_imm_c(
277 <2 x half> addrspace(1)* %r,
278 <2 x half> addrspace(1)* %a,
279 <2 x half> addrspace(1)* %b,
280 <2 x half> addrspace(1)* %d) {
281entry:
282 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
283 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
284 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
285 %fcmp = fcmp olt <2 x half> %a.val, %b.val
286 %r.val = select <2 x i1> %fcmp, <2 x half> <half 0xH3800, half 0xH3900>, <2 x half> %d.val
287 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
288 ret void
289}
290
291; GCN-LABEL: {{^}}select_v2f16_imm_d
292; SI: v_cvt_f32_f16_e32
293; SI: v_cvt_f32_f16_e32
294; SI: v_cvt_f32_f16_e32
295; SI: v_cvt_f32_f16_e32
296; SI: v_cvt_f32_f16_e32
297; SI: v_cvt_f32_f16_e32
298; SI: v_cvt_f32_f16_e32
299; SI: v_cvt_f32_f16_e32
300; SI: v_cmp_lt_f32_e32
301; SI: v_cmp_lt_f32_e64
302; VI: v_cmp_lt_f16_e32
303; VI: v_cmp_lt_f16_e64
304; GCN: v_cndmask_b32_e32
305; GCN: v_cndmask_b32_e64
306; SI: v_cvt_f16_f32_e32
307; SI: v_cvt_f16_f32_e32
308; GCN: s_endpgm
309define void @select_v2f16_imm_d(
310 <2 x half> addrspace(1)* %r,
311 <2 x half> addrspace(1)* %a,
312 <2 x half> addrspace(1)* %b,
313 <2 x half> addrspace(1)* %c) {
314entry:
315 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
316 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
317 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
318 %fcmp = fcmp olt <2 x half> %a.val, %b.val
319 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> <half 0xH3800, half 0xH3900>
320 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
321 ret void
322}