blob: 050398c03234c412f09f4eef41b8d9818e85c6b4 [file] [log] [blame]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +00003
Matt Arsenault0c687392017-01-30 16:57:41 +00004; GCN-LABEL: {{^}}select_f16:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +00005; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
8; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
9; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
10; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
11; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
12; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
13; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
14; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
15; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
16; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
17; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
18; GCN: buffer_store_short v[[R_F16]]
19; GCN: s_endpgm
20define void @select_f16(
21 half addrspace(1)* %r,
22 half addrspace(1)* %a,
23 half addrspace(1)* %b,
24 half addrspace(1)* %c,
25 half addrspace(1)* %d) {
26entry:
27 %a.val = load half, half addrspace(1)* %a
28 %b.val = load half, half addrspace(1)* %b
29 %c.val = load half, half addrspace(1)* %c
30 %d.val = load half, half addrspace(1)* %d
31 %fcmp = fcmp olt half %a.val, %b.val
32 %r.val = select i1 %fcmp, half %c.val, half %d.val
33 store half %r.val, half addrspace(1)* %r
34 ret void
35}
36
Matt Arsenault0c687392017-01-30 16:57:41 +000037; GCN-LABEL: {{^}}select_f16_imm_a:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000038; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
39; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
40; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000041; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000042; SI: v_cmp_lt_f32_e32 vcc, 0.5, v[[B_F32]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000043; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
44; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
45; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
46; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault4bd72362016-12-10 00:39:12 +000047; VI: v_cmp_lt_f16_e32 vcc, 0.5, v[[B_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000048; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
49; GCN: buffer_store_short v[[R_F16]]
50; GCN: s_endpgm
51define void @select_f16_imm_a(
52 half addrspace(1)* %r,
53 half addrspace(1)* %b,
54 half addrspace(1)* %c,
55 half addrspace(1)* %d) {
56entry:
57 %b.val = load half, half addrspace(1)* %b
58 %c.val = load half, half addrspace(1)* %c
59 %d.val = load half, half addrspace(1)* %d
60 %fcmp = fcmp olt half 0xH3800, %b.val
61 %r.val = select i1 %fcmp, half %c.val, half %d.val
62 store half %r.val, half addrspace(1)* %r
63 ret void
64}
65
Matt Arsenault0c687392017-01-30 16:57:41 +000066; GCN-LABEL: {{^}}select_f16_imm_b:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000067; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
68; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
69; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000070; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000071; SI: v_cmp_gt_f32_e32 vcc, 0.5, v[[A_F32]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000072; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
73; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
74; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
75; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +000076
Matt Arsenault4bd72362016-12-10 00:39:12 +000077; VI: v_cmp_gt_f16_e32 vcc, 0.5, v[[A_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000078; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
79; GCN: buffer_store_short v[[R_F16]]
80; GCN: s_endpgm
81define void @select_f16_imm_b(
82 half addrspace(1)* %r,
83 half addrspace(1)* %a,
84 half addrspace(1)* %c,
85 half addrspace(1)* %d) {
86entry:
87 %a.val = load half, half addrspace(1)* %a
88 %c.val = load half, half addrspace(1)* %c
89 %d.val = load half, half addrspace(1)* %d
90 %fcmp = fcmp olt half %a.val, 0xH3800
91 %r.val = select i1 %fcmp, half %c.val, half %d.val
92 store half %r.val, half addrspace(1)* %r
93 ret void
94}
95
Matt Arsenault0c687392017-01-30 16:57:41 +000096; GCN-LABEL: {{^}}select_f16_imm_c:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +000097; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
98; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
99; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000100; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
101; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
102; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
Matt Arsenault0b26e472016-12-22 21:40:08 +0000103; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000104; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], 0.5, v[[D_F32]], vcc
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000105; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000106
Matt Arsenault0b26e472016-12-22 21:40:08 +0000107; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000108; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x3800{{$}}
Matt Arsenault0b26e472016-12-22 21:40:08 +0000109; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[C_F16]], v[[D_F16]], vcc
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000110; GCN: buffer_store_short v[[R_F16]]
111; GCN: s_endpgm
112define void @select_f16_imm_c(
113 half addrspace(1)* %r,
114 half addrspace(1)* %a,
115 half addrspace(1)* %b,
116 half addrspace(1)* %d) {
117entry:
118 %a.val = load half, half addrspace(1)* %a
119 %b.val = load half, half addrspace(1)* %b
120 %d.val = load half, half addrspace(1)* %d
121 %fcmp = fcmp olt half %a.val, %b.val
122 %r.val = select i1 %fcmp, half 0xH3800, half %d.val
123 store half %r.val, half addrspace(1)* %r
124 ret void
125}
126
Matt Arsenault0c687392017-01-30 16:57:41 +0000127; GCN-LABEL: {{^}}select_f16_imm_d:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000128; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
129; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
130; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000131; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
132; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
133; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
134; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000135; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], 0.5, v[[C_F32]]
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000136; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
137; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
138; VI: v_mov_b32_e32 v[[D_F16:[0-9]+]], 0x3800{{$}}
139; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
140; GCN: buffer_store_short v[[R_F16]]
141; GCN: s_endpgm
142define void @select_f16_imm_d(
143 half addrspace(1)* %r,
144 half addrspace(1)* %a,
145 half addrspace(1)* %b,
146 half addrspace(1)* %c) {
147entry:
148 %a.val = load half, half addrspace(1)* %a
149 %b.val = load half, half addrspace(1)* %b
150 %c.val = load half, half addrspace(1)* %c
151 %fcmp = fcmp olt half %a.val, %b.val
152 %r.val = select i1 %fcmp, half %c.val, half 0xH3800
153 store half %r.val, half addrspace(1)* %r
154 ret void
155}
156
Matt Arsenault0c687392017-01-30 16:57:41 +0000157; GCN-LABEL: {{^}}select_v2f16:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000158; SI: v_cvt_f32_f16_e32
159; SI: v_cvt_f32_f16_e32
160; SI: v_cvt_f32_f16_e32
161; SI: v_cvt_f32_f16_e32
162; SI: v_cmp_lt_f32_e64
163; SI: v_cmp_lt_f32_e32
164; VI: v_cmp_lt_f16_e32
165; VI: v_cmp_lt_f16_e64
166; GCN: v_cndmask_b32_e32
167; GCN: v_cndmask_b32_e64
168; SI: v_cvt_f16_f32_e32
169; SI: v_cvt_f16_f32_e32
170; GCN: s_endpgm
171define void @select_v2f16(
172 <2 x half> addrspace(1)* %r,
173 <2 x half> addrspace(1)* %a,
174 <2 x half> addrspace(1)* %b,
175 <2 x half> addrspace(1)* %c,
176 <2 x half> addrspace(1)* %d) {
177entry:
178 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
179 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
180 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
181 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
182 %fcmp = fcmp olt <2 x half> %a.val, %b.val
183 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
184 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
185 ret void
186}
187
Matt Arsenault0c687392017-01-30 16:57:41 +0000188; GCN-LABEL: {{^}}select_v2f16_imm_a:
189; SI: v_cvt_f32_f16_e32
190; SI: v_cvt_f32_f16_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000191; SI: v_cvt_f32_f16_e32
192; SI: v_cvt_f32_f16_e32
193; SI: v_cvt_f32_f16_e32
194; SI: v_cvt_f32_f16_e32
Matt Arsenault0c687392017-01-30 16:57:41 +0000195; SI: v_cmp_lt_f32_e64
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000196; SI: v_cmp_lt_f32_e32 vcc, 0.5
Matt Arsenault0c687392017-01-30 16:57:41 +0000197
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000198; VI: v_cmp_lt_f16_e32
199; VI: v_cmp_lt_f16_e64
200; GCN: v_cndmask_b32_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000201; GCN: v_cndmask_b32_e64
202; SI: v_cvt_f16_f32_e32
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000203; SI: v_cvt_f16_f32_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000204; GCN: s_endpgm
205define void @select_v2f16_imm_a(
206 <2 x half> addrspace(1)* %r,
207 <2 x half> addrspace(1)* %b,
208 <2 x half> addrspace(1)* %c,
209 <2 x half> addrspace(1)* %d) {
210entry:
211 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
212 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
213 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
214 %fcmp = fcmp olt <2 x half> <half 0xH3800, half 0xH3900>, %b.val
215 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
216 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
217 ret void
218}
219
Matt Arsenault0c687392017-01-30 16:57:41 +0000220; GCN-LABEL: {{^}}select_v2f16_imm_b:
221; SI: v_cvt_f32_f16_e32
222; SI: v_cvt_f32_f16_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000223; SI: v_cvt_f32_f16_e32
224; SI: v_cvt_f32_f16_e32
225; SI: v_cvt_f32_f16_e32
226; SI: v_cvt_f32_f16_e32
Matt Arsenault0c687392017-01-30 16:57:41 +0000227; SI: v_cmp_gt_f32_e64
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000228; SI: v_cmp_gt_f32_e32 vcc, 0.5
Matt Arsenault0c687392017-01-30 16:57:41 +0000229
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000230; VI: v_cmp_gt_f16_e32
231; VI: v_cmp_gt_f16_e64
232; GCN: v_cndmask_b32_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000233; GCN: v_cndmask_b32_e64
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000234
235; SI: v_cvt_f16_f32_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000236; SI: v_cvt_f16_f32_e32
237; GCN: s_endpgm
238define void @select_v2f16_imm_b(
239 <2 x half> addrspace(1)* %r,
240 <2 x half> addrspace(1)* %a,
241 <2 x half> addrspace(1)* %c,
242 <2 x half> addrspace(1)* %d) {
243entry:
244 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
245 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
246 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
247 %fcmp = fcmp olt <2 x half> %a.val, <half 0xH3800, half 0xH3900>
248 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
249 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
250 ret void
251}
252
Matt Arsenault0c687392017-01-30 16:57:41 +0000253; GCN-LABEL: {{^}}select_v2f16_imm_c:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000254; SI: v_cvt_f32_f16_e32
255; SI: v_cvt_f32_f16_e32
256; SI: v_cvt_f32_f16_e32
257; SI: v_cvt_f32_f16_e32
258; SI: v_cvt_f32_f16_e32
259; SI: v_cvt_f32_f16_e32
Matt Arsenault0b26e472016-12-22 21:40:08 +0000260
Matt Arsenault0c687392017-01-30 16:57:41 +0000261; SI: v_cmp_nlt_f32_e32
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000262; SI: v_cmp_nlt_f32_e64
263; SI: v_cndmask_b32_e64
Matt Arsenault0c687392017-01-30 16:57:41 +0000264; SI: v_cndmask_b32_e32
Matt Arsenault0b26e472016-12-22 21:40:08 +0000265
266; VI: v_cmp_nlt_f16_e32
267; VI: v_cndmask_b32_e32
268
269; VI: v_cmp_nlt_f16_e32
270; VI: v_cndmask_b32_e32
271
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000272; SI: v_cvt_f16_f32_e32
273; SI: v_cvt_f16_f32_e32
274; GCN: s_endpgm
275define void @select_v2f16_imm_c(
276 <2 x half> addrspace(1)* %r,
277 <2 x half> addrspace(1)* %a,
278 <2 x half> addrspace(1)* %b,
279 <2 x half> addrspace(1)* %d) {
280entry:
281 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
282 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
283 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
284 %fcmp = fcmp olt <2 x half> %a.val, %b.val
285 %r.val = select <2 x i1> %fcmp, <2 x half> <half 0xH3800, half 0xH3900>, <2 x half> %d.val
286 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
287 ret void
288}
289
Matt Arsenault0c687392017-01-30 16:57:41 +0000290; GCN-LABEL: {{^}}select_v2f16_imm_d:
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000291; SI: v_cvt_f32_f16_e32
292; SI: v_cvt_f32_f16_e32
293; SI: v_cvt_f32_f16_e32
294; SI: v_cvt_f32_f16_e32
295; SI: v_cvt_f32_f16_e32
296; SI: v_cvt_f32_f16_e32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000297; SI: v_cmp_lt_f32_e64
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000298; SI: v_cmp_lt_f32_e32
299
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000300; VI: v_cmp_lt_f16_e32
301; VI: v_cmp_lt_f16_e64
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000302; GCN: v_cndmask_b32
303; GCN: v_cndmask_b32
Konstantin Zhuravlyov2a87a422016-11-16 03:16:26 +0000304; SI: v_cvt_f16_f32_e32
305; SI: v_cvt_f16_f32_e32
306; GCN: s_endpgm
307define void @select_v2f16_imm_d(
308 <2 x half> addrspace(1)* %r,
309 <2 x half> addrspace(1)* %a,
310 <2 x half> addrspace(1)* %b,
311 <2 x half> addrspace(1)* %c) {
312entry:
313 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
314 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
315 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
316 %fcmp = fcmp olt <2 x half> %a.val, %b.val
317 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> <half 0xH3800, half 0xH3900>
318 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
319 ret void
320}