blob: c6e1ed5a1f292652b65dee28275ccf98e1efffa1 [file] [log] [blame]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}fcmp_f16_lt
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
8; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
9; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
10; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
11; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
12; GCN: buffer_store_dword v[[R_I32]]
13; GCN: s_endpgm
14define void @fcmp_f16_lt(
15 i32 addrspace(1)* %r,
16 half addrspace(1)* %a,
17 half addrspace(1)* %b) {
18entry:
19 %a.val = load half, half addrspace(1)* %a
20 %b.val = load half, half addrspace(1)* %b
21 %r.val = fcmp olt half %a.val, %b.val
22 %r.val.sext = sext i1 %r.val to i32
23 store i32 %r.val.sext, i32 addrspace(1)* %r
24 ret void
25}
26
Matt Arsenault18f56be2016-12-22 16:27:11 +000027; GCN-LABEL: {{^}}fcmp_f16_lt_abs:
28; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
29; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
30
31; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
32; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
33
34; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]|
35; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]|
36
37; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
38; GCN: buffer_store_dword v[[R_I32]]
39; GCN: s_endpgm
40define void @fcmp_f16_lt_abs(
41 i32 addrspace(1)* %r,
42 half addrspace(1)* %a,
43 half addrspace(1)* %b) {
44entry:
45 %a.val = load half, half addrspace(1)* %a
46 %b.val = load half, half addrspace(1)* %b
47 %a.abs = call half @llvm.fabs.f16(half %a.val)
48 %b.abs = call half @llvm.fabs.f16(half %b.val)
49 %r.val = fcmp olt half %a.abs, %b.abs
50 %r.val.sext = sext i1 %r.val to i32
51 store i32 %r.val.sext, i32 addrspace(1)* %r
52 ret void
53}
54
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000055; GCN-LABEL: {{^}}fcmp_f16_eq
56; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
57; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
58; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
59; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
60; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
61; VI: v_cmp_eq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
62; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
63; GCN: buffer_store_dword v[[R_I32]]
64; GCN: s_endpgm
65define void @fcmp_f16_eq(
66 i32 addrspace(1)* %r,
67 half addrspace(1)* %a,
68 half addrspace(1)* %b) {
69entry:
70 %a.val = load half, half addrspace(1)* %a
71 %b.val = load half, half addrspace(1)* %b
72 %r.val = fcmp oeq half %a.val, %b.val
73 %r.val.sext = sext i1 %r.val to i32
74 store i32 %r.val.sext, i32 addrspace(1)* %r
75 ret void
76}
77
78; GCN-LABEL: {{^}}fcmp_f16_le
79; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
80; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
81; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
82; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
83; SI: v_cmp_le_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
84; VI: v_cmp_le_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
85; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
86; GCN: buffer_store_dword v[[R_I32]]
87; GCN: s_endpgm
88define void @fcmp_f16_le(
89 i32 addrspace(1)* %r,
90 half addrspace(1)* %a,
91 half addrspace(1)* %b) {
92entry:
93 %a.val = load half, half addrspace(1)* %a
94 %b.val = load half, half addrspace(1)* %b
95 %r.val = fcmp ole half %a.val, %b.val
96 %r.val.sext = sext i1 %r.val to i32
97 store i32 %r.val.sext, i32 addrspace(1)* %r
98 ret void
99}
100
101; GCN-LABEL: {{^}}fcmp_f16_gt
102; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
103; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
104; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
105; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
106; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
107; VI: v_cmp_gt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
108; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
109; GCN: buffer_store_dword v[[R_I32]]
110; GCN: s_endpgm
111define void @fcmp_f16_gt(
112 i32 addrspace(1)* %r,
113 half addrspace(1)* %a,
114 half addrspace(1)* %b) {
115entry:
116 %a.val = load half, half addrspace(1)* %a
117 %b.val = load half, half addrspace(1)* %b
118 %r.val = fcmp ogt half %a.val, %b.val
119 %r.val.sext = sext i1 %r.val to i32
120 store i32 %r.val.sext, i32 addrspace(1)* %r
121 ret void
122}
123
124; GCN-LABEL: {{^}}fcmp_f16_lg
125; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
126; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
127; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
128; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
129; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
130; VI: v_cmp_lg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
131; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
132; GCN: buffer_store_dword v[[R_I32]]
133; GCN: s_endpgm
134define void @fcmp_f16_lg(
135 i32 addrspace(1)* %r,
136 half addrspace(1)* %a,
137 half addrspace(1)* %b) {
138entry:
139 %a.val = load half, half addrspace(1)* %a
140 %b.val = load half, half addrspace(1)* %b
141 %r.val = fcmp one half %a.val, %b.val
142 %r.val.sext = sext i1 %r.val to i32
143 store i32 %r.val.sext, i32 addrspace(1)* %r
144 ret void
145}
146
147; GCN-LABEL: {{^}}fcmp_f16_ge
148; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
149; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
150; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
151; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
152; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
153; VI: v_cmp_ge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
154; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
155; GCN: buffer_store_dword v[[R_I32]]
156; GCN: s_endpgm
157define void @fcmp_f16_ge(
158 i32 addrspace(1)* %r,
159 half addrspace(1)* %a,
160 half addrspace(1)* %b) {
161entry:
162 %a.val = load half, half addrspace(1)* %a
163 %b.val = load half, half addrspace(1)* %b
164 %r.val = fcmp oge half %a.val, %b.val
165 %r.val.sext = sext i1 %r.val to i32
166 store i32 %r.val.sext, i32 addrspace(1)* %r
167 ret void
168}
169
170; GCN-LABEL: {{^}}fcmp_f16_o
171; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
172; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
173; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
174; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
175; SI: v_cmp_o_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
176; VI: v_cmp_o_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
177; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
178; GCN: buffer_store_dword v[[R_I32]]
179; GCN: s_endpgm
180define void @fcmp_f16_o(
181 i32 addrspace(1)* %r,
182 half addrspace(1)* %a,
183 half addrspace(1)* %b) {
184entry:
185 %a.val = load half, half addrspace(1)* %a
186 %b.val = load half, half addrspace(1)* %b
187 %r.val = fcmp ord half %a.val, %b.val
188 %r.val.sext = sext i1 %r.val to i32
189 store i32 %r.val.sext, i32 addrspace(1)* %r
190 ret void
191}
192
193; GCN-LABEL: {{^}}fcmp_f16_u
194; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
195; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
196; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
197; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
198; SI: v_cmp_u_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
199; VI: v_cmp_u_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
200; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
201; GCN: buffer_store_dword v[[R_I32]]
202; GCN: s_endpgm
203define void @fcmp_f16_u(
204 i32 addrspace(1)* %r,
205 half addrspace(1)* %a,
206 half addrspace(1)* %b) {
207entry:
208 %a.val = load half, half addrspace(1)* %a
209 %b.val = load half, half addrspace(1)* %b
210 %r.val = fcmp uno half %a.val, %b.val
211 %r.val.sext = sext i1 %r.val to i32
212 store i32 %r.val.sext, i32 addrspace(1)* %r
213 ret void
214}
215
216; GCN-LABEL: {{^}}fcmp_f16_nge
217; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
218; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
219; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
220; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
221; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
222; VI: v_cmp_nge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
223; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
224; GCN: buffer_store_dword v[[R_I32]]
225; GCN: s_endpgm
226define void @fcmp_f16_nge(
227 i32 addrspace(1)* %r,
228 half addrspace(1)* %a,
229 half addrspace(1)* %b) {
230entry:
231 %a.val = load half, half addrspace(1)* %a
232 %b.val = load half, half addrspace(1)* %b
233 %r.val = fcmp ult half %a.val, %b.val
234 %r.val.sext = sext i1 %r.val to i32
235 store i32 %r.val.sext, i32 addrspace(1)* %r
236 ret void
237}
238
239; GCN-LABEL: {{^}}fcmp_f16_nlg
240; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
241; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
242; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
243; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
244; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
245; VI: v_cmp_nlg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
246; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
247; GCN: buffer_store_dword v[[R_I32]]
248; GCN: s_endpgm
249define void @fcmp_f16_nlg(
250 i32 addrspace(1)* %r,
251 half addrspace(1)* %a,
252 half addrspace(1)* %b) {
253entry:
254 %a.val = load half, half addrspace(1)* %a
255 %b.val = load half, half addrspace(1)* %b
256 %r.val = fcmp ueq half %a.val, %b.val
257 %r.val.sext = sext i1 %r.val to i32
258 store i32 %r.val.sext, i32 addrspace(1)* %r
259 ret void
260}
261
262; GCN-LABEL: {{^}}fcmp_f16_ngt
263; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
264; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
265; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
266; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
267; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
268; VI: v_cmp_ngt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
269; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
270; GCN: buffer_store_dword v[[R_I32]]
271; GCN: s_endpgm
272define void @fcmp_f16_ngt(
273 i32 addrspace(1)* %r,
274 half addrspace(1)* %a,
275 half addrspace(1)* %b) {
276entry:
277 %a.val = load half, half addrspace(1)* %a
278 %b.val = load half, half addrspace(1)* %b
279 %r.val = fcmp ule half %a.val, %b.val
280 %r.val.sext = sext i1 %r.val to i32
281 store i32 %r.val.sext, i32 addrspace(1)* %r
282 ret void
283}
284
285; GCN-LABEL: {{^}}fcmp_f16_nle
286; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
287; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
288; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
289; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
290; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
291; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
292; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
293; GCN: buffer_store_dword v[[R_I32]]
294; GCN: s_endpgm
295define void @fcmp_f16_nle(
296 i32 addrspace(1)* %r,
297 half addrspace(1)* %a,
298 half addrspace(1)* %b) {
299entry:
300 %a.val = load half, half addrspace(1)* %a
301 %b.val = load half, half addrspace(1)* %b
302 %r.val = fcmp ugt half %a.val, %b.val
303 %r.val.sext = sext i1 %r.val to i32
304 store i32 %r.val.sext, i32 addrspace(1)* %r
305 ret void
306}
307
308; GCN-LABEL: {{^}}fcmp_f16_neq
309; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
310; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
311; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
312; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
313; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
314; VI: v_cmp_neq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
315; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
316; GCN: buffer_store_dword v[[R_I32]]
317; GCN: s_endpgm
318define void @fcmp_f16_neq(
319 i32 addrspace(1)* %r,
320 half addrspace(1)* %a,
321 half addrspace(1)* %b) {
322entry:
323 %a.val = load half, half addrspace(1)* %a
324 %b.val = load half, half addrspace(1)* %b
325 %r.val = fcmp une half %a.val, %b.val
326 %r.val.sext = sext i1 %r.val to i32
327 store i32 %r.val.sext, i32 addrspace(1)* %r
328 ret void
329}
330
331; GCN-LABEL: {{^}}fcmp_f16_nlt
332; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
333; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
334; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
335; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
336; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
337; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
338; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
339; GCN: buffer_store_dword v[[R_I32]]
340; GCN: s_endpgm
341define void @fcmp_f16_nlt(
342 i32 addrspace(1)* %r,
343 half addrspace(1)* %a,
344 half addrspace(1)* %b) {
345entry:
346 %a.val = load half, half addrspace(1)* %a
347 %b.val = load half, half addrspace(1)* %b
348 %r.val = fcmp uge half %a.val, %b.val
349 %r.val.sext = sext i1 %r.val to i32
350 store i32 %r.val.sext, i32 addrspace(1)* %r
351 ret void
352}
353
354; GCN-LABEL: {{^}}fcmp_v2f16_lt
355; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
356; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
357; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
358; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
359; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
360; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
361; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
362; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
363; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
364; SI: v_cmp_lt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
365; VI: v_cmp_lt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
366; VI: v_cmp_lt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
367; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
368; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
369; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
370; GCN: s_endpgm
371define void @fcmp_v2f16_lt(
372 <2 x i32> addrspace(1)* %r,
373 <2 x half> addrspace(1)* %a,
374 <2 x half> addrspace(1)* %b) {
375entry:
376 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
377 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
378 %r.val = fcmp olt <2 x half> %a.val, %b.val
379 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
380 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
381 ret void
382}
383
384; GCN-LABEL: {{^}}fcmp_v2f16_eq
385; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
386; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
387; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
388; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
389; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
390; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
391; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
392; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
393; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
394; SI: v_cmp_eq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
395; VI: v_cmp_eq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
396; VI: v_cmp_eq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
397; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
398; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
399; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
400; GCN: s_endpgm
401define void @fcmp_v2f16_eq(
402 <2 x i32> addrspace(1)* %r,
403 <2 x half> addrspace(1)* %a,
404 <2 x half> addrspace(1)* %b) {
405entry:
406 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
407 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
408 %r.val = fcmp oeq <2 x half> %a.val, %b.val
409 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
410 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
411 ret void
412}
413
414; GCN-LABEL: {{^}}fcmp_v2f16_le
415; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
416; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
417; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
418; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
419; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
420; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
421; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
422; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
423; SI: v_cmp_le_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
424; SI: v_cmp_le_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
425; VI: v_cmp_le_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
426; VI: v_cmp_le_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
427; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
428; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
429; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
430; GCN: s_endpgm
431define void @fcmp_v2f16_le(
432 <2 x i32> addrspace(1)* %r,
433 <2 x half> addrspace(1)* %a,
434 <2 x half> addrspace(1)* %b) {
435entry:
436 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
437 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
438 %r.val = fcmp ole <2 x half> %a.val, %b.val
439 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
440 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
441 ret void
442}
443
444; GCN-LABEL: {{^}}fcmp_v2f16_gt
445; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
446; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
447; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
448; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
449; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
450; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
451; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
452; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
453; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
454; SI: v_cmp_gt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
455; VI: v_cmp_gt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
456; VI: v_cmp_gt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
457; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
458; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
459; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
460; GCN: s_endpgm
461define void @fcmp_v2f16_gt(
462 <2 x i32> addrspace(1)* %r,
463 <2 x half> addrspace(1)* %a,
464 <2 x half> addrspace(1)* %b) {
465entry:
466 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
467 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
468 %r.val = fcmp ogt <2 x half> %a.val, %b.val
469 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
470 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
471 ret void
472}
473
474; GCN-LABEL: {{^}}fcmp_v2f16_lg
475; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
476; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
477; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
478; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
479; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
480; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
481; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
482; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
483; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
484; SI: v_cmp_lg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
485; VI: v_cmp_lg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
486; VI: v_cmp_lg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
487; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
488; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
489; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
490; GCN: s_endpgm
491define void @fcmp_v2f16_lg(
492 <2 x i32> addrspace(1)* %r,
493 <2 x half> addrspace(1)* %a,
494 <2 x half> addrspace(1)* %b) {
495entry:
496 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
497 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
498 %r.val = fcmp one <2 x half> %a.val, %b.val
499 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
500 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
501 ret void
502}
503
504; GCN-LABEL: {{^}}fcmp_v2f16_ge
505; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
506; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
507; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
508; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
509; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
510; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
511; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
512; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
513; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
514; SI: v_cmp_ge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
515; VI: v_cmp_ge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
516; VI: v_cmp_ge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
517; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
518; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
519; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
520; GCN: s_endpgm
521define void @fcmp_v2f16_ge(
522 <2 x i32> addrspace(1)* %r,
523 <2 x half> addrspace(1)* %a,
524 <2 x half> addrspace(1)* %b) {
525entry:
526 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
527 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
528 %r.val = fcmp oge <2 x half> %a.val, %b.val
529 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
530 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
531 ret void
532}
533
534; GCN-LABEL: {{^}}fcmp_v2f16_o
535; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
536; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
537; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
538; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
539; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
540; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
541; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
542; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
543; SI: v_cmp_o_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
544; SI: v_cmp_o_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
545; VI: v_cmp_o_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
546; VI: v_cmp_o_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
547; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
548; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
549; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
550; GCN: s_endpgm
551define void @fcmp_v2f16_o(
552 <2 x i32> addrspace(1)* %r,
553 <2 x half> addrspace(1)* %a,
554 <2 x half> addrspace(1)* %b) {
555entry:
556 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
557 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
558 %r.val = fcmp ord <2 x half> %a.val, %b.val
559 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
560 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
561 ret void
562}
563
564; GCN-LABEL: {{^}}fcmp_v2f16_u
565; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
566; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
567; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
568; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
569; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
570; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
571; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
572; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
573; SI: v_cmp_u_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
574; SI: v_cmp_u_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
575; VI: v_cmp_u_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
576; VI: v_cmp_u_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
577; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
578; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
579; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
580; GCN: s_endpgm
581define void @fcmp_v2f16_u(
582 <2 x i32> addrspace(1)* %r,
583 <2 x half> addrspace(1)* %a,
584 <2 x half> addrspace(1)* %b) {
585entry:
586 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
587 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
588 %r.val = fcmp uno <2 x half> %a.val, %b.val
589 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
590 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
591 ret void
592}
593
594; GCN-LABEL: {{^}}fcmp_v2f16_nge
595; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
596; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
597; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
598; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
599; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
600; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
601; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
602; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
603; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
604; SI: v_cmp_nge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
605; VI: v_cmp_nge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
606; VI: v_cmp_nge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
607; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
608; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
609; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
610; GCN: s_endpgm
611define void @fcmp_v2f16_nge(
612 <2 x i32> addrspace(1)* %r,
613 <2 x half> addrspace(1)* %a,
614 <2 x half> addrspace(1)* %b) {
615entry:
616 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
617 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
618 %r.val = fcmp ult <2 x half> %a.val, %b.val
619 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
620 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
621 ret void
622}
623
624; GCN-LABEL: {{^}}fcmp_v2f16_nlg
625; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
626; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
627; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
628; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
629; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
630; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
631; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
632; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
633; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
634; SI: v_cmp_nlg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
635; VI: v_cmp_nlg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
636; VI: v_cmp_nlg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
637; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
638; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
639; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
640; GCN: s_endpgm
641define void @fcmp_v2f16_nlg(
642 <2 x i32> addrspace(1)* %r,
643 <2 x half> addrspace(1)* %a,
644 <2 x half> addrspace(1)* %b) {
645entry:
646 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
647 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
648 %r.val = fcmp ueq <2 x half> %a.val, %b.val
649 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
650 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
651 ret void
652}
653
654; GCN-LABEL: {{^}}fcmp_v2f16_ngt
655; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
656; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
657; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
658; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
659; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
660; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
661; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
662; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
663; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
664; SI: v_cmp_ngt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
665; VI: v_cmp_ngt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
666; VI: v_cmp_ngt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
667; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
668; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
669; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
670; GCN: s_endpgm
671define void @fcmp_v2f16_ngt(
672 <2 x i32> addrspace(1)* %r,
673 <2 x half> addrspace(1)* %a,
674 <2 x half> addrspace(1)* %b) {
675entry:
676 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
677 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
678 %r.val = fcmp ule <2 x half> %a.val, %b.val
679 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
680 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
681 ret void
682}
683
684; GCN-LABEL: {{^}}fcmp_v2f16_nle
685; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
686; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
687; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
688; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
689; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
690; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
691; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
692; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
693; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
694; SI: v_cmp_nle_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
695; VI: v_cmp_nle_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
696; VI: v_cmp_nle_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
697; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
698; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
699; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
700; GCN: s_endpgm
701define void @fcmp_v2f16_nle(
702 <2 x i32> addrspace(1)* %r,
703 <2 x half> addrspace(1)* %a,
704 <2 x half> addrspace(1)* %b) {
705entry:
706 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
707 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
708 %r.val = fcmp ugt <2 x half> %a.val, %b.val
709 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
710 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
711 ret void
712}
713
714; GCN-LABEL: {{^}}fcmp_v2f16_neq
715; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
716; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
717; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
718; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
719; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
720; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
721; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
722; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
723; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
724; SI: v_cmp_neq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
725; VI: v_cmp_neq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
726; VI: v_cmp_neq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
727; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
728; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
729; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
730; GCN: s_endpgm
731define void @fcmp_v2f16_neq(
732 <2 x i32> addrspace(1)* %r,
733 <2 x half> addrspace(1)* %a,
734 <2 x half> addrspace(1)* %b) {
735entry:
736 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
737 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
738 %r.val = fcmp une <2 x half> %a.val, %b.val
739 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
740 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
741 ret void
742}
743
744; GCN-LABEL: {{^}}fcmp_v2f16_nlt
745; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
746; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
747; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
748; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
749; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
750; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
751; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
752; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
753; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
754; SI: v_cmp_nlt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
755; VI: v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
756; VI: v_cmp_nlt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
757; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
758; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
759; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
760; GCN: s_endpgm
761define void @fcmp_v2f16_nlt(
762 <2 x i32> addrspace(1)* %r,
763 <2 x half> addrspace(1)* %a,
764 <2 x half> addrspace(1)* %b) {
765entry:
766 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
767 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
768 %r.val = fcmp uge <2 x half> %a.val, %b.val
769 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
770 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
771 ret void
772}
Matt Arsenault18f56be2016-12-22 16:27:11 +0000773
774declare half @llvm.fabs.f16(half) #1
775
776attributes #0 = { nounwind }
777attributes #1 = { nounwind readnone }