blob: 8c7277acbdee747ae1275468f83303c553c54446 [file] [log] [blame]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}fcmp_f16_lt
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
8; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
9; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
10; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
11; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
12; GCN: buffer_store_dword v[[R_I32]]
13; GCN: s_endpgm
14define void @fcmp_f16_lt(
15 i32 addrspace(1)* %r,
16 half addrspace(1)* %a,
17 half addrspace(1)* %b) {
18entry:
19 %a.val = load half, half addrspace(1)* %a
20 %b.val = load half, half addrspace(1)* %b
21 %r.val = fcmp olt half %a.val, %b.val
22 %r.val.sext = sext i1 %r.val to i32
23 store i32 %r.val.sext, i32 addrspace(1)* %r
24 ret void
25}
26
27; GCN-LABEL: {{^}}fcmp_f16_eq
28; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
29; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
30; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
31; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
32; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
33; VI: v_cmp_eq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
34; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
35; GCN: buffer_store_dword v[[R_I32]]
36; GCN: s_endpgm
37define void @fcmp_f16_eq(
38 i32 addrspace(1)* %r,
39 half addrspace(1)* %a,
40 half addrspace(1)* %b) {
41entry:
42 %a.val = load half, half addrspace(1)* %a
43 %b.val = load half, half addrspace(1)* %b
44 %r.val = fcmp oeq half %a.val, %b.val
45 %r.val.sext = sext i1 %r.val to i32
46 store i32 %r.val.sext, i32 addrspace(1)* %r
47 ret void
48}
49
50; GCN-LABEL: {{^}}fcmp_f16_le
51; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
52; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
53; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
54; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
55; SI: v_cmp_le_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
56; VI: v_cmp_le_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
57; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
58; GCN: buffer_store_dword v[[R_I32]]
59; GCN: s_endpgm
60define void @fcmp_f16_le(
61 i32 addrspace(1)* %r,
62 half addrspace(1)* %a,
63 half addrspace(1)* %b) {
64entry:
65 %a.val = load half, half addrspace(1)* %a
66 %b.val = load half, half addrspace(1)* %b
67 %r.val = fcmp ole half %a.val, %b.val
68 %r.val.sext = sext i1 %r.val to i32
69 store i32 %r.val.sext, i32 addrspace(1)* %r
70 ret void
71}
72
73; GCN-LABEL: {{^}}fcmp_f16_gt
74; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
75; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
76; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
77; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
78; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
79; VI: v_cmp_gt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
80; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
81; GCN: buffer_store_dword v[[R_I32]]
82; GCN: s_endpgm
83define void @fcmp_f16_gt(
84 i32 addrspace(1)* %r,
85 half addrspace(1)* %a,
86 half addrspace(1)* %b) {
87entry:
88 %a.val = load half, half addrspace(1)* %a
89 %b.val = load half, half addrspace(1)* %b
90 %r.val = fcmp ogt half %a.val, %b.val
91 %r.val.sext = sext i1 %r.val to i32
92 store i32 %r.val.sext, i32 addrspace(1)* %r
93 ret void
94}
95
96; GCN-LABEL: {{^}}fcmp_f16_lg
97; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
98; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
99; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
100; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
101; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
102; VI: v_cmp_lg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
103; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
104; GCN: buffer_store_dword v[[R_I32]]
105; GCN: s_endpgm
106define void @fcmp_f16_lg(
107 i32 addrspace(1)* %r,
108 half addrspace(1)* %a,
109 half addrspace(1)* %b) {
110entry:
111 %a.val = load half, half addrspace(1)* %a
112 %b.val = load half, half addrspace(1)* %b
113 %r.val = fcmp one half %a.val, %b.val
114 %r.val.sext = sext i1 %r.val to i32
115 store i32 %r.val.sext, i32 addrspace(1)* %r
116 ret void
117}
118
119; GCN-LABEL: {{^}}fcmp_f16_ge
120; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
121; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
122; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
123; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
124; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
125; VI: v_cmp_ge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
126; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
127; GCN: buffer_store_dword v[[R_I32]]
128; GCN: s_endpgm
129define void @fcmp_f16_ge(
130 i32 addrspace(1)* %r,
131 half addrspace(1)* %a,
132 half addrspace(1)* %b) {
133entry:
134 %a.val = load half, half addrspace(1)* %a
135 %b.val = load half, half addrspace(1)* %b
136 %r.val = fcmp oge half %a.val, %b.val
137 %r.val.sext = sext i1 %r.val to i32
138 store i32 %r.val.sext, i32 addrspace(1)* %r
139 ret void
140}
141
142; GCN-LABEL: {{^}}fcmp_f16_o
143; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
144; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
145; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
146; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
147; SI: v_cmp_o_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
148; VI: v_cmp_o_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
149; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
150; GCN: buffer_store_dword v[[R_I32]]
151; GCN: s_endpgm
152define void @fcmp_f16_o(
153 i32 addrspace(1)* %r,
154 half addrspace(1)* %a,
155 half addrspace(1)* %b) {
156entry:
157 %a.val = load half, half addrspace(1)* %a
158 %b.val = load half, half addrspace(1)* %b
159 %r.val = fcmp ord half %a.val, %b.val
160 %r.val.sext = sext i1 %r.val to i32
161 store i32 %r.val.sext, i32 addrspace(1)* %r
162 ret void
163}
164
165; GCN-LABEL: {{^}}fcmp_f16_u
166; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
167; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
168; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
169; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
170; SI: v_cmp_u_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
171; VI: v_cmp_u_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
172; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
173; GCN: buffer_store_dword v[[R_I32]]
174; GCN: s_endpgm
175define void @fcmp_f16_u(
176 i32 addrspace(1)* %r,
177 half addrspace(1)* %a,
178 half addrspace(1)* %b) {
179entry:
180 %a.val = load half, half addrspace(1)* %a
181 %b.val = load half, half addrspace(1)* %b
182 %r.val = fcmp uno half %a.val, %b.val
183 %r.val.sext = sext i1 %r.val to i32
184 store i32 %r.val.sext, i32 addrspace(1)* %r
185 ret void
186}
187
188; GCN-LABEL: {{^}}fcmp_f16_nge
189; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
190; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
191; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
192; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
193; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
194; VI: v_cmp_nge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
195; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
196; GCN: buffer_store_dword v[[R_I32]]
197; GCN: s_endpgm
198define void @fcmp_f16_nge(
199 i32 addrspace(1)* %r,
200 half addrspace(1)* %a,
201 half addrspace(1)* %b) {
202entry:
203 %a.val = load half, half addrspace(1)* %a
204 %b.val = load half, half addrspace(1)* %b
205 %r.val = fcmp ult half %a.val, %b.val
206 %r.val.sext = sext i1 %r.val to i32
207 store i32 %r.val.sext, i32 addrspace(1)* %r
208 ret void
209}
210
211; GCN-LABEL: {{^}}fcmp_f16_nlg
212; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
213; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
214; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
215; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
216; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
217; VI: v_cmp_nlg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
218; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
219; GCN: buffer_store_dword v[[R_I32]]
220; GCN: s_endpgm
221define void @fcmp_f16_nlg(
222 i32 addrspace(1)* %r,
223 half addrspace(1)* %a,
224 half addrspace(1)* %b) {
225entry:
226 %a.val = load half, half addrspace(1)* %a
227 %b.val = load half, half addrspace(1)* %b
228 %r.val = fcmp ueq half %a.val, %b.val
229 %r.val.sext = sext i1 %r.val to i32
230 store i32 %r.val.sext, i32 addrspace(1)* %r
231 ret void
232}
233
234; GCN-LABEL: {{^}}fcmp_f16_ngt
235; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
236; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
237; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
238; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
239; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
240; VI: v_cmp_ngt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
241; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
242; GCN: buffer_store_dword v[[R_I32]]
243; GCN: s_endpgm
244define void @fcmp_f16_ngt(
245 i32 addrspace(1)* %r,
246 half addrspace(1)* %a,
247 half addrspace(1)* %b) {
248entry:
249 %a.val = load half, half addrspace(1)* %a
250 %b.val = load half, half addrspace(1)* %b
251 %r.val = fcmp ule half %a.val, %b.val
252 %r.val.sext = sext i1 %r.val to i32
253 store i32 %r.val.sext, i32 addrspace(1)* %r
254 ret void
255}
256
257; GCN-LABEL: {{^}}fcmp_f16_nle
258; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
259; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
260; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
261; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
262; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
263; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
264; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
265; GCN: buffer_store_dword v[[R_I32]]
266; GCN: s_endpgm
267define void @fcmp_f16_nle(
268 i32 addrspace(1)* %r,
269 half addrspace(1)* %a,
270 half addrspace(1)* %b) {
271entry:
272 %a.val = load half, half addrspace(1)* %a
273 %b.val = load half, half addrspace(1)* %b
274 %r.val = fcmp ugt half %a.val, %b.val
275 %r.val.sext = sext i1 %r.val to i32
276 store i32 %r.val.sext, i32 addrspace(1)* %r
277 ret void
278}
279
280; GCN-LABEL: {{^}}fcmp_f16_neq
281; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
282; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
283; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
284; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
285; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
286; VI: v_cmp_neq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
287; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
288; GCN: buffer_store_dword v[[R_I32]]
289; GCN: s_endpgm
290define void @fcmp_f16_neq(
291 i32 addrspace(1)* %r,
292 half addrspace(1)* %a,
293 half addrspace(1)* %b) {
294entry:
295 %a.val = load half, half addrspace(1)* %a
296 %b.val = load half, half addrspace(1)* %b
297 %r.val = fcmp une half %a.val, %b.val
298 %r.val.sext = sext i1 %r.val to i32
299 store i32 %r.val.sext, i32 addrspace(1)* %r
300 ret void
301}
302
303; GCN-LABEL: {{^}}fcmp_f16_nlt
304; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
305; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
306; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
307; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
308; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
309; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
310; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
311; GCN: buffer_store_dword v[[R_I32]]
312; GCN: s_endpgm
313define void @fcmp_f16_nlt(
314 i32 addrspace(1)* %r,
315 half addrspace(1)* %a,
316 half addrspace(1)* %b) {
317entry:
318 %a.val = load half, half addrspace(1)* %a
319 %b.val = load half, half addrspace(1)* %b
320 %r.val = fcmp uge half %a.val, %b.val
321 %r.val.sext = sext i1 %r.val to i32
322 store i32 %r.val.sext, i32 addrspace(1)* %r
323 ret void
324}
325
326; GCN-LABEL: {{^}}fcmp_v2f16_lt
327; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
328; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
329; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
330; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
331; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
332; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
333; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
334; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
335; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
336; SI: v_cmp_lt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
337; VI: v_cmp_lt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
338; VI: v_cmp_lt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
339; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
340; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
341; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
342; GCN: s_endpgm
343define void @fcmp_v2f16_lt(
344 <2 x i32> addrspace(1)* %r,
345 <2 x half> addrspace(1)* %a,
346 <2 x half> addrspace(1)* %b) {
347entry:
348 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
349 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
350 %r.val = fcmp olt <2 x half> %a.val, %b.val
351 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
352 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
353 ret void
354}
355
356; GCN-LABEL: {{^}}fcmp_v2f16_eq
357; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
358; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
359; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
360; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
361; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
362; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
363; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
364; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
365; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
366; SI: v_cmp_eq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
367; VI: v_cmp_eq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
368; VI: v_cmp_eq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
369; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
370; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
371; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
372; GCN: s_endpgm
373define void @fcmp_v2f16_eq(
374 <2 x i32> addrspace(1)* %r,
375 <2 x half> addrspace(1)* %a,
376 <2 x half> addrspace(1)* %b) {
377entry:
378 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
379 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
380 %r.val = fcmp oeq <2 x half> %a.val, %b.val
381 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
382 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
383 ret void
384}
385
386; GCN-LABEL: {{^}}fcmp_v2f16_le
387; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
388; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
389; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
390; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
391; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
392; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
393; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
394; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
395; SI: v_cmp_le_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
396; SI: v_cmp_le_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
397; VI: v_cmp_le_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
398; VI: v_cmp_le_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
399; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
400; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
401; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
402; GCN: s_endpgm
403define void @fcmp_v2f16_le(
404 <2 x i32> addrspace(1)* %r,
405 <2 x half> addrspace(1)* %a,
406 <2 x half> addrspace(1)* %b) {
407entry:
408 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
409 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
410 %r.val = fcmp ole <2 x half> %a.val, %b.val
411 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
412 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
413 ret void
414}
415
416; GCN-LABEL: {{^}}fcmp_v2f16_gt
417; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
418; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
419; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
420; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
421; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
422; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
423; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
424; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
425; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
426; SI: v_cmp_gt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
427; VI: v_cmp_gt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
428; VI: v_cmp_gt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
429; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
430; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
431; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
432; GCN: s_endpgm
433define void @fcmp_v2f16_gt(
434 <2 x i32> addrspace(1)* %r,
435 <2 x half> addrspace(1)* %a,
436 <2 x half> addrspace(1)* %b) {
437entry:
438 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
439 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
440 %r.val = fcmp ogt <2 x half> %a.val, %b.val
441 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
442 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
443 ret void
444}
445
446; GCN-LABEL: {{^}}fcmp_v2f16_lg
447; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
448; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
449; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
450; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
451; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
452; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
453; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
454; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
455; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
456; SI: v_cmp_lg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
457; VI: v_cmp_lg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
458; VI: v_cmp_lg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
459; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
460; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
461; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
462; GCN: s_endpgm
463define void @fcmp_v2f16_lg(
464 <2 x i32> addrspace(1)* %r,
465 <2 x half> addrspace(1)* %a,
466 <2 x half> addrspace(1)* %b) {
467entry:
468 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
469 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
470 %r.val = fcmp one <2 x half> %a.val, %b.val
471 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
472 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
473 ret void
474}
475
476; GCN-LABEL: {{^}}fcmp_v2f16_ge
477; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
478; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
479; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
480; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
481; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
482; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
483; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
484; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
485; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
486; SI: v_cmp_ge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
487; VI: v_cmp_ge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
488; VI: v_cmp_ge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
489; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
490; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
491; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
492; GCN: s_endpgm
493define void @fcmp_v2f16_ge(
494 <2 x i32> addrspace(1)* %r,
495 <2 x half> addrspace(1)* %a,
496 <2 x half> addrspace(1)* %b) {
497entry:
498 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
499 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
500 %r.val = fcmp oge <2 x half> %a.val, %b.val
501 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
502 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
503 ret void
504}
505
506; GCN-LABEL: {{^}}fcmp_v2f16_o
507; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
508; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
509; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
510; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
511; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
512; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
513; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
514; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
515; SI: v_cmp_o_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
516; SI: v_cmp_o_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
517; VI: v_cmp_o_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
518; VI: v_cmp_o_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
519; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
520; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
521; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
522; GCN: s_endpgm
523define void @fcmp_v2f16_o(
524 <2 x i32> addrspace(1)* %r,
525 <2 x half> addrspace(1)* %a,
526 <2 x half> addrspace(1)* %b) {
527entry:
528 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
529 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
530 %r.val = fcmp ord <2 x half> %a.val, %b.val
531 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
532 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
533 ret void
534}
535
536; GCN-LABEL: {{^}}fcmp_v2f16_u
537; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
538; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
539; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
540; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
541; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
542; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
543; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
544; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
545; SI: v_cmp_u_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
546; SI: v_cmp_u_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
547; VI: v_cmp_u_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
548; VI: v_cmp_u_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
549; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
550; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
551; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
552; GCN: s_endpgm
553define void @fcmp_v2f16_u(
554 <2 x i32> addrspace(1)* %r,
555 <2 x half> addrspace(1)* %a,
556 <2 x half> addrspace(1)* %b) {
557entry:
558 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
559 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
560 %r.val = fcmp uno <2 x half> %a.val, %b.val
561 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
562 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
563 ret void
564}
565
566; GCN-LABEL: {{^}}fcmp_v2f16_nge
567; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
568; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
569; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
570; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
571; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
572; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
573; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
574; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
575; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
576; SI: v_cmp_nge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
577; VI: v_cmp_nge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
578; VI: v_cmp_nge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
579; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
580; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
581; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
582; GCN: s_endpgm
583define void @fcmp_v2f16_nge(
584 <2 x i32> addrspace(1)* %r,
585 <2 x half> addrspace(1)* %a,
586 <2 x half> addrspace(1)* %b) {
587entry:
588 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
589 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
590 %r.val = fcmp ult <2 x half> %a.val, %b.val
591 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
592 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
593 ret void
594}
595
596; GCN-LABEL: {{^}}fcmp_v2f16_nlg
597; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
598; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
599; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
600; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
601; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
602; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
603; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
604; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
605; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
606; SI: v_cmp_nlg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
607; VI: v_cmp_nlg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
608; VI: v_cmp_nlg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
609; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
610; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
611; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
612; GCN: s_endpgm
613define void @fcmp_v2f16_nlg(
614 <2 x i32> addrspace(1)* %r,
615 <2 x half> addrspace(1)* %a,
616 <2 x half> addrspace(1)* %b) {
617entry:
618 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
619 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
620 %r.val = fcmp ueq <2 x half> %a.val, %b.val
621 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
622 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
623 ret void
624}
625
626; GCN-LABEL: {{^}}fcmp_v2f16_ngt
627; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
628; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
629; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
630; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
631; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
632; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
633; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
634; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
635; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
636; SI: v_cmp_ngt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
637; VI: v_cmp_ngt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
638; VI: v_cmp_ngt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
639; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
640; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
641; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
642; GCN: s_endpgm
643define void @fcmp_v2f16_ngt(
644 <2 x i32> addrspace(1)* %r,
645 <2 x half> addrspace(1)* %a,
646 <2 x half> addrspace(1)* %b) {
647entry:
648 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
649 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
650 %r.val = fcmp ule <2 x half> %a.val, %b.val
651 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
652 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
653 ret void
654}
655
656; GCN-LABEL: {{^}}fcmp_v2f16_nle
657; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
658; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
659; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
660; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
661; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
662; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
663; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
664; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
665; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
666; SI: v_cmp_nle_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
667; VI: v_cmp_nle_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
668; VI: v_cmp_nle_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
669; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
670; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
671; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
672; GCN: s_endpgm
673define void @fcmp_v2f16_nle(
674 <2 x i32> addrspace(1)* %r,
675 <2 x half> addrspace(1)* %a,
676 <2 x half> addrspace(1)* %b) {
677entry:
678 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
679 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
680 %r.val = fcmp ugt <2 x half> %a.val, %b.val
681 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
682 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
683 ret void
684}
685
686; GCN-LABEL: {{^}}fcmp_v2f16_neq
687; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
688; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
689; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
690; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
691; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
692; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
693; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
694; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
695; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
696; SI: v_cmp_neq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
697; VI: v_cmp_neq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
698; VI: v_cmp_neq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
699; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
700; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
701; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
702; GCN: s_endpgm
703define void @fcmp_v2f16_neq(
704 <2 x i32> addrspace(1)* %r,
705 <2 x half> addrspace(1)* %a,
706 <2 x half> addrspace(1)* %b) {
707entry:
708 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
709 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
710 %r.val = fcmp une <2 x half> %a.val, %b.val
711 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
712 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
713 ret void
714}
715
716; GCN-LABEL: {{^}}fcmp_v2f16_nlt
717; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
718; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
719; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
720; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
721; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
722; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
723; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
724; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
725; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
726; SI: v_cmp_nlt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
727; VI: v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
728; VI: v_cmp_nlt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
729; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
730; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
731; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
732; GCN: s_endpgm
733define void @fcmp_v2f16_nlt(
734 <2 x i32> addrspace(1)* %r,
735 <2 x half> addrspace(1)* %a,
736 <2 x half> addrspace(1)* %b) {
737entry:
738 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
739 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
740 %r.val = fcmp uge <2 x half> %a.val, %b.val
741 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
742 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
743 ret void
744}