blob: aef898b1a8ee8eadf0448f2b25b60e8a0dbbc1ee [file] [log] [blame]
Matt Arsenault9aa45f02017-07-06 20:57:05 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00003
4; GCN-LABEL: {{^}}fcmp_f16_lt
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
8; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
9; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
10; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
11; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
12; GCN: buffer_store_dword v[[R_I32]]
13; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @fcmp_f16_lt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000015 i32 addrspace(1)* %r,
16 half addrspace(1)* %a,
17 half addrspace(1)* %b) {
18entry:
19 %a.val = load half, half addrspace(1)* %a
20 %b.val = load half, half addrspace(1)* %b
21 %r.val = fcmp olt half %a.val, %b.val
22 %r.val.sext = sext i1 %r.val to i32
23 store i32 %r.val.sext, i32 addrspace(1)* %r
24 ret void
25}
26
Matt Arsenault18f56be2016-12-22 16:27:11 +000027; GCN-LABEL: {{^}}fcmp_f16_lt_abs:
28; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
29; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
30
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000031; SI: v_cvt_f32_f16_e64 v[[A_F32:[0-9]+]], |v[[A_F16]]|
32; SI: v_cvt_f32_f16_e64 v[[B_F32:[0-9]+]], |v[[B_F16]]|
Matt Arsenault18f56be2016-12-22 16:27:11 +000033
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000034; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
Matt Arsenault18f56be2016-12-22 16:27:11 +000035; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]|
36
37; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
38; GCN: buffer_store_dword v[[R_I32]]
39; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000040define amdgpu_kernel void @fcmp_f16_lt_abs(
Matt Arsenault18f56be2016-12-22 16:27:11 +000041 i32 addrspace(1)* %r,
42 half addrspace(1)* %a,
43 half addrspace(1)* %b) {
44entry:
45 %a.val = load half, half addrspace(1)* %a
46 %b.val = load half, half addrspace(1)* %b
47 %a.abs = call half @llvm.fabs.f16(half %a.val)
48 %b.abs = call half @llvm.fabs.f16(half %b.val)
49 %r.val = fcmp olt half %a.abs, %b.abs
50 %r.val.sext = sext i1 %r.val to i32
51 store i32 %r.val.sext, i32 addrspace(1)* %r
52 ret void
53}
54
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000055; GCN-LABEL: {{^}}fcmp_f16_eq
56; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
57; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
58; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
59; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
60; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
61; VI: v_cmp_eq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
62; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
63; GCN: buffer_store_dword v[[R_I32]]
64; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @fcmp_f16_eq(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000066 i32 addrspace(1)* %r,
67 half addrspace(1)* %a,
68 half addrspace(1)* %b) {
69entry:
70 %a.val = load half, half addrspace(1)* %a
71 %b.val = load half, half addrspace(1)* %b
72 %r.val = fcmp oeq half %a.val, %b.val
73 %r.val.sext = sext i1 %r.val to i32
74 store i32 %r.val.sext, i32 addrspace(1)* %r
75 ret void
76}
77
78; GCN-LABEL: {{^}}fcmp_f16_le
79; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
80; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
81; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
82; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
83; SI: v_cmp_le_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
84; VI: v_cmp_le_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
85; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
86; GCN: buffer_store_dword v[[R_I32]]
87; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000088define amdgpu_kernel void @fcmp_f16_le(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000089 i32 addrspace(1)* %r,
90 half addrspace(1)* %a,
91 half addrspace(1)* %b) {
92entry:
93 %a.val = load half, half addrspace(1)* %a
94 %b.val = load half, half addrspace(1)* %b
95 %r.val = fcmp ole half %a.val, %b.val
96 %r.val.sext = sext i1 %r.val to i32
97 store i32 %r.val.sext, i32 addrspace(1)* %r
98 ret void
99}
100
101; GCN-LABEL: {{^}}fcmp_f16_gt
102; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
103; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
104; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
105; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
106; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
107; VI: v_cmp_gt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
108; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
109; GCN: buffer_store_dword v[[R_I32]]
110; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000111define amdgpu_kernel void @fcmp_f16_gt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000112 i32 addrspace(1)* %r,
113 half addrspace(1)* %a,
114 half addrspace(1)* %b) {
115entry:
116 %a.val = load half, half addrspace(1)* %a
117 %b.val = load half, half addrspace(1)* %b
118 %r.val = fcmp ogt half %a.val, %b.val
119 %r.val.sext = sext i1 %r.val to i32
120 store i32 %r.val.sext, i32 addrspace(1)* %r
121 ret void
122}
123
124; GCN-LABEL: {{^}}fcmp_f16_lg
125; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
126; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
127; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
128; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
129; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
130; VI: v_cmp_lg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
131; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
132; GCN: buffer_store_dword v[[R_I32]]
133; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000134define amdgpu_kernel void @fcmp_f16_lg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000135 i32 addrspace(1)* %r,
136 half addrspace(1)* %a,
137 half addrspace(1)* %b) {
138entry:
139 %a.val = load half, half addrspace(1)* %a
140 %b.val = load half, half addrspace(1)* %b
141 %r.val = fcmp one half %a.val, %b.val
142 %r.val.sext = sext i1 %r.val to i32
143 store i32 %r.val.sext, i32 addrspace(1)* %r
144 ret void
145}
146
147; GCN-LABEL: {{^}}fcmp_f16_ge
148; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
149; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
150; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
151; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
152; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
153; VI: v_cmp_ge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
154; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
155; GCN: buffer_store_dword v[[R_I32]]
156; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000157define amdgpu_kernel void @fcmp_f16_ge(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000158 i32 addrspace(1)* %r,
159 half addrspace(1)* %a,
160 half addrspace(1)* %b) {
161entry:
162 %a.val = load half, half addrspace(1)* %a
163 %b.val = load half, half addrspace(1)* %b
164 %r.val = fcmp oge half %a.val, %b.val
165 %r.val.sext = sext i1 %r.val to i32
166 store i32 %r.val.sext, i32 addrspace(1)* %r
167 ret void
168}
169
170; GCN-LABEL: {{^}}fcmp_f16_o
171; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
172; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
173; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
174; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
175; SI: v_cmp_o_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
176; VI: v_cmp_o_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
177; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
178; GCN: buffer_store_dword v[[R_I32]]
179; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000180define amdgpu_kernel void @fcmp_f16_o(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000181 i32 addrspace(1)* %r,
182 half addrspace(1)* %a,
183 half addrspace(1)* %b) {
184entry:
185 %a.val = load half, half addrspace(1)* %a
186 %b.val = load half, half addrspace(1)* %b
187 %r.val = fcmp ord half %a.val, %b.val
188 %r.val.sext = sext i1 %r.val to i32
189 store i32 %r.val.sext, i32 addrspace(1)* %r
190 ret void
191}
192
193; GCN-LABEL: {{^}}fcmp_f16_u
194; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
195; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
196; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
197; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
198; SI: v_cmp_u_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
199; VI: v_cmp_u_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
200; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
201; GCN: buffer_store_dword v[[R_I32]]
202; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000203define amdgpu_kernel void @fcmp_f16_u(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000204 i32 addrspace(1)* %r,
205 half addrspace(1)* %a,
206 half addrspace(1)* %b) {
207entry:
208 %a.val = load half, half addrspace(1)* %a
209 %b.val = load half, half addrspace(1)* %b
210 %r.val = fcmp uno half %a.val, %b.val
211 %r.val.sext = sext i1 %r.val to i32
212 store i32 %r.val.sext, i32 addrspace(1)* %r
213 ret void
214}
215
216; GCN-LABEL: {{^}}fcmp_f16_nge
217; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
218; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
219; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
220; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
221; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
222; VI: v_cmp_nge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
223; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
224; GCN: buffer_store_dword v[[R_I32]]
225; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000226define amdgpu_kernel void @fcmp_f16_nge(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000227 i32 addrspace(1)* %r,
228 half addrspace(1)* %a,
229 half addrspace(1)* %b) {
230entry:
231 %a.val = load half, half addrspace(1)* %a
232 %b.val = load half, half addrspace(1)* %b
233 %r.val = fcmp ult half %a.val, %b.val
234 %r.val.sext = sext i1 %r.val to i32
235 store i32 %r.val.sext, i32 addrspace(1)* %r
236 ret void
237}
238
239; GCN-LABEL: {{^}}fcmp_f16_nlg
240; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
241; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
242; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
243; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
244; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
245; VI: v_cmp_nlg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
246; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
247; GCN: buffer_store_dword v[[R_I32]]
248; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000249define amdgpu_kernel void @fcmp_f16_nlg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000250 i32 addrspace(1)* %r,
251 half addrspace(1)* %a,
252 half addrspace(1)* %b) {
253entry:
254 %a.val = load half, half addrspace(1)* %a
255 %b.val = load half, half addrspace(1)* %b
256 %r.val = fcmp ueq half %a.val, %b.val
257 %r.val.sext = sext i1 %r.val to i32
258 store i32 %r.val.sext, i32 addrspace(1)* %r
259 ret void
260}
261
262; GCN-LABEL: {{^}}fcmp_f16_ngt
263; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
264; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
265; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
266; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
267; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
268; VI: v_cmp_ngt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
269; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
270; GCN: buffer_store_dword v[[R_I32]]
271; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000272define amdgpu_kernel void @fcmp_f16_ngt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000273 i32 addrspace(1)* %r,
274 half addrspace(1)* %a,
275 half addrspace(1)* %b) {
276entry:
277 %a.val = load half, half addrspace(1)* %a
278 %b.val = load half, half addrspace(1)* %b
279 %r.val = fcmp ule half %a.val, %b.val
280 %r.val.sext = sext i1 %r.val to i32
281 store i32 %r.val.sext, i32 addrspace(1)* %r
282 ret void
283}
284
285; GCN-LABEL: {{^}}fcmp_f16_nle
286; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
287; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
288; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
289; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
290; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
291; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
292; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
293; GCN: buffer_store_dword v[[R_I32]]
294; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000295define amdgpu_kernel void @fcmp_f16_nle(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000296 i32 addrspace(1)* %r,
297 half addrspace(1)* %a,
298 half addrspace(1)* %b) {
299entry:
300 %a.val = load half, half addrspace(1)* %a
301 %b.val = load half, half addrspace(1)* %b
302 %r.val = fcmp ugt half %a.val, %b.val
303 %r.val.sext = sext i1 %r.val to i32
304 store i32 %r.val.sext, i32 addrspace(1)* %r
305 ret void
306}
307
308; GCN-LABEL: {{^}}fcmp_f16_neq
309; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
310; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
311; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
312; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
313; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
314; VI: v_cmp_neq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
315; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
316; GCN: buffer_store_dword v[[R_I32]]
317; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000318define amdgpu_kernel void @fcmp_f16_neq(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000319 i32 addrspace(1)* %r,
320 half addrspace(1)* %a,
321 half addrspace(1)* %b) {
322entry:
323 %a.val = load half, half addrspace(1)* %a
324 %b.val = load half, half addrspace(1)* %b
325 %r.val = fcmp une half %a.val, %b.val
326 %r.val.sext = sext i1 %r.val to i32
327 store i32 %r.val.sext, i32 addrspace(1)* %r
328 ret void
329}
330
331; GCN-LABEL: {{^}}fcmp_f16_nlt
332; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
333; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
334; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
335; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
336; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
337; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
338; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
339; GCN: buffer_store_dword v[[R_I32]]
340; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000341define amdgpu_kernel void @fcmp_f16_nlt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000342 i32 addrspace(1)* %r,
343 half addrspace(1)* %a,
344 half addrspace(1)* %b) {
345entry:
346 %a.val = load half, half addrspace(1)* %a
347 %b.val = load half, half addrspace(1)* %b
348 %r.val = fcmp uge half %a.val, %b.val
349 %r.val.sext = sext i1 %r.val to i32
350 store i32 %r.val.sext, i32 addrspace(1)* %r
351 ret void
352}
353
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000354; GCN-LABEL: {{^}}fcmp_v2f16_lt:
355; SI: v_cmp_lt_f32_e32 vcc,
356; SI: v_cmp_lt_f32_e32 vcc,
357
358; VI: v_cmp_lt_f16_e32 vcc,
359; VI: v_cmp_lt_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000360define amdgpu_kernel void @fcmp_v2f16_lt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000361 <2 x i32> addrspace(1)* %r,
362 <2 x half> addrspace(1)* %a,
363 <2 x half> addrspace(1)* %b) {
364entry:
365 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
366 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
367 %r.val = fcmp olt <2 x half> %a.val, %b.val
368 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
369 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
370 ret void
371}
372
373; GCN-LABEL: {{^}}fcmp_v2f16_eq
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000374; SI: v_cmp_eq_f32_e32 vcc,
375; SI: v_cmp_eq_f32_e32 vcc,
376
377; VI: v_cmp_eq_f16_e32 vcc,
378; VI: v_cmp_eq_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000379define amdgpu_kernel void @fcmp_v2f16_eq(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000380 <2 x i32> addrspace(1)* %r,
381 <2 x half> addrspace(1)* %a,
382 <2 x half> addrspace(1)* %b) {
383entry:
384 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
385 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
386 %r.val = fcmp oeq <2 x half> %a.val, %b.val
387 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
388 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
389 ret void
390}
391
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000392; GCN-LABEL: {{^}}fcmp_v2f16_le:
393; SI: v_cmp_le_f32_e32 vcc
394; SI: v_cmp_le_f32_e32 vcc
395; VI: v_cmp_le_f16_e32 vcc
396; VI: v_cmp_le_f16_e32 vcc
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000397define amdgpu_kernel void @fcmp_v2f16_le(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000398 <2 x i32> addrspace(1)* %r,
399 <2 x half> addrspace(1)* %a,
400 <2 x half> addrspace(1)* %b) {
401entry:
402 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
403 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
404 %r.val = fcmp ole <2 x half> %a.val, %b.val
405 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
406 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
407 ret void
408}
409
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000410; GCN-LABEL: {{^}}fcmp_v2f16_gt:
411; SI: v_cmp_gt_f32_e32 vcc,
412; SI: v_cmp_gt_f32_e32 vcc,
413
414; VI: v_cmp_gt_f16_e32 vcc,
415; VI: v_cmp_gt_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000416define amdgpu_kernel void @fcmp_v2f16_gt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000417 <2 x i32> addrspace(1)* %r,
418 <2 x half> addrspace(1)* %a,
419 <2 x half> addrspace(1)* %b) {
420entry:
421 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
422 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
423 %r.val = fcmp ogt <2 x half> %a.val, %b.val
424 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
425 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
426 ret void
427}
428
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000429; GCN-LABEL: {{^}}fcmp_v2f16_lg:
430; SI: v_cmp_lg_f32_e32 vcc,
431; SI: v_cmp_lg_f32_e32 vcc,
432
433; VI: v_cmp_lg_f16_e32 vcc,
434; VI: v_cmp_lg_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000435define amdgpu_kernel void @fcmp_v2f16_lg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000436 <2 x i32> addrspace(1)* %r,
437 <2 x half> addrspace(1)* %a,
438 <2 x half> addrspace(1)* %b) {
439entry:
440 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
441 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
442 %r.val = fcmp one <2 x half> %a.val, %b.val
443 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
444 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
445 ret void
446}
447
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000448; GCN-LABEL: {{^}}fcmp_v2f16_ge:
449; SI: v_cmp_ge_f32_e32 vcc,
450; SI: v_cmp_ge_f32_e32 vcc,
451
452; VI: v_cmp_ge_f16_e32 vcc,
453; VI: v_cmp_ge_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000454define amdgpu_kernel void @fcmp_v2f16_ge(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000455 <2 x i32> addrspace(1)* %r,
456 <2 x half> addrspace(1)* %a,
457 <2 x half> addrspace(1)* %b) {
458entry:
459 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
460 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
461 %r.val = fcmp oge <2 x half> %a.val, %b.val
462 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
463 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
464 ret void
465}
466
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000467; GCN-LABEL: {{^}}fcmp_v2f16_o:
468; SI: v_cmp_o_f32_e32 vcc,
469; SI: v_cmp_o_f32_e32 vcc,
470
471; VI: v_cmp_o_f16_e32 vcc,
472; VI: v_cmp_o_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000473define amdgpu_kernel void @fcmp_v2f16_o(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000474 <2 x i32> addrspace(1)* %r,
475 <2 x half> addrspace(1)* %a,
476 <2 x half> addrspace(1)* %b) {
477entry:
478 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
479 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
480 %r.val = fcmp ord <2 x half> %a.val, %b.val
481 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
482 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
483 ret void
484}
485
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000486; GCN-LABEL: {{^}}fcmp_v2f16_u:
487; SI: v_cmp_u_f32_e32 vcc,
488; SI: v_cmp_u_f32_e32 vcc,
489
490; VI: v_cmp_u_f16_e32 vcc,
491; VI: v_cmp_u_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000492define amdgpu_kernel void @fcmp_v2f16_u(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000493 <2 x i32> addrspace(1)* %r,
494 <2 x half> addrspace(1)* %a,
495 <2 x half> addrspace(1)* %b) {
496entry:
497 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
498 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
499 %r.val = fcmp uno <2 x half> %a.val, %b.val
500 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
501 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
502 ret void
503}
504
505; GCN-LABEL: {{^}}fcmp_v2f16_nge
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000506; SI: v_cmp_nge_f32_e32 vcc,
507; SI: v_cmp_nge_f32_e32 vcc,
508
509; VI: v_cmp_nge_f16_e32 vcc,
510; VI: v_cmp_nge_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000511define amdgpu_kernel void @fcmp_v2f16_nge(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000512 <2 x i32> addrspace(1)* %r,
513 <2 x half> addrspace(1)* %a,
514 <2 x half> addrspace(1)* %b) {
515entry:
516 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
517 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
518 %r.val = fcmp ult <2 x half> %a.val, %b.val
519 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
520 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
521 ret void
522}
523
524; GCN-LABEL: {{^}}fcmp_v2f16_nlg
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000525; SI: v_cmp_nlg_f32_e32 vcc
526; SI: v_cmp_nlg_f32_e32 vcc
527
528; VI: v_cmp_nlg_f16_e32 vcc
529; VI: v_cmp_nlg_f16_e32 vcc
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000530define amdgpu_kernel void @fcmp_v2f16_nlg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000531 <2 x i32> addrspace(1)* %r,
532 <2 x half> addrspace(1)* %a,
533 <2 x half> addrspace(1)* %b) {
534entry:
535 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
536 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
537 %r.val = fcmp ueq <2 x half> %a.val, %b.val
538 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
539 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
540 ret void
541}
542
543; GCN-LABEL: {{^}}fcmp_v2f16_ngt
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000544; SI: v_cmp_ngt_f32_e32 vcc,
545; SI: v_cmp_ngt_f32_e32 vcc,
546
547; VI: v_cmp_ngt_f16_e32 vcc,
548; VI: v_cmp_ngt_f16_e32 vcc,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000549define amdgpu_kernel void @fcmp_v2f16_ngt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000550 <2 x i32> addrspace(1)* %r,
551 <2 x half> addrspace(1)* %a,
552 <2 x half> addrspace(1)* %b) {
553entry:
554 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
555 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
556 %r.val = fcmp ule <2 x half> %a.val, %b.val
557 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
558 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
559 ret void
560}
561
562; GCN-LABEL: {{^}}fcmp_v2f16_nle
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000563; SI: v_cmp_nle_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
564; SI: v_cmp_nle_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
565
566; VI: v_cmp_nle_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
567; VI: v_cmp_nle_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000568define amdgpu_kernel void @fcmp_v2f16_nle(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000569 <2 x i32> addrspace(1)* %r,
570 <2 x half> addrspace(1)* %a,
571 <2 x half> addrspace(1)* %b) {
572entry:
573 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
574 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
575 %r.val = fcmp ugt <2 x half> %a.val, %b.val
576 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
577 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
578 ret void
579}
580
581; GCN-LABEL: {{^}}fcmp_v2f16_neq
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000582; SI: v_cmp_neq_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
583; SI: v_cmp_neq_f32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
584
585; VI: v_cmp_neq_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
586; VI: v_cmp_neq_f16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000587define amdgpu_kernel void @fcmp_v2f16_neq(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000588 <2 x i32> addrspace(1)* %r,
589 <2 x half> addrspace(1)* %a,
590 <2 x half> addrspace(1)* %b) {
591entry:
592 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
593 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
594 %r.val = fcmp une <2 x half> %a.val, %b.val
595 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
596 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
597 ret void
598}
599
600; GCN-LABEL: {{^}}fcmp_v2f16_nlt
601; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
602; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000603; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
604; GCN-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
605; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
606; SI-DAG: v_cmp_nlt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
607
608; GCN-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
609; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
610; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
611; SI-DAG: v_cmp_nlt_f32_e32 vcc, v[[A_F32_1]], v[[B_F32_1]]
612; VI-DAG: v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000613; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
Matt Arsenault9aa45f02017-07-06 20:57:05 +0000614
615; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16_1]], v[[B_F16_1]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000616; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
617; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
618; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000619define amdgpu_kernel void @fcmp_v2f16_nlt(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000620 <2 x i32> addrspace(1)* %r,
621 <2 x half> addrspace(1)* %a,
622 <2 x half> addrspace(1)* %b) {
623entry:
624 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
625 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
626 %r.val = fcmp uge <2 x half> %a.val, %b.val
627 %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
628 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
629 ret void
630}
Matt Arsenault18f56be2016-12-22 16:27:11 +0000631
632declare half @llvm.fabs.f16(half) #1
633
634attributes #0 = { nounwind }
635attributes #1 = { nounwind readnone }