blob: f08d4b6c791567e19e31245a5e8c376a187944c8 [file] [log] [blame]
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00001; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002
3declare half @llvm.fabs.f16(half %a)
4declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)
5
6; GCN-LABEL: {{^}}class_f16
7; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
8; GCN: buffer_load_dword v[[B_I32:[0-9]+]]
9; VI: v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]]
10; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
11; GCN: buffer_store_dword v[[R_I32]]
12; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000013define amdgpu_kernel void @class_f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000014 i32 addrspace(1)* %r,
15 half addrspace(1)* %a,
16 i32 addrspace(1)* %b) {
17entry:
18 %a.val = load half, half addrspace(1)* %a
19 %b.val = load i32, i32 addrspace(1)* %b
20 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val)
21 %r.val.sext = sext i1 %r.val to i32
22 store i32 %r.val.sext, i32 addrspace(1)* %r
23 ret void
24}
25
26; GCN-LABEL: {{^}}class_f16_fabs
27; GCN: s_load_dword s[[SA_F16:[0-9]+]]
28; GCN: s_load_dword s[[SB_I32:[0-9]+]]
29; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
30; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |v[[VA_F16]]|, s[[SB_I32]]
31; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
32; GCN: buffer_store_dword v[[VR_I32]]
33; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @class_f16_fabs(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000035 i32 addrspace(1)* %r,
36 half %a.val,
37 i32 %b.val) {
38entry:
39 %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
40 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val)
41 %r.val.sext = sext i1 %r.val to i32
42 store i32 %r.val.sext, i32 addrspace(1)* %r
43 ret void
44}
45
46; GCN-LABEL: {{^}}class_f16_fneg
47; GCN: s_load_dword s[[SA_F16:[0-9]+]]
48; GCN: s_load_dword s[[SB_I32:[0-9]+]]
Matt Arsenault53f0cc22017-01-26 01:25:36 +000049; VI: v_trunc_f16_e64 v[[VA_F16:[0-9]+]], -s[[SA_F16]]
50; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], s[[SB_I32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000051; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
52; GCN: buffer_store_dword v[[VR_I32]]
53; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000054define amdgpu_kernel void @class_f16_fneg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000055 i32 addrspace(1)* %r,
56 half %a.val,
57 i32 %b.val) {
58entry:
59 %a.val.fneg = fsub half -0.0, %a.val
60 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val)
61 %r.val.sext = sext i1 %r.val to i32
62 store i32 %r.val.sext, i32 addrspace(1)* %r
63 ret void
64}
65
66; GCN-LABEL: {{^}}class_f16_fabs_fneg
67; GCN: s_load_dword s[[SA_F16:[0-9]+]]
68; GCN: s_load_dword s[[SB_I32:[0-9]+]]
69; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
70; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|v[[VA_F16]]|, s[[SB_I32]]
71; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
72; GCN: buffer_store_dword v[[VR_I32]]
73; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000074define amdgpu_kernel void @class_f16_fabs_fneg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000075 i32 addrspace(1)* %r,
76 half %a.val,
77 i32 %b.val) {
78entry:
79 %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
80 %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs
81 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val)
82 %r.val.sext = sext i1 %r.val to i32
83 store i32 %r.val.sext, i32 addrspace(1)* %r
84 ret void
85}
86
87; GCN-LABEL: {{^}}class_f16_1
88; GCN: s_load_dword s[[SA_F16:[0-9]+]]
89; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
90; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 1{{$}}
91; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
92; GCN: buffer_store_dword v[[VR_I32]]
93; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000094define amdgpu_kernel void @class_f16_1(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000095 i32 addrspace(1)* %r,
96 half %a.val) {
97entry:
98 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1)
99 %r.val.sext = sext i1 %r.val to i32
100 store i32 %r.val.sext, i32 addrspace(1)* %r
101 ret void
102}
103
104; GCN-LABEL: {{^}}class_f16_64
105; GCN: s_load_dword s[[SA_F16:[0-9]+]]
106; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
107; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 64{{$}}
108; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
109; GCN: buffer_store_dword v[[VR_I32]]
110; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000111define amdgpu_kernel void @class_f16_64(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000112 i32 addrspace(1)* %r,
113 half %a.val) {
114entry:
115 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64)
116 %r.val.sext = sext i1 %r.val to i32
117 store i32 %r.val.sext, i32 addrspace(1)* %r
118 ret void
119}
120
121; GCN-LABEL: {{^}}class_f16_full_mask
122; GCN: s_load_dword s[[SA_F16:[0-9]+]]
123; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}}
124; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
125; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]]
126; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
127; GCN: buffer_store_dword v[[VR_I32]]
128; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000129define amdgpu_kernel void @class_f16_full_mask(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000130 i32 addrspace(1)* %r,
131 half %a.val) {
132entry:
133 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023)
134 %r.val.sext = sext i1 %r.val to i32
135 store i32 %r.val.sext, i32 addrspace(1)* %r
136 ret void
137}
138
139; GCN-LABEL: {{^}}class_f16_nine_bit_mask
140; GCN: s_load_dword s[[SA_F16:[0-9]+]]
141; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}}
142; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
143; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]]
144; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
145; GCN: buffer_store_dword v[[VR_I32]]
146; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @class_f16_nine_bit_mask(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000148 i32 addrspace(1)* %r,
149 half %a.val) {
150entry:
151 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511)
152 %r.val.sext = sext i1 %r.val to i32
153 store i32 %r.val.sext, i32 addrspace(1)* %r
154 ret void
155}