blob: 91cddb1e0610b21bc453828e4c8dc43a46d85af4 [file] [log] [blame]
Matt Arsenault72a9f522018-06-01 07:06:03 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00002
3declare half @llvm.fabs.f16(half %a)
4declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)
5
Matt Arsenault72a9f522018-06-01 07:06:03 +00006; GCN-LABEL: {{^}}class_f16:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00007; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
8; GCN: buffer_load_dword v[[B_I32:[0-9]+]]
9; VI: v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]]
10; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
11; GCN: buffer_store_dword v[[R_I32]]
12; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000013define amdgpu_kernel void @class_f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000014 i32 addrspace(1)* %r,
15 half addrspace(1)* %a,
16 i32 addrspace(1)* %b) {
17entry:
18 %a.val = load half, half addrspace(1)* %a
19 %b.val = load i32, i32 addrspace(1)* %b
20 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val)
21 %r.val.sext = sext i1 %r.val to i32
22 store i32 %r.val.sext, i32 addrspace(1)* %r
23 ret void
24}
25
Matt Arsenault72a9f522018-06-01 07:06:03 +000026; GCN-LABEL: {{^}}class_f16_fabs:
27; GCN-DAG: buffer_load_ushort v[[SA_F16:[0-9]+]]
28; GCN-DAG: s_load_dword s[[SB_I32:[0-9]+]]
29; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |v[[SA_F16]]|, s[[SB_I32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000030; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
31; GCN: buffer_store_dword v[[VR_I32]]
32; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000033define amdgpu_kernel void @class_f16_fabs(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000034 i32 addrspace(1)* %r,
35 half %a.val,
36 i32 %b.val) {
37entry:
38 %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
39 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val)
40 %r.val.sext = sext i1 %r.val to i32
41 store i32 %r.val.sext, i32 addrspace(1)* %r
42 ret void
43}
44
45; GCN-LABEL: {{^}}class_f16_fneg
Matt Arsenault72a9f522018-06-01 07:06:03 +000046; GCN: buffer_load_ushort v[[SA_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000047; GCN: s_load_dword s[[SB_I32:[0-9]+]]
Matt Arsenault72a9f522018-06-01 07:06:03 +000048; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -v[[SA_F16]], s[[SB_I32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000049; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
50; GCN: buffer_store_dword v[[VR_I32]]
51; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000052define amdgpu_kernel void @class_f16_fneg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000053 i32 addrspace(1)* %r,
54 half %a.val,
55 i32 %b.val) {
56entry:
57 %a.val.fneg = fsub half -0.0, %a.val
58 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val)
59 %r.val.sext = sext i1 %r.val to i32
60 store i32 %r.val.sext, i32 addrspace(1)* %r
61 ret void
62}
63
64; GCN-LABEL: {{^}}class_f16_fabs_fneg
Matt Arsenault72a9f522018-06-01 07:06:03 +000065; GCN-DAG: buffer_load_ushort v[[SA_F16:[0-9]+]]
66; GCN-DAG: s_load_dword s[[SB_I32:[0-9]+]]
67; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|v[[SA_F16]]|, s[[SB_I32]]
68; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000069; GCN: buffer_store_dword v[[VR_I32]]
70; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000071define amdgpu_kernel void @class_f16_fabs_fneg(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000072 i32 addrspace(1)* %r,
73 half %a.val,
74 i32 %b.val) {
75entry:
76 %a.val.fabs = call half @llvm.fabs.f16(half %a.val)
77 %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs
78 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val)
79 %r.val.sext = sext i1 %r.val to i32
80 store i32 %r.val.sext, i32 addrspace(1)* %r
81 ret void
82}
83
Matt Arsenault72a9f522018-06-01 07:06:03 +000084; GCN-LABEL: {{^}}class_f16_1:
85; GCN: buffer_load_ushort v[[SA_F16:[0-9]+]]
86; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[SA_F16]], 1{{$}}
87; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000088; GCN: buffer_store_dword v[[VR_I32]]
89; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000090define amdgpu_kernel void @class_f16_1(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000091 i32 addrspace(1)* %r,
92 half %a.val) {
93entry:
94 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1)
95 %r.val.sext = sext i1 %r.val to i32
96 store i32 %r.val.sext, i32 addrspace(1)* %r
97 ret void
98}
99
100; GCN-LABEL: {{^}}class_f16_64
Matt Arsenault72a9f522018-06-01 07:06:03 +0000101; GCN: buffer_load_ushort v[[SA_F16:[0-9]+]]
102; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[SA_F16]], 64{{$}}
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000103; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
104; GCN: buffer_store_dword v[[VR_I32]]
105; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000106define amdgpu_kernel void @class_f16_64(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000107 i32 addrspace(1)* %r,
108 half %a.val) {
109entry:
110 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64)
111 %r.val.sext = sext i1 %r.val to i32
112 store i32 %r.val.sext, i32 addrspace(1)* %r
113 ret void
114}
115
Matt Arsenault72a9f522018-06-01 07:06:03 +0000116; GCN-LABEL: {{^}}class_f16_full_mask:
117; GCN: buffer_load_ushort v[[SA_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000118; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}}
Matt Arsenault72a9f522018-06-01 07:06:03 +0000119; VI: v_cmp_class_f16_e32 vcc, v[[SA_F16]], v[[MASK]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000120; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
121; GCN: buffer_store_dword v[[VR_I32]]
122; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000123define amdgpu_kernel void @class_f16_full_mask(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000124 i32 addrspace(1)* %r,
125 half %a.val) {
126entry:
127 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023)
128 %r.val.sext = sext i1 %r.val to i32
129 store i32 %r.val.sext, i32 addrspace(1)* %r
130 ret void
131}
132
133; GCN-LABEL: {{^}}class_f16_nine_bit_mask
Matt Arsenault72a9f522018-06-01 07:06:03 +0000134; GCN: buffer_load_ushort v[[SA_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000135; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}}
Matt Arsenault72a9f522018-06-01 07:06:03 +0000136; VI: v_cmp_class_f16_e32 vcc, v[[SA_F16]], v[[MASK]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000137; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc
138; GCN: buffer_store_dword v[[VR_I32]]
139; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @class_f16_nine_bit_mask(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000141 i32 addrspace(1)* %r,
142 half %a.val) {
143entry:
144 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511)
145 %r.val.sext = sext i1 %r.val to i32
146 store i32 %r.val.sext, i32 addrspace(1)* %r
147 ret void
148}