blob: 4a6743ee98ba2fcafa62af73c0687fcdf6d09e5f [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
Matt Arsenaultb5b51102014-06-10 19:18:21 +00003; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5declare i32 @llvm.ctpop.i32(i32) nounwind readnone
6declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
7declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
8declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
9declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
10
Alexander Timofeev982aee62017-07-04 17:32:00 +000011declare i32 @llvm.r600.read.tidig.x() nounwind readnone
12
Tom Stellard79243d92014-10-01 17:15:17 +000013; FUNC-LABEL: {{^}}s_ctpop_i32:
Marek Olsakfa6607d2015-02-11 14:26:46 +000014; GCN: s_load_dword [[SVAL:s[0-9]+]],
15; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
16; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
17; GCN: buffer_store_dword [[VRESULT]],
18; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +000019
20; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
Matt Arsenaultb5b51102014-06-10 19:18:21 +000022 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
23 store i32 %ctpop, i32 addrspace(1)* %out, align 4
24 ret void
25}
26
27; XXX - Why 0 in register?
Tom Stellard79243d92014-10-01 17:15:17 +000028; FUNC-LABEL: {{^}}v_ctpop_i32:
Alexander Timofeev982aee62017-07-04 17:32:00 +000029; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +000030; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0
Marek Olsakfa6607d2015-02-11 14:26:46 +000031; GCN: buffer_store_dword [[RESULT]],
32; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +000033
34; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000035define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +000036 %tid = call i32 @llvm.r600.read.tidig.x()
37 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
38 %val = load i32, i32 addrspace(1)* %in.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +000039 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
40 store i32 %ctpop, i32 addrspace(1)* %out, align 4
41 ret void
42}
43
Tom Stellard79243d92014-10-01 17:15:17 +000044; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
Alexander Timofeev982aee62017-07-04 17:32:00 +000045; GCN: {{buffer|flat}}_load_dword [[VAL0:v[0-9]+]],
46; GCN: {{buffer|flat}}_load_dword [[VAL1:v[0-9]+]],
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +000047; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
Tom Stellard83f0bce2015-01-29 16:55:25 +000048; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +000049; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
Marek Olsakfa6607d2015-02-11 14:26:46 +000050; GCN: buffer_store_dword [[RESULT]],
51; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +000052
53; EG: BCNT_INT
54; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +000056 %tid = call i32 @llvm.r600.read.tidig.x()
57 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %tid
58 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %tid
59 %val0 = load i32, i32 addrspace(1)* %in0.gep, align 4
60 %val1 = load i32, i32 addrspace(1)* %in1.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +000061 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
62 %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
63 %add = add i32 %ctpop0, %ctpop1
64 store i32 %add, i32 addrspace(1)* %out, align 4
65 ret void
66}
67
Tom Stellard79243d92014-10-01 17:15:17 +000068; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
Alexander Timofeev982aee62017-07-04 17:32:00 +000069; GCN: {{buffer|flat}}_load_dword [[VAL0:v[0-9]+]],
Tom Stellarda76bcc22016-03-28 16:10:13 +000070; GCN: s_waitcnt
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +000071; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
Changpeng Fang71369b32016-05-26 19:35:29 +000072; GCN: buffer_store_dword [[RESULT]],
Marek Olsakfa6607d2015-02-11 14:26:46 +000073; GCN: s_endpgm
Alexander Timofeev982aee62017-07-04 17:32:00 +000074define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %sval) nounwind {
75 %tid = call i32 @llvm.r600.read.tidig.x()
76 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
77 %val = load i32, i32 addrspace(1)* %in.gep, align 4
78 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
79 %add = add i32 %ctpop, %sval
Matt Arsenault49dd4282014-09-15 17:15:02 +000080 store i32 %add, i32 addrspace(1)* %out, align 4
81 ret void
82}
83
Tom Stellard79243d92014-10-01 17:15:17 +000084; FUNC-LABEL: {{^}}v_ctpop_v2i32:
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +000085; GCN: v_bcnt_u32_b32{{(_e64)*}}
86; GCN: v_bcnt_u32_b32{{(_e64)*}}
Marek Olsakfa6607d2015-02-11 14:26:46 +000087; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +000088
89; EG: BCNT_INT
90; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000091define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +000092 %tid = call i32 @llvm.r600.read.tidig.x()
93 %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
94 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
Matt Arsenaultb5b51102014-06-10 19:18:21 +000095 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
96 store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
97 ret void
98}
99
Tom Stellard79243d92014-10-01 17:15:17 +0000100; FUNC-LABEL: {{^}}v_ctpop_v4i32:
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000101; GCN: v_bcnt_u32_b32{{(_e64)*}}
102; GCN: v_bcnt_u32_b32{{(_e64)*}}
103; GCN: v_bcnt_u32_b32{{(_e64)*}}
104; GCN: v_bcnt_u32_b32{{(_e64)*}}
Marek Olsakfa6607d2015-02-11 14:26:46 +0000105; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000106
107; EG: BCNT_INT
108; EG: BCNT_INT
109; EG: BCNT_INT
110; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000111define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000112 %tid = call i32 @llvm.r600.read.tidig.x()
113 %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
114 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000115 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
116 store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
117 ret void
118}
119
Tom Stellard79243d92014-10-01 17:15:17 +0000120; FUNC-LABEL: {{^}}v_ctpop_v8i32:
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000121; GCN: v_bcnt_u32_b32{{(_e64)*}}
122; GCN: v_bcnt_u32_b32{{(_e64)*}}
123; GCN: v_bcnt_u32_b32{{(_e64)*}}
124; GCN: v_bcnt_u32_b32{{(_e64)*}}
125; GCN: v_bcnt_u32_b32{{(_e64)*}}
126; GCN: v_bcnt_u32_b32{{(_e64)*}}
127; GCN: v_bcnt_u32_b32{{(_e64)*}}
128; GCN: v_bcnt_u32_b32{{(_e64)*}}
Marek Olsakfa6607d2015-02-11 14:26:46 +0000129; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000130
131; EG: BCNT_INT
132; EG: BCNT_INT
133; EG: BCNT_INT
134; EG: BCNT_INT
135; EG: BCNT_INT
136; EG: BCNT_INT
137; EG: BCNT_INT
138; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000139define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000140 %tid = call i32 @llvm.r600.read.tidig.x()
141 %in.gep = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tid
142 %val = load <8 x i32>, <8 x i32> addrspace(1)* %in.gep, align 32
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000143 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
144 store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
145 ret void
146}
147
Tom Stellard79243d92014-10-01 17:15:17 +0000148; FUNC-LABEL: {{^}}v_ctpop_v16i32:
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000149; GCN: v_bcnt_u32_b32{{(_e64)*}}
150; GCN: v_bcnt_u32_b32{{(_e64)*}}
151; GCN: v_bcnt_u32_b32{{(_e64)*}}
152; GCN: v_bcnt_u32_b32{{(_e64)*}}
153; GCN: v_bcnt_u32_b32{{(_e64)*}}
154; GCN: v_bcnt_u32_b32{{(_e64)*}}
155; GCN: v_bcnt_u32_b32{{(_e64)*}}
156; GCN: v_bcnt_u32_b32{{(_e64)*}}
157; GCN: v_bcnt_u32_b32{{(_e64)*}}
158; GCN: v_bcnt_u32_b32{{(_e64)*}}
159; GCN: v_bcnt_u32_b32{{(_e64)*}}
160; GCN: v_bcnt_u32_b32{{(_e64)*}}
161; GCN: v_bcnt_u32_b32{{(_e64)*}}
162; GCN: v_bcnt_u32_b32{{(_e64)*}}
163; GCN: v_bcnt_u32_b32{{(_e64)*}}
164; GCN: v_bcnt_u32_b32{{(_e64)*}}
Marek Olsakfa6607d2015-02-11 14:26:46 +0000165; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000166
167; EG: BCNT_INT
168; EG: BCNT_INT
169; EG: BCNT_INT
170; EG: BCNT_INT
171; EG: BCNT_INT
172; EG: BCNT_INT
173; EG: BCNT_INT
174; EG: BCNT_INT
175; EG: BCNT_INT
176; EG: BCNT_INT
177; EG: BCNT_INT
178; EG: BCNT_INT
179; EG: BCNT_INT
180; EG: BCNT_INT
181; EG: BCNT_INT
182; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000183define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000184 %tid = call i32 @llvm.r600.read.tidig.x()
185 %in.gep = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tid
186 %val = load <16 x i32>, <16 x i32> addrspace(1)* %in.gep, align 32
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000187 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
188 store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
189 ret void
190}
191
Tom Stellard79243d92014-10-01 17:15:17 +0000192; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
Alexander Timofeev982aee62017-07-04 17:32:00 +0000193; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000194; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
Marek Olsakfa6607d2015-02-11 14:26:46 +0000195; GCN: buffer_store_dword [[RESULT]],
196; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000197
198; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000199define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000200 %tid = call i32 @llvm.r600.read.tidig.x()
201 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
202 %val = load i32, i32 addrspace(1)* %in.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000203 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
204 %add = add i32 %ctpop, 4
205 store i32 %add, i32 addrspace(1)* %out, align 4
206 ret void
207}
208
Tom Stellard79243d92014-10-01 17:15:17 +0000209; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
Alexander Timofeev982aee62017-07-04 17:32:00 +0000210; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000211; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
Marek Olsakfa6607d2015-02-11 14:26:46 +0000212; GCN: buffer_store_dword [[RESULT]],
213; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000214
215; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000216define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000217 %tid = call i32 @llvm.r600.read.tidig.x()
218 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
219 %val = load i32, i32 addrspace(1)* %in.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000220 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
221 %add = add i32 4, %ctpop
222 store i32 %add, i32 addrspace(1)* %out, align 4
223 ret void
224}
225
Tom Stellard79243d92014-10-01 17:15:17 +0000226; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
Alexander Timofeev982aee62017-07-04 17:32:00 +0000227; GCN-DAG: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
Changpeng Fang71369b32016-05-26 19:35:29 +0000228; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
Tom Stellard326d6ec2014-11-05 14:50:53 +0000229; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000230; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
Marek Olsakfa6607d2015-02-11 14:26:46 +0000231; GCN: buffer_store_dword [[RESULT]],
232; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000233define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000234 %tid = call i32 @llvm.r600.read.tidig.x()
235 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
236 %val = load i32, i32 addrspace(1)* %in.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000237 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
238 %add = add i32 %ctpop, 99999
239 store i32 %add, i32 addrspace(1)* %out, align 4
240 ret void
241}
242
Tom Stellard79243d92014-10-01 17:15:17 +0000243; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
Alexander Timofeev982aee62017-07-04 17:32:00 +0000244; GCN-DAG: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
Marek Olsakfa6607d2015-02-11 14:26:46 +0000245; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000246; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
Marek Olsakfa6607d2015-02-11 14:26:46 +0000247; GCN: buffer_store_dword [[RESULT]],
248; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000249
250; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000251define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000252 %tid = call i32 @llvm.r600.read.tidig.x()
253 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
254 %val = load i32, i32 addrspace(1)* %in.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000255 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
256 %add = add i32 %ctpop, %const
257 store i32 %add, i32 addrspace(1)* %out, align 4
258 ret void
259}
260
Tom Stellard79243d92014-10-01 17:15:17 +0000261; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
Alexander Timofeev982aee62017-07-04 17:32:00 +0000262; GCN-DAG: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
Marek Olsakfa6607d2015-02-11 14:26:46 +0000263; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000264; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
Marek Olsakfa6607d2015-02-11 14:26:46 +0000265; GCN: buffer_store_dword [[RESULT]],
266; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000267
268; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000269define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000270 %tid = call i32 @llvm.r600.read.tidig.x()
271 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
272 %val = load i32, i32 addrspace(1)* %in.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000273 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
274 %add = add i32 %const, %ctpop
275 store i32 %add, i32 addrspace(1)* %out, align 4
276 ret void
277}
278
Tom Stellard79243d92014-10-01 17:15:17 +0000279; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
Alexander Timofeev982aee62017-07-04 17:32:00 +0000280; SI: buffer_load_dword [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
281; SI: buffer_load_dword [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
282; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAR]], [[VAL]]
283; VI: flat_load_dword [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}]
284; VI: flat_load_dword [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}]
Dmitry Preobrazhensky167f8b62017-05-15 14:28:23 +0000285; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
Marek Olsakfa6607d2015-02-11 14:26:46 +0000286; GCN: buffer_store_dword [[RESULT]],
287; GCN: s_endpgm
Matt Arsenault60425062014-06-10 19:18:28 +0000288
289; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000290define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
Alexander Timofeev982aee62017-07-04 17:32:00 +0000291 %tid = call i32 @llvm.r600.read.tidig.x()
292 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
293 %val = load i32, i32 addrspace(1)* %in.gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000294 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
Alexander Timofeev982aee62017-07-04 17:32:00 +0000295 %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +0000296 %const = load i32, i32 addrspace(1)* %gep, align 4
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000297 %add = add i32 %const, %ctpop
298 store i32 %add, i32 addrspace(1)* %out, align 4
299 ret void
300}
Tom Stellardae4c9e72014-06-20 17:06:11 +0000301
302; FIXME: We currently disallow SALU instructions in all branches,
303; but there are some cases when the should be allowed.
304
Tom Stellard79243d92014-10-01 17:15:17 +0000305; FUNC-LABEL: {{^}}ctpop_i32_in_br:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000306; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
Marek Olsakfa6607d2015-02-11 14:26:46 +0000307; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
308; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
309; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
310; GCN: buffer_store_dword [[RESULT]],
311; GCN: s_endpgm
Tom Stellardae4c9e72014-06-20 17:06:11 +0000312; EG: BCNT_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000313define amdgpu_kernel void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
Tom Stellardae4c9e72014-06-20 17:06:11 +0000314entry:
Tom Stellard744b99b2014-09-24 01:33:28 +0000315 %tmp0 = icmp eq i32 %cond, 0
316 br i1 %tmp0, label %if, label %else
Tom Stellardae4c9e72014-06-20 17:06:11 +0000317
318if:
Tom Stellard744b99b2014-09-24 01:33:28 +0000319 %tmp2 = call i32 @llvm.ctpop.i32(i32 %ctpop_arg)
Tom Stellardae4c9e72014-06-20 17:06:11 +0000320 br label %endif
321
322else:
David Blaikie79e6c742015-02-27 19:29:02 +0000323 %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000324 %tmp4 = load i32, i32 addrspace(1)* %tmp3
Tom Stellardae4c9e72014-06-20 17:06:11 +0000325 br label %endif
326
327endif:
Tom Stellard744b99b2014-09-24 01:33:28 +0000328 %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else]
329 store i32 %tmp5, i32 addrspace(1)* %out
Tom Stellardae4c9e72014-06-20 17:06:11 +0000330 ret void
331}