blob: 9758ac96ea9b84357796c94686fde53cd6c3139f [file] [log] [blame]
Tom Stellard49f8bfd2015-01-06 18:00:21 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenault8333e432014-06-10 19:18:24 +00002
3declare i64 @llvm.ctpop.i64(i64) nounwind readnone
4declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
5declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
6declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
7declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
8
Tom Stellard79243d92014-10-01 17:15:17 +00009; FUNC-LABEL: {{^}}s_ctpop_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000010; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
11; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
12; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
13; SI: buffer_store_dword [[VRESULT]],
14; SI: s_endpgm
Matt Arsenault8333e432014-06-10 19:18:24 +000015define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
16 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
17 %truncctpop = trunc i64 %ctpop to i32
18 store i32 %truncctpop, i32 addrspace(1)* %out, align 4
19 ret void
20}
21
Tom Stellard79243d92014-10-01 17:15:17 +000022; FUNC-LABEL: {{^}}v_ctpop_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000023; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
Marek Olsakc5368502015-01-15 18:43:01 +000024; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
Tom Stellard326d6ec2014-11-05 14:50:53 +000025; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
26; SI: buffer_store_dword [[RESULT]],
27; SI: s_endpgm
Matt Arsenault8333e432014-06-10 19:18:24 +000028define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
29 %val = load i64 addrspace(1)* %in, align 8
30 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
31 %truncctpop = trunc i64 %ctpop to i32
32 store i32 %truncctpop, i32 addrspace(1)* %out, align 4
33 ret void
34}
35
Tom Stellard79243d92014-10-01 17:15:17 +000036; FUNC-LABEL: {{^}}s_ctpop_v2i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000037; SI: s_bcnt1_i32_b64
38; SI: s_bcnt1_i32_b64
39; SI: s_endpgm
Matt Arsenault8333e432014-06-10 19:18:24 +000040define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
41 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
42 %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
43 store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
44 ret void
45}
46
Tom Stellard79243d92014-10-01 17:15:17 +000047; FUNC-LABEL: {{^}}s_ctpop_v4i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000048; SI: s_bcnt1_i32_b64
49; SI: s_bcnt1_i32_b64
50; SI: s_bcnt1_i32_b64
51; SI: s_bcnt1_i32_b64
52; SI: s_endpgm
Matt Arsenault8333e432014-06-10 19:18:24 +000053define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
54 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
55 %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
56 store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
57 ret void
58}
59
Tom Stellard79243d92014-10-01 17:15:17 +000060; FUNC-LABEL: {{^}}v_ctpop_v2i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000061; SI: v_bcnt_u32_b32
62; SI: v_bcnt_u32_b32
63; SI: v_bcnt_u32_b32
64; SI: v_bcnt_u32_b32
65; SI: s_endpgm
Matt Arsenault8333e432014-06-10 19:18:24 +000066define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
67 %val = load <2 x i64> addrspace(1)* %in, align 16
68 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
69 %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
70 store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
71 ret void
72}
73
Tom Stellard79243d92014-10-01 17:15:17 +000074; FUNC-LABEL: {{^}}v_ctpop_v4i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000075; SI: v_bcnt_u32_b32
76; SI: v_bcnt_u32_b32
77; SI: v_bcnt_u32_b32
78; SI: v_bcnt_u32_b32
79; SI: v_bcnt_u32_b32
80; SI: v_bcnt_u32_b32
81; SI: v_bcnt_u32_b32
82; SI: v_bcnt_u32_b32
83; SI: s_endpgm
Matt Arsenault8333e432014-06-10 19:18:24 +000084define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
85 %val = load <4 x i64> addrspace(1)* %in, align 32
86 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
87 %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
88 store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
89 ret void
90}
Tom Stellardae4c9e72014-06-20 17:06:11 +000091
92; FIXME: We currently disallow SALU instructions in all branches,
93; but there are some cases when the should be allowed.
94
Tom Stellard79243d92014-10-01 17:15:17 +000095; FUNC-LABEL: {{^}}ctpop_i64_in_br:
Tom Stellard326d6ec2014-11-05 14:50:53 +000096; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
97; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
98; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
99; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
100; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
101; SI: s_endpgm
Tom Stellard744b99b2014-09-24 01:33:28 +0000102define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
Tom Stellardae4c9e72014-06-20 17:06:11 +0000103entry:
Tom Stellard744b99b2014-09-24 01:33:28 +0000104 %tmp0 = icmp eq i32 %cond, 0
105 br i1 %tmp0, label %if, label %else
Tom Stellardae4c9e72014-06-20 17:06:11 +0000106
107if:
Tom Stellard744b99b2014-09-24 01:33:28 +0000108 %tmp2 = call i64 @llvm.ctpop.i64(i64 %ctpop_arg)
Tom Stellardae4c9e72014-06-20 17:06:11 +0000109 br label %endif
110
111else:
Tom Stellard744b99b2014-09-24 01:33:28 +0000112 %tmp3 = getelementptr i64 addrspace(1)* %in, i32 1
113 %tmp4 = load i64 addrspace(1)* %tmp3
Tom Stellardae4c9e72014-06-20 17:06:11 +0000114 br label %endif
115
116endif:
Tom Stellard744b99b2014-09-24 01:33:28 +0000117 %tmp5 = phi i64 [%tmp2, %if], [%tmp4, %else]
118 store i64 %tmp5, i64 addrspace(1)* %out
Tom Stellardae4c9e72014-06-20 17:06:11 +0000119 ret void
120}