blob: e4d11e00369617f3053ac843a0104c19e33e0832 [file] [log] [blame]
Matt Arsenaultb5b51102014-06-10 19:18:21 +00001; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4declare i32 @llvm.ctpop.i32(i32) nounwind readnone
5declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
6declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
7declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
8declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
9
10; FUNC-LABEL: @s_ctpop_i32:
11; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
12; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
13; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
14; SI: BUFFER_STORE_DWORD [[VRESULT]],
15; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +000016
17; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +000018define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
19 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
20 store i32 %ctpop, i32 addrspace(1)* %out, align 4
21 ret void
22}
23
24; XXX - Why 0 in register?
25; FUNC-LABEL: @v_ctpop_i32:
26; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
27; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
28; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
29; SI: BUFFER_STORE_DWORD [[RESULT]],
30; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +000031
32; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +000033define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
34 %val = load i32 addrspace(1)* %in, align 4
35 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
36 store i32 %ctpop, i32 addrspace(1)* %out, align 4
37 ret void
38}
39
40; FUNC-LABEL: @v_ctpop_add_chain_i32
41; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
42; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
43; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
44; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
45; SI-NOT: ADD
46; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
47; SI: BUFFER_STORE_DWORD [[RESULT]],
48; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +000049
50; EG: BCNT_INT
51; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +000052define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
53 %val0 = load i32 addrspace(1)* %in0, align 4
54 %val1 = load i32 addrspace(1)* %in1, align 4
55 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
56 %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
57 %add = add i32 %ctpop0, %ctpop1
58 store i32 %add, i32 addrspace(1)* %out, align 4
59 ret void
60}
61
62; FUNC-LABEL: @v_ctpop_v2i32:
63; SI: V_BCNT_U32_B32_e32
64; SI: V_BCNT_U32_B32_e32
65; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +000066
67; EG: BCNT_INT
68; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +000069define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
70 %val = load <2 x i32> addrspace(1)* %in, align 8
71 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
72 store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
73 ret void
74}
75
76; FUNC-LABEL: @v_ctpop_v4i32:
77; SI: V_BCNT_U32_B32_e32
78; SI: V_BCNT_U32_B32_e32
79; SI: V_BCNT_U32_B32_e32
80; SI: V_BCNT_U32_B32_e32
81; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +000082
83; EG: BCNT_INT
84; EG: BCNT_INT
85; EG: BCNT_INT
86; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +000087define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
88 %val = load <4 x i32> addrspace(1)* %in, align 16
89 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
90 store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
91 ret void
92}
93
94; FUNC-LABEL: @v_ctpop_v8i32:
95; SI: V_BCNT_U32_B32_e32
96; SI: V_BCNT_U32_B32_e32
97; SI: V_BCNT_U32_B32_e32
98; SI: V_BCNT_U32_B32_e32
99; SI: V_BCNT_U32_B32_e32
100; SI: V_BCNT_U32_B32_e32
101; SI: V_BCNT_U32_B32_e32
102; SI: V_BCNT_U32_B32_e32
103; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +0000104
105; EG: BCNT_INT
106; EG: BCNT_INT
107; EG: BCNT_INT
108; EG: BCNT_INT
109; EG: BCNT_INT
110; EG: BCNT_INT
111; EG: BCNT_INT
112; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000113define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
114 %val = load <8 x i32> addrspace(1)* %in, align 32
115 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
116 store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
117 ret void
118}
119
120; FUNC-LABEL: @v_ctpop_v16i32:
121; SI: V_BCNT_U32_B32_e32
122; SI: V_BCNT_U32_B32_e32
123; SI: V_BCNT_U32_B32_e32
124; SI: V_BCNT_U32_B32_e32
125; SI: V_BCNT_U32_B32_e32
126; SI: V_BCNT_U32_B32_e32
127; SI: V_BCNT_U32_B32_e32
128; SI: V_BCNT_U32_B32_e32
129; SI: V_BCNT_U32_B32_e32
130; SI: V_BCNT_U32_B32_e32
131; SI: V_BCNT_U32_B32_e32
132; SI: V_BCNT_U32_B32_e32
133; SI: V_BCNT_U32_B32_e32
134; SI: V_BCNT_U32_B32_e32
135; SI: V_BCNT_U32_B32_e32
136; SI: V_BCNT_U32_B32_e32
137; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +0000138
139; EG: BCNT_INT
140; EG: BCNT_INT
141; EG: BCNT_INT
142; EG: BCNT_INT
143; EG: BCNT_INT
144; EG: BCNT_INT
145; EG: BCNT_INT
146; EG: BCNT_INT
147; EG: BCNT_INT
148; EG: BCNT_INT
149; EG: BCNT_INT
150; EG: BCNT_INT
151; EG: BCNT_INT
152; EG: BCNT_INT
153; EG: BCNT_INT
154; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000155define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
156 %val = load <16 x i32> addrspace(1)* %in, align 32
157 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
158 store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
159 ret void
160}
161
162; FUNC-LABEL: @v_ctpop_i32_add_inline_constant:
163; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
164; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
165; SI: BUFFER_STORE_DWORD [[RESULT]],
166; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +0000167
168; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000169define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
170 %val = load i32 addrspace(1)* %in, align 4
171 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
172 %add = add i32 %ctpop, 4
173 store i32 %add, i32 addrspace(1)* %out, align 4
174 ret void
175}
176
177; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv:
178; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
179; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
180; SI: BUFFER_STORE_DWORD [[RESULT]],
181; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +0000182
183; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000184define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
185 %val = load i32 addrspace(1)* %in, align 4
186 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
187 %add = add i32 4, %ctpop
188 store i32 %add, i32 addrspace(1)* %out, align 4
189 ret void
190}
191
192; FUNC-LABEL: @v_ctpop_i32_add_literal:
193; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
194; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
195; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
196; SI: BUFFER_STORE_DWORD [[RESULT]],
197; SI: S_ENDPGM
198define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
199 %val = load i32 addrspace(1)* %in, align 4
200 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
201 %add = add i32 %ctpop, 99999
202 store i32 %add, i32 addrspace(1)* %out, align 4
203 ret void
204}
205
206; FUNC-LABEL: @v_ctpop_i32_add_var:
207; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
208; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
209; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
210; SI: BUFFER_STORE_DWORD [[RESULT]],
211; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +0000212
213; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000214define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
215 %val = load i32 addrspace(1)* %in, align 4
216 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
217 %add = add i32 %ctpop, %const
218 store i32 %add, i32 addrspace(1)* %out, align 4
219 ret void
220}
221
222; FUNC-LABEL: @v_ctpop_i32_add_var_inv:
223; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
224; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
225; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
226; SI: BUFFER_STORE_DWORD [[RESULT]],
227; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +0000228
229; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000230define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
231 %val = load i32 addrspace(1)* %in, align 4
232 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
233 %add = add i32 %const, %ctpop
234 store i32 %add, i32 addrspace(1)* %out, align 4
235 ret void
236}
237
238; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
239; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0
240; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10
241; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
242; SI: BUFFER_STORE_DWORD [[RESULT]],
243; SI: S_ENDPGM
Matt Arsenault60425062014-06-10 19:18:28 +0000244
245; EG: BCNT_INT
Matt Arsenaultb5b51102014-06-10 19:18:21 +0000246define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
247 %val = load i32 addrspace(1)* %in, align 4
248 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
249 %gep = getelementptr i32 addrspace(1)* %constptr, i32 4
250 %const = load i32 addrspace(1)* %gep, align 4
251 %add = add i32 %const, %ctpop
252 store i32 %add, i32 addrspace(1)* %out, align 4
253 ret void
254}