blob: 9480d75db47a6be0faa2abee184c3403f4484361 [file] [log] [blame]
Logan Chien0a43abc2015-07-13 15:37:30 +00001; RUN: llc < %s -mtriple armv7-linux-gnueabihf -mattr=+neon | FileCheck %s
2
3; This test checks the @llvm.cttz.* intrinsics for vectors.
4
5declare <1 x i8> @llvm.cttz.v1i8(<1 x i8>, i1)
6declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1)
7declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1)
8declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1)
9declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
10
11declare <1 x i16> @llvm.cttz.v1i16(<1 x i16>, i1)
12declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1)
13declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1)
14declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
15
16declare <1 x i32> @llvm.cttz.v1i32(<1 x i32>, i1)
17declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
18declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
19
20declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1)
21declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
22
23;------------------------------------------------------------------------------
24
25define void @test_v1i8(<1 x i8>* %p) {
26; CHECK-LABEL: test_v1i8
27 %a = load <1 x i8>, <1 x i8>* %p
28 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 false)
29 store <1 x i8> %tmp, <1 x i8>* %p
30 ret void
31}
32
33define void @test_v2i8(<2 x i8>* %p) {
34; CHECK-LABEL: test_v2i8:
35 %a = load <2 x i8>, <2 x i8>* %p
36 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 false)
37 store <2 x i8> %tmp, <2 x i8>* %p
38 ret void
39}
40
41define void @test_v4i8(<4 x i8>* %p) {
42; CHECK-LABEL: test_v4i8:
43 %a = load <4 x i8>, <4 x i8>* %p
44 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 false)
45 store <4 x i8> %tmp, <4 x i8>* %p
46 ret void
47}
48
49define void @test_v8i8(<8 x i8>* %p) {
50; CHECK-LABEL: test_v8i8:
51; CHECK: vldr [[D1:d[0-9]+]], [r0]
52; CHECK: vmov.i8 [[D2:d[0-9]+]], #0x1
53; CHECK: vneg.s8 [[D3:d[0-9]+]], [[D1]]
54; CHECK: vand [[D1]], [[D1]], [[D3]]
55; CHECK: vsub.i8 [[D1]], [[D1]], [[D2]]
56; CHECK: vcnt.8 [[D1]], [[D1]]
57; CHECK: vstr [[D1]], [r0]
58 %a = load <8 x i8>, <8 x i8>* %p
59 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 false)
60 store <8 x i8> %tmp, <8 x i8>* %p
61 ret void
62}
63
64define void @test_v16i8(<16 x i8>* %p) {
65; CHECK-LABEL: test_v16i8:
66; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
67; CHECK: vmov.i8 [[Q2:q[0-9]+]], #0x1
68; CHECK: vneg.s8 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]]
69; CHECK: vand [[Q1]], [[Q1]], [[Q3]]
70; CHECK: vsub.i8 [[Q1]], [[Q1]], [[Q2]]
71; CHECK: vcnt.8 [[Q1]], [[Q1]]
72; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
73 %a = load <16 x i8>, <16 x i8>* %p
74 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
75 store <16 x i8> %tmp, <16 x i8>* %p
76 ret void
77}
78
79define void @test_v1i16(<1 x i16>* %p) {
80; CHECK-LABEL: test_v1i16:
81 %a = load <1 x i16>, <1 x i16>* %p
82 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 false)
83 store <1 x i16> %tmp, <1 x i16>* %p
84 ret void
85}
86
87define void @test_v2i16(<2 x i16>* %p) {
88; CHECK-LABEL: test_v2i16:
89 %a = load <2 x i16>, <2 x i16>* %p
90 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 false)
91 store <2 x i16> %tmp, <2 x i16>* %p
92 ret void
93}
94
95define void @test_v4i16(<4 x i16>* %p) {
96; CHECK-LABEL: test_v4i16:
97; CHECK: vldr [[D1:d[0-9]+]], [r0]
98; CHECK: vmov.i16 [[D2:d[0-9]+]], #0x1
99; CHECK: vneg.s16 [[D3:d[0-9]+]], [[D1]]
100; CHECK: vand [[D1]], [[D1]], [[D3]]
101; CHECK: vsub.i16 [[D1]], [[D1]], [[D2]]
102; CHECK: vcnt.8 [[D1]], [[D1]]
103; CHECK: vpaddl.u8 [[D1]], [[D1]]
104; CHECK: vstr [[D1]], [r0]
105 %a = load <4 x i16>, <4 x i16>* %p
106 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 false)
107 store <4 x i16> %tmp, <4 x i16>* %p
108 ret void
109}
110
111define void @test_v8i16(<8 x i16>* %p) {
112; CHECK-LABEL: test_v8i16:
113; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
114; CHECK: vmov.i16 [[Q2:q[0-9]+]], #0x1
115; CHECK: vneg.s16 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]]
116; CHECK: vand [[Q1]], [[Q1]], [[Q3]]
117; CHECK: vsub.i16 [[Q1]], [[Q1]], [[Q2]]
118; CHECK: vcnt.8 [[Q1]], [[Q1]]
119; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
120; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
121 %a = load <8 x i16>, <8 x i16>* %p
122 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
123 store <8 x i16> %tmp, <8 x i16>* %p
124 ret void
125}
126
127define void @test_v1i32(<1 x i32>* %p) {
128; CHECK-LABEL: test_v1i32:
129 %a = load <1 x i32>, <1 x i32>* %p
130 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 false)
131 store <1 x i32> %tmp, <1 x i32>* %p
132 ret void
133}
134
135define void @test_v2i32(<2 x i32>* %p) {
136; CHECK-LABEL: test_v2i32:
137; CHECK: vldr [[D1:d[0-9]+]], [r0]
138; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x1
139; CHECK: vneg.s32 [[D3:d[0-9]+]], [[D1]]
140; CHECK: vand [[D1]], [[D1]], [[D3]]
141; CHECK: vsub.i32 [[D1]], [[D1]], [[D2]]
142; CHECK: vcnt.8 [[D1]], [[D1]]
143; CHECK: vpaddl.u8 [[D1]], [[D1]]
144; CHECK: vpaddl.u16 [[D1]], [[D1]]
145; CHECK: vstr [[D1]], [r0]
146 %a = load <2 x i32>, <2 x i32>* %p
147 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
148 store <2 x i32> %tmp, <2 x i32>* %p
149 ret void
150}
151
152define void @test_v4i32(<4 x i32>* %p) {
153; CHECK-LABEL: test_v4i32:
154; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
155; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x1
156; CHECK: vneg.s32 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]]
157; CHECK: vand [[Q1]], [[Q1]], [[Q3]]
158; CHECK: vsub.i32 [[Q1]], [[Q1]], [[Q2]]
159; CHECK: vcnt.8 [[Q1]], [[Q1]]
160; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
161; CHECK: vpaddl.u16 [[Q1]], [[Q1]]
162; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
163 %a = load <4 x i32>, <4 x i32>* %p
164 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
165 store <4 x i32> %tmp, <4 x i32>* %p
166 ret void
167}
168
169define void @test_v1i64(<1 x i64>* %p) {
170; CHECK-LABEL: test_v1i64:
171; CHECK: vldr [[D1:d[0-9]+]], [r0]
172; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0
173; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff
174; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]]
175; CHECK: vand [[D1]], [[D1]], [[D2]]
176; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]]
177; CHECK: vcnt.8 [[D1]], [[D1]]
178; CHECK: vpaddl.u8 [[D1]], [[D1]]
179; CHECK: vpaddl.u16 [[D1]], [[D1]]
180; CHECK: vpaddl.u32 [[D1]], [[D1]]
181; CHECK: vstr [[D1]], [r0]
182 %a = load <1 x i64>, <1 x i64>* %p
183 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false)
184 store <1 x i64> %tmp, <1 x i64>* %p
185 ret void
186}
187
188define void @test_v2i64(<2 x i64>* %p) {
189; CHECK-LABEL: test_v2i64:
190; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
191; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0
192; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff
193; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]]
194; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
195; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]]
196; CHECK: vcnt.8 [[Q1]], [[Q1]]
197; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
198; CHECK: vpaddl.u16 [[Q1]], [[Q1]]
199; CHECK: vpaddl.u32 [[Q1]], [[Q1]]
200; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
201 %a = load <2 x i64>, <2 x i64>* %p
202 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
203 store <2 x i64> %tmp, <2 x i64>* %p
204 ret void
205}
206
207;------------------------------------------------------------------------------
208
209define void @test_v1i8_zero_undef(<1 x i8>* %p) {
210; CHECK-LABEL: test_v1i8_zero_undef
211 %a = load <1 x i8>, <1 x i8>* %p
212 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 true)
213 store <1 x i8> %tmp, <1 x i8>* %p
214 ret void
215}
216
217define void @test_v2i8_zero_undef(<2 x i8>* %p) {
218; CHECK-LABEL: test_v2i8_zero_undef:
219 %a = load <2 x i8>, <2 x i8>* %p
220 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
221 store <2 x i8> %tmp, <2 x i8>* %p
222 ret void
223}
224
225define void @test_v4i8_zero_undef(<4 x i8>* %p) {
226; CHECK-LABEL: test_v4i8_zero_undef:
227 %a = load <4 x i8>, <4 x i8>* %p
228 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
229 store <4 x i8> %tmp, <4 x i8>* %p
230 ret void
231}
232
233define void @test_v8i8_zero_undef(<8 x i8>* %p) {
234; CHECK-LABEL: test_v8i8_zero_undef:
235; CHECK: vldr [[D1:d[0-9]+]], [r0]
236; CHECK: vmov.i8 [[D2:d[0-9]+]], #0x1
237; CHECK: vneg.s8 [[D3:d[0-9]+]], [[D1]]
238; CHECK: vand [[D1]], [[D1]], [[D3]]
239; CHECK: vsub.i8 [[D1]], [[D1]], [[D2]]
240; CHECK: vcnt.8 [[D1]], [[D1]]
241; CHECK: vstr [[D1]], [r0]
242 %a = load <8 x i8>, <8 x i8>* %p
243 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
244 store <8 x i8> %tmp, <8 x i8>* %p
245 ret void
246}
247
248define void @test_v16i8_zero_undef(<16 x i8>* %p) {
249; CHECK-LABEL: test_v16i8_zero_undef:
250; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
251; CHECK: vmov.i8 [[Q2:q[0-9]+]], #0x1
252; CHECK: vneg.s8 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]]
253; CHECK: vand [[Q1]], [[Q1]], [[Q3]]
254; CHECK: vsub.i8 [[Q1]], [[Q1]], [[Q2]]
255; CHECK: vcnt.8 [[Q1]], [[Q1]]
256; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
257 %a = load <16 x i8>, <16 x i8>* %p
258 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
259 store <16 x i8> %tmp, <16 x i8>* %p
260 ret void
261}
262
263define void @test_v1i16_zero_undef(<1 x i16>* %p) {
264; CHECK-LABEL: test_v1i16_zero_undef:
265 %a = load <1 x i16>, <1 x i16>* %p
266 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 true)
267 store <1 x i16> %tmp, <1 x i16>* %p
268 ret void
269}
270
271define void @test_v2i16_zero_undef(<2 x i16>* %p) {
272; CHECK-LABEL: test_v2i16_zero_undef:
273 %a = load <2 x i16>, <2 x i16>* %p
274 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
275 store <2 x i16> %tmp, <2 x i16>* %p
276 ret void
277}
278
279define void @test_v4i16_zero_undef(<4 x i16>* %p) {
280; CHECK-LABEL: test_v4i16_zero_undef:
281; CHECK: vldr [[D1:d[0-9]+]], [r0]
282; CHECK: vneg.s16 [[D2:d[0-9]+]], [[D1]]
283; CHECK: vand [[D1]], [[D1]], [[D2]]
284; CHECK: vmov.i16 [[D3:d[0-9]+]], #0xf
285; CHECK: vclz.i16 [[D1]], [[D1]]
286; CHECK: vsub.i16 [[D1]], [[D3]], [[D1]]
287; CHECK: vstr [[D1]], [r0]
288 %a = load <4 x i16>, <4 x i16>* %p
289 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
290 store <4 x i16> %tmp, <4 x i16>* %p
291 ret void
292}
293
294define void @test_v8i16_zero_undef(<8 x i16>* %p) {
295; CHECK-LABEL: test_v8i16_zero_undef:
296; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
297; CHECK: vneg.s16 [[Q2:q[0-9]+]], [[Q1:q[0-9]+]]
298; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
299; CHECK: vmov.i16 [[Q3:q[0-9]+]], #0xf
300; CHECK: vclz.i16 [[Q1]], [[Q1]]
301; CHECK: vsub.i16 [[Q1]], [[Q3]], [[Q1]]
302; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
303 %a = load <8 x i16>, <8 x i16>* %p
304 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
305 store <8 x i16> %tmp, <8 x i16>* %p
306 ret void
307}
308
309define void @test_v1i32_zero_undef(<1 x i32>* %p) {
310; CHECK-LABEL: test_v1i32_zero_undef:
311 %a = load <1 x i32>, <1 x i32>* %p
312 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 true)
313 store <1 x i32> %tmp, <1 x i32>* %p
314 ret void
315}
316
317define void @test_v2i32_zero_undef(<2 x i32>* %p) {
318; CHECK-LABEL: test_v2i32_zero_undef:
319; CHECK: vldr [[D1:d[0-9]+]], [r0]
320; CHECK: vneg.s32 [[D2:d[0-9]+]], [[D1]]
321; CHECK: vand [[D1]], [[D1]], [[D2]]
322; CHECK: vmov.i32 [[D3:d[0-9]+]], #0x1f
323; CHECK: vclz.i32 [[D1]], [[D1]]
324; CHECK: vsub.i32 [[D1]], [[D3]], [[D1]]
325; CHECK: vstr [[D1]], [r0]
326 %a = load <2 x i32>, <2 x i32>* %p
327 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
328 store <2 x i32> %tmp, <2 x i32>* %p
329 ret void
330}
331
332define void @test_v4i32_zero_undef(<4 x i32>* %p) {
333; CHECK-LABEL: test_v4i32_zero_undef:
334; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
335; CHECK: vneg.s32 [[Q2:q[0-9]+]], [[Q1:q[0-9]+]]
336; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
337; CHECK: vmov.i32 [[Q3:q[0-9]+]], #0x1f
338; CHECK: vclz.i32 [[Q1]], [[Q1]]
339; CHECK: vsub.i32 [[Q1]], [[Q3]], [[Q1]]
340; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
341 %a = load <4 x i32>, <4 x i32>* %p
342 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
343 store <4 x i32> %tmp, <4 x i32>* %p
344 ret void
345}
346
347define void @test_v1i64_zero_undef(<1 x i64>* %p) {
348; CHECK-LABEL: test_v1i64_zero_undef:
349; CHECK: vldr [[D1:d[0-9]+]], [r0]
350; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0
351; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff
352; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]]
353; CHECK: vand [[D1]], [[D1]], [[D2]]
354; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]]
355; CHECK: vcnt.8 [[D1]], [[D1]]
356; CHECK: vpaddl.u8 [[D1]], [[D1]]
357; CHECK: vpaddl.u16 [[D1]], [[D1]]
358; CHECK: vpaddl.u32 [[D1]], [[D1]]
359; CHECK: vstr [[D1]], [r0]
360 %a = load <1 x i64>, <1 x i64>* %p
361 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true)
362 store <1 x i64> %tmp, <1 x i64>* %p
363 ret void
364}
365
366define void @test_v2i64_zero_undef(<2 x i64>* %p) {
367; CHECK-LABEL: test_v2i64_zero_undef:
368; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
369; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0
370; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff
371; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]]
372; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
373; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]]
374; CHECK: vcnt.8 [[Q1]], [[Q1]]
375; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
376; CHECK: vpaddl.u16 [[Q1]], [[Q1]]
377; CHECK: vpaddl.u32 [[Q1]], [[Q1]]
378; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
379 %a = load <2 x i64>, <2 x i64>* %p
380 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
381 store <2 x i64> %tmp, <2 x i64>* %p
382 ret void
383}