[PowerPC] Implement the vclz instructions for PWR8
Patch by Kit Barton.
Add the vector count leading zeros instruction for byte, halfword,
word, and doubleword sizes. This is a fairly straightforward addition
after the changes made for vpopcnt:
1. Add the correct definitions for the various instructions in
PPCInstrAltivec.td
2. Make the CTLZ operation legal on vector types when using P8Altivec
in PPCISelLowering.cpp
Test Plan
Created new test case in test/CodeGen/PowerPC/vec_clz.ll to check the
instructions are being generated when the CTLZ operation is used in
LLVM.
Check the encoding and decoding in test/MC/PowerPC/ppc_encoding_vmx.s
and test/Disassembler/PowerPC/ppc_encoding_vmx.txt respectively.
llvm-svn: 228301
diff --git a/llvm/test/CodeGen/PowerPC/vec_clz.ll b/llvm/test/CodeGen/PowerPC/vec_clz.ll
new file mode 100644
index 0000000..01cdecd
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec_clz.ll
@@ -0,0 +1,40 @@
+; Check the vctlz* instructions that were added in P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>) nounwind readnone
+
+define <16 x i8> @test_v16i8(<16 x i8> %x) nounwind readnone {
+ %vcnt = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x)
+ ret <16 x i8> %vcnt
+; CHECK: @test_v16i8
+; CHECK: vclzb 2, 2
+; CHECK: blr
+}
+
+define <8 x i16> @test_v8i16(<8 x i16> %x) nounwind readnone {
+ %vcnt = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %x)
+ ret <8 x i16> %vcnt
+; CHECK: @test_v8i16
+; CHECK: vclzh 2, 2
+; CHECK: blr
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %x) nounwind readnone {
+ %vcnt = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x)
+ ret <4 x i32> %vcnt
+; CHECK: @test_v4i32
+; CHECK: vclzw 2, 2
+; CHECK: blr
+}
+
+define <2 x i64> @test_v2i64(<2 x i64> %x) nounwind readnone {
+ %vcnt = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x)
+ ret <2 x i64> %vcnt
+; CHECK: @test_v2i64
+; CHECK: vclzd 2, 2
+; CHECK: blr
+}