[PowerPC] Implement the vclz instructions for PWR8 Patch by Kit Barton. Add the vector count leading zeros instruction for byte, halfword, word, and doubleword sizes. This is a fairly straightforward addition after the changes made for vpopcnt: 1. Add the correct definitions for the various instructions in PPCInstrAltivec.td 2. Make the CTLZ operation legal on vector types when using P8Altivec in PPCISelLowering.cpp Test Plan Created new test case in test/CodeGen/PowerPC/vec_clz.ll to check the instructions are being generated when the CTLZ operation is used in LLVM. Check the encoding and decoding in test/MC/PowerPC/ppc_encoding_vmx.s and test/Disassembler/PowerPC/ppc_encoding_vmx.txt respectively. llvm-svn: 228301

commit: 433b1c3aaefc74edcf98bb59007de3e992d22cd6 [log] [tgz]
author: Bill Schmidt <wschmidt@linux.vnet.ibm.com> Thu Feb 05 15:24:47 2015 +0000
committer: Bill Schmidt <wschmidt@linux.vnet.ibm.com> Thu Feb 05 15:24:47 2015 +0000
tree: 32d97e9322873fccdc35724f54e6a433bb9319d2
parent: f8d662aa4b554ba903be9b19620cf9621fd99525 [diff]
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index dd3362f..b98c9e1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -401,11 +401,15 @@
       setOperationAction(ISD::ADD , VT, Legal);
       setOperationAction(ISD::SUB , VT, Legal);
 
-      // Vector popcnt instructions introduced in P8
-      if (Subtarget.hasP8Altivec()) 
+      // Vector instructions introduced in P8
+      if (Subtarget.hasP8Altivec()) {
         setOperationAction(ISD::CTPOP, VT, Legal);
-      else 
+        setOperationAction(ISD::CTLZ, VT, Legal);
+      }
+      else {
         setOperationAction(ISD::CTPOP, VT, Expand);
+        setOperationAction(ISD::CTLZ, VT, Expand);
+      }
 
       // We promote all shuffles to v16i8.
       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
@@ -461,7 +465,6 @@
       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::BSWAP, VT, Expand);
-      setOperationAction(ISD::CTLZ, VT, Expand);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTTZ, VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);

diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 5641b53..9379bad 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td

@@ -940,6 +940,21 @@
 
 def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
 let Predicates = [HasP8Altivec] in {
+
+// Count Leading Zeros
+def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzb $vD, $vB", IIC_VecGeneral,
+                     [(set v16i8:$vD, (ctlz v16i8:$vB))]>;
+def VCLZH : VXForm_2<1858, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzh $vD, $vB", IIC_VecGeneral,
+                     [(set v8i16:$vD, (ctlz v8i16:$vB))]>;
+def VCLZW : VXForm_2<1922, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzw $vD, $vB", IIC_VecGeneral,
+                     [(set v4i32:$vD, (ctlz v4i32:$vB))]>;
+def VCLZD : VXForm_2<1986, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzd $vD, $vB", IIC_VecGeneral,
+                     [(set v2i64:$vD, (ctlz v2i64:$vB))]>;
+
 // Population Count
 def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB),
                         "vpopcntb $vD, $vB", IIC_VecGeneral,

diff --git a/llvm/test/CodeGen/PowerPC/vec_clz.ll b/llvm/test/CodeGen/PowerPC/vec_clz.ll
new file mode 100644
index 0000000..01cdecd
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec_clz.ll

@@ -0,0 +1,40 @@
+; Check the vctlz* instructions that were added in P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>) nounwind readnone
+
+define <16 x i8> @test_v16i8(<16 x i8> %x) nounwind readnone {
+       %vcnt = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x)
+       ret <16 x i8> %vcnt
+; CHECK: @test_v16i8
+; CHECK: vclzb 2, 2
+; CHECK: blr
+}
+
+define <8 x i16> @test_v8i16(<8 x i16> %x) nounwind readnone {
+       %vcnt = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %x)
+       ret <8 x i16> %vcnt
+; CHECK: @test_v8i16
+; CHECK: vclzh 2, 2
+; CHECK: blr
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %x) nounwind readnone {
+       %vcnt = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x)
+       ret <4 x i32> %vcnt
+; CHECK: @test_v4i32
+; CHECK: vclzw 2, 2
+; CHECK: blr
+}
+
+define <2 x i64> @test_v2i64(<2 x i64> %x) nounwind readnone {
+       %vcnt = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x)
+       ret <2 x i64> %vcnt
+; CHECK: @test_v2i64
+; CHECK: vclzd 2, 2
+; CHECK: blr
+}

diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
index 72c5e64..4f12a6c 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt

@@ -501,6 +501,18 @@
 # CHECK: vrsqrtefp 2, 3                  
 0x10 0x40 0x19 0x4a
 
+# CHECK: vclzb 2, 3
+0x10 0x40 0x1f 0x02
+
+# CHECK: vclzh 2, 3
+0x10 0x40 0x1f 0x42
+
+# CHECK: vclzw 2, 3
+0x10 0x40 0x1f 0x82
+
+# CHECK: vclzd 2, 3
+0x10 0x40 0x1f 0xc2
+
 # CHECK: vpopcntb 2, 3
 0x10 0x40 0x1f 0x03
 

diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s b/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s
index 3247d40..09e5ecc 100644
--- a/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s
+++ b/llvm/test/MC/PowerPC/ppc64-encoding-vmx.s

@@ -543,6 +543,23 @@
 # CHECK-LE: vrsqrtefp 2, 3                  # encoding: [0x4a,0x19,0x40,0x10]
             vrsqrtefp 2, 3
 
+# Vector count leading zero instructions
+# CHECK-BE: vclzb 2, 3                      # encoding: [0x10,0x40,0x1f,0x02]
+# CHECK-LE: vclzb 2, 3                      # encoding: [0x02,0x1f,0x40,0x10]
+            vclzb 2, 3
+
+# CHECK-BE: vclzh 2, 3                      # encoding: [0x10,0x40,0x1f,0x42]
+# CHECK-LE: vclzh 2, 3                      # encoding: [0x42,0x1f,0x40,0x10]
+            vclzh 2, 3
+
+# CHECK-BE: vclzw 2, 3                      # encoding: [0x10,0x40,0x1f,0x82]
+# CHECK-LE: vclzw 2, 3                      # encoding: [0x82,0x1f,0x40,0x10]
+            vclzw 2, 3
+
+# CHECK-BE: vclzd 2, 3                      # encoding: [0x10,0x40,0x1f,0xc2]
+# CHECK-LE: vclzd 2, 3                      # encoding: [0xc2,0x1f,0x40,0x10]
+            vclzd 2, 3                      
+
 # Vector population count instructions
 # CHECK-BE: vpopcntb 2, 3                   # encoding: [0x10,0x40,0x1f,0x03]
 # CHECK-LE: vpopcntb 2, 3                   # encoding: [0x03,0x1f,0x40,0x10]
commit	433b1c3aaefc74edcf98bb59007de3e992d22cd6	[log] [tgz]
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	Thu Feb 05 15:24:47 2015 +0000
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	Thu Feb 05 15:24:47 2015 +0000
tree	32d97e9322873fccdc35724f54e6a433bb9319d2
parent	f8d662aa4b554ba903be9b19620cf9621fd99525 [diff]