Add the PPC popcntw instruction

The popcntw instruction is available whenever the popcntd instruction is
available, and performs a separate popcnt on the lower and upper 32-bits.
Ignoring the high-order count, this can be used for the 32-bit input case
(saving on the explicit zero extension otherwise required to use popcntd).

llvm-svn: 178470
diff --git a/llvm/test/CodeGen/PowerPC/popcnt.ll b/llvm/test/CodeGen/PowerPC/popcnt.ll
index 363f705..b304d72 100644
--- a/llvm/test/CodeGen/PowerPC/popcnt.ll
+++ b/llvm/test/CodeGen/PowerPC/popcnt.ll
@@ -4,8 +4,8 @@
   %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
   ret i8 %cnt
 ; CHECK: @cnt8
-; CHECK: rldicl
-; CHECK: popcntd
+; CHECK: rlwinm
+; CHECK: popcntw
 ; CHECK: blr
 }
 
@@ -13,8 +13,8 @@
   %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
   ret i16 %cnt
 ; CHECK: @cnt16
-; CHECK: rldicl
-; CHECK: popcntd
+; CHECK: rlwinm
+; CHECK: popcntw
 ; CHECK: blr
 }
 
@@ -22,8 +22,7 @@
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 ; CHECK: @cnt32
-; CHECK: rldicl
-; CHECK: popcntd
+; CHECK: popcntw
 ; CHECK: blr
 }