[PPC] Fix code generation for bswap(int32) followed by store16

This patch fixes pr32063.

Current code in PPCTargetLowering::PerformDAGCombine can transform

bswap
store

into a single PPCISD::STBRX instruction. but it doesn't consider the case that the operand size of bswap may be larger than store size. When it occurs, we need 2 modifications,

1 For the last operand of PPCISD::STBRX, we should not use DAG.getValueType(N->getOperand(1).getValueType()), instead we should use cast<StoreSDNode>(N)->getMemoryVT().

2 Before PPCISD::STBRX, we need to shift the original operand of bswap to the right side.

Differential Revision: https://reviews.llvm.org/D30362

llvm-svn: 296811
diff --git a/llvm/test/CodeGen/PowerPC/pr32063.ll b/llvm/test/CodeGen/PowerPC/pr32063.ll
new file mode 100644
index 0000000..f031ec8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr32063.ll
@@ -0,0 +1,16 @@
+; RUN: llc -O2 < %s | FileCheck %s
+target triple = "powerpc64le-linux-gnu"
+
+define void @foo(i32 %v, i16* %p) {
+        %1 = and i32 %v, -65536
+        %2 = tail call i32 @llvm.bswap.i32(i32 %1)
+        %conv = trunc i32 %2 to i16
+        store i16 %conv, i16* %p
+        ret void
+
+; CHECK:     srwi
+; CHECK:     sthbrx
+; CHECK-NOT: stwbrx
+}
+
+declare i32 @llvm.bswap.i32(i32)