[Power9] Exploit move and splat instructions for build_vector improvement This patch corresponds to review: https://reviews.llvm.org/D21135 This patch exploits the following instructions: mtvsrws lxvwsx mtvsrdd mfvsrld In order to improve some build_vector and extractelement patterns. llvm-svn: 282246

commit: d2c3c51a70a3c0f6234b40db82442a6183992191 [log] [tgz]
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> Fri Sep 23 13:25:31 2016 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> Fri Sep 23 13:25:31 2016 +0000
tree: 297e5f15c74a272cbebdb84e8787b686bcb13045
parent: f34f45fd534140f082d41dbb6a7c60dc675870c4 [diff] [blame]
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c414a15..5bce336 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -672,6 +672,9 @@
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
     }
+
+    if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
+      setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Legal);
   }
 
   if (Subtarget.hasQPX()) {
@@ -7079,6 +7082,16 @@
   return DAG.getNode(ISD::BITCAST, dl, VT, T);
 }
 
+static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
+  if (BVN->getValueType(0) != Type)
+    return false;
+  auto OpZero = BVN->getOperand(0);
+  for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
+    if (BVN->getOperand(i) != OpZero)
+      return false;
+  return true;
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -7200,8 +7213,17 @@
   bool HasAnyUndefs;
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
-      SplatBitSize > 32)
+      SplatBitSize > 32) {
+    // We can splat a non-const value on CPU's that implement ISA 3.0
+    // in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat).
+    auto OpZero = BVN->getOperand(0);
+    bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
+      BVN->isOnlyUserOf(OpZero.getNode());
+    if (Subtarget.isISA3_0() &&
+        isNonConstSplatBV(BVN, MVT::v4i32) && !CanLoadAndSplat)
+      return Op;
     return SDValue();
+  }
 
   unsigned SplatBits = APSplatBits.getZExtValue();
   unsigned SplatUndef = APSplatUndef.getZExtValue();
@@ -7219,6 +7241,10 @@
     return Op;
   }
 
+  // We have XXSPLTIB for constant splats one byte wide
+  if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8)
+    return Op;
+
   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
                     (32-SplatBitSize));
@@ -7462,6 +7488,18 @@
   if (Subtarget.hasVSX()) {
     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+
+      // If the source for the shuffle is a scalar_to_vector that came from a
+      // 32-bit load, it will have used LXVWSX so we don't need to splat again.
+      if (Subtarget.isISA3_0() &&
+          ((isLittleEndian && SplatIdx == 3) ||
+           (!isLittleEndian && SplatIdx == 0))) {
+        SDValue Src = V1.getOperand(0);
+        if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+            Src.getOperand(0).getOpcode() == ISD::LOAD &&
+            Src.getOperand(0).hasOneUse())
+          return V1;
+      }
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
commit	d2c3c51a70a3c0f6234b40db82442a6183992191	[log] [tgz]
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	Fri Sep 23 13:25:31 2016 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	Fri Sep 23 13:25:31 2016 +0000
tree	297e5f15c74a272cbebdb84e8787b686bcb13045
parent	f34f45fd534140f082d41dbb6a7c60dc675870c4 [diff] [blame]