[PowerPC] Generate VSX version of splat word

This patch corresponds to review:
http://reviews.llvm.org/D18592

It allows the PPC back end to generate the xxspltw instruction where we
previously only emitted vspltw.

llvm-svn: 268516
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d0f9e8d..e5cd3e3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1014,6 +1014,7 @@
   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
   case PPCISD::VPERM:           return "PPCISD::VPERM";
+  case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
   case PPCISD::CMPB:            return "PPCISD::CMPB";
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
@@ -7419,6 +7420,16 @@
   EVT VT = Op.getValueType();
   bool isLittleEndian = Subtarget.isLittleEndian();
 
+  if (Subtarget.hasVSX()) {
+    if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
+      int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+      SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
+                                  DAG.getConstant(SplatIdx, dl, MVT::i32));
+      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
+    }
+  }
+
   if (Subtarget.hasQPX()) {
     if (VT.getVectorNumElements() != 4)
       return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index c097a62..b60fa53 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -61,6 +61,10 @@
       ///
       VPERM,
 
+      /// XXSPLT - The PPC VSX splat instructions
+      ///
+      XXSPLT,
+
       /// The CMPB instruction (takes two operands of i32 or i64).
       CMPB,
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 8388896..ad38234 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -31,6 +31,10 @@
   SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
 ]>;
 
+def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
+  SDTCisVec<1>, SDTCisInt<2>
+]>;
+
 def SDT_PPCvcmp : SDTypeProfile<1, 3, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
 ]>;
@@ -141,6 +145,7 @@
 def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
 
 def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+def PPCxxsplt    : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
 
 def PPCqvfperm   : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
 def PPCqvgpci    : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d7f64c9..bc91fb6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -775,7 +775,9 @@
                        "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
   def XXSPLTW : XX2Form_2<60, 164,
                        (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
-                       "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+                       "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
+                       [(set v4i32:$XT,
+                             (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
 } // hasSideEffects
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 10636b8..a1aa841 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -407,9 +407,9 @@
       case PPC::VSPLTB:
       case PPC::VSPLTH:
       case PPC::VSPLTW:
+      case PPC::XXSPLTW:
         // Splats are lane-sensitive, but we can use special handling
-        // to adjust the source lane for the splat.  This is not yet
-        // implemented.  When it is, we need to uncomment the following:
+        // to adjust the source lane for the splat.
         SwapVector[VecIdx].IsSwappable = 1;
         SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
         break;
@@ -515,7 +515,6 @@
       // permute control vectors (for shift values 1, 2, 3).  However,
       // VPERM has a more restrictive register class.
       case PPC::XXSLDWI:
-      case PPC::XXSPLTW:
         break;
       }
     }
@@ -806,12 +805,21 @@
       llvm_unreachable("Unexpected splat opcode");
     case PPC::VSPLTB: NElts = 16; break;
     case PPC::VSPLTH: NElts = 8;  break;
-    case PPC::VSPLTW: NElts = 4;  break;
+    case PPC::VSPLTW:
+    case PPC::XXSPLTW: NElts = 4;  break;
     }
 
-    unsigned EltNo = MI->getOperand(1).getImm();
+    unsigned EltNo;
+    if (MI->getOpcode() == PPC::XXSPLTW)
+      EltNo = MI->getOperand(2).getImm();
+    else
+      EltNo = MI->getOperand(1).getImm();
+
     EltNo = (EltNo + NElts / 2) % NElts;
-    MI->getOperand(1).setImm(EltNo);
+    if (MI->getOpcode() == PPC::XXSPLTW)
+      MI->getOperand(2).setImm(EltNo);
+    else
+      MI->getOperand(1).setImm(EltNo);
 
     DEBUG(dbgs() << "  Into: ");
     DEBUG(MI->dump());