[Hexagon] Split HVX operations on vector pairs

Vector pairs are legal types, but not every operation can work on pairs.
For those operations that are legal for single vectors, generate a concat
of their results on pair halves.

llvm-svn: 324350
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 5cee207..3465d7e 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1275,12 +1275,8 @@
 
 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
-
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
-  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(LHS)))
-    return LowerHvxSetCC(Op, DAG);
-
   SDValue Cmp = Op.getOperand(2);
   ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
 
@@ -2151,15 +2147,39 @@
       // independent) handling of it would convert it to a load, which is
       // not always the optimal choice.
       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
-      // Custom-lower SETCC for pairs. Expand it into a concat of SETCCs
-      // for individual vectors.
-      setOperationAction(ISD::SETCC,        T, Custom);
 
-      if (T == ByteW)
-        continue;
-      // Promote all shuffles and concats to operate on vectors of bytes.
-      setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
-      setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
+      // Custom-lower these operations for pairs. Expand them into a concat
+      // of the corresponding operations on individual vectors.
+      setOperationAction(ISD::ANY_EXTEND,               T, Custom);
+      setOperationAction(ISD::SIGN_EXTEND,              T, Custom);
+      setOperationAction(ISD::ZERO_EXTEND,              T, Custom);
+      setOperationAction(ISD::SIGN_EXTEND_INREG,        T, Custom);
+      setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,  T, Custom);
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
+
+      setOperationAction(ISD::ADD,      T, Legal);
+      setOperationAction(ISD::SUB,      T, Legal);
+      setOperationAction(ISD::MUL,      T, Custom);
+      setOperationAction(ISD::MULHS,    T, Custom);
+      setOperationAction(ISD::MULHU,    T, Custom);
+      setOperationAction(ISD::AND,      T, Custom);
+      setOperationAction(ISD::OR,       T, Custom);
+      setOperationAction(ISD::XOR,      T, Custom);
+      setOperationAction(ISD::SETCC,    T, Custom);
+      setOperationAction(ISD::VSELECT,  T, Custom);
+      if (T != ByteW) {
+        setOperationAction(ISD::SRA,      T, Custom);
+        setOperationAction(ISD::SHL,      T, Custom);
+        setOperationAction(ISD::SRL,      T, Custom);
+
+        // Promote all shuffles and concats to operate on vectors of bytes.
+        setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
+        setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
+      }
+
+      MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
+      setOperationAction(ISD::SETCC, BoolV, Custom);
     }
   }
 
@@ -2310,6 +2330,7 @@
   case HexagonISD::P2D:           return "HexagonISD::P2D";
   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
+  case HexagonISD::QCAT:          return "HexagonISD::QCAT";
   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
@@ -2593,7 +2614,16 @@
 
 SDValue
 HexagonTargetLowering::LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const {
-  return LowerSIGN_EXTEND(Op, DAG);
+  // Lower any-extends of boolean vectors to sign-extends, since they
+  // translate directly to Q2V. Zero-extending could also be done equally
+  // fast, but Q2V is used/recognized in more places.
+  // For all other vectors, use zero-extend.
+  MVT ResTy = ty(Op);
+  SDValue InpV = Op.getOperand(0);
+  MVT ElemTy = ty(InpV).getVectorElementType();
+  if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
+    return LowerSIGN_EXTEND(Op, DAG);
+  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
 }
 
 SDValue
@@ -3185,6 +3215,14 @@
 SDValue
 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   unsigned Opc = Op.getOpcode();
+
+  // Handle INLINEASM first.
+  if (Opc == ISD::INLINEASM)
+    return LowerINLINEASM(Op, DAG);
+
+  if (isHvxOperation(Op))
+    return LowerHvxOperation(Op, DAG);
+
   switch (Opc) {
     default:
 #ifndef NDEBUG
@@ -3200,9 +3238,6 @@
     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
-    case ISD::ANY_EXTEND:           return LowerANY_EXTEND(Op, DAG);
-    case ISD::SIGN_EXTEND:          return LowerSIGN_EXTEND(Op, DAG);
-    case ISD::ZERO_EXTEND:          return LowerZERO_EXTEND(Op, DAG);
     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
     case ISD::SRA:
     case ISD::SHL:
@@ -3210,7 +3245,6 @@
     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
-      // Frame & Return address. Currently unimplemented.
     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
@@ -3224,23 +3258,11 @@
     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
-    case ISD::INLINEASM:            return LowerINLINEASM(Op, DAG);
     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
-    case ISD::MUL:
-      if (Subtarget.useHVXOps())
-        return LowerHvxMul(Op, DAG);
-      break;
-    case ISD::MULHS:
-    case ISD::MULHU:
-      if (Subtarget.useHVXOps())
-        return LowerHvxMulh(Op, DAG);
-      break;
-    case ISD::ANY_EXTEND_VECTOR_INREG:
-      if (Subtarget.useHVXOps())
-        return LowerHvxExtend(Op, DAG);
       break;
   }
+
   return SDValue();
 }
 
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 4e7da76..ab85893 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -73,6 +73,7 @@
                    // [*] The equivalence is defined as "Q <=> (V != 0)",
                    //     where the != operation compares bytes.
                    // Note: V != 0 is implemented as V >u 0.
+      QCAT,
       QTRUE,
       QFALSE,
       VZERO,
@@ -408,9 +409,14 @@
     SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
+
     std::pair<const TargetRegisterClass*, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
         const override;
+
+    bool isHvxOperation(SDValue Op) const;
+    SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
   };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 9d0e4be..264a2d1 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -75,6 +75,10 @@
 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
                                SelectionDAG &DAG) const {
   TypePair Tys = typeSplit(ty(Vec));
+  if (Vec.getOpcode() == HexagonISD::QCAT) {
+assert(0);
+    return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
+  }
   return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
 }
 
@@ -799,7 +803,26 @@
   unsigned HwLen = Subtarget.getVectorLength();
   unsigned NumOp = Op.getNumOperands();
   assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
-  (void)NumOp;
+
+  SDValue Op0 = Op.getOperand(0);
+
+  // If the operands are HVX types (i.e. not scalar predicates), then
+  // defer the concatenation, and create QCAT instead.
+  if (Subtarget.isHVXVectorType(ty(Op0), true)) {
+    if (NumOp == 2)
+      return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
+
+    ArrayRef<SDUse> U(Op.getNode()->ops());
+    SmallVector<SDValue,4> SV(U.begin(), U.end());
+    ArrayRef<SDValue> Ops(SV);
+
+    MVT HalfTy = typeSplit(VecTy).first;
+    SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
+                             Ops.take_front(NumOp/2));
+    SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
+                             Ops.take_back(NumOp/2));
+    return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
+  }
 
   // Count how many bytes (in a vector register) each bit in VecTy
   // corresponds to.
@@ -889,7 +912,7 @@
 SDValue
 HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
   MVT ResTy = ty(Op);
-  assert(ResTy.isVector());
+  assert(ResTy.isVector() && isHvxSingleTy(ResTy));
   const SDLoc &dl(Op);
   SmallVector<int,256> ShuffMask;
 
@@ -1047,30 +1070,6 @@
 }
 
 SDValue
-HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
-  MVT ResTy = ty(Op);
-  MVT VecTy = ty(Op.getOperand(0));
-  assert(VecTy == ty(Op.getOperand(1)));
-  unsigned HwLen = Subtarget.getVectorLength();
-  const SDLoc &dl(Op);
-
-  SDValue Cmp = Op.getOperand(2);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
-
-  if (VecTy.getSizeInBits() == 16*HwLen) {
-    VectorPair P0 = opSplit(Op.getOperand(0), dl, DAG);
-    VectorPair P1 = opSplit(Op.getOperand(1), dl, DAG);
-    MVT HalfTy = typeSplit(ResTy).first;
-
-    SDValue V0 = DAG.getSetCC(dl, HalfTy, P0.first, P1.first, CC);
-    SDValue V1 = DAG.getSetCC(dl, HalfTy, P0.second, P1.second, CC);
-    return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, V1, V0);
-  }
-
-  return SDValue();
-}
-
-SDValue
 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
   // Sign- and zero-extends are legal.
   assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
@@ -1082,3 +1081,103 @@
   return Op;
 }
 
+SDValue
+HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
+  assert(!Op.isMachineOpcode());
+  SmallVector<SDValue,2> OpsL, OpsH;
+  const SDLoc &dl(Op);
+
+  auto SplitVTNode = [&DAG,this] (const VTSDNode *N) {
+    MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
+    SDValue TV = DAG.getValueType(Ty);
+    return std::make_pair(TV, TV);
+  };
+
+  for (SDValue A : Op.getNode()->ops()) {
+    VectorPair P = Subtarget.isHVXVectorType(ty(A), true)
+                    ? opSplit(A, dl, DAG)
+                    : std::make_pair(A, A);
+    // Special case for type operand.
+    if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+      if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
+        P = SplitVTNode(N);
+    }
+    OpsL.push_back(P.first);
+    OpsH.push_back(P.second);
+  }
+
+  MVT ResTy = ty(Op);
+  MVT HalfTy = typeSplit(ResTy).first;
+  SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
+  SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
+  SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H);
+  return S;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
+  unsigned Opc = Op.getOpcode();
+  bool IsPairOp = isHvxPairTy(ty(Op)) ||
+                  llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
+                    return isHvxPairTy(ty(V));
+                  });
+
+  if (IsPairOp) {
+    switch (Opc) {
+      default:
+        break;
+      case ISD::MUL:
+      case ISD::MULHS:
+      case ISD::MULHU:
+      case ISD::AND:
+      case ISD::OR:
+      case ISD::XOR:
+      case ISD::SRA:
+      case ISD::SHL:
+      case ISD::SRL:
+      case ISD::SETCC:
+      case ISD::VSELECT:
+      case ISD::SIGN_EXTEND_INREG:
+        return SplitHvxPairOp(Op, DAG);
+    }
+  }
+
+  switch (Opc) {
+    default:
+      break;
+    case ISD::CONCAT_VECTORS:           return LowerCONCAT_VECTORS(Op, DAG);
+    case ISD::INSERT_SUBVECTOR:         return LowerINSERT_SUBVECTOR(Op, DAG);
+    case ISD::INSERT_VECTOR_ELT:        return LowerINSERT_VECTOR_ELT(Op, DAG);
+    case ISD::EXTRACT_SUBVECTOR:        return LowerEXTRACT_SUBVECTOR(Op, DAG);
+    case ISD::EXTRACT_VECTOR_ELT:       return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+    case ISD::BUILD_VECTOR:             return LowerBUILD_VECTOR(Op, DAG);
+    case ISD::VECTOR_SHUFFLE:           return LowerVECTOR_SHUFFLE(Op, DAG);
+    case ISD::ANY_EXTEND:               return LowerANY_EXTEND(Op, DAG);
+    case ISD::SIGN_EXTEND:              return LowerSIGN_EXTEND(Op, DAG);
+    case ISD::ZERO_EXTEND:              return LowerZERO_EXTEND(Op, DAG);
+    case ISD::SRA:
+    case ISD::SHL:
+    case ISD::SRL:                      return LowerVECTOR_SHIFT(Op, DAG);
+    case ISD::MUL:                      return LowerHvxMul(Op, DAG);
+    case ISD::MULHS:
+    case ISD::MULHU:                    return LowerHvxMulh(Op, DAG);
+    case ISD::ANY_EXTEND_VECTOR_INREG:  return LowerHvxExtend(Op, DAG);
+    case ISD::SETCC:
+    case ISD::INTRINSIC_VOID:           return Op;
+  }
+#ifndef NDEBUG
+  Op.dumpr(&DAG);
+#endif
+  llvm_unreachable("Unhandled HVX operation");
+}
+
+bool
+HexagonTargetLowering::isHvxOperation(SDValue Op) const {
+  // If the type of the result, or any operand type are HVX vector types,
+  // this is an HVX operation.
+  return Subtarget.isHVXVectorType(ty(Op)) ||
+         llvm::any_of(Op.getNode()->ops(),
+                      [this] (SDValue V) {
+                        return Subtarget.isHVXVectorType(ty(V), true);
+                      });
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 31aac6c..757e5c6 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -2977,7 +2977,10 @@
 def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
 
 
-def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>;
+def SDTVecLeaf:
+  SDTypeProfile<1, 0, [SDTCisVec<0>]>;
+def SDTVecBinOp:
+  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;
 
 def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
   [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
@@ -2987,18 +2990,36 @@
   [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
 def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
 
-def Combinev: OutPatFrag<(ops node:$Rs, node:$Rt),
-  (REG_SEQUENCE HvxWR, $Rs, vsub_hi, $Rt, vsub_lo)>;
+def HwLen2: SDNodeXForm<imm, [{
+  const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
+  return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
+}]>;
+
+def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>;
+
+def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt),
+  (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>;
+
+def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt),
+  (V6_vandvrt
+    (V6_vor
+      (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)),
+               (A2_tfrsi (HwLen2 (i32 0)))),  // Half the vector length
+      (V6_vpackeb (V6_vd0), (Q2V $Qt))),
+    (A2_tfrsi -1))>;
 
 def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
 def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
 
 def HexagonVZERO:  SDNode<"HexagonISD::VZERO",  SDTVecLeaf>;
+def HexagonQCAT:   SDNode<"HexagonISD::QCAT",   SDTVecBinOp>;
 def HexagonQTRUE:  SDNode<"HexagonISD::QTRUE",  SDTVecLeaf>;
 def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>;
 def vzero:  PatFrag<(ops), (HexagonVZERO)>;
 def qtrue:  PatFrag<(ops), (HexagonQTRUE)>;
 def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
+def qcat:   PatFrag<(ops node:$Qs, node:$Qt),
+                    (HexagonQCAT node:$Qs, node:$Qt)>;
 
 def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
 
@@ -3021,9 +3042,13 @@
 }]>;
 
 let Predicates = [UseHVX] in {
-  def: Pat<(VecI8  vzero), (V6_vd0)>;
-  def: Pat<(VecI16 vzero), (V6_vd0)>;
-  def: Pat<(VecI32 vzero), (V6_vd0)>;
+  def: Pat<(VecI8   vzero), (V6_vd0)>;
+  def: Pat<(VecI16  vzero), (V6_vd0)>;
+  def: Pat<(VecI32  vzero), (V6_vd0)>;
+  // Use V6_vsubw_dv instead.
+  def: Pat<(VecPI8  vzero), (Combinev (V6_vd0), (V6_vd0))>;
+  def: Pat<(VecPI16 vzero), (Combinev (V6_vd0), (V6_vd0))>;
+  def: Pat<(VecPI32 vzero), (Combinev (V6_vd0), (V6_vd0))>;
 
   def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
            (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
@@ -3032,6 +3057,9 @@
   def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
            (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
 
+  def: Pat<(VecQ8  (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qs, $Qt)>;
+  def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qs, $Qt)>;
+
   def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
            (V6_extractw HvxVR:$Vu, I32:$Rs)>;
   def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
@@ -3053,6 +3081,14 @@
              (V6_lvsplatw (ToI32 (SplatH $V)))>;
     def: Pat<(VecI32 (HexagonVSPLAT anyimm:$V)),
              (V6_lvsplatw (ToI32 $V))>;
+    def: Pat<(VecPI8 (HexagonVSPLAT u8_0ImmPred:$V)),
+             (Combinev (V6_lvsplatw (ToI32 (SplatB $V))),
+                       (V6_lvsplatw (ToI32 (SplatB $V))))>;
+    def: Pat<(VecPI16 (HexagonVSPLAT u16_0ImmPred:$V)),
+             (Combinev (V6_lvsplatw (ToI32 (SplatH $V))),
+                       (V6_lvsplatw (ToI32 (SplatH $V))))>;
+    def: Pat<(VecPI32 (HexagonVSPLAT anyimm:$V)),
+             (Combinev (V6_lvsplatw (ToI32 $V)), (V6_lvsplatw (ToI32 $V)))>;
   }
   def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)),
            (V6_lvsplatw (S2_vsplatrb I32:$Rs))>;
@@ -3060,14 +3096,28 @@
            (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs))>;
   def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)),
            (V6_lvsplatw I32:$Rs)>;
+  def: Pat<(VecPI8 (HexagonVSPLAT I32:$Rs)),
+           (Combinev (V6_lvsplatw (S2_vsplatrb I32:$Rs)),
+                     (V6_lvsplatw (S2_vsplatrb I32:$Rs)))>;
+  def: Pat<(VecPI16 (HexagonVSPLAT I32:$Rs)),
+           (Combinev (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs)),
+                     (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs)))>;
+  def: Pat<(VecPI32 (HexagonVSPLAT I32:$Rs)),
+           (Combinev (V6_lvsplatw I32:$Rs), (V6_lvsplatw I32:$Rs))>;
 
-  def: Pat<(add HVI8:$Vs,  HVI8:$Vt),   (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(add HVI16:$Vs, HVI16:$Vt),  (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(add HVI32:$Vs, HVI32:$Vt),  (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HVI8:$Vs,  HVI8:$Vt),   (V6_vaddb    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HVI16:$Vs, HVI16:$Vt),  (V6_vaddh    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HVI32:$Vs, HVI32:$Vt),  (V6_vaddw    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HWI8:$Vs,  HWI8:$Vt),   (V6_vaddb_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(add HWI16:$Vs, HWI16:$Vt),  (V6_vaddh_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(add HWI32:$Vs, HWI32:$Vt),  (V6_vaddw_dv HvxWR:$Vs, HvxWR:$Vt)>;
 
-  def: Pat<(sub HVI8:$Vs,  HVI8:$Vt),   (V6_vsubb HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(sub HVI16:$Vs, HVI16:$Vt),  (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(sub HVI32:$Vs, HVI32:$Vt),  (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HVI8:$Vs,  HVI8:$Vt),   (V6_vsubb    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HVI16:$Vs, HVI16:$Vt),  (V6_vsubh    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HVI32:$Vs, HVI32:$Vt),  (V6_vsubw    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HWI8:$Vs,  HWI8:$Vt),   (V6_vsubb_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(sub HWI16:$Vs, HWI16:$Vt),  (V6_vsubh_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(sub HWI32:$Vs, HWI32:$Vt),  (V6_vsubw_dv HvxWR:$Vs, HvxWR:$Vt)>;
 
   def: Pat<(and HVI8:$Vs,  HVI8:$Vt),   (V6_vand  HvxVR:$Vs, HvxVR:$Vt)>;
   def: Pat<(or  HVI8:$Vs,  HVI8:$Vt),   (V6_vor   HvxVR:$Vs, HvxVR:$Vt)>;
@@ -3096,11 +3146,19 @@
   def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
   def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
            (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
+  def: Pat<(VecPI16 (sext_invec HWI8:$Vss)),  (VSxtb (LoVec $Vss))>;
+  def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>;
+  def: Pat<(VecPI32 (sext_invec HWI8:$Vss)),
+           (VSxth (LoVec (VSxtb (LoVec $Vss))))>;
 
   def: Pat<(VecI16 (zext_invec HVI8:$Vs)),  (LoVec (VZxtb $Vs))>;
   def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
   def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
            (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
+  def: Pat<(VecPI16 (zext_invec HWI8:$Vss)),  (VZxtb (LoVec $Vss))>;
+  def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>;
+  def: Pat<(VecPI32 (zext_invec HWI8:$Vss)),
+           (VZxth (LoVec (VZxtb (LoVec $Vss))))>;
 
   // The "source" types are not legal, and there are no parameterized
   // definitions for them, but they are length-specific.
@@ -3121,6 +3179,16 @@
              (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
   }
 
+  def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
+           (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
+                       (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
+  def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt),
+           (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt),
+                       (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>;
+  def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt),
+           (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
+                       (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
+
   def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>;
   def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>;
   def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>;