llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp - toolchain/llvm-project - Gitiles

 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//

 #include "HexagonISelLowering.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"

 using namespace llvm;

 SDValue
 HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
                               const SDLoc &dl, SelectionDAG &DAG) const {
   SmallVector<SDValue,4> IntOps;
   IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
   for (const SDValue &Op : Ops)
     IntOps.push_back(Op);
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
 }

 MVT
 HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
   assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());

   MVT ElemTy = Tys.first.getVectorElementType();
   return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
                                   Tys.second.getVectorNumElements());
 }

 HexagonTargetLowering::TypePair
 HexagonTargetLowering::typeSplit(MVT VecTy) const {
   assert(VecTy.isVector());
   unsigned NumElem = VecTy.getVectorNumElements();
   assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
   MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
   return { HalfTy, HalfTy };
 }

 MVT
 HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
   MVT ElemTy = VecTy.getVectorElementType();
   MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
   return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
 }

 MVT
 HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
   MVT ElemTy = VecTy.getVectorElementType();
   MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
   return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
 }

 SDValue
 HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
                                   SelectionDAG &DAG) const {
   if (ty(Vec).getVectorElementType() == ElemTy)
     return Vec;
   MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
   return DAG.getBitcast(CastTy, Vec);
 }

 SDValue
 HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
                               SelectionDAG &DAG) const {
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
                      Ops.second, Ops.first);
 }

 HexagonTargetLowering::VectorPair
 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
                                SelectionDAG &DAG) const {
   TypePair Tys = typeSplit(ty(Vec));
   return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
 }

 SDValue
 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
                                           SelectionDAG &DAG) const {
   if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
     ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);

   unsigned ElemWidth = ElemTy.getSizeInBits();
   if (ElemWidth == 8)
     return ElemIdx;

   unsigned L = Log2_32(ElemWidth/8);
   const SDLoc &dl(ElemIdx);
   return DAG.getNode(ISD::SHL, dl, MVT::i32,
                      {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
 }

 SDValue
 HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
                                         SelectionDAG &DAG) const {
   unsigned ElemWidth = ElemTy.getSizeInBits();
   assert(ElemWidth >= 8 && ElemWidth <= 32);
   if (ElemWidth == 32)
     return Idx;

   if (ty(Idx) != MVT::i32)
     Idx = DAG.getBitcast(MVT::i32, Idx);
   const SDLoc &dl(Idx);
   SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
   SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
   return SubIdx;
 }

 SDValue
 HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
                                       SDValue Op1, ArrayRef<int> Mask,
                                       SelectionDAG &DAG) const {
   MVT OpTy = ty(Op0);
   assert(OpTy == ty(Op1));

   MVT ElemTy = OpTy.getVectorElementType();
   if (ElemTy == MVT::i8)
     return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
   assert(ElemTy.getSizeInBits() >= 8);

   MVT ResTy = tyVector(OpTy, MVT::i8);
   unsigned ElemSize = ElemTy.getSizeInBits() / 8;

   SmallVector<int,128> ByteMask;
   for (int M : Mask) {
     if (M < 0) {
       for (unsigned I = 0; I != ElemSize; ++I)
         ByteMask.push_back(-1);
     } else {
       int NewM = M*ElemSize;
       for (unsigned I = 0; I != ElemSize; ++I)
         ByteMask.push_back(NewM+I);
     }
   }
   assert(ResTy.getVectorNumElements() == ByteMask.size());
   return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
                               opCastElem(Op1, MVT::i8, DAG), ByteMask);
 }

 MVT
 HexagonTargetLowering::getVecBoolVT() const {
   return MVT::getVectorVT(MVT::i1, 8*Subtarget.getVectorLength());
 }

 SDValue
 HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values,
                                             const SDLoc &dl, MVT VecTy,
                                             SelectionDAG &DAG) const {
   unsigned VecLen = Values.size();
   MachineFunction &MF = DAG.getMachineFunction();
   MVT ElemTy = VecTy.getVectorElementType();
   unsigned ElemWidth = ElemTy.getSizeInBits();
   unsigned HwLen = Subtarget.getVectorLength();

   SmallVector<ConstantInt*, 128> Consts(VecLen);
   bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
   if (AllConst) {
     if (llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
       return getZero(dl, VecTy, DAG);

     ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
                             (Constant**)Consts.end());
     Constant *CV = ConstantVector::get(Tmp);
     unsigned Align = HwLen;
     SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG);
     return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
                        MachinePointerInfo::getConstantPool(MF), Align);
   }

   unsigned ElemSize = ElemWidth / 8;
   assert(ElemSize*VecLen == HwLen);
   SmallVector<SDValue,32> Words;

   if (VecTy.getVectorElementType() != MVT::i32) {
     assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
     unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
     MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
     for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
       SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
       Words.push_back(DAG.getBitcast(MVT::i32, W));
     }
   } else {
     Words.assign(Values.begin(), Values.end());
   }

   // Construct two halves in parallel, then or them together.
   assert(4*Words.size() == Subtarget.getVectorLength());
   SDValue HalfV0 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
   SDValue HalfV1 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
   SDValue S = DAG.getConstant(4, dl, MVT::i32);
   unsigned NumWords = Words.size();
   for (unsigned i = 0; i != NumWords/2; ++i) {
     SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
                             {HalfV0, Words[i]});
     SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
                             {HalfV1, Words[i+NumWords/2]});
     HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S});
     HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S});
   }

   HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy,
                        {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)});
   SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1});
   return DstV;
 }

 SDValue
 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
                                           const SDLoc &dl, MVT VecTy,
                                           SelectionDAG &DAG) const {
   // Construct a vector V of bytes, such that a comparison V >u 0 would
   // produce the required vector predicate.
   unsigned VecLen = Values.size();
   unsigned HwLen = Subtarget.getVectorLength();
   assert(VecLen <= HwLen || VecLen == 8*HwLen);
   SmallVector<SDValue,128> Bytes;

   if (VecLen <= HwLen) {
     // In the hardware, each bit of a vector predicate corresponds to a byte
     // of a vector register. Calculate how many bytes does a bit of VecTy
     // correspond to.
     assert(HwLen % VecLen == 0);
     unsigned BitBytes = HwLen / VecLen;
     for (SDValue V : Values) {
       SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
                                  : DAG.getConstant(0, dl, MVT::i8);
       for (unsigned B = 0; B != BitBytes; ++B)
         Bytes.push_back(Ext);
     }
   } else {
     // There are as many i1 values, as there are bits in a vector register.
     // Divide the values into groups of 8 and check that each group consists
     // of the same value (ignoring undefs).
     for (unsigned I = 0; I != VecLen; I += 8) {
       unsigned B = 0;
       // Find the first non-undef value in this group.
       for (; B != 8; ++B) {
         if (!Values[I+B].isUndef())
           break;
       }
       SDValue F = Values[I+B];
       SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
                             : DAG.getConstant(0, dl, MVT::i8);
       Bytes.push_back(Ext);
       // Verify that the rest of values in the group are the same as the
       // first.
       for (; B != 8; ++B)
         assert(Values[I+B].isUndef() || Values[I+B] == F);
     }
   }

   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
   SDValue ByteVec = buildHvxVectorSingle(Bytes, dl, ByteTy, DAG);
   SDValue Cmp = DAG.getSetCC(dl, VecTy, ByteVec, getZero(dl, ByteTy, DAG),
                              ISD::SETUGT);
   return Cmp;
 }

 SDValue
 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
       const {
   const SDLoc &dl(Op);
   MVT VecTy = ty(Op);

   unsigned Size = Op.getNumOperands();
   SmallVector<SDValue,128> Ops;
   for (unsigned i = 0; i != Size; ++i)
     Ops.push_back(Op.getOperand(i));

   if (VecTy.getVectorElementType() == MVT::i1)
     return buildHvxVectorPred(Ops, dl, VecTy, DAG);

   if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
     ArrayRef<SDValue> A(Ops);
     MVT SingleTy = typeSplit(VecTy).first;
     SDValue V0 = buildHvxVectorSingle(A.take_front(Size/2), dl, SingleTy, DAG);
     SDValue V1 = buildHvxVectorSingle(A.drop_front(Size/2), dl, SingleTy, DAG);
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
   }

   return buildHvxVectorSingle(Ops, dl, VecTy, DAG);
 }

 SDValue
 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
       const {
   // Change the type of the extracted element to i32.
   SDValue VecV = Op.getOperand(0);
   MVT ElemTy = ty(VecV).getVectorElementType();
   unsigned ElemWidth = ElemTy.getSizeInBits();
   assert(ElemWidth >= 8 && ElemWidth <= 32);
   (void)ElemWidth;

   const SDLoc &dl(Op);
   SDValue IdxV = Op.getOperand(1);
   if (ty(IdxV) != MVT::i32)
     IdxV = DAG.getBitcast(MVT::i32, IdxV);

   SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
   SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
                                {VecV, ByteIdx});
   if (ElemTy == MVT::i32)
     return ExWord;

   // Have an extracted word, need to extract the smaller element out of it.
   // 1. Extract the bits of (the original) IdxV that correspond to the index
   //    of the desired element in the 32-bit word.
   SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
   // 2. Extract the element from the word.
   SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
   return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
 }

 SDValue
 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
       const {
   const SDLoc &dl(Op);
   SDValue VecV = Op.getOperand(0);
   SDValue ValV = Op.getOperand(1);
   SDValue IdxV = Op.getOperand(2);
   MVT ElemTy = ty(VecV).getVectorElementType();
   unsigned ElemWidth = ElemTy.getSizeInBits();
   assert(ElemWidth >= 8 && ElemWidth <= 32);
   (void)ElemWidth;

   auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
                                      SDValue ByteIdxV) {
     MVT VecTy = ty(VecV);
     unsigned HwLen = Subtarget.getVectorLength();
     SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32,
                                 {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)});
     SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
     SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
     SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
                                {DAG.getConstant(HwLen/4, dl, MVT::i32), MaskV});
     SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
     return TorV;
   };

   SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
   if (ElemTy == MVT::i32)
     return InsertWord(VecV, ValV, ByteIdx);

   // If this is not inserting a 32-bit word, convert it into such a thing.
   // 1. Extract the existing word from the target vector.
   SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
                                 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
   SDValue Ex0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
                             {opCastElem(VecV, MVT::i32, DAG), WordIdx});
   SDValue Ext = LowerHvxExtractElement(Ex0, DAG);

   // 2. Treating the extracted word as a 32-bit vector, insert the given
   //    value into it.
   SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
   MVT SubVecTy = tyVector(ty(Ext), ElemTy);
   SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
                              ValV, SubIdx, dl, ElemTy, DAG);

   // 3. Insert the 32-bit word back into the original vector.
   return InsertWord(VecV, Ins, ByteIdx);
 }

 SDValue
 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
       const {
   SDValue SrcV = Op.getOperand(0);
   MVT SrcTy = ty(SrcV);
   unsigned SrcElems = SrcTy.getVectorNumElements();
   SDValue IdxV = Op.getOperand(1);
   unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
   MVT DstTy = ty(Op);
   assert(Idx == 0 || DstTy.getVectorNumElements() % Idx == 0);
   const SDLoc &dl(Op);
   if (Idx == 0)
     return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, DstTy, SrcV);
   if (Idx == SrcElems/2)
     return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, DstTy, SrcV);
   return SDValue();
 }

 SDValue
 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
       const {
   // Idx may be variable.
   SDValue IdxV = Op.getOperand(2);
   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
   if (!IdxN)
     return SDValue();
   unsigned Idx = IdxN->getZExtValue();

   SDValue DstV = Op.getOperand(0);
   SDValue SrcV = Op.getOperand(1);
   MVT DstTy = ty(DstV);
   MVT SrcTy = ty(SrcV);
   unsigned DstElems = DstTy.getVectorNumElements();
   unsigned SrcElems = SrcTy.getVectorNumElements();
   if (2*SrcElems != DstElems)
     return SDValue();

   const SDLoc &dl(Op);
   if (Idx == 0)
     return DAG.getTargetInsertSubreg(Hexagon::vsub_lo, dl, DstTy, DstV, SrcV);
   if (Idx == SrcElems)
     return DAG.getTargetInsertSubreg(Hexagon::vsub_hi, dl, DstTy, DstV, SrcV);
   return SDValue();
 }

 SDValue
 HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
   MVT ResTy = ty(Op);
   assert(ResTy.isVector());
   const SDLoc &dl(Op);
   SmallVector<int,256> ShuffMask;

   MVT ElemTy = ResTy.getVectorElementType();
   unsigned VecLen = ResTy.getVectorNumElements();
   SDValue Vs = Op.getOperand(0);
   SDValue Vt = Op.getOperand(1);

   switch (ElemTy.SimpleTy) {
     case MVT::i8:
     case MVT::i16: { // V6_vmpyih
       // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
       // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
       // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
       // For i16, use V6_vmpyhv, which behaves in an analogous way to
       // V6_vmpybv: results Lo and Hi are products of even/odd elements
       // respectively.
       MVT ExtTy = typeExtElem(ResTy, 2);
       unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv
                                           : Hexagon::V6_vmpyhv;
       SDValue M = getNode(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);

       // Discard high halves of the resulting values, collect the low halves.
       for (unsigned I = 0; I < VecLen; I += 2) {
         ShuffMask.push_back(I);         // Pick even element.
         ShuffMask.push_back(I+VecLen);  // Pick odd element.
       }
       VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
       SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
       return DAG.getBitcast(ResTy, BS);
     }
     case MVT::i32: {
       // Use the following sequence for signed word multiply:
       // T0 = V6_vmpyiowh Vs, Vt
       // T1 = V6_vaslw T0, 16
       // T2 = V6_vmpyiewuh_acc T1, Vs, Vt
       SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
       SDValue T0 = getNode(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
       SDValue T1 = getNode(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
       SDValue T2 = getNode(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
                            {T1, Vs, Vt}, DAG);
       return T2;
     }
     default:
       break;
   }
   return SDValue();
 }

 SDValue
 HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
   MVT ResTy = ty(Op);
   assert(ResTy.isVector());
   const SDLoc &dl(Op);
   SmallVector<int,256> ShuffMask;

   MVT ElemTy = ResTy.getVectorElementType();
   unsigned VecLen = ResTy.getVectorNumElements();
   SDValue Vs = Op.getOperand(0);
   SDValue Vt = Op.getOperand(1);
   bool IsSigned = Op.getOpcode() == ISD::MULHS;

   if (ElemTy == MVT::i8 || ElemTy == MVT::i16) {
     // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
     // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
     // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
     // For i16, use V6_vmpyhv, which behaves in an analogous way to
     // V6_vmpybv: results Lo and Hi are products of even/odd elements
     // respectively.
     MVT ExtTy = typeExtElem(ResTy, 2);
     unsigned MpyOpc = ElemTy == MVT::i8
         ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv)
         : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv);
     SDValue M = getNode(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);

     // Discard low halves of the resulting values, collect the high halves.
     for (unsigned I = 0; I < VecLen; I += 2) {
       ShuffMask.push_back(I+1);         // Pick even element.
       ShuffMask.push_back(I+VecLen+1);  // Pick odd element.
     }
     VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
     SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
     return DAG.getBitcast(ResTy, BS);
   }

   assert(ElemTy == MVT::i32);
   SDValue S16 = DAG.getConstant(16, dl, MVT::i32);

   if (IsSigned) {
     // mulhs(Vs,Vt) =
     //   = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
     //   = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
     //      + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32
     //   = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16
     //      + Lo(Vs) *us Vt] >> 32
     // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to
     // anything, so it cannot produce any carry over to higher bits),
     // so everything in [] can be shifted by 16 without loss of precision.
     //   = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16
     //   = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16
     // Denote Hi(Vs) = Vs':
     //   = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16
     //   = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16
     SDValue T0 = getNode(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG);
     // Get Vs':
     SDValue S0 = getNode(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG);
     SDValue T1 = getNode(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
                          {T0, S0, Vt}, DAG);
     // Shift by 16:
     SDValue S2 = getNode(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG);
     // Get Vs'*Hi(Vt):
     SDValue T2 = getNode(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG);
     // Add:
     SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2});
     return T3;
   }

   // Unsigned mulhw. (Would expansion using signed mulhw be better?)

   auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) {
     return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair);
   };
   auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) {
     return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair);
   };

   MVT PairTy = typeJoin({ResTy, ResTy});
   SDValue P = getNode(Hexagon::V6_lvsplatw, dl, ResTy,
                       {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
   // Multiply-unsigned halfwords:
   //   LoVec = Vs.uh[2i] * Vt.uh[2i],
   //   HiVec = Vs.uh[2i+1] * Vt.uh[2i+1]
   SDValue T0 = getNode(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG);
   // The low halves in the LoVec of the pair can be discarded. They are
   // not added to anything (in the full-precision product), so they cannot
   // produce a carry into the higher bits.
   SDValue T1 = getNode(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG);
   // Swap low and high halves in Vt, and do the halfword multiplication
   // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i].
   SDValue D0 = getNode(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG);
   SDValue T2 = getNode(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG);
   // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs).
   // These products are words, but cannot be added directly because the
   // sums could overflow. Add these products, by halfwords, where each sum
   // of a pair of halfwords gives a word.
   SDValue T3 = getNode(Hexagon::V6_vadduhw, dl, PairTy,
                        {LoVec(T2), HiVec(T2)}, DAG);
   // Add the high halfwords from the products of the low halfwords.
   SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)});
   SDValue T5 = getNode(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG);
   SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)});
   SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6});
   return T7;
 }

 SDValue
 HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
   MVT VecTy = ty(Op.getOperand(0));
   assert(VecTy == ty(Op.getOperand(1)));

   SDValue Cmp = Op.getOperand(2);
   ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
   bool Negate = false, Swap = false;

   // HVX has instructions for SETEQ, SETGT, SETUGT. The other comparisons
   // can be arranged as operand-swapped/negated versions of these. Since
   // the generated code will have the original CC expressed as
   //   (negate (swap-op NewCmp)),
   // the condition code for the NewCmp should be calculated from the original
   // CC by applying these operations in the reverse order.
   //
   // This could also be done through setCondCodeAction, but for negation it
   // uses a xor with a vector of -1s, which it obtains from BUILD_VECTOR.
   // That is far too expensive for what can be done with a single instruction.

   switch (CC) {
     case ISD::SETNE:    // !eq
     case ISD::SETLE:    // !gt
     case ISD::SETGE:    // !lt
     case ISD::SETULE:   // !ugt
     case ISD::SETUGE:   // !ult
       CC = ISD::getSetCCInverse(CC, true);
       Negate = true;
       break;
     default:
       break;
   }

   switch (CC) {
     case ISD::SETLT:    // swap gt
     case ISD::SETULT:   // swap ugt
       CC = ISD::getSetCCSwappedOperands(CC);
       Swap = true;
       break;
     default:
       break;
   }

   assert(CC == ISD::SETEQ || CC == ISD::SETGT || CC == ISD::SETUGT);

   MVT ElemTy = VecTy.getVectorElementType();
   unsigned ElemWidth = ElemTy.getSizeInBits();
   assert(isPowerOf2_32(ElemWidth));

   auto getIdx = [] (unsigned Code) {
     static const unsigned Idx[] = { ISD::SETEQ, ISD::SETGT, ISD::SETUGT };
     for (unsigned I = 0, E = array_lengthof(Idx); I != E; ++I)
       if (Code == Idx[I])
         return I;
     llvm_unreachable("Unhandled CondCode");
   };

   static unsigned OpcTable[3][3] = {
     //           SETEQ             SETGT,            SETUGT
     /* Byte */ { Hexagon::V6_veqb, Hexagon::V6_vgtb, Hexagon::V6_vgtub },
     /* Half */ { Hexagon::V6_veqh, Hexagon::V6_vgth, Hexagon::V6_vgtuh },
     /* Word */ { Hexagon::V6_veqw, Hexagon::V6_vgtw, Hexagon::V6_vgtuw }
   };

   unsigned CmpOpc = OpcTable[Log2_32(ElemWidth)-3][getIdx(CC)];

   MVT ResTy = ty(Op);
   const SDLoc &dl(Op);
   SDValue OpL = Swap ? Op.getOperand(1) : Op.getOperand(0);
   SDValue OpR = Swap ? Op.getOperand(0) : Op.getOperand(1);
   SDValue CmpV = getNode(CmpOpc, dl, ResTy, {OpL, OpR}, DAG);
   return Negate ? getNode(Hexagon::V6_pred_not, dl, ResTy, {CmpV}, DAG)
                 : CmpV;
 }

 SDValue
 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
   // Sign- and zero-extends are legal.
   assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
   return DAG.getZeroExtendVectorInReg(Op.getOperand(0), SDLoc(Op), ty(Op));
 }