[Power9] Exploit move and splat instructions for build_vector improvement
This patch corresponds to review:
https://reviews.llvm.org/D21135
This patch exploits the following instructions:
mtvsrws
lxvwsx
mtvsrdd
mfvsrld
In order to improve some build_vector and extractelement patterns.
llvm-svn: 282246
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c414a15..5bce336 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -672,6 +672,9 @@
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
}
+
+ if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Legal);
}
if (Subtarget.hasQPX()) {
@@ -7079,6 +7082,16 @@
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
+static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
+ if (BVN->getValueType(0) != Type)
+ return false;
+ auto OpZero = BVN->getOperand(0);
+ for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
+ if (BVN->getOperand(i) != OpZero)
+ return false;
+ return true;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -7200,8 +7213,17 @@
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
- SplatBitSize > 32)
+ SplatBitSize > 32) {
+ // We can splat a non-const value on CPU's that implement ISA 3.0
+ // in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat).
+ auto OpZero = BVN->getOperand(0);
+ bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
+ BVN->isOnlyUserOf(OpZero.getNode());
+ if (Subtarget.isISA3_0() &&
+ isNonConstSplatBV(BVN, MVT::v4i32) && !CanLoadAndSplat)
+ return Op;
return SDValue();
+ }
unsigned SplatBits = APSplatBits.getZExtValue();
unsigned SplatUndef = APSplatUndef.getZExtValue();
@@ -7219,6 +7241,10 @@
return Op;
}
+ // We have XXSPLTIB for constant splats one byte wide
+ if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8)
+ return Op;
+
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
(32-SplatBitSize));
@@ -7462,6 +7488,18 @@
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+
+ // If the source for the shuffle is a scalar_to_vector that came from a
+ // 32-bit load, it will have used LXVWSX so we don't need to splat again.
+ if (Subtarget.isISA3_0() &&
+ ((isLittleEndian && SplatIdx == 3) ||
+ (!isLittleEndian && SplatIdx == 0))) {
+ SDValue Src = V1.getOperand(0);
+ if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Src.getOperand(0).getOpcode() == ISD::LOAD &&
+ Src.getOperand(0).hasOneUse())
+ return V1;
+ }
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
DAG.getConstant(SplatIdx, dl, MVT::i32));