Additional fixes for bug 15155.
This handles the cases where the 6-bit splat element is odd, converting
to a three-instruction sequence to add or subtract two splats. With this
fix, the XFAIL in test/CodeGen/PowerPC/vec_constants.ll is removed.
llvm-svn: 175663
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 01d731a..1453506 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1323,34 +1323,75 @@
SDValue(Tmp, 0), GA);
}
case PPCISD::VADD_SPLAT: {
- // Convert: VADD_SPLAT elt, size
- // Into: tmp = VSPLTIS[BHW] elt
- // VADDU[BHW]M tmp, tmp
- // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
assert(isa<ConstantSDNode>(N->getOperand(0)) &&
isa<ConstantSDNode>(N->getOperand(1)) &&
"Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
int EltSize = N->getConstantOperandVal(1);
- unsigned Opc1, Opc2;
+ unsigned Opc1, Opc2, Opc3;
EVT VT;
+
if (EltSize == 1) {
Opc1 = PPC::VSPLTISB;
Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
VT = MVT::v16i8;
} else if (EltSize == 2) {
Opc1 = PPC::VSPLTISH;
Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
VT = MVT::v8i16;
} else {
assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
Opc1 = PPC::VSPLTISW;
Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
VT = MVT::v4i32;
}
- SDValue Elt = getI32Imm(N->getConstantOperandVal(0));
- SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, Elt);
- SDValue TmpVal = SDValue(Tmp, 0);
- return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 338d73f..6d2aacd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5025,11 +5025,17 @@
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
- // tmp = VSPLTI[bhw], result = add tmp, tmp
- if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
- // To avoid having the optimization undone by constant folding, we
- // convert to a pseudo that will be expanded later.
- SDValue Elt = DAG.getConstant(SextVal >> 1, MVT::i32);
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
EVT VT = Op.getValueType();
int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
SDValue EltSize = DAG.getConstant(Size, MVT::i32);
@@ -5129,25 +5135,6 @@
}
}
- // Three instruction sequences.
-
- // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
- // FIXME: Disabled because the add gets constant folded.
- if (0 && SextVal >= 0 && SextVal <= 31) {
- SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
- // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
- // FIXME: Disabled because the add gets constant folded.
- if (0 && SextVal >= -31 && SextVal <= 0) {
- SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
-
return SDValue();
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7cc2d1a..1fa88f3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -238,8 +238,9 @@
ADDI_DTPREL_L,
/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
- /// into an ADD of a VSPLTI with itself during instruction selection.
- /// Necessary to avoid losing this optimization due to constant folds.
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
VADD_SPLAT,
/// STD_32 - This is the STD instruction for use with "32-bit" registers.