AMDGPU: Use splat vectors for undefs when folding canonicalize
If one of the elements is undef, use the canonicalized constant
from the other element instead of 0.
Splat vectors are more useful for other optimizations, such
as matching vector clamps. This was breaking on clamps
of half3 from the undef 4th component.
llvm-svn: 339512
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 226cfeb..ba8a3a5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6989,27 +6989,42 @@
// TODO: This could be better with wider vectors that will be split to v2f16,
// and to consider uses since there aren't that many packed operations.
- if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16) {
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
+ isTypeLegal(MVT::v2f16)) {
SDLoc SL(N);
SDValue NewElts[2];
SDValue Lo = N0.getOperand(0);
SDValue Hi = N0.getOperand(1);
+ EVT EltVT = Lo.getValueType();
+
if (vectorEltWillFoldAway(Lo) || vectorEltWillFoldAway(Hi)) {
for (unsigned I = 0; I != 2; ++I) {
SDValue Op = N0.getOperand(I);
- EVT EltVT = Op.getValueType();
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
CFP->getValueAPF());
} else if (Op.isUndef()) {
- // This would ordinarily be folded to a qNaN. Since this may be half
- // of a packed operation, it may be cheaper to use a 0.
- NewElts[I] = DAG.getConstantFP(0.0f, SL, EltVT);
+ // Handled below based on what the other operand is.
+ NewElts[I] = Op;
} else {
NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
}
}
+ // If one half is undef, and one is constant, perfer a splat vector rather
+ // than the normal qNaN. If it's a register, prefer 0.0 since that's
+ // cheaper to use and may be free with a packed operation.
+ if (NewElts[0].isUndef()) {
+ if (isa<ConstantFPSDNode>(NewElts[1]))
+ NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
+ NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
+ }
+
+ if (NewElts[1].isUndef()) {
+ NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
+ NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
+ }
+
return DAG.getBuildVector(VT, SL, NewElts);
}
}