propagate fast-math-flags on DAG nodes
After D10403, we had FMF in the DAG but disabled by default. Nick reported no crashing errors after some stress testing,
so I enabled them at r243687. However, Escha soon notified us of a bug not covered by any in-tree regression tests:
if we don't propagate the flags, we may fail to CSE DAG nodes because differing FMF causes them to not match. There is
one test case in this patch to prove that point.
This patch hopes to fix or leave a 'TODO' for all of the in-tree places where we create nodes that are FMF-capable. I
did this by putting an assert in SelectionDAG.getNode() to find any FMF-capable node that was being created without FMF
( D11807 ). I then ran all regression tests and test-suite and confirmed that everything passes.
This patch exposes remaining work to get DAG FMF to be fully functional: (1) add the flags to non-binary nodes such as
FCMP, FMA and FNEG; (2) add the flags to intrinsics; (3) use the flags as conditions for transforms rather than the
current global settings.
Differential Revision: http://reviews.llvm.org/D12095
llvm-svn: 247815
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f81da72..c77c1e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -348,10 +348,12 @@
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
- SDValue BuildReciprocalEstimate(SDValue Op);
- SDValue BuildRsqrtEstimate(SDValue Op);
- SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
- SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
+ SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
+ SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -614,6 +616,9 @@
assert(Op.hasOneUse() && "Unknown reuse!");
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+
+ const SDNodeFlags *Flags = Op.getNode()->getFlags();
+
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
@@ -631,12 +636,12 @@
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1),
- Op.getOperand(0));
+ Op.getOperand(0), Flags);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(Options.UnsafeFPMath);
@@ -648,7 +653,7 @@
// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0));
+ Op.getOperand(1), Op.getOperand(0), Flags);
case ISD::FMUL:
case ISD::FDIV:
@@ -660,13 +665,13 @@
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1));
+ LegalOperations, Depth+1), Flags);
case ISD::FP_EXTEND:
case ISD::FSIN:
@@ -1482,13 +1487,8 @@
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = {N1, N0};
- SDNode *CSENode;
- if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
- &BinNode->Flags);
- } else {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
- }
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ N->getFlags());
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -7931,6 +7931,7 @@
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -7939,23 +7940,23 @@
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations));
+ GetNegatedExpression(N0, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -7971,7 +7972,9 @@
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
+ DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
+ Flags),
+ Flags);
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
@@ -7992,8 +7995,8 @@
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
@@ -8001,8 +8004,8 @@
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}
}
@@ -8013,8 +8016,8 @@
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
@@ -8022,8 +8025,8 @@
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}
}
@@ -8033,7 +8036,7 @@
if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N1, DAG.getConstantFP(3.0, DL, VT));
+ N1, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
@@ -8043,7 +8046,7 @@
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N0, DAG.getConstantFP(3.0, DL, VT));
+ N0, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
@@ -8053,8 +8056,8 @@
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(4.0, DL, VT), Flags);
}
}
} // enable-unsafe-fp-math
@@ -8076,6 +8079,7 @@
EVT VT = N->getValueType(0);
SDLoc dl(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8084,12 +8088,12 @@
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
+ return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, dl, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -8140,6 +8144,7 @@
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector()) {
@@ -8150,12 +8155,12 @@
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (isConstantFPBuildVectorOrConstantFP(N0) &&
!isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -8184,8 +8189,8 @@
// the second operand of the outer multiply are constants.
if ((N1CFP && isConstOrConstSplatFP(N01)) ||
(BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
}
}
}
@@ -8198,14 +8203,14 @@
(N0.getOperand(0) == N0.getOperand(1)) &&
N0.hasOneUse()) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -8220,7 +8225,8 @@
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
@@ -8250,6 +8256,7 @@
if (N1CFP && N1CFP->isZero())
return N2;
}
+ // TODO: The FMA node should have flags that propagate to these nodes.
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -8259,13 +8266,19 @@
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
+ // TODO: FMA nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FMUL &&
N0 == N2.getOperand(0) &&
N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ &Flags), &Flags);
}
@@ -8275,7 +8288,8 @@
N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0),
- DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ &Flags),
N2);
}
@@ -8283,29 +8297,33 @@
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
AddToWorklist(RHSNeg.getNode());
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
}
}
// (fma x, c, x) -> (fmul x, (c+1))
- if (Options.UnsafeFPMath && N1CFP && N0 == N2)
+ if (Options.UnsafeFPMath && N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT)));
-
+ N1, DAG.getConstantFP(1.0, dl, VT),
+ &Flags), &Flags);
+ }
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
+ N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT)));
-
+ N1, DAG.getConstantFP(-1.0, dl, VT),
+ &Flags), &Flags);
+ }
return SDValue();
}
@@ -8349,17 +8367,15 @@
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // FIXME: This optimization requires some level of fast-math, so the
- // created reciprocal node should at least have the 'allowReciprocal'
- // fast-math-flag set.
- SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
// Dividend / Divisor -> Dividend * Reciprocal
for (auto *U : Users) {
SDValue Dividend = U->getOperand(0);
if (Dividend != FPOne) {
SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
- Reciprocal);
+ Reciprocal, Flags);
CombineTo(U, NewNode);
} else if (U != Reciprocal.getNode()) {
// In the absence of fast-math-flags, this user node is always the
@@ -8378,6 +8394,7 @@
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8386,7 +8403,7 @@
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
@@ -8405,28 +8422,30 @@
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT));
+ DAG.getConstantFP(Recip, DL, VT), Flags);
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -8443,18 +8462,18 @@
if (SqrtOp.getNode()) {
// We found a FSQRT, so try to make this fold:
// x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
- if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
- RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+ if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1)) {
+ if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
@@ -8466,7 +8485,8 @@
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
@@ -8485,7 +8505,8 @@
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
+ &cast<BinaryWithFlagsSDNode>(N)->Flags);
return SDValue();
}
@@ -8494,14 +8515,19 @@
if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
return SDValue();
+ // TODO: FSQRT nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
- SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+ SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
if (!RV)
return SDValue();
EVT VT = RV.getValueType();
SDLoc DL(N);
- RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+ RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
AddToWorklist(RV.getNode());
// Unfortunately, RV is now NaN if the input was exactly 0.
@@ -8916,9 +8942,10 @@
if (Level >= AfterLegalizeDAG &&
(TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
- return DAG.getNode(
- ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N0.getOperand(1)),
+ &cast<BinaryWithFlagsSDNode>(N0)->Flags);
}
}
@@ -13346,7 +13373,7 @@
}
SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
- LHSOp, RHSOp);
+ LHSOp, RHSOp, N->getFlags());
// We need the resulting constant to be legal if we are in a phase after
// legalization, so zero extend to the smallest operand type if required.
@@ -13383,7 +13410,8 @@
EVT VT = N->getValueType(0);
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- LHS.getOperand(0), RHS.getOperand(0));
+ LHS.getOperand(0), RHS.getOperand(0),
+ N->getFlags());
AddUsersToWorklist(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
&SVN0->getMask()[0]);
@@ -13895,7 +13923,7 @@
return S;
}
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -13919,16 +13947,16 @@
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
}
@@ -13945,31 +13973,32 @@
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
AddToWorklist(HalfArg.getNode());
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
@@ -13981,7 +14010,8 @@
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -13989,25 +14019,25 @@
// Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+ SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
AddToWorklist(HalfEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
}
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -14019,8 +14049,8 @@
AddToWorklist(Est.getNode());
if (Iterations) {
Est = UseOneConstNR ?
- BuildRsqrtNROneConst(Op, Est, Iterations) :
- BuildRsqrtNRTwoConst(Op, Est, Iterations);
+ BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
+ BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
}
return Est;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c3c3688..da0afa6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2443,6 +2443,8 @@
SDValue Op0,
EVT DestVT,
SDLoc dl) {
+ // TODO: Should any fast-math-flags be set for the created nodes?
+
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
// simple 32-bit [signed|unsigned] integer to float/double expansion
@@ -3120,6 +3122,7 @@
Node->getOperand(0),
Tmp1, ISD::SETLT);
True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ // TODO: Should any fast-math-flags be set for the FSUB?
False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
DAG.getNode(ISD::FSUB, dl, VT,
Node->getOperand(0), Tmp1));
@@ -3287,6 +3290,7 @@
case ISD::FNEG:
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+ // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
Node->getOperand(0));
Results.push_back(Tmp1);
@@ -3513,8 +3517,9 @@
EVT VT = Node->getValueType(0);
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
Results.push_back(Tmp1);
} else {
Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
@@ -4267,7 +4272,8 @@
case ISD::FPOW: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
- Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+ Node->getFlags());
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3c50a41..e8770bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1341,6 +1341,7 @@
break;
}
+ // TODO: Are there fast-math-flags to propagate to this FADD?
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
APInt(128, Parts)),
@@ -1511,6 +1512,7 @@
SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
+ // TODO: Are there fast-math-flags to propagate to this FSUB?
return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
DAG.getNode(ISD::ADD, dl, MVT::i32,
DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
@@ -1912,8 +1914,7 @@
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
SDValue Op1 = GetPromotedFloat(N->getOperand(1));
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
}
SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8ee98b2..35ccd13 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -415,8 +415,8 @@
else
Operands[j] = Op.getOperand(j);
}
-
- Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
@@ -1001,6 +1001,7 @@
// Convert hi and lo to floats
// Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
@@ -1014,6 +1015,7 @@
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDLoc DL(Op);
SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+ // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
Zero, Op.getOperand(0));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 362a73a..dffcaaa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -141,7 +141,7 @@
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ LHS.getValueType(), LHS, RHS, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
@@ -704,8 +704,10 @@
GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
- Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+ const SDNodeFlags *Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -2073,7 +2075,7 @@
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@@ -2084,6 +2086,7 @@
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
+ const SDNodeFlags *Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@@ -2093,7 +2096,7 @@
// Operation doesn't trap so just widen as normal.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
// No legal vector version so unroll the vector operation and then widen.
@@ -2123,7 +2126,7 @@
SDValue EOp2 = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
Idx += NumElts;
CurNumElts -= NumElts;
}
@@ -2141,7 +2144,7 @@
ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
+ EOp1, EOp2, Flags);
}
CurNumElts = 0;
}
@@ -2231,7 +2234,7 @@
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
-
+ const SDNodeFlags *Flags = N->getFlags();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
@@ -2239,7 +2242,7 @@
if (InVTNumElts == WidenNumElts) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
- return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
}
@@ -2260,7 +2263,7 @@
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
- return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
if (InVTNumElts % WidenNumElts == 0) {
@@ -2270,7 +2273,7 @@
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
- return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
}
}
@@ -2286,7 +2289,7 @@
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
- Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3acf749..455fc90 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -397,24 +397,21 @@
ID.AddInteger(Op.getResNo());
}
}
+
/// Add logical or fast math flag values to FoldingSetNodeID value.
static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
const SDNodeFlags *Flags) {
- if (!Flags || !isBinOpWithFlags(Opcode))
+ if (!isBinOpWithFlags(Opcode))
return;
- unsigned RawFlags = Flags->getRawFlags();
- // If no flags are set, do not alter the ID. We must match the ID of nodes
- // that were created without explicitly specifying flags. This also saves time
- // and allows a gradual increase in API usage of the optional optimization
- // flags.
- if (RawFlags != 0)
- ID.AddInteger(RawFlags);
+ unsigned RawFlags = 0;
+ if (Flags)
+ RawFlags = Flags->getRawFlags();
+ ID.AddInteger(RawFlags);
}
static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
- if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N))
- AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags);
+ AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
}
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
@@ -3191,8 +3188,10 @@
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+ // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
- Operand.getNode()->getOperand(0));
+ Operand.getNode()->getOperand(0),
+ &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getNode()->getOperand(0);
break;
@@ -5394,12 +5393,12 @@
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0]);
- case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
}
@@ -6850,6 +6849,12 @@
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
+const SDNodeFlags *SDNode::getFlags() const {
+ if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+ return &FlagsNode->Flags;
+ return nullptr;
+}
+
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6886,9 +6891,11 @@
}
switch (N->getOpcode()) {
- default:
- Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
+ default: {
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+ N->getFlags()));
break;
+ }
case ISD::VSELECT:
Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2885776..36c6b90 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -80,7 +80,7 @@
cl::init(0));
static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
cl::desc("Enable fast-math-flags for DAG nodes"));
// Limit the width of DAG chains. This is important in general to prevent
@@ -2347,6 +2347,10 @@
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+
+ // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+ // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+ // further optimization, but currently FMF is only applicable to binary nodes.
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
@@ -3629,6 +3633,8 @@
static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
// IntegerPartOfX = ((int32_t)(t0);
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
@@ -3727,6 +3733,8 @@
//
// #define LOG2OFe 1.4426950f
// t0 = Op * LOG2OFe
+
+ // TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
getF32Constant(DAG, 0x3fb8aa3b, dl));
return getLimitedPrecisionExp2(t0, dl, DAG);
@@ -3740,6 +3748,9 @@
/// limited-precision mode.
static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3836,6 +3847,9 @@
/// limited-precision mode.
static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3931,6 +3945,9 @@
/// limited-precision mode.
static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -4040,6 +4057,7 @@
}
}
+ // TODO: What fast-math-flags should be set on the FMUL node?
if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -4083,6 +4101,8 @@
// the benefit of being both really simple and much better than a libcall.
SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
@@ -4736,6 +4756,7 @@
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
} else {
+ // TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 212ae7b..8300431 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1088,6 +1088,7 @@
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
+ // TODO: Should this propagate fast-math-flags?
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, DL, MVT::f32),
Op.getOperand(1));
@@ -1602,6 +1603,7 @@
// float fb = (float)ib;
SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
+ // TODO: Should this propagate fast-math-flags?
// float fq = native_divide(fa, fb);
SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
@@ -1912,6 +1914,8 @@
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
@@ -1940,6 +1944,7 @@
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
@@ -2017,6 +2022,8 @@
SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
@@ -2046,6 +2053,8 @@
SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
@@ -2156,6 +2165,7 @@
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
@@ -2178,7 +2188,7 @@
SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
DAG.getConstant(32, SL, MVT::i32));
-
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
}
@@ -2203,6 +2213,7 @@
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(1, DL, MVT::i32));
SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ // TODO: Should this propagate fast-math-flags?
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
@@ -2229,7 +2240,7 @@
MVT::f64);
SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
MVT::f64);
-
+ // TODO: Should this propagate fast-math-flags?
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index ecda332..5699941 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -946,6 +946,8 @@
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
SDLoc DL(Op);
+
+ // TODO: Should this propagate fast-math-flags?
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
DAG.getNode(ISD::FADD, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Arg,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ef42a29..d0830a9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1000,6 +1000,8 @@
SDLoc DL(Op);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ // TODO: Should this propagate fast-math-flags?
+
switch (IntrinsicID) {
case Intrinsic::r600_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
@@ -1248,8 +1250,10 @@
if (Unsafe) {
// Turn into multiply by the reciprocal.
// x / y -> x * (1.0 / y)
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
+ return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags);
}
return SDValue();
@@ -1286,6 +1290,8 @@
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+ // TODO: Should this propagate fast-math-flags?
+
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
@@ -1405,6 +1411,7 @@
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
+ // TODO: Should this propagate fast-math-flags?
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Arg,
DAG.getConstantFP(0.5/M_PI, DL,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 61705fc..7c9df0e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -6385,6 +6385,8 @@
static SDValue
LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -6415,6 +6417,8 @@
static SDValue
LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -6487,6 +6491,7 @@
}
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index b319fd0..5167b6b 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1786,9 +1786,11 @@
return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
case Intrinsic::mips_fadd_w:
- case Intrinsic::mips_fadd_d:
+ case Intrinsic::mips_fadd_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
// Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
case Intrinsic::mips_fceq_w:
case Intrinsic::mips_fceq_d:
@@ -1831,9 +1833,11 @@
return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2), ISD::SETUNE);
case Intrinsic::mips_fdiv_w:
- case Intrinsic::mips_fdiv_d:
+ case Intrinsic::mips_fdiv_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_ffint_u_w:
case Intrinsic::mips_ffint_u_d:
return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
@@ -1856,6 +1860,7 @@
}
case Intrinsic::mips_fexp2_w:
case Intrinsic::mips_fexp2_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
EVT ResTy = Op->getValueType(0);
return DAG.getNode(
ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
@@ -1869,11 +1874,14 @@
return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
case Intrinsic::mips_fmul_w:
- case Intrinsic::mips_fmul_d:
+ case Intrinsic::mips_fmul_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_fmsub_w:
case Intrinsic::mips_fmsub_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
EVT ResTy = Op->getValueType(0);
return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
@@ -1886,9 +1894,11 @@
case Intrinsic::mips_fsqrt_d:
return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
case Intrinsic::mips_fsub_w:
- case Intrinsic::mips_fsub_d:
+ case Intrinsic::mips_fsub_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_ftrunc_u_w:
case Intrinsic::mips_ftrunc_u_d:
return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 804aec9..882af75 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5999,7 +5999,11 @@
if (!DAG.getTarget().Options.NoInfsFPMath ||
!DAG.getTarget().Options.NoNaNsFPMath)
return Op;
-
+ // TODO: Propagate flags from the select rather than global settings.
+ SDNodeFlags Flags;
+ Flags.setNoInfs(true);
+ Flags.setNoNaNs(true);
+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
@@ -6049,7 +6053,7 @@
case ISD::SETNE:
std::swap(TV, FV);
case ISD::SETEQ:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6059,25 +6063,25 @@
DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ee7c445..00b88c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12229,6 +12229,7 @@
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
false, false, false, 16);
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
@@ -12278,6 +12279,7 @@
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
// Subtract the bias.
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
// Handle final rounding.
@@ -12390,6 +12392,7 @@
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue FHigh =
DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
// return (float4) lo + fhi;
@@ -12509,6 +12512,7 @@
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
false, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
@@ -15847,8 +15851,8 @@
Mask, PassThru, Subtarget, DAG);
}
}
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1,Src2),
+ // TODO: Intrinsics should have fast-math-flags to propagate.
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK_RM: {
@@ -19266,6 +19270,7 @@
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;