DAG: Change behavior of fminnum/fmaxnum nodes
Introduce new versions that follow the IEEE semantics
to help with legalization that may need quieted inputs.
There are some regressions from inserting unnecessary
canonicalizes when these are matched from fast math
fcmp + select which should be fixed in a future commit.
llvm-svn: 344914
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 381efb9..f560f0e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7097,6 +7097,13 @@
case ISD::SETLE:
case ISD::SETULT:
case ISD::SETULE: {
+ // Since it's known never nan to get here already, either fminnum or
+ // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
+ // expanded in terms of it.
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
@@ -7108,6 +7115,10 @@
case ISD::SETGE:
case ISD::SETUGT:
case ISD::SETUGE: {
+ unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+ if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+ return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 71d124c..b73fc10 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3247,7 +3247,12 @@
Results.push_back(Tmp1);
break;
}
-
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2c1a494..e7edc0e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -130,6 +130,7 @@
SDValue ExpandBITREVERSE(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ(SDValue Op);
+ SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
/// Implements vector promotion.
@@ -353,6 +354,8 @@
case ISD::FABS:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case ISD::FMINNAN:
case ISD::FMAXNAN:
case ISD::FCOPYSIGN:
@@ -721,6 +724,9 @@
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
return ExpandCTTZ(Op);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ return ExpandFMINNUM_FMAXNUM(Op);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -1120,6 +1126,12 @@
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8d00b32..2b5fd8d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -113,6 +113,8 @@
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case ISD::FMINNAN:
case ISD::FMAXNAN:
case ISD::SMIN:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0f8bd08..1f0f732 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3712,9 +3712,31 @@
// TODO: Refine on operand
return false;
}
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Only one needs to be known not-nan, since it will be returned if the
+ // other ends up being one.
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE: {
+ if (SNaN)
+ return true;
+ // This can return a NaN if either operand is an sNaN, or if both operands
+ // are NaN.
+ return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
+ (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
+ isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+ }
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN: {
+ // TODO: Does this quiet or return the origina NaN as-is?
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
- // TODO: Handle FMINNUM/FMAXNUM/FMINNAN/FMAXNAN when there is an agreement on
- // what they should do.
+ }
case ISD::EXTRACT_VECTOR_ELT: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9967f0e..64a9764 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -176,6 +176,9 @@
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::FMINNUM_IEEE: return "fminnum_ieee";
+ case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
+
case ISD::FMINNAN: return "fminnan";
case ISD::FMAXNAN: return "fmaxnan";
case ISD::FNEG: return "fneg";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b9b99b3..ceedd06 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4113,6 +4113,35 @@
return true;
}
+SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
+ ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+ EVT VT = Node->getValueType(0);
+ if (isOperationLegalOrCustom(NewOp, VT)) {
+ SDValue Quiet0 = Node->getOperand(0);
+ SDValue Quiet1 = Node->getOperand(1);
+
+ if (!Node->getFlags().hasNoNaNs()) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+ if (!DAG.isKnownNeverSNaN(Quiet0)) {
+ Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
+ Node->getFlags());
+ }
+ if (!DAG.isKnownNeverSNaN(Quiet1)) {
+ Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
+ Node->getFlags());
+ }
+ }
+
+ return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
+ }
+
+ return SDValue();
+}
+
SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SelectionDAG &DAG) const {
SDLoc SL(LD);
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 03a29a3..ddd5fc1 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -600,6 +600,8 @@
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand);
+ setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
+ setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
setOperationAction(ISD::FMINNAN, VT, Expand);
setOperationAction(ISD::FMAXNAN, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ae6b925..a1b9198 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -552,6 +552,8 @@
case ISD::FMAD:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case ISD::FSIN:
case ISD::FTRUNC:
case ISD::FRINT:
@@ -3512,6 +3514,10 @@
return ISD::FMINNUM;
case ISD::FMINNUM:
return ISD::FMAXNUM;
+ case ISD::FMAXNUM_IEEE:
+ return ISD::FMINNUM_IEEE;
+ case ISD::FMINNUM_IEEE:
+ return ISD::FMAXNUM_IEEE;
case AMDGPUISD::FMAX_LEGACY:
return AMDGPUISD::FMIN_LEGACY;
case AMDGPUISD::FMIN_LEGACY:
@@ -3617,6 +3623,8 @@
}
case ISD::FMAXNUM:
case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMIN_LEGACY: {
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 92d8991..0d22cb2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -360,6 +360,7 @@
SIN_HW,
FMAX_LEGACY,
FMIN_LEGACY,
+
FMAX3,
SMAX3,
UMAX3,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index ab00b1d..b7d1575 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -152,8 +152,14 @@
def smin_oneuse : HasOneUseBinOp<smin>;
def umax_oneuse : HasOneUseBinOp<umax>;
def umin_oneuse : HasOneUseBinOp<umin>;
+
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+
+def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
+def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
+
+
def and_oneuse : HasOneUseBinOp<and>;
def or_oneuse : HasOneUseBinOp<or>;
def xor_oneuse : HasOneUseBinOp<xor>;
@@ -837,3 +843,25 @@
(AMDGPUrcp (fsqrt vt:$src)),
(RsqInst $src)
>;
+
+// Instructions which select to the same v_min_f*
+def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
+ [(fminnum_ieee node:$src0, node:$src1),
+ (fminnum node:$src0, node:$src1)]
+>;
+
+// Instructions which select to the same v_max_f*
+def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
+ [(fmaxnum_ieee node:$src0, node:$src1),
+ (fmaxnum node:$src0, node:$src1)]
+>;
+
+def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+ [(fminnum_ieee_oneuse node:$src0, node:$src1),
+ (fminnum_oneuse node:$src0, node:$src1)]
+>;
+
+def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+ [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
+ (fmaxnum_oneuse node:$src0, node:$src1)]
+>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 81ff640..3ba0483 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -384,8 +384,20 @@
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Custom);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Custom);
+
+
+ // These are really only legal for ieee_mode functions. We should be avoiding
+ // them for functions that don't have ieee_mode enabled, so just say they are
+ // legal.
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -474,8 +486,7 @@
// F16 - VOP2 Actions.
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+
setOperationAction(ISD::FDIV, MVT::f16, Custom);
// F16 - VOP3 Actions.
@@ -558,6 +569,17 @@
// This isn't really legal, but this avoids the legalizer unrolling it (and
// allows matching fneg (fabs x) patterns)
setOperationAction(ISD::FABS, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f16, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f16, Legal);
+
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
}
if (Subtarget->hasVOP3PInsts()) {
@@ -575,8 +597,10 @@
setOperationAction(ISD::FADD, MVT::v2f16, Legal);
setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
setOperationAction(ISD::FMA, MVT::v2f16, Legal);
- setOperationAction(ISD::FMINNUM, MVT::v2f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::v2f16, Legal);
+
setOperationAction(ISD::FCANONICALIZE, MVT::v2f16, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
@@ -596,6 +620,10 @@
setOperationAction(ISD::FADD, MVT::v4f16, Custom);
setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
+
+ setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
+
setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
@@ -634,6 +662,8 @@
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);
+ setTargetDAGCombine(ISD::FMINNUM_IEEE);
+ setTargetDAGCombine(ISD::FMAXNUM_IEEE);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::SMAX);
@@ -3580,6 +3610,9 @@
case ISD::FNEG:
case ISD::FCANONICALIZE:
return splitUnaryVectorOp(Op, DAG);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ return lowerFMINNUM_FMAXNUM(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -3590,10 +3623,10 @@
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
case ISD::FADD:
case ISD::FMUL:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
return splitBinaryVectorOp(Op, DAG);
}
return SDValue();
@@ -4048,6 +4081,23 @@
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
}
+SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+
+ // FIXME: Assert during eslection that this is only selected for
+ // ieee_mode. Currently a combine can produce the ieee version for non-ieee
+ // mode functions, but this happens to be OK since it's only done in cases
+ // where there is known no sNaN.
+ if (IsIEEEMode)
+ return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
+
+ if (VT == MVT::v4f16)
+ return splitBinaryVectorOp(Op, DAG);
+ return Op;
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
@@ -7521,37 +7571,32 @@
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case AMDGPUISD::CLAMP:
case AMDGPUISD::FMED3:
case AMDGPUISD::FMAX3:
case AMDGPUISD::FMIN3: {
// FIXME: Shouldn't treat the generic operations different based these.
- bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
- if (IsIEEEMode) {
- // snans will be quieted, so we only need to worry about denormals.
- if (Subtarget->supportsMinMaxDenormModes() ||
- denormalsEnabledForType(Op.getValueType()))
- return true;
+ // However, we aren't really required to flush the result from
+ // minnum/maxnum..
- // Flushing may be required.
- // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. For such
- // targets need to check their input recursively.
- return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
- isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
- }
-
+ // snans will be quieted, so we only need to worry about denormals.
if (Subtarget->supportsMinMaxDenormModes() ||
- denormalsEnabledForType(Op.getValueType())) {
- // Only quieting may be necessary.
- return DAG.isKnownNeverSNaN(Op.getOperand(0)) &&
- DAG.isKnownNeverSNaN(Op.getOperand(1));
+ denormalsEnabledForType(Op.getValueType()))
+ return true;
+
+ // Flushing may be required.
+ // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. For such
+ // targets need to check their input recursively.
+
+ // FIXME: Does this apply with clamp? It's implemented with max.
+ for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
+ if (!isCanonicalized(DAG, Op.getOperand(I), MaxDepth - 1))
+ return false;
}
- // Flushing and quieting may be necessary
- // With ieee_mode off, the nan is returned as-is, so if it is an sNaN it
- // needs to be quieted.
- return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
- isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
+ return true;
}
case ISD::SELECT: {
return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
@@ -7578,6 +7623,21 @@
// Could be anything.
return false;
+ case ISD::BITCAST: {
+ // Hack round the mess we make when legalizing extract_vector_elt
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::i16 &&
+ Src.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncSrc = Src.getOperand(0);
+ if (TruncSrc.getValueType() == MVT::i32 &&
+ TruncSrc.getOpcode() == ISD::BITCAST &&
+ TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
+ return isCanonicalized(DAG, TruncSrc.getOperand(0), MaxDepth - 1);
+ }
+ }
+
+ return false;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID
= cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -7603,7 +7663,6 @@
}
// Constant fold canonicalize.
-
SDValue SITargetLowering::getCanonicalConstantFP(
SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
// Flush denormals to 0 if not enabled.
@@ -7699,18 +7758,40 @@
}
}
+ unsigned SrcOpc = N0.getOpcode();
+
+ // If it's free to do so, push canonicalizes further up the source, which may
+ // find a canonical source.
+ //
+ // TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for
+ // sNaNs.
+ if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) {
+ auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CRHS && N0.hasOneUse()) {
+ SDLoc SL(N);
+ SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT,
+ N0.getOperand(0));
+ SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
+ DCI.AddToWorklist(Canon0.getNode());
+
+ return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1);
+ }
+ }
+
return isCanonicalized(DAG, N0) ? N0 : SDValue();
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
switch (Opc) {
case ISD::FMAXNUM:
+ case ISD::FMAXNUM_IEEE:
return AMDGPUISD::FMAX3;
case ISD::SMAX:
return AMDGPUISD::SMAX3;
case ISD::UMAX:
return AMDGPUISD::UMAX3;
case ISD::FMINNUM:
+ case ISD::FMINNUM_IEEE:
return AMDGPUISD::FMIN3;
case ISD::SMIN:
return AMDGPUISD::SMIN3;
@@ -7877,6 +7958,7 @@
// fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
+ (Opc == ISD::FMINNUM_IEEE && Op0.getOpcode() == ISD::FMAXNUM_IEEE) ||
(Opc == AMDGPUISD::FMIN_LEGACY &&
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
(VT == MVT::f32 || VT == MVT::f64 ||
@@ -7995,7 +8077,9 @@
case ISD::SMIN:
case ISD::SMAX:
case ISD::FMAXNUM:
- case ISD::FMINNUM: {
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE: {
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
Vec.getOperand(0), Idx);
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
@@ -8595,13 +8679,15 @@
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM:
case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE:
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN:
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY: {
- if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ if (//DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMinMaxCombine(N, DCI);
break;
@@ -9320,3 +9406,17 @@
return false;
}
}
+
+bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ if (Op.getOpcode() == AMDGPUISD::CLAMP) {
+ if (Subtarget->enableDX10Clamp())
+ return true; // Clamped to 0.
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+
+ return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
+ SNaN, Depth);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1b0cb06..bcb46ec 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -110,6 +110,7 @@
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const;
@@ -346,6 +347,11 @@
bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth = 5) const;
bool denormalsEnabledForType(EVT VT) const;
+
+ bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN = false,
+ unsigned Depth = 0) const override;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 1336a57..67aea73d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1645,10 +1645,11 @@
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
class FPMed3Pat<ValueType vt,
+ //SDPatternOperator max, SDPatternOperator min,
Instruction med3Inst> : GCNPat<
- (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
- (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
@@ -1656,10 +1657,10 @@
class FP16Med3Pat<ValueType vt,
Instruction med3Inst> : GCNPat<
- (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
- (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
- (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
- (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index e9d12ba..db031be 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -393,8 +393,8 @@
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
-defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
-defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
+defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
+defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
@@ -556,8 +556,8 @@
defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
-defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum>;
-defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum>;
+defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
+defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 96b233b..51bee3e 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -295,8 +295,8 @@
def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>;
def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, fadd, 1>;
def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
-def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum, 1>;
-def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum, 1>;
+def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
+def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteQuarterRate32] in {
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 41e21c1..c91d911 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -48,8 +48,8 @@
def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
-def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum>;
-def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum>;
+def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
+def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;