AMDGPU: Custom lower fexp
This will allow the library to just use __builtin_expf directly
without expanding this itself. Note f64 still won't work because
there is no exp instruction for it.
llvm-svn: 339902
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5301626..7067bf1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -318,6 +318,7 @@
setOperationAction(ISD::FLOG, MVT::f32, Custom);
setOperationAction(ISD::FLOG10, MVT::f32, Custom);
+ setOperationAction(ISD::FEXP, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
@@ -450,6 +451,7 @@
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
@@ -1141,6 +1143,8 @@
return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F);
case ISD::FLOG10:
return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F);
+ case ISD::FEXP:
+ return lowerFEXP(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
@@ -2214,6 +2218,34 @@
return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
}
+// Return M_LOG2E of appropriate type
+static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) {
+ switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT);
+ case MVT::f16:
+ return DAG.getConstantFP(
+ APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"),
+ SL, VT);
+ case MVT::f64:
+ return DAG.getConstantFP(
+ APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT);
+ default:
+ llvm_unreachable("unsupported fp type");
+ }
+}
+
+// exp2(M_LOG2E_F * f);
+SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ const SDValue K = getLog2EVal(DAG, SL, VT);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
+ return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
+}
+
static bool isCtlzOpc(unsigned Opc) {
return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 4e8b7a4..d56f9cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -58,8 +58,9 @@
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFLOG(SDValue Op, SelectionDAG &Dag,
+ SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
double Log2BaseInverted) const;
+ SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0ae1514..3e7ca62 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -345,6 +345,7 @@
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::FLOG, MVT::f16, Custom);
+ setOperationAction(ISD::FEXP, MVT::f16, Custom);
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
@@ -597,6 +598,7 @@
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
+ setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
setOperationAction(ISD::SELECT, MVT::v4f16, Custom);
}