AMDGPU: Remove custom node for RSQ_LEGACY
Directly select from the intrinsic. This wasn't getting much value
from the custom node.
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b5f9685..221d723 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -275,6 +275,7 @@
[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>;
+// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def int_amdgcn_rsq_clamp : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 51f4132..014bb5a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3000,6 +3000,16 @@
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
return simplifyI24(N, DCI);
+ case Intrinsic::amdgcn_fract:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_ldexp: {
+ // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
+ SDValue Src = N->getOperand(1);
+ return Src.isUndef() ? Src : SDValue();
+ }
default:
return SDValue();
}
@@ -4258,7 +4268,6 @@
NODE_NAME_CASE(RCP)
NODE_NAME_CASE(RSQ)
NODE_NAME_CASE(RCP_LEGACY)
- NODE_NAME_CASE(RSQ_LEGACY)
NODE_NAME_CASE(RCP_IFLAG)
NODE_NAME_CASE(FMUL_LEGACY)
NODE_NAME_CASE(RSQ_CLAMP)
@@ -4653,7 +4662,6 @@
case AMDGPUISD::RCP:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RSQ_CLAMP: {
if (SNaN)
return true;
@@ -4697,6 +4705,17 @@
return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
}
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_rsq_clamp: {
+ if (SNaN)
+ return true;
+
+ // TODO: Need is known positive check.
+ return false;
+ }
case Intrinsic::amdgcn_fdot2:
// TODO: Refine on operand
return SNaN;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 7d0b17f..d81b447 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -416,7 +416,6 @@
RCP,
RSQ,
RCP_LEGACY,
- RSQ_LEGACY,
RCP_IFLAG,
FMUL_LEGACY,
RSQ_CLAMP,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index eae00f0..b9ded3b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -121,8 +121,6 @@
// out = 1.0 / sqrt(a)
def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
-// out = 1.0 / sqrt(a)
-def AMDGPUrsq_legacy_impl : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>;
def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
@@ -385,9 +383,6 @@
def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src),
(AMDGPUrcp_legacy_impl node:$src)]>;
-def AMDGPUrsq_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rsq_legacy node:$src),
- (AMDGPUrsq_legacy_impl node:$src)]>;
-
def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src),
(AMDGPUrsq_impl node:$src)]>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a0260f5..1bdd2ed 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5842,8 +5842,7 @@
case Intrinsic::amdgcn_rsq_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
-
- return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+ return SDValue();
case Intrinsic::amdgcn_rcp_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
@@ -8777,7 +8776,6 @@
case AMDGPUISD::RSQ:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::TRIG_PREOP:
case AMDGPUISD::DIV_SCALE:
@@ -8882,6 +8880,11 @@
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_fdot2:
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
return true;
default:
break;
@@ -10068,10 +10071,10 @@
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::LDEXP: {
+ // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(0);
if (Src.isUndef())
return Src;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 95f0c0e..d857857 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -325,7 +325,7 @@
defm V_RSQ_CLAMP_F32 :
VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
defm V_RSQ_LEGACY_F32 :
- VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
+ VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>;
} // End SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteDouble] in {