Fast-math fold: x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
The motivation is to recognize code such as this from /llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c:
float distance = sqrt(dx * dx + dy * dy + dz * dz);
float mag = dt / (distance * distance * distance);
Without this patch, we don't match the sqrt as a reciprocal sqrt, so for PPC the new testcase in this patch produces:
addis 3, 2, .LCPI4_2@toc@ha
lfs 4, .LCPI4_2@toc@l(3)
addis 3, 2, .LCPI4_1@toc@ha
lfs 0, .LCPI4_1@toc@l(3)
fcmpu 0, 1, 4
beq 0, .LBB4_2
# BB#1:
frsqrtes 4, 1
addis 3, 2, .LCPI4_0@toc@ha
lfs 5, .LCPI4_0@toc@l(3)
fnmsubs 13, 1, 5, 1
fmuls 6, 4, 4
fmadds 1, 13, 6, 5
fmuls 1, 4, 1
fres 4, 1 <--- reciprocal of reciprocal square root
fnmsubs 1, 1, 4, 0
fmadds 4, 4, 1, 4
.LBB4_2:
fmuls 1, 4, 2
fres 2, 1
fnmsubs 0, 1, 2, 0
fmadds 0, 2, 0, 2
fmuls 1, 3, 0
blr
After the patch, this simplifies to:
frsqrtes 0, 1
addis 3, 2, .LCPI4_1@toc@ha
fres 5, 2
lfs 4, .LCPI4_1@toc@l(3)
addis 3, 2, .LCPI4_0@toc@ha
lfs 7, .LCPI4_0@toc@l(3)
fnmsubs 13, 1, 4, 1
fmuls 6, 0, 0
fnmsubs 2, 2, 5, 7
fmadds 1, 13, 6, 4
fmadds 2, 5, 2, 5
fmuls 0, 0, 1
fmuls 0, 0, 2
fmuls 1, 3, 0
blr
Differential Revision: http://reviews.llvm.org/D5628
llvm-svn: 219139
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d8d3380..f98ef96 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7036,6 +7036,28 @@
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
+ } else if (N1.getOpcode() == ISD::FMUL) {
+ // Look through an FMUL. Even though this won't remove the FDIV directly,
+ // it's still worthwhile to get rid of the FSQRT if possible.
+ SDValue SqrtOp;
+ SDValue OtherOp;
+ if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(0);
+ OtherOp = N1.getOperand(1);
+ } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(1);
+ OtherOp = N1.getOperand(0);
+ }
+ if (SqrtOp.getNode()) {
+ // We found a FSQRT, so try to make this fold:
+ // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
+ if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
+ AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ }
}
// Fold into a reciprocal estimate and multiply instead of a real divide.