AMDGPU: Improve extract_vector_elt reduction combine
Handle fmul, fsub and preserve flags.
Also really test minnum/maxnum reductions.
The existing tests were only checking from
minnum/maxnum matched from a fast math compare
and select which is not the same.
llvm-svn: 339820
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4e15fdb..0ae1514 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7349,18 +7349,24 @@
return SDValue();
// TODO: Support other binary operations.
case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
case ISD::ADD:
case ISD::UMIN:
case ISD::UMAX:
case ISD::SMIN:
case ISD::SMAX:
case ISD::FMAXNUM:
- case ISD::FMINNUM:
- return DAG.getNode(Opc, SL, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Vec.getOperand(0), Idx),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Vec.getOperand(1), Idx));
+ case ISD::FMINNUM: {
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec.getOperand(0), Idx);
+ SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec.getOperand(1), Idx);
+
+ DCI.AddToWorklist(Elt0.getNode());
+ DCI.AddToWorklist(Elt1.getNode());
+ return DAG.getNode(Opc, SL, EltVT, Elt0, Elt1, Vec->getFlags());
+ }
}
}