AMDGPU: Fold undef fcanonicalize to qNaN We could choose a free 0 for this, but this matches the behavior for fmul undef, 1.0. Also, the NaN use is more useful for folding use operations although if it's not eliminated it is more expensive in terms of code size. llvm-svn: 338376

commit: 4aec86d37a65758b2975a93d5f9c406a4610f052 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Tue Jul 31 13:34:31 2018 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Tue Jul 31 13:34:31 2018 +0000
tree: 86b90383c2c71788b1aff30be774ff3da9aa0d66
parent: 511fed8a24d321be41176cf246f6450d11342174 [diff] [blame]
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 97c38e4..acde638 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

@@ -6845,8 +6845,16 @@
   SDNode *N,
   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
-  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
+  SDValue N0 = N->getOperand(0);
 
+  // fcanonicalize undef -> qnan
+  if (N0.isUndef()) {
+    EVT VT = N->getValueType(0);
+    APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
+    return DAG.getConstantFP(QNaN, SDLoc(N), VT);
+  }
+
+  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
   if (!CFP) {
     SDValue N0 = N->getOperand(0);
     EVT VT = N0.getValueType().getScalarType();
@@ -6899,7 +6907,7 @@
       return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
   }
 
-  return N->getOperand(0);
+  return N0;
 }
 
 static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
commit	4aec86d37a65758b2975a93d5f9c406a4610f052	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Tue Jul 31 13:34:31 2018 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Tue Jul 31 13:34:31 2018 +0000
tree	86b90383c2c71788b1aff30be774ff3da9aa0d66
parent	511fed8a24d321be41176cf246f6450d11342174 [diff] [blame]