[AArch64] Match float round and convert to int instructions.

Summary:
Add patterns for doing floating point round with various rounding modes
followed by conversion to int as a single FCVT* instruction.

Reviewers: t.p.northover, jmolloy

Subscribers: aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D11424

llvm-svn: 243422
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1a1b58b..0b848fc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -155,6 +155,7 @@
   SDNode *SelectBitfieldInsertOp(SDNode *N);
 
   SDNode *SelectLIBM(SDNode *N);
+  SDNode *SelectFPConvertWithRound(SDNode *N);
 
   SDNode *SelectReadRegister(SDNode *N);
   SDNode *SelectWriteRegister(SDNode *N);
@@ -185,6 +186,9 @@
   }
 
   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
+
+  SDNode *GenerateInexactFlagIfNeeded(const SDValue &In, unsigned InTyVariant,
+                                      SDLoc DL);
 };
 } // end anonymous namespace
 
@@ -2016,11 +2020,29 @@
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
 }
 
+/// GenerateInexactFlagIfNeeded - Insert FRINTX instruction to generate inexact
+/// signal on round-to-integer operations if needed. C11 leaves it
+/// implementation-defined whether these operations trigger an inexact
+/// exception. IEEE says they don't.  Unfortunately, Darwin decided they do so
+/// we sometimes have to insert a special instruction just to set the right bit
+/// in FPSR.
+SDNode *AArch64DAGToDAGISel::GenerateInexactFlagIfNeeded(const SDValue &In,
+                                                         unsigned InTyVariant,
+                                                         SDLoc DL) {
+  if (Subtarget->isTargetDarwin() && !TM.Options.UnsafeFPMath) {
+    // Pick the right FRINTX using InTyVariant needed to set the flags.
+    // InTyVariant is 0 for 32-bit and 1 for 64-bit.
+    unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
+    return CurDAG->getMachineNode(FRINTXOpcs[InTyVariant], DL,
+                                  In.getValueType(), MVT::Glue, In);
+  }
+  return nullptr;
+}
+
 SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned Variant;
   unsigned Opc;
-  unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
 
   if (VT == MVT::f32) {
     Variant = 0;
@@ -2029,9 +2051,6 @@
   } else
     return nullptr; // Unrecognized argument type. Fall back on default codegen.
 
-  // Pick the FRINTX variant needed to set the flags.
-  unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
   switch (N->getOpcode()) {
   default:
     return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
@@ -2062,18 +2081,97 @@
   SmallVector<SDValue, 2> Ops;
   Ops.push_back(In);
 
-  // C11 leaves it implementation-defined whether these operations trigger an
-  // inexact exception. IEEE says they don't.  Unfortunately, Darwin decided
-  // they do so we sometimes have to insert a special instruction just to set
-  // the right bit in FPSR.
-  if (Subtarget->isTargetDarwin() && !TM.Options.UnsafeFPMath) {
-    SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
-    Ops.push_back(SDValue(FRINTX, 1));
-  }
+  if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, Variant, dl))
+    Ops.push_back(SDValue(FRINTXNode, 1));
 
   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
 }
 
+/// SelectFPConvertWithRound - Try to combine FP rounding and
+/// FP-INT conversion.
+SDNode *AArch64DAGToDAGISel::SelectFPConvertWithRound(SDNode *N) {
+  SDNode *Op0 = N->getOperand(0).getNode();
+
+  // Return if the round op is used by other nodes, as this would result in two
+  // FRINTX, one each for round and convert.
+  if (!Op0->hasOneUse())
+    return nullptr;
+
+  unsigned InTyVariant;
+  EVT InTy = Op0->getValueType(0);
+  if (InTy == MVT::f32)
+    InTyVariant = 0;
+  else if (InTy == MVT::f64)
+    InTyVariant = 1;
+  else
+    return nullptr;
+
+  unsigned OutTyVariant;
+  EVT OutTy = N->getValueType(0);
+  if (OutTy == MVT::i32)
+    OutTyVariant = 0;
+  else if (OutTy == MVT::i64)
+    OutTyVariant = 1;
+  else
+    return nullptr;
+
+  assert((N->getOpcode() == ISD::FP_TO_SINT
+          || N->getOpcode() == ISD::FP_TO_UINT) && "Unexpected opcode!");
+  unsigned FpConVariant = N->getOpcode() == ISD::FP_TO_SINT ? 0 : 1;
+
+  unsigned Opc;
+  switch (Op0->getOpcode()) {
+  default:
+    return nullptr;
+  case ISD::FCEIL: {
+    unsigned FCVTPOpcs[2][2][2] = {
+        { { AArch64::FCVTPSUWSr, AArch64::FCVTPSUXSr },
+          { AArch64::FCVTPSUWDr, AArch64::FCVTPSUXDr } },
+        { { AArch64::FCVTPUUWSr, AArch64::FCVTPUUXSr },
+          { AArch64::FCVTPUUWDr, AArch64::FCVTPUUXDr } } };
+    Opc = FCVTPOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  case ISD::FFLOOR: {
+    unsigned FCVTMOpcs[2][2][2] = {
+        { { AArch64::FCVTMSUWSr, AArch64::FCVTMSUXSr },
+          { AArch64::FCVTMSUWDr, AArch64::FCVTMSUXDr } },
+        { { AArch64::FCVTMUUWSr, AArch64::FCVTMUUXSr },
+          { AArch64::FCVTMUUWDr, AArch64::FCVTMUUXDr } } };
+    Opc = FCVTMOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  case ISD::FTRUNC: {
+    unsigned FCVTZOpcs[2][2][2] = {
+        { { AArch64::FCVTZSUWSr, AArch64::FCVTZSUXSr },
+          { AArch64::FCVTZSUWDr, AArch64::FCVTZSUXDr } },
+        { { AArch64::FCVTZUUWSr, AArch64::FCVTZUUXSr },
+          { AArch64::FCVTZUUWDr, AArch64::FCVTZUUXDr } } };
+    Opc = FCVTZOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  case ISD::FROUND: {
+    unsigned FCVTAOpcs[2][2][2] = {
+        { { AArch64::FCVTASUWSr, AArch64::FCVTASUXSr },
+          { AArch64::FCVTASUWDr, AArch64::FCVTASUXDr } },
+        { { AArch64::FCVTAUUWSr, AArch64::FCVTAUUXSr },
+          { AArch64::FCVTAUUWDr, AArch64::FCVTAUUXDr } } };
+    Opc = FCVTAOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  }
+
+  SDLoc DL(N);
+  SDValue In = Op0->getOperand(0);
+  SmallVector<SDValue, 2> Ops;
+  Ops.push_back(In);
+
+  if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, InTyVariant, DL))
+    Ops.push_back(SDValue(FRINTXNode, 1));
+
+  return CurDAG->getMachineNode(Opc, DL, OutTy, Ops);
+}
+
 bool
 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
                                               unsigned RegWidth) {
@@ -3226,6 +3324,12 @@
     if (SDNode *I = SelectLIBM(Node))
       return I;
     break;
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    if (SDNode *I = SelectFPConvertWithRound(Node))
+      return I;
+    break;
   }
 
   // Select the default instruction