[RISCV] Add RV64F codegen support

This requires a little extra work due tothe fact i32 is not a legal type. When
call lowering happens post-legalisation (e.g. when an intrinsic was inserted
during legalisation). A bitcast from f32 to i32 can't be introduced. This is
similar to the challenges with RV32D. To handle this, we introduce
target-specific DAG nodes that perform bitcast+anyext for f32->i64 and
trunc+bitcast for i64->f32.

Differential Revision: https://reviews.llvm.org/D53235

llvm-svn: 352807
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d61297c..d1e9ce7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -137,6 +137,9 @@
       setOperationAction(Op, MVT::f32, Expand);
   }
 
+  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
+    setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+
   if (Subtarget.hasStdExtD()) {
     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -338,6 +341,17 @@
     return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:
     return lowerRETURNADDR(Op, DAG);
+  case ISD::BITCAST: {
+    assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
+           "Unexpected custom legalisation");
+    SDLoc DL(Op);
+    SDValue Op0 = Op.getOperand(0);
+    if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
+      return SDValue();
+    SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+    SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
+    return FPConv;
+  }
   }
 }
 
@@ -579,6 +593,18 @@
       return;
     Results.push_back(customLegalizeToWOp(N, DAG));
     break;
+  case ISD::BITCAST: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           Subtarget.hasStdExtF() && "Unexpected custom legalisation");
+    SDLoc DL(N);
+    SDValue Op0 = N->getOperand(0);
+    if (Op0.getValueType() != MVT::f32)
+      return;
+    SDValue FPConv =
+        DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
+    break;
+  }
   }
 }
 
@@ -633,6 +659,38 @@
       return SDValue();
     break;
   }
+  case RISCVISD::FMV_X_ANYEXTW_RV64: {
+    SDLoc DL(N);
+    SDValue Op0 = N->getOperand(0);
+    // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
+    // conversion is unnecessary and can be replaced with an ANY_EXTEND
+    // of the FMV_W_X_RV64 operand.
+    if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
+      SDValue AExtOp =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
+      return DCI.CombineTo(N, AExtOp);
+    }
+
+    // This is a target-specific version of a DAGCombine performed in
+    // DAGCombiner::visitBITCAST. It performs the equivalent of:
+    // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+    // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+    if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
+        !Op0.getNode()->hasOneUse())
+      break;
+    SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
+                                 Op0.getOperand(0));
+    APInt SignBit = APInt::getSignMask(32).sext(64);
+    if (Op0.getOpcode() == ISD::FNEG) {
+      return DCI.CombineTo(N,
+                           DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
+                                       DAG.getConstant(SignBit, DL, MVT::i64)));
+    }
+    assert(Op0.getOpcode() == ISD::FABS);
+    return DCI.CombineTo(N,
+                         DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
+                                     DAG.getConstant(~SignBit, DL, MVT::i64)));
+  }
   }
 
   return SDValue();
@@ -874,7 +932,7 @@
   assert(XLen == 32 || XLen == 64);
   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
   if (ValVT == MVT::f32) {
-    LocVT = MVT::i32;
+    LocVT = XLenVT;
     LocInfo = CCValAssign::BCvt;
   }
 
@@ -1047,6 +1105,10 @@
   case CCValAssign::Full:
     break;
   case CCValAssign::BCvt:
+    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+      Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
+      break;
+    }
     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
     break;
   }
@@ -1082,6 +1144,10 @@
   case CCValAssign::Full:
     break;
   case CCValAssign::BCvt:
+    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
+      break;
+    }
     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
     break;
   }
@@ -1108,9 +1174,12 @@
     llvm_unreachable("Unexpected CCValAssign::LocInfo");
   case CCValAssign::Full:
   case CCValAssign::Indirect:
+  case CCValAssign::BCvt:
     ExtType = ISD::NON_EXTLOAD;
     break;
   }
+  if (ValVT == MVT::f32)
+    LocVT = MVT::f32;
   Val = DAG.getExtLoad(
       ExtType, DL, LocVT, Chain, FIN,
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
@@ -1759,6 +1828,10 @@
     return "RISCVISD::DIVUW";
   case RISCVISD::REMUW:
     return "RISCVISD::REMUW";
+  case RISCVISD::FMV_W_X_RV64:
+    return "RISCVISD::FMV_W_X_RV64";
+  case RISCVISD::FMV_X_ANYEXTW_RV64:
+    return "RISCVISD::FMV_X_ANYEXTW_RV64";
   }
   return nullptr;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index fa18f7c..12b6320 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -41,7 +41,14 @@
   // at instruction selection time.
   DIVW,
   DIVUW,
-  REMUW
+  REMUW,
+  // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
+  // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
+  // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
+  // This is a more convenient semantic for producing dagcombines that remove
+  // unnecessary GPR->FPR->GPR moves.
+  FMV_W_X_RV64,
+  FMV_X_ANYEXTW_RV64
 };
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index ea47d82..0fb20520 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -12,6 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_RISCVFMV_W_X_RV64
+    : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
+def SDT_RISCVFMV_X_ANYEXTW_RV64
+    : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
+
+def riscv_fmv_w_x_rv64
+    : SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>;
+def riscv_fmv_x_anyextw_rv64
+    : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
+
+//===----------------------------------------------------------------------===//
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//
 
@@ -333,3 +347,37 @@
 def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
 def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
 } // Predicates = [HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtF, IsRV32] in {
+// FP->[u]int. Round-to-zero must be used
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtF, IsRV64] in {
+def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>;
+def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>;
+def : Pat<(sexti32 (riscv_fmv_x_anyextw_rv64 FPR32:$src)),
+          (FMV_X_W FPR32:$src)>;
+
+// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
+// because fpto[u|s]i produces poison if the value can't fit into the target.
+// We match the single case below because fcvt.wu.s sign-extends its result so
+// is cheaper than fcvt.lu.s+sext.w.
+def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR32:$rs1)), i32),
+          (FCVT_WU_S $rs1, 0b001)>;
+
+// FP->[u]int64
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_L_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_LU_S $rs1, 0b001)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>;
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_L $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_LU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV64]