[RISCV] Codegen support for RV32D floating point load/store, fadd.d, calling conv

fadd.d is required in order to force floating point registers to be used in
test code, as parameters are passed in integer registers in the soft float
ABI.

Much of this patch is concerned with support for passing f64 on RV32D with a
soft-float ABI. Similar to Mips, introduce pseudoinstructions to build an f64
out of a pair of i32 and to split an f64 to a pair of i32. BUILD_PAIR and
EXTRACT_ELEMENT can't be used, as a BITCAST to i64 would be necessary, but i64
is not a legal type.

llvm-svn: 329871
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 834b960..3457ed59 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -47,6 +47,8 @@
 
   if (Subtarget.hasStdExtF())
     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
+  if (Subtarget.hasStdExtD())
+    addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
 
   // Compute derived properties from the register classes.
   computeRegisterProperties(STI.getRegisterInfo());
@@ -119,6 +121,9 @@
     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
   }
 
+  if (Subtarget.hasStdExtD())
+    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+
   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
@@ -392,18 +397,83 @@
   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
 }
 
+static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
+                                             MachineBasicBlock *BB) {
+  assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
+
+  MachineFunction &MF = *BB->getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+  unsigned LoReg = MI.getOperand(0).getReg();
+  unsigned HiReg = MI.getOperand(1).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
+  int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
+
+  TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
+                          RI);
+  MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
+                              MachineMemOperand::MOLoad, 8, 8);
+  BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
+      .addFrameIndex(FI)
+      .addImm(0)
+      .addMemOperand(MMO);
+  BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
+      .addFrameIndex(FI)
+      .addImm(4)
+      .addMemOperand(MMO);
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
+
+static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
+                                                 MachineBasicBlock *BB) {
+  assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
+         "Unexpected instruction");
+
+  MachineFunction &MF = *BB->getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned LoReg = MI.getOperand(1).getReg();
+  unsigned HiReg = MI.getOperand(2).getReg();
+  const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
+  int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
+
+  MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
+                              MachineMemOperand::MOStore, 8, 8);
+  BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+      .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
+      .addFrameIndex(FI)
+      .addImm(0)
+      .addMemOperand(MMO);
+  BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
+      .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
+      .addFrameIndex(FI)
+      .addImm(4)
+      .addMemOperand(MMO);
+  TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
+
 MachineBasicBlock *
 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                  MachineBasicBlock *BB) const {
-  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
-  DebugLoc DL = MI.getDebugLoc();
-
   switch (MI.getOpcode()) {
   default:
     llvm_unreachable("Unexpected instr type to insert");
   case RISCV::Select_GPR_Using_CC_GPR:
   case RISCV::Select_FPR32_Using_CC_GPR:
     break;
+  case RISCV::BuildPairF64Pseudo:
+    return emitBuildPairF64Pseudo(MI, BB);
+  case RISCV::SplitF64Pseudo:
+    return emitSplitF64Pseudo(MI, BB);
   }
 
   // To "insert" a SELECT instruction, we actually have to insert the triangle
@@ -417,7 +487,9 @@
   //     |  IfFalseMBB
   //     | /
   //    TailMBB
+  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  DebugLoc DL = MI.getDebugLoc();
   MachineFunction::iterator I = ++BB->getIterator();
 
   MachineBasicBlock *HeadMBB = BB;
@@ -542,7 +614,6 @@
     LocVT = MVT::i32;
     LocInfo = CCValAssign::BCvt;
   }
-  assert(LocVT == XLenVT && "Unexpected LocVT");
 
   // Any return value split in to more than two values can't be returned
   // directly.
@@ -572,6 +643,28 @@
   assert(PendingLocs.size() == PendingArgFlags.size() &&
          "PendingLocs and PendingArgFlags out of sync");
 
+  // Handle passing f64 on RV32D with a soft float ABI.
+  if (XLen == 32 && ValVT == MVT::f64) {
+    assert(!ArgFlags.isSplit() && PendingLocs.empty() ||
+           "Can't lower f64 if it is split");
+    // Depending on available argument GPRS, f64 may be passed in a pair of
+    // GPRs, split between a GPR and the stack, or passed completely on the
+    // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
+    // cases.
+    unsigned Reg = State.AllocateReg(ArgGPRs);
+    LocVT = MVT::i32;
+    if (!Reg) {
+      unsigned StackOffset = State.AllocateStack(8, 8);
+      State.addLoc(
+          CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+      return false;
+    }
+    if (!State.AllocateReg(ArgGPRs))
+      State.AllocateStack(4, 4);
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
   // Split arguments might be passed indirectly, so keep track of the pending
   // values.
   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
@@ -733,6 +826,43 @@
   return Val;
 }
 
+static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
+                                       const CCValAssign &VA, const SDLoc &DL) {
+  assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
+         "Unexpected VA");
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  if (VA.isMemLoc()) {
+    // f64 is passed on the stack.
+    int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
+    SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+    return DAG.getLoad(MVT::f64, DL, Chain, FIN,
+                       MachinePointerInfo::getFixedStack(MF, FI));
+  }
+
+  assert(VA.isRegLoc() && "Expected register VA assignment");
+
+  unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+  RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
+  SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
+  SDValue Hi;
+  if (VA.getLocReg() == RISCV::X17) {
+    // Second half of f64 is passed on the stack.
+    int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
+    SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+    Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
+                     MachinePointerInfo::getFixedStack(MF, FI));
+  } else {
+    // Second half of f64 is passed in another GPR.
+    unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+    RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
+    Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
+  }
+  return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
+}
+
 // Transform physical registers into virtual registers.
 SDValue RISCVTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
@@ -763,7 +893,11 @@
     CCValAssign &VA = ArgLocs[i];
     assert(VA.getLocVT() == XLenVT && "Unhandled argument type");
     SDValue ArgValue;
-    if (VA.isRegLoc())
+    // Passing f64 on RV32D with a soft float ABI must be handled as a special
+    // case.
+    if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
+      ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
+    else if (VA.isRegLoc())
       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
     else
       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
@@ -917,6 +1051,37 @@
     SDValue ArgValue = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
 
+    // Handle passing f64 on RV32D with a soft float ABI as a special case.
+    bool IsF64OnRV32DSoftABI =
+        VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
+    if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
+      SDValue SplitF64 = DAG.getNode(
+          RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
+      SDValue Lo = SplitF64.getValue(0);
+      SDValue Hi = SplitF64.getValue(1);
+
+      unsigned RegLo = VA.getLocReg();
+      RegsToPass.push_back(std::make_pair(RegLo, Lo));
+
+      if (RegLo == RISCV::X17) {
+        // Second half of f64 is passed on the stack.
+        // Work out the address of the stack slot.
+        if (!StackPtr.getNode())
+          StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
+        // Emit the store.
+        MemOpChains.push_back(
+            DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
+      } else {
+        // Second half of f64 is passed in another GPR.
+        unsigned RegHigh = RegLo + 1;
+        RegsToPass.push_back(std::make_pair(RegHigh, Hi));
+      }
+      continue;
+    }
+
+    // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
+    // as any other MemLoc.
+
     // Promote the value if needed.
     // For now, only handle fully promoted and indirect arguments.
     switch (VA.getLocInfo()) {
@@ -1033,11 +1198,21 @@
 
   // Copy all of the result registers out of their specified physreg.
   for (auto &VA : RVLocs) {
-    // Copy the value out, gluing the copy to the end of the call sequence.
-    SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
-                                          VA.getLocVT(), Glue);
+    // Copy the value out
+    SDValue RetValue =
+        DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
+    // Glue the RetValue to the end of the call sequence
     Chain = RetValue.getValue(1);
     Glue = RetValue.getValue(2);
+    if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+      assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
+      SDValue RetValue2 =
+          DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
+      Chain = RetValue2.getValue(1);
+      Glue = RetValue2.getValue(2);
+      RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
+                             RetValue2);
+    }
 
     switch (VA.getLocInfo()) {
     default:
@@ -1102,7 +1277,7 @@
   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
                     nullptr);
 
-  SDValue Flag;
+  SDValue Glue;
   SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Copy the result values into the output registers.
@@ -1110,20 +1285,38 @@
     SDValue Val = OutVals[i];
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
-    Val = packIntoRegLoc(DAG, Val, VA, DL);
 
-    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
+    if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+      // Handle returning f64 on RV32D with a soft float ABI.
+      assert(VA.isRegLoc() && "Expected return via registers");
+      SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
+                                     DAG.getVTList(MVT::i32, MVT::i32), Val);
+      SDValue Lo = SplitF64.getValue(0);
+      SDValue Hi = SplitF64.getValue(1);
+      unsigned RegLo = VA.getLocReg();
+      unsigned RegHi = RegLo + 1;
+      Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
+      Glue = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
+      Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
+      Glue = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
+    } else {
+      // Handle a 'normal' return.
+      Val = packIntoRegLoc(DAG, Val, VA, DL);
+      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
 
-    // Guarantee that all emitted copies are stuck together.
-    Flag = Chain.getValue(1);
-    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+      // Guarantee that all emitted copies are stuck together.
+      Glue = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+    }
   }
 
   RetOps[0] = Chain; // Update chain.
 
-  // Add the flag if we have it.
-  if (Flag.getNode()) {
-    RetOps.push_back(Flag);
+  // Add the glue node if we have it.
+  if (Glue.getNode()) {
+    RetOps.push_back(Glue);
   }
 
   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
@@ -1139,6 +1332,10 @@
     return "RISCVISD::CALL";
   case RISCVISD::SELECT_CC:
     return "RISCVISD::SELECT_CC";
+  case RISCVISD::BuildPairF64:
+    return "RISCVISD::BuildPairF64";
+  case RISCVISD::SplitF64:
+    return "RISCVISD::SplitF64";
   }
   return nullptr;
 }