[AMDGPU] Add the adjusted FP as a livein register.

Reviewers: arsenm, rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64145

llvm-svn: 366223
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 672e491..0ea8db0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1067,15 +1067,15 @@
 
   auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
                                    const TargetRegisterClass &RC,
-                                   ArgDescriptor &Arg) {
+                                   ArgDescriptor &Arg, unsigned UserSGPRs,
+                                   unsigned SystemSGPRs) {
     // Skip parsing if it's not present.
     if (!A)
       return false;
 
     if (A->IsRegister) {
       unsigned Reg;
-      if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value,
-                                      Error)) {
+      if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
         SourceRange = A->RegisterName.SourceRange;
         return true;
       }
@@ -1088,60 +1088,62 @@
     if (A->Mask)
       Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
 
+    MFI->NumUserSGPRs += UserSGPRs;
+    MFI->NumSystemSGPRs += SystemSGPRs;
     return false;
   };
 
   if (YamlMFI.ArgInfo &&
       (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
                              AMDGPU::SReg_128RegClass,
-                             MFI->ArgInfo.PrivateSegmentBuffer) ||
+                             MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
-                             AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.DispatchPtr) ||
+                             AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
+                             2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.QueuePtr) ||
+                             MFI->ArgInfo.QueuePtr, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.KernargSegmentPtr) ||
+                             MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
-                             AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.DispatchID) ||
+                             AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
+                             2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.FlatScratchInit) ||
+                             MFI->ArgInfo.FlatScratchInit, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
                              AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.PrivateSegmentSize) ||
+                             MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
-                             AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupIDX) ||
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
+                             0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
-                             AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupIDY) ||
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
+                             0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
-                             AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupIDZ) ||
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
+                             0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
                              AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupInfo) ||
+                             MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
                              AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.PrivateSegmentWaveByteOffset) ||
+                             MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.ImplicitArgPtr) ||
+                             MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.ImplicitBufferPtr) ||
+                             MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
                              AMDGPU::VGPR_32RegClass,
-                             MFI->ArgInfo.WorkItemIDX) ||
+                             MFI->ArgInfo.WorkItemIDX, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
                              AMDGPU::VGPR_32RegClass,
-                             MFI->ArgInfo.WorkItemIDY) ||
+                             MFI->ArgInfo.WorkItemIDY, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
                              AMDGPU::VGPR_32RegClass,
-                             MFI->ArgInfo.WorkItemIDZ)))
+                             MFI->ArgInfo.WorkItemIDZ, 0, 0)))
     return true;
 
   MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 44647d8..feab6be 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -311,7 +311,8 @@
 }
 
 // Shift down registers reserved for the scratch wave offset.
-unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+std::pair<unsigned, bool>
+SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
     const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
     SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -322,17 +323,17 @@
   // No replacement necessary.
   if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
       (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
-    return AMDGPU::NoRegister;
+    return std::make_pair(AMDGPU::NoRegister, false);
   }
 
   if (ST.hasSGPRInitBug())
-    return ScratchWaveOffsetReg;
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
 
   ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
   if (NumPreloaded > AllSGPRs.size())
-    return ScratchWaveOffsetReg;
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   AllSGPRs = AllSGPRs.slice(NumPreloaded);
 
@@ -353,10 +354,11 @@
   unsigned ReservedRegCount = 13;
 
   if (AllSGPRs.size() < ReservedRegCount)
-    return ScratchWaveOffsetReg;
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   bool HandledScratchWaveOffsetReg =
     ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+  bool FPAdjusted = false;
 
   for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
     // Pick the first unallocated SGPR. Be careful not to pick an alias of the
@@ -374,12 +376,13 @@
         MFI->setScratchWaveOffsetReg(Reg);
         MFI->setFrameOffsetReg(Reg);
         ScratchWaveOffsetReg = Reg;
+        FPAdjusted = true;
         break;
       }
     }
   }
 
-  return ScratchWaveOffsetReg;
+  return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
 }
 
 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
@@ -415,7 +418,9 @@
   unsigned ScratchRsrcReg
     = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
 
-  unsigned ScratchWaveOffsetReg =
+  unsigned ScratchWaveOffsetReg;
+  bool FPAdjusted;
+  std::tie(ScratchWaveOffsetReg, FPAdjusted) =
       getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
 
   // We need to insert initialization of the scratch resource descriptor.
@@ -453,7 +458,7 @@
     if (&OtherBB == &MBB)
       continue;
 
-    if (OffsetRegUsed)
+    if (OffsetRegUsed || FPAdjusted)
       OtherBB.addLiveIn(ScratchWaveOffsetReg);
 
     if (ResourceRegUsed)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 1954328..c644f47 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -66,7 +66,7 @@
     SIMachineFunctionInfo *MFI,
     MachineFunction &MF) const;
 
-  unsigned getReservedPrivateSegmentWaveByteOffsetReg(
+  std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg(
       const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
       SIMachineFunctionInfo *MFI, MachineFunction &MF) const;