AMDGPU: Figure out private memory regs after lowering
Introduce pseudo-registers for registers needed for stack
access, which are replaced during finalizeLowering.
Note these pseudo-registers are currently only used for the
used register location, and not for determining their
input argument register.
This is better because it avoids the need to try to predict
whether a call will be emitted from the IR, and also
detects stack objects introduced by legalization.
Test changes are from the HasStackObjects check being more
accurate since stack objects introduced during legalization
are now known.
llvm-svn: 308325
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 08a64de3..ed962ac 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -158,7 +158,7 @@
// No replacement necessary.
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
!MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
- assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister);
+ assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
}
@@ -250,7 +250,9 @@
emitFlatScratchInit(ST, MF, MBB);
unsigned SPReg = MFI->getStackPtrOffsetReg();
- if (SPReg != AMDGPU::NoRegister) {
+ if (SPReg != AMDGPU::SP_REG) {
+ assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
+
DebugLoc DL;
int64_t StackSize = MF.getFrameInfo().getStackSize();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 36d4732..2356405 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1171,8 +1171,7 @@
static void reservePrivateMemoryRegs(const TargetMachine &TM,
MachineFunction &MF,
const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info,
- bool NeedSP) {
+ SIMachineFunctionInfo &Info) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1234,15 +1233,6 @@
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
}
-
- if (NeedSP) {
- unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF);
- Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
-
- assert(Info.getStackPtrOffsetReg() != Info.getFrameOffsetReg());
- assert(!TRI.isSubRegister(Info.getScratchRSrcReg(),
- Info.getStackPtrOffsetReg()));
- }
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -1437,25 +1427,13 @@
InVals.push_back(Val);
}
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
-
- // TODO: Could maybe omit SP if only tail calls?
- bool NeedSP = FrameInfo.hasCalls() || FrameInfo.hasVarSizedObjects();
-
// Start adding system SGPRs.
if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
- reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info, NeedSP);
} else {
CCInfo.AllocateReg(Info->getScratchRSrcReg());
CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
CCInfo.AllocateReg(Info->getFrameOffsetReg());
-
- if (NeedSP) {
- unsigned StackPtrReg = findFirstFreeSGPR(CCInfo);
- CCInfo.AllocateReg(StackPtrReg);
- Info->setStackPtrOffsetReg(StackPtrReg);
- }
}
return Chains.empty() ? Chain :
@@ -5851,3 +5829,44 @@
}
return TargetLowering::getConstraintType(Constraint);
}
+
+// Figure out which registers should be reserved for stack access. Only after
+// the function is legalized do we know all of the non-spill stack objects or if
+// calls are present.
+void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ if (Info->isEntryFunction()) {
+ // Callable functions have fixed registers used for stack access.
+ reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
+ }
+
+ // We have to assume the SP is needed in case there are calls in the function
+ // during lowering. Calls are only detected after the function is
+ // lowered. We're about to reserve registers, so don't bother using it if we
+ // aren't really going to use it.
+ bool NeedSP = !Info->isEntryFunction() ||
+ MFI.hasVarSizedObjects() ||
+ MFI.hasCalls();
+
+ if (NeedSP) {
+ unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF);
+ Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
+
+ assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg());
+ assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
+ Info->getStackPtrOffsetReg()));
+ MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
+ }
+
+ MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
+ MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
+ MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
+ Info->getScratchWaveOffsetReg());
+
+ TargetLoweringBase::finalizeLowering(MF);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 83392a7..e6bb3d6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -232,6 +232,8 @@
ConstraintType getConstraintType(StringRef Constraint) const override;
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
SDValue V) const;
+
+ void finalizeLowering(MachineFunction &MF) const override;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 49c8adc..01456a1 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -23,10 +23,10 @@
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
- ScratchRSrcReg(AMDGPU::NoRegister),
- ScratchWaveOffsetReg(AMDGPU::NoRegister),
- FrameOffsetReg(AMDGPU::NoRegister),
- StackPtrOffsetReg(AMDGPU::NoRegister),
+ ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
+ ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
+ FrameOffsetReg(AMDGPU::FP_REG),
+ StackPtrOffsetReg(AMDGPU::SP_REG),
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
DispatchPtrUserSGPR(AMDGPU::NoRegister),
QueuePtrUserSGPR(AMDGPU::NoRegister),
@@ -90,6 +90,9 @@
ScratchWaveOffsetReg = AMDGPU::SGPR4;
FrameOffsetReg = AMDGPU::SGPR5;
StackPtrOffsetReg = AMDGPU::SGPR32;
+
+ // FIXME: Not really a system SGPR.
+ PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
}
CallingConv::ID CC = F->getCallingConv();
@@ -131,7 +134,7 @@
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
- bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls();
+ bool HasStackObjects = FrameInfo.hasStackObjects();
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index a9a229d..4c7f38a 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -382,10 +382,13 @@
}
void setStackPtrOffsetReg(unsigned Reg) {
- assert(Reg != AMDGPU::NoRegister && "Should never be unset");
StackPtrOffsetReg = Reg;
}
+ // Note the unset value for this is AMDGPU::SP_REG rather than
+ // NoRegister. This is mostly a workaround for MIR tests where state that
+ // can't be directly computed from the function is not preserved in serialized
+ // MIR.
unsigned getStackPtrOffsetReg() const {
return StackPtrOffsetReg;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ef6ad4a..4a3fbb4 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -207,7 +207,11 @@
assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
}
+ // We have to assume the SP is needed in case there are calls in the function,
+ // which is detected after the function is lowered. If we aren't really going
+ // to need SP, don't bother reserving it.
unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
+
if (StackPtrReg != AMDGPU::NoRegister) {
reserveRegisterTuples(Reserved, StackPtrReg);
assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 23f8df3..54ea780 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -23,6 +23,13 @@
def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
+// Pseudo-registers: Used as placeholders during isel and immediately
+// replaced, never seeing the verifier.
+def PRIVATE_RSRC_REG : SIReg<"", 0>;
+def FP_REG : SIReg<"", 0>;
+def SP_REG : SIReg<"", 0>;
+def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>;
+
// VCC for 64-bit instructions
def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
DwarfRegAlias<VCC_LO> {
@@ -267,7 +274,8 @@
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
- SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT,
+ FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
let AllocationPriority = 7;
}
@@ -314,7 +322,8 @@
let isAllocatable = 0;
}
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> {
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32,
+ (add SGPR_128, TTMP_128)> {
let AllocationPriority = 10;
}