AMDGPU: Always use s33 for global scratch wave offset
Every called function could possibly need this to calculate the
absolute address of stack objectst, and this avoids inserting a copy
around every call site in the kernel. It's also somewhat cleaner to
keep this in a callee saved SGPR.
llvm-svn: 363990
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 89c797d..bcd320e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2621,20 +2621,12 @@
SmallVector<SDValue, 4> CopyFromChains;
- unsigned OffsetReg = Info->getScratchWaveOffsetReg();
-
// In the HSA case, this should be an identity copy.
SDValue ScratchRSrcReg
= DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
- // TODO: Don't hardcode these registers and get from the callee function.
- SDValue ScratchWaveOffsetReg
- = DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
- RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
- CopyFromChains.push_back(ScratchWaveOffsetReg.getValue(1));
-
if (!Info->isEntryFunction()) {
// Avoid clobbering this function's FP value. In the current convention
// callee will overwrite this, so do save/restore around the call site.
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 2ccab85..871a021 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -69,7 +69,7 @@
// Non-entry functions have no special inputs for now, other registers
// required for scratch access.
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
- ScratchWaveOffsetReg = AMDGPU::SGPR4;
+ ScratchWaveOffsetReg = AMDGPU::SGPR33;
FrameOffsetReg = AMDGPU::SGPR5;
StackPtrOffsetReg = AMDGPU::SGPR32;