blob: 1f46012309a4c1c80a1e66a65ac9209faf452f47 [file] [log] [blame]
Matt Arsenault0c90e952015-11-06 18:17:45 +00001//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Matt Arsenault0c90e952015-11-06 18:17:45 +00006//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000010#include "AMDGPUSubtarget.h"
Matt Arsenault0e3d3892015-11-30 21:15:53 +000011#include "SIInstrInfo.h"
12#include "SIMachineFunctionInfo.h"
Matt Arsenault0c90e952015-11-06 18:17:45 +000013#include "SIRegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000014#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Matt Arsenault43e92fe2016-06-24 06:30:11 +000015
Matt Arsenault03ae3992018-03-29 21:30:06 +000016#include "llvm/CodeGen/LivePhysRegs.h"
Matt Arsenault0c90e952015-11-06 18:17:45 +000017#include "llvm/CodeGen/MachineFrameInfo.h"
18#include "llvm/CodeGen/MachineFunction.h"
Matt Arsenault0e3d3892015-11-30 21:15:53 +000019#include "llvm/CodeGen/MachineInstrBuilder.h"
Matt Arsenault0c90e952015-11-06 18:17:45 +000020#include "llvm/CodeGen/RegisterScavenging.h"
21
22using namespace llvm;
23
Matt Arsenault71dfb7e2019-07-08 19:03:38 +000024#define DEBUG_TYPE "frame-info"
25
26
Tom Stellard5bfbae52018-07-11 20:59:01 +000027static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
Konstantin Zhuravlyove03b1d72017-02-08 13:02:33 +000028 const MachineFunction &MF) {
Matt Arsenaultab3429c2016-05-18 15:19:50 +000029 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
Konstantin Zhuravlyove03b1d72017-02-08 13:02:33 +000030 ST.getMaxNumSGPRs(MF) / 4);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000031}
32
Tom Stellard5bfbae52018-07-11 20:59:01 +000033static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
Konstantin Zhuravlyove03b1d72017-02-08 13:02:33 +000034 const MachineFunction &MF) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000035 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
Konstantin Zhuravlyove03b1d72017-02-08 13:02:33 +000036 ST.getMaxNumSGPRs(MF));
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000037}
38
Matt Arsenault71dfb7e2019-07-08 19:03:38 +000039// Find a scratch register that we can use at the start of the prologue to
40// re-align the stack pointer. We avoid using callee-save registers since they
41// may appear to be free when this is called from canUseAsPrologue (during
42// shrink wrapping), but then no longer be free when this is called from
43// emitPrologue.
44//
45// FIXME: This is a bit conservative, since in the above case we could use one
46// of the callee-save registers as a scratch temp to re-align the stack pointer,
47// but we would then have to make sure that we were in fact saving at least one
48// callee-save register in the prologue, which is additional complexity that
49// doesn't seem worth the benefit.
50static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
51 LivePhysRegs &LiveRegs,
52 const TargetRegisterClass &RC,
53 bool Unused = false) {
54 // Mark callee saved registers as used so we will not choose them.
55 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
56 for (unsigned i = 0; CSRegs[i]; ++i)
57 LiveRegs.addReg(CSRegs[i]);
58
59 if (Unused) {
60 // We are looking for a register that can be used throughout the entire
61 // function, so any use is unacceptable.
62 for (unsigned Reg : RC) {
63 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
64 return Reg;
65 }
66 } else {
67 for (unsigned Reg : RC) {
68 if (LiveRegs.available(MRI, Reg))
69 return Reg;
70 }
71 }
72
73 // If we require an unused register, this is used in contexts where failure is
74 // an option and has an alternative plan. In other contexts, this must
75 // succeed0.
76 if (!Unused)
77 report_fatal_error("failed to find free scratch register");
78
79 return AMDGPU::NoRegister;
80}
81
82static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
83 LivePhysRegs LiveRegs;
84 LiveRegs.init(*MRI.getTargetRegisterInfo());
85 return findScratchNonCalleeSaveRegister(
86 MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
87}
88
89// We need to specially emit stack operations here because a different frame
90// register is used than in the rest of the function, as getFrameRegister would
91// use.
92static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
93 MachineBasicBlock::iterator I,
94 const SIInstrInfo *TII, unsigned SpillReg,
95 unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
96 MachineFunction *MF = MBB.getParent();
97 MachineFrameInfo &MFI = MF->getFrameInfo();
98
99 int64_t Offset = MFI.getObjectOffset(FI);
100
101 MachineMemOperand *MMO = MF->getMachineMemOperand(
102 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
103 MFI.getObjectAlignment(FI));
104
105 if (isUInt<12>(Offset)) {
106 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
107 .addReg(SpillReg, RegState::Kill)
108 .addReg(ScratchRsrcReg)
109 .addReg(SPReg)
110 .addImm(Offset)
111 .addImm(0) // glc
112 .addImm(0) // slc
113 .addImm(0) // tfe
114 .addImm(0) // dlc
115 .addMemOperand(MMO);
116 return;
117 }
118
119 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
120 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
121
122 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
123 .addImm(Offset);
124
125 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
126 .addReg(SpillReg, RegState::Kill)
127 .addReg(OffsetReg, RegState::Kill)
128 .addReg(ScratchRsrcReg)
129 .addReg(SPReg)
130 .addImm(0)
131 .addImm(0) // glc
132 .addImm(0) // slc
133 .addImm(0) // tfe
134 .addImm(0) // dlc
135 .addMemOperand(MMO);
136}
137
138static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
139 MachineBasicBlock::iterator I,
140 const SIInstrInfo *TII, unsigned SpillReg,
141 unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
142 MachineFunction *MF = MBB.getParent();
143 MachineFrameInfo &MFI = MF->getFrameInfo();
144 int64_t Offset = MFI.getObjectOffset(FI);
145
146 MachineMemOperand *MMO = MF->getMachineMemOperand(
147 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
148 MFI.getObjectAlignment(FI));
149
150 if (isUInt<12>(Offset)) {
151 BuildMI(MBB, I, DebugLoc(),
152 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
153 .addReg(ScratchRsrcReg)
154 .addReg(SPReg)
155 .addImm(Offset)
156 .addImm(0) // glc
157 .addImm(0) // slc
158 .addImm(0) // tfe
159 .addImm(0) // dlc
160 .addMemOperand(MMO);
161 return;
162 }
163
164 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
165 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
166
167 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
168 .addImm(Offset);
169
170 BuildMI(MBB, I, DebugLoc(),
171 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
172 .addReg(OffsetReg, RegState::Kill)
173 .addReg(ScratchRsrcReg)
174 .addReg(SPReg)
175 .addImm(0)
176 .addImm(0) // glc
177 .addImm(0) // slc
178 .addImm(0) // tfe
179 .addImm(0) // dlc
180 .addMemOperand(MMO);
181}
182
Tom Stellard5bfbae52018-07-11 20:59:01 +0000183void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
Matt Arsenault57bc4322016-08-31 21:52:21 +0000184 MachineFunction &MF,
185 MachineBasicBlock &MBB) const {
Matt Arsenaulte823d922017-02-18 18:29:53 +0000186 const SIInstrInfo *TII = ST.getInstrInfo();
187 const SIRegisterInfo* TRI = &TII->getRegisterInfo();
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000188 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenaulte823d922017-02-18 18:29:53 +0000189
Matt Arsenault57bc4322016-08-31 21:52:21 +0000190 // We don't need this if we only have spills since there is no user facing
191 // scratch.
192
193 // TODO: If we know we don't have flat instructions earlier, we can omit
194 // this from the input registers.
195 //
196 // TODO: We only need to know if we access scratch space through a flat
197 // pointer. Because we only detect if flat instructions are used at all,
198 // this will be used more often than necessary on VI.
199
200 // Debug location must be unknown since the first debug location is used to
201 // determine the end of the prologue.
202 DebugLoc DL;
203 MachineBasicBlock::iterator I = MBB.begin();
204
205 unsigned FlatScratchInitReg
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000206 = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
Matt Arsenault57bc4322016-08-31 21:52:21 +0000207
208 MachineRegisterInfo &MRI = MF.getRegInfo();
209 MRI.addLiveIn(FlatScratchInitReg);
210 MBB.addLiveIn(FlatScratchInitReg);
211
Matt Arsenault57bc4322016-08-31 21:52:21 +0000212 unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
Matt Arsenaulte823d922017-02-18 18:29:53 +0000213 unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
Matt Arsenault57bc4322016-08-31 21:52:21 +0000214
Matt Arsenault57bc4322016-08-31 21:52:21 +0000215 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
216
Matt Arsenaulte823d922017-02-18 18:29:53 +0000217 // Do a 64-bit pointer add.
218 if (ST.flatScratchIsPointer()) {
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000219 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
220 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
221 .addReg(FlatScrInitLo)
222 .addReg(ScratchWaveOffsetReg);
223 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
224 .addReg(FlatScrInitHi)
225 .addImm(0);
226 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
227 addReg(FlatScrInitLo).
228 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
229 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
230 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
231 addReg(FlatScrInitHi).
232 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
233 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
234 return;
235 }
236
Matt Arsenaulte823d922017-02-18 18:29:53 +0000237 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
238 .addReg(FlatScrInitLo)
239 .addReg(ScratchWaveOffsetReg);
240 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
241 .addReg(FlatScrInitHi)
242 .addImm(0);
243
244 return;
245 }
246
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000247 assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
248
Matt Arsenaulte823d922017-02-18 18:29:53 +0000249 // Copy the size in bytes.
250 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
251 .addReg(FlatScrInitHi, RegState::Kill);
252
Matt Arsenault57bc4322016-08-31 21:52:21 +0000253 // Add wave offset in bytes to private base offset.
254 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
255 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
256 .addReg(FlatScrInitLo)
257 .addReg(ScratchWaveOffsetReg);
258
259 // Convert offset to 256-byte units.
260 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
261 .addReg(FlatScrInitLo, RegState::Kill)
262 .addImm(8);
263}
264
265unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
Tom Stellard5bfbae52018-07-11 20:59:01 +0000266 const GCNSubtarget &ST,
Matt Arsenault57bc4322016-08-31 21:52:21 +0000267 const SIInstrInfo *TII,
268 const SIRegisterInfo *TRI,
269 SIMachineFunctionInfo *MFI,
270 MachineFunction &MF) const {
Matt Arsenaulte2218492017-04-24 21:08:32 +0000271 MachineRegisterInfo &MRI = MF.getRegInfo();
Matt Arsenault57bc4322016-08-31 21:52:21 +0000272
273 // We need to insert initialization of the scratch resource descriptor.
274 unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
Matt Arsenaulte2218492017-04-24 21:08:32 +0000275 if (ScratchRsrcReg == AMDGPU::NoRegister ||
276 !MRI.isPhysRegUsed(ScratchRsrcReg))
Matt Arsenault08906a32016-10-28 19:43:31 +0000277 return AMDGPU::NoRegister;
Matt Arsenault57bc4322016-08-31 21:52:21 +0000278
279 if (ST.hasSGPRInitBug() ||
280 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
281 return ScratchRsrcReg;
282
283 // We reserved the last registers for this. Shift it down to the end of those
284 // which were actually used.
285 //
286 // FIXME: It might be safer to use a pseudoregister before replacement.
287
288 // FIXME: We should be able to eliminate unused input registers. We only
289 // cannot do this for the resources required for scratch access. For now we
290 // skip over user SGPRs and may leave unused holes.
291
292 // We find the resource first because it has an alignment requirement.
293
Matt Arsenault08906a32016-10-28 19:43:31 +0000294 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
Konstantin Zhuravlyove03b1d72017-02-08 13:02:33 +0000295 ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
Matt Arsenault08906a32016-10-28 19:43:31 +0000296 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
297
Matt Arsenaulte0bf7d02017-02-21 19:12:08 +0000298 // Skip the last N reserved elements because they should have already been
299 // reserved for VCC etc.
Matt Arsenault08906a32016-10-28 19:43:31 +0000300 for (MCPhysReg Reg : AllSGPR128s) {
Matt Arsenault57bc4322016-08-31 21:52:21 +0000301 // Pick the first unallocated one. Make sure we don't clobber the other
302 // reserved input we needed.
Matt Arsenault08906a32016-10-28 19:43:31 +0000303 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
Matt Arsenault57bc4322016-08-31 21:52:21 +0000304 MRI.replaceRegWith(ScratchRsrcReg, Reg);
305 MFI->setScratchRSrcReg(Reg);
306 return Reg;
307 }
308 }
309
310 return ScratchRsrcReg;
311}
312
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000313// Shift down registers reserved for the scratch wave offset.
314unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
315 const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
316 SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
Matt Arsenaulte2218492017-04-24 21:08:32 +0000317 MachineRegisterInfo &MRI = MF.getRegInfo();
Matt Arsenault57bc4322016-08-31 21:52:21 +0000318 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
Matt Arsenaulte2218492017-04-24 21:08:32 +0000319
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000320 assert(MFI->isEntryFunction());
321
Matt Arsenaulte2218492017-04-24 21:08:32 +0000322 // No replacement necessary.
323 if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000324 (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
325 return AMDGPU::NoRegister;
Matt Arsenault36c31222017-04-25 23:40:57 +0000326 }
Matt Arsenaulte2218492017-04-24 21:08:32 +0000327
Matt Arsenault36c31222017-04-25 23:40:57 +0000328 if (ST.hasSGPRInitBug())
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000329 return ScratchWaveOffsetReg;
Matt Arsenault57bc4322016-08-31 21:52:21 +0000330
Matt Arsenault57bc4322016-08-31 21:52:21 +0000331 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
332
Konstantin Zhuravlyove03b1d72017-02-08 13:02:33 +0000333 ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
Matt Arsenault08906a32016-10-28 19:43:31 +0000334 if (NumPreloaded > AllSGPRs.size())
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000335 return ScratchWaveOffsetReg;
Matt Arsenault08906a32016-10-28 19:43:31 +0000336
337 AllSGPRs = AllSGPRs.slice(NumPreloaded);
338
Matt Arsenault57bc4322016-08-31 21:52:21 +0000339 // We need to drop register from the end of the list that we cannot use
340 // for the scratch wave offset.
341 // + 2 s102 and s103 do not exist on VI.
342 // + 2 for vcc
343 // + 2 for xnack_mask
344 // + 2 for flat_scratch
345 // + 4 for registers reserved for scratch resource register
346 // + 1 for register reserved for scratch wave offset. (By exluding this
347 // register from the list to consider, it means that when this
348 // register is being used for the scratch wave offset and there
349 // are no other free SGPRs, then the value will stay in this register.
Matt Arsenault36c31222017-04-25 23:40:57 +0000350 // + 1 if stack pointer is used.
Matt Arsenault57bc4322016-08-31 21:52:21 +0000351 // ----
Matt Arsenault36c31222017-04-25 23:40:57 +0000352 // 13 (+1)
353 unsigned ReservedRegCount = 13;
Matt Arsenault08906a32016-10-28 19:43:31 +0000354
Matt Arsenault36c31222017-04-25 23:40:57 +0000355 if (AllSGPRs.size() < ReservedRegCount)
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000356 return ScratchWaveOffsetReg;
Matt Arsenault36c31222017-04-25 23:40:57 +0000357
358 bool HandledScratchWaveOffsetReg =
359 ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
360
361 for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
Matt Arsenault57bc4322016-08-31 21:52:21 +0000362 // Pick the first unallocated SGPR. Be careful not to pick an alias of the
363 // scratch descriptor, since we haven’t added its uses yet.
Matt Arsenaulte2218492017-04-24 21:08:32 +0000364 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
Matt Arsenault36c31222017-04-25 23:40:57 +0000365 if (!HandledScratchWaveOffsetReg) {
366 HandledScratchWaveOffsetReg = true;
367
368 MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000369 if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
370 assert(!hasFP(MF));
371 MFI->setStackPtrOffsetReg(Reg);
372 }
373
Matt Arsenault36c31222017-04-25 23:40:57 +0000374 MFI->setScratchWaveOffsetReg(Reg);
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000375 MFI->setFrameOffsetReg(Reg);
Matt Arsenault36c31222017-04-25 23:40:57 +0000376 ScratchWaveOffsetReg = Reg;
Matt Arsenault36c31222017-04-25 23:40:57 +0000377 break;
378 }
Matt Arsenault57bc4322016-08-31 21:52:21 +0000379 }
380 }
381
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000382 return ScratchWaveOffsetReg;
Matt Arsenault57bc4322016-08-31 21:52:21 +0000383}
384
Matt Arsenault2b1f9aa2017-05-17 21:56:25 +0000385void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
386 MachineBasicBlock &MBB) const {
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000387 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
388
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000389 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000390
391 // If we only have SGPR spills, we won't actually be using scratch memory
392 // since these spill to VGPRs.
393 //
394 // FIXME: We should be cleaning up these unused SGPR spill frame indices
395 // somewhere.
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000396
Matt Arsenaultaa6fb4c2019-02-21 23:27:46 +0000397 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000398 const SIInstrInfo *TII = ST.getInstrInfo();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000399 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
Matt Arsenault296b8492016-02-12 06:31:30 +0000400 MachineRegisterInfo &MRI = MF.getRegInfo();
Matt Arsenaultceafc552018-05-29 17:42:50 +0000401 const Function &F = MF.getFunction();
Matt Arsenault57bc4322016-08-31 21:52:21 +0000402
Matt Arsenault08906a32016-10-28 19:43:31 +0000403 // We need to do the replacement of the private segment buffer and wave offset
404 // register even if there are no stack objects. There could be stores to undef
405 // or a constant without an associated object.
406
407 // FIXME: We still have implicit uses on SGPR spill instructions in case they
408 // need to spill to vector memory. It's likely that will not happen, but at
409 // this point it appears we need the setup. This part of the prolog should be
410 // emitted after frame indices are eliminated.
411
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000412 if (MFI->hasFlatScratchInit())
Matt Arsenaulte823d922017-02-18 18:29:53 +0000413 emitFlatScratchInit(ST, MF, MBB);
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000414
Matt Arsenaulte2218492017-04-24 21:08:32 +0000415 unsigned ScratchRsrcReg
416 = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
Matt Arsenault36c31222017-04-25 23:40:57 +0000417
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000418 unsigned ScratchWaveOffsetReg =
419 getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
Matt Arsenaulte2218492017-04-24 21:08:32 +0000420
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000421 // We need to insert initialization of the scratch resource descriptor.
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000422 unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
423 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000424
425 unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
Konstantin Zhuravlyovaa067cb2018-10-04 21:02:16 +0000426 if (ST.isAmdHsaOrMesa(F)) {
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000427 PreloadedPrivateBufferReg = MFI->getPreloadedReg(
428 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000429 }
430
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000431 bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
432 MRI.isPhysRegUsed(ScratchWaveOffsetReg);
Matt Arsenaulte2218492017-04-24 21:08:32 +0000433 bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
434 MRI.isPhysRegUsed(ScratchRsrcReg);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000435
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000436 // FIXME: Hack to not crash in situations which emitted an error.
437 if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
438 return;
439
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000440 // We added live-ins during argument lowering, but since they were not used
441 // they were deleted. We're adding the uses now, so add them back.
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000442 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
443 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000444
Matt Arsenault08906a32016-10-28 19:43:31 +0000445 if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
Konstantin Zhuravlyovaa067cb2018-10-04 21:02:16 +0000446 assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000447 MRI.addLiveIn(PreloadedPrivateBufferReg);
448 MBB.addLiveIn(PreloadedPrivateBufferReg);
449 }
450
Matt Arsenault57bc4322016-08-31 21:52:21 +0000451 // Make the register selected live throughout the function.
452 for (MachineBasicBlock &OtherBB : MF) {
453 if (&OtherBB == &MBB)
454 continue;
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000455
Matt Arsenault08906a32016-10-28 19:43:31 +0000456 if (OffsetRegUsed)
457 OtherBB.addLiveIn(ScratchWaveOffsetReg);
458
459 if (ResourceRegUsed)
460 OtherBB.addLiveIn(ScratchRsrcReg);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000461 }
462
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000463 DebugLoc DL;
Matt Arsenault57bc4322016-08-31 21:52:21 +0000464 MachineBasicBlock::iterator I = MBB.begin();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000465
Matt Arsenault08906a32016-10-28 19:43:31 +0000466 // If we reserved the original input registers, we don't need to copy to the
467 // reserved registers.
468
469 bool CopyBuffer = ResourceRegUsed &&
470 PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
Konstantin Zhuravlyovaa067cb2018-10-04 21:02:16 +0000471 ST.isAmdHsaOrMesa(F) &&
Matt Arsenault08906a32016-10-28 19:43:31 +0000472 ScratchRsrcReg != PreloadedPrivateBufferReg;
473
474 // This needs to be careful of the copying order to avoid overwriting one of
475 // the input registers before it's been copied to it's final
476 // destination. Usually the offset should be copied first.
477 bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
478 ScratchWaveOffsetReg);
479 if (CopyBuffer && CopyBufferFirst) {
480 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
481 .addReg(PreloadedPrivateBufferReg, RegState::Kill);
482 }
483
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000484 unsigned SPReg = MFI->getStackPtrOffsetReg();
485 assert(SPReg != AMDGPU::SP_REG);
486
487 // FIXME: Remove the isPhysRegUsed checks
488 const bool HasFP = hasFP(MF);
489
490 if (HasFP || OffsetRegUsed) {
491 assert(ScratchWaveOffsetReg);
Matt Arsenault1d215172016-08-31 21:52:25 +0000492 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000493 .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000494 }
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000495
Matt Arsenault08906a32016-10-28 19:43:31 +0000496 if (CopyBuffer && !CopyBufferFirst) {
Matt Arsenault1d215172016-08-31 21:52:25 +0000497 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
498 .addReg(PreloadedPrivateBufferReg, RegState::Kill);
Matt Arsenault08906a32016-10-28 19:43:31 +0000499 }
500
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000501 if (ResourceRegUsed) {
Tim Renouf13229152017-09-29 09:49:35 +0000502 emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
503 PreloadedPrivateBufferReg, ScratchRsrcReg);
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000504 }
505
506 if (HasFP) {
507 DebugLoc DL;
508 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
509 int64_t StackSize = FrameInfo.getStackSize();
510
511 // On kernel entry, the private scratch wave offset is the SP value.
512 if (StackSize == 0) {
513 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
514 .addReg(MFI->getScratchWaveOffsetReg());
515 } else {
516 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
517 .addReg(MFI->getScratchWaveOffsetReg())
518 .addImm(StackSize * ST.getWavefrontSize());
519 }
520 }
Tim Renouf13229152017-09-29 09:49:35 +0000521}
522
523// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
Tom Stellard5bfbae52018-07-11 20:59:01 +0000524void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
Tim Renouf13229152017-09-29 09:49:35 +0000525 MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
526 MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
527 unsigned ScratchRsrcReg) const {
528
529 const SIInstrInfo *TII = ST.getInstrInfo();
530 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
Matt Arsenaultceafc552018-05-29 17:42:50 +0000531 const Function &Fn = MF.getFunction();
Tim Renouf13229152017-09-29 09:49:35 +0000532 DebugLoc DL;
Tim Renouf13229152017-09-29 09:49:35 +0000533
534 if (ST.isAmdPalOS()) {
535 // The pointer to the GIT is formed from the offset passed in and either
536 // the amdgpu-git-ptr-high function attribute or the top part of the PC
537 unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
538 unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
539 unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
540
541 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
542
543 if (MFI->getGITPtrHigh() != 0xffffffff) {
544 BuildMI(MBB, I, DL, SMovB32, RsrcHi)
545 .addImm(MFI->getGITPtrHigh())
546 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
547 } else {
548 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
549 BuildMI(MBB, I, DL, GetPC64, Rsrc01);
550 }
Tim Renouf832f90f2018-02-26 14:46:43 +0000551 auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
552 if (ST.hasMergedShaders()) {
553 switch (MF.getFunction().getCallingConv()) {
554 case CallingConv::AMDGPU_HS:
555 case CallingConv::AMDGPU_GS:
556 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
557 // ES+GS merged shader on gfx9+.
558 GitPtrLo = AMDGPU::SGPR8;
559 break;
560 default:
561 break;
562 }
563 }
Tim Renouf7190a462018-04-10 11:25:15 +0000564 MF.getRegInfo().addLiveIn(GitPtrLo);
Matt Arsenault302eedc2019-05-31 22:47:36 +0000565 MBB.addLiveIn(GitPtrLo);
Tim Renouf13229152017-09-29 09:49:35 +0000566 BuildMI(MBB, I, DL, SMovB32, RsrcLo)
Tim Renouf832f90f2018-02-26 14:46:43 +0000567 .addReg(GitPtrLo)
Tim Renouf13229152017-09-29 09:49:35 +0000568 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
569
570 // We now have the GIT ptr - now get the scratch descriptor from the entry
Tim Renouf7190a462018-04-10 11:25:15 +0000571 // at offset 0 (or offset 16 for a compute shader).
Tim Renouf13229152017-09-29 09:49:35 +0000572 PointerType *PtrTy =
Matthias Braunf1caa282017-12-15 22:22:58 +0000573 PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
Tim Renouf13229152017-09-29 09:49:35 +0000574 AMDGPUAS::CONSTANT_ADDRESS);
575 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
576 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
577 auto MMO = MF.getMachineMemOperand(PtrInfo,
578 MachineMemOperand::MOLoad |
579 MachineMemOperand::MOInvariant |
580 MachineMemOperand::MODereferenceable,
Matt Arsenault2a645982019-01-31 01:38:47 +0000581 16, 4);
Matt Arsenaultceafc552018-05-29 17:42:50 +0000582 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
Carl Ritson494b8ac2019-02-08 15:41:11 +0000583 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
584 unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
Tim Renouf13229152017-09-29 09:49:35 +0000585 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
586 .addReg(Rsrc01)
Carl Ritson494b8ac2019-02-08 15:41:11 +0000587 .addImm(EncodedOffset) // offset
Tim Renouf13229152017-09-29 09:49:35 +0000588 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000589 .addImm(0) // dlc
Tim Renouf13229152017-09-29 09:49:35 +0000590 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
591 .addMemOperand(MMO);
592 return;
593 }
Matt Arsenaultceafc552018-05-29 17:42:50 +0000594 if (ST.isMesaGfxShader(Fn)
Tim Renouf13229152017-09-29 09:49:35 +0000595 || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
Konstantin Zhuravlyovaa067cb2018-10-04 21:02:16 +0000596 assert(!ST.isAmdHsaOrMesa(Fn));
Matt Arsenault1d215172016-08-31 21:52:25 +0000597 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
598
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000599 unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
600 unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
601
602 // Use relocations to get the pointer, and setup the other bits manually.
603 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000604
Matt Arsenault10fc0622017-06-26 03:01:31 +0000605 if (MFI->hasImplicitBufferPtr()) {
Tom Stellard2f3f9852017-01-25 01:25:13 +0000606 unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
607
Matthias Braunf1caa282017-12-15 22:22:58 +0000608 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
Tom Stellard2f3f9852017-01-25 01:25:13 +0000609 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
610
611 BuildMI(MBB, I, DL, Mov64, Rsrc01)
Matt Arsenault10fc0622017-06-26 03:01:31 +0000612 .addReg(MFI->getImplicitBufferPtrUserSGPR())
Tom Stellard2f3f9852017-01-25 01:25:13 +0000613 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
614 } else {
615 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
616
617 PointerType *PtrTy =
Matthias Braunf1caa282017-12-15 22:22:58 +0000618 PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
Konstantin Zhuravlyov435151a2017-11-01 19:12:38 +0000619 AMDGPUAS::CONSTANT_ADDRESS);
Tom Stellard2f3f9852017-01-25 01:25:13 +0000620 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
621 auto MMO = MF.getMachineMemOperand(PtrInfo,
622 MachineMemOperand::MOLoad |
623 MachineMemOperand::MOInvariant |
624 MachineMemOperand::MODereferenceable,
Matt Arsenault2a645982019-01-31 01:38:47 +0000625 8, 4);
Tom Stellard2f3f9852017-01-25 01:25:13 +0000626 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
Matt Arsenault10fc0622017-06-26 03:01:31 +0000627 .addReg(MFI->getImplicitBufferPtrUserSGPR())
Tom Stellard2f3f9852017-01-25 01:25:13 +0000628 .addImm(0) // offset
629 .addImm(0) // glc
Stanislav Mekhanoshina6322942019-04-30 22:08:23 +0000630 .addImm(0) // dlc
Tom Stellard2f3f9852017-01-25 01:25:13 +0000631 .addMemOperand(MMO)
632 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
Matt Arsenault302eedc2019-05-31 22:47:36 +0000633
634 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
635 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
Tom Stellard2f3f9852017-01-25 01:25:13 +0000636 }
637 } else {
638 unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
639 unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
640
641 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
642 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
643 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
644
645 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
646 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
647 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
648
649 }
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000650
651 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
652 .addImm(Rsrc23 & 0xffffffff)
653 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
654
655 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
656 .addImm(Rsrc23 >> 32)
657 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
658 }
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000659}
660
Sander de Smalen5d6ee762019-06-17 09:13:29 +0000661bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
Fangrui Song5401c2d2019-06-17 10:20:20 +0000662 switch (ID) {
663 case TargetStackID::Default:
664 case TargetStackID::NoAlloc:
665 case TargetStackID::SGPRSpill:
666 return true;
667 }
668 llvm_unreachable("Invalid TargetStackID::Value");
Sander de Smalen5d6ee762019-06-17 09:13:29 +0000669}
670
Matt Arsenault2b1f9aa2017-05-17 21:56:25 +0000671void SIFrameLowering::emitPrologue(MachineFunction &MF,
672 MachineBasicBlock &MBB) const {
Matt Arsenault03ae3992018-03-29 21:30:06 +0000673 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000674 if (FuncInfo->isEntryFunction()) {
Matt Arsenault2b1f9aa2017-05-17 21:56:25 +0000675 emitEntryFunctionPrologue(MF, MBB);
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000676 return;
677 }
678
679 const MachineFrameInfo &MFI = MF.getFrameInfo();
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000680 MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard5bfbae52018-07-11 20:59:01 +0000681 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000682 const SIInstrInfo *TII = ST.getInstrInfo();
Matt Arsenault03ae3992018-03-29 21:30:06 +0000683 const SIRegisterInfo &TRI = TII->getRegisterInfo();
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000684
685 unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
686 unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
Matt Arsenault3d59e382019-05-24 18:18:51 +0000687 LivePhysRegs LiveRegs;
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000688
689 MachineBasicBlock::iterator MBBI = MBB.begin();
690 DebugLoc DL;
691
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000692 bool HasFP = false;
Matt Arsenault03ae3992018-03-29 21:30:06 +0000693 uint32_t NumBytes = MFI.getStackSize();
694 uint32_t RoundedSize = NumBytes;
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000695 // To avoid clobbering VGPRs in lanes that weren't active on function entry,
696 // turn on all lanes before doing the spill to memory.
697 unsigned ScratchExecCopy = AMDGPU::NoRegister;
698
699 // Emit the copy if we need an FP, and are using a free SGPR to save it.
700 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
701 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
702 .addReg(FramePtrReg)
703 .setMIFlag(MachineInstr::FrameSetup);
704 }
705
706 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
707 : FuncInfo->getSGPRSpillVGPRs()) {
708 if (!Reg.FI.hasValue())
709 continue;
710
711 if (ScratchExecCopy == AMDGPU::NoRegister) {
712 if (LiveRegs.empty()) {
713 LiveRegs.init(TRI);
714 LiveRegs.addLiveIns(MBB);
715 if (FuncInfo->SGPRForFPSaveRestoreCopy)
716 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
717 }
718
719 ScratchExecCopy
720 = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
721 *TRI.getWaveMaskRegClass());
722 assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
723
724 const unsigned OrSaveExec = ST.isWave32() ?
725 AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
726 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
727 ScratchExecCopy)
728 .addImm(-1);
729 }
730
731 buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
732 FuncInfo->getScratchRSrcReg(),
733 StackPtrReg,
734 Reg.FI.getValue());
735 }
736
737 if (ScratchExecCopy != AMDGPU::NoRegister) {
738 // FIXME: Split block and make terminator.
739 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
740 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
741 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
742 .addReg(ScratchExecCopy, RegState::Kill);
743 LiveRegs.addReg(ScratchExecCopy);
744 }
745
746
747 if (FuncInfo->FramePointerSaveIndex) {
748 const int FI = FuncInfo->FramePointerSaveIndex.getValue();
749 assert(!MFI.isDeadObjectIndex(FI) &&
750 MFI.getStackID(FI) == TargetStackID::SGPRSpill);
751 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
752 = FuncInfo->getSGPRToVGPRSpills(FI);
753 assert(Spill.size() == 1);
754
755 // Save FP before setting it up.
756 // FIXME: This should respect spillSGPRToVGPR;
757 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
758 Spill[0].VGPR)
759 .addReg(FramePtrReg)
760 .addImm(Spill[0].Lane)
761 .addReg(Spill[0].VGPR, RegState::Undef);
762 }
Matt Arsenault03ae3992018-03-29 21:30:06 +0000763
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000764 if (TRI.needsStackRealignment(MF)) {
765 HasFP = true;
Matt Arsenault03ae3992018-03-29 21:30:06 +0000766 const unsigned Alignment = MFI.getMaxAlignment();
Matt Arsenault03ae3992018-03-29 21:30:06 +0000767
768 RoundedSize += Alignment;
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000769 if (LiveRegs.empty()) {
770 LiveRegs.init(TRI);
771 LiveRegs.addLiveIns(MBB);
772 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
773 }
Matt Arsenault03ae3992018-03-29 21:30:06 +0000774
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000775 unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
776 MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
777 assert(ScratchSPReg != AMDGPU::NoRegister &&
778 ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
Matt Arsenault03ae3992018-03-29 21:30:06 +0000779
780 // s_add_u32 tmp_reg, s32, NumBytes
781 // s_and_b32 s32, tmp_reg, 0b111...0000
782 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
783 .addReg(StackPtrReg)
784 .addImm((Alignment - 1) * ST.getWavefrontSize())
785 .setMIFlag(MachineInstr::FrameSetup);
786 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
787 .addReg(ScratchSPReg, RegState::Kill)
788 .addImm(-Alignment * ST.getWavefrontSize())
789 .setMIFlag(MachineInstr::FrameSetup);
790 FuncInfo->setIsStackRealigned(true);
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000791 } else if ((HasFP = hasFP(MF))) {
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000792 // If we need a base pointer, set it up here. It's whatever the value of
793 // the stack pointer is at this point. Any variable size objects will be
794 // allocated after this, so we can still use the base pointer to reference
795 // locals.
796 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
797 .addReg(StackPtrReg)
798 .setMIFlag(MachineInstr::FrameSetup);
799 }
800
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000801 if (HasFP && RoundedSize != 0) {
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000802 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
803 .addReg(StackPtrReg)
Matt Arsenault03ae3992018-03-29 21:30:06 +0000804 .addImm(RoundedSize * ST.getWavefrontSize())
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000805 .setMIFlag(MachineInstr::FrameSetup);
806 }
Matt Arsenault8e8f8f42017-08-02 01:52:45 +0000807
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000808 assert(!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||
809 FuncInfo->FramePointerSaveIndex) &&
810 "Needed to save FP but didn't save it anywhere");
Matt Arsenault24e80b82019-05-28 16:46:02 +0000811
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000812 assert(HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&
813 !FuncInfo->FramePointerSaveIndex) &&
814 "Saved FP but didn't need it");
Matt Arsenault2b1f9aa2017-05-17 21:56:25 +0000815}
816
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000817void SIFrameLowering::emitEpilogue(MachineFunction &MF,
818 MachineBasicBlock &MBB) const {
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000819 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
820 if (FuncInfo->isEntryFunction())
821 return;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000822
Tom Stellard5bfbae52018-07-11 20:59:01 +0000823 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Matt Arsenault8e8f8f42017-08-02 01:52:45 +0000824 const SIInstrInfo *TII = ST.getInstrInfo();
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000825 MachineRegisterInfo &MRI = MF.getRegInfo();
Matt Arsenault8e8f8f42017-08-02 01:52:45 +0000826 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000827 LivePhysRegs LiveRegs;
Matt Arsenault3d59e382019-05-24 18:18:51 +0000828 DebugLoc DL;
Matt Arsenault8e8f8f42017-08-02 01:52:45 +0000829
Matt Arsenault5dc457c2019-06-20 17:03:23 +0000830 const MachineFrameInfo &MFI = MF.getFrameInfo();
831 uint32_t NumBytes = MFI.getStackSize();
832 uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
833 NumBytes + MFI.getMaxAlignment() : NumBytes;
Matt Arsenault03ae3992018-03-29 21:30:06 +0000834
Matt Arsenault5dc457c2019-06-20 17:03:23 +0000835 if (RoundedSize != 0 && hasFP(MF)) {
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000836 const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000837 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
838 .addReg(StackPtrReg)
Matt Arsenaultb812b7a2019-06-05 22:20:47 +0000839 .addImm(RoundedSize * ST.getWavefrontSize())
840 .setMIFlag(MachineInstr::FrameDestroy);
Matt Arsenaultf28683c2017-06-26 17:53:59 +0000841 }
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000842
843 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
844 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
845 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
846 .setMIFlag(MachineInstr::FrameSetup);
847 }
848
849 if (FuncInfo->FramePointerSaveIndex) {
850 const int FI = FuncInfo->FramePointerSaveIndex.getValue();
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000851
Matt Arsenault85618442019-07-08 19:47:42 +0000852 assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
853 MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
854
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000855 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
856 = FuncInfo->getSGPRToVGPRSpills(FI);
857 assert(Spill.size() == 1);
858 BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
859 FuncInfo->getFrameOffsetReg())
860 .addReg(Spill[0].VGPR)
861 .addImm(Spill[0].Lane);
862 }
863
864 unsigned ScratchExecCopy = AMDGPU::NoRegister;
865 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
866 : FuncInfo->getSGPRSpillVGPRs()) {
867 if (!Reg.FI.hasValue())
868 continue;
869
870 const SIRegisterInfo &TRI = TII->getRegisterInfo();
871 if (ScratchExecCopy == AMDGPU::NoRegister) {
872 // See emitPrologue
873 if (LiveRegs.empty()) {
874 LiveRegs.init(*ST.getRegisterInfo());
875 LiveRegs.addLiveOuts(MBB);
876 LiveRegs.stepBackward(*MBBI);
877 }
878
879 ScratchExecCopy = findScratchNonCalleeSaveRegister(
880 MRI, LiveRegs, *TRI.getWaveMaskRegClass());
881 LiveRegs.removeReg(ScratchExecCopy);
882
883 const unsigned OrSaveExec =
884 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
885
886 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
887 .addImm(-1);
888 }
889
890 buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
891 FuncInfo->getScratchRSrcReg(),
892 FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
893 }
894
895 if (ScratchExecCopy != AMDGPU::NoRegister) {
896 // FIXME: Split block and make terminator.
897 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
898 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
899 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
900 .addReg(ScratchExecCopy, RegState::Kill);
901 }
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000902}
903
Matt Arsenault942404d2019-06-24 14:34:40 +0000904// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000905// memory. They should have been removed by now.
Matt Arsenault5b0922f2019-07-03 23:32:29 +0000906static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
Matt Arsenault7b6c5d22017-02-22 22:23:32 +0000907 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
908 I != E; ++I) {
Matt Arsenault5b0922f2019-07-03 23:32:29 +0000909 if (!MFI.isDeadObjectIndex(I))
Matt Arsenault7b6c5d22017-02-22 22:23:32 +0000910 return false;
911 }
912
913 return true;
914}
915
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000916
917#ifndef NDEBUG
918static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
919 Optional<int> FramePointerSaveIndex) {
920 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
921 I != E; ++I) {
922 if (!MFI.isDeadObjectIndex(I) &&
923 MFI.getStackID(I) == TargetStackID::SGPRSpill &&
924 FramePointerSaveIndex && I != FramePointerSaveIndex) {
925 return false;
926 }
927 }
928
929 return true;
930}
931#endif
932
Konstantin Zhuravlyovffdb00e2017-03-10 19:39:07 +0000933int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
934 unsigned &FrameReg) const {
Tom Stellard5bfbae52018-07-11 20:59:01 +0000935 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
Konstantin Zhuravlyovffdb00e2017-03-10 19:39:07 +0000936
937 FrameReg = RI->getFrameRegister(MF);
938 return MF.getFrameInfo().getObjectOffset(FI);
939}
940
Matt Arsenault0c90e952015-11-06 18:17:45 +0000941void SIFrameLowering::processFunctionBeforeFrameFinalized(
942 MachineFunction &MF,
943 RegScavenger *RS) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000944 MachineFrameInfo &MFI = MF.getFrameInfo();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000945
Tom Stellard5bfbae52018-07-11 20:59:01 +0000946 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000947 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Matt Arsenault7b6c5d22017-02-22 22:23:32 +0000948 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenault7b6c5d22017-02-22 22:23:32 +0000949
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000950 assert(allSGPRSpillsAreDead(MFI, None) &&
951 "SGPR spill should have been removed in SILowerSGPRSpills");
Sander de Smalen7f23e0a2019-04-02 09:46:52 +0000952
Matt Arsenault5b0922f2019-07-03 23:32:29 +0000953 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
954 // but currently hasNonSpillStackObjects is set only from source
955 // allocas. Stack temps produced from legalization are not counted currently.
956 if (!allStackObjectsAreDead(MFI)) {
Matt Arsenault7b6c5d22017-02-22 22:23:32 +0000957 assert(RS && "RegScavenger required if spilling");
958
Matt Arsenault34c8b832019-06-05 22:37:50 +0000959 if (FuncInfo->isEntryFunction()) {
960 int ScavengeFI = MFI.CreateFixedObject(
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000961 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
Matt Arsenault34c8b832019-06-05 22:37:50 +0000962 RS->addScavengingFrameIndex(ScavengeFI);
963 } else {
964 int ScavengeFI = MFI.CreateStackObject(
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000965 TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
966 TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
Matt Arsenault34c8b832019-06-05 22:37:50 +0000967 false);
968 RS->addScavengingFrameIndex(ScavengeFI);
969 }
Matt Arsenault707780b2017-02-22 21:05:25 +0000970 }
Matt Arsenault0c90e952015-11-06 18:17:45 +0000971}
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000972
Matt Arsenault5b0922f2019-07-03 23:32:29 +0000973// Only report VGPRs to generic code.
974void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000975 BitVector &SavedVGPRs,
Matt Arsenaultecb43ef2017-09-13 23:47:01 +0000976 RegScavenger *RS) const {
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000977 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
978
979 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
980 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
Matt Arsenault5b0922f2019-07-03 23:32:29 +0000981 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
982 const SIRegisterInfo *TRI = ST.getRegisterInfo();
Matt Arsenault5b0922f2019-07-03 23:32:29 +0000983
Matt Arsenault71dfb7e2019-07-08 19:03:38 +0000984 // Ignore the SGPRs the default implementation found.
985 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
986
987 // hasFP only knows about stack objects that already exist. We're now
988 // determining the stack slots that will be created, so we have to predict
989 // them. Stack objects force FP usage with calls.
990 //
991 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
992 // don't want to report it here.
993 //
994 // FIXME: Is this really hasReservedCallFrame?
995 const bool WillHaveFP =
996 FrameInfo.hasCalls() &&
997 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
998
999 // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1000 // so don't allow the default insertion to handle them.
Matt Arsenault5b0922f2019-07-03 23:32:29 +00001001 for (auto SSpill : MFI->getSGPRSpillVGPRs())
Matt Arsenault71dfb7e2019-07-08 19:03:38 +00001002 SavedVGPRs.reset(SSpill.VGPR);
1003
1004 const bool HasFP = WillHaveFP || hasFP(MF);
1005 if (!HasFP)
1006 return;
1007
1008 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
1009 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1010 TargetStackID::SGPRSpill);
1011
1012 // If there is already a VGPR with free lanes, use it. We may already have
1013 // to pay the penalty for spilling a CSR VGPR.
1014 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1015 llvm_unreachable("allocate SGPR spill should have worked");
1016
1017 MFI->FramePointerSaveIndex = NewFI;
1018
1019 LLVM_DEBUG(
1020 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
1021 dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI)
1022 << ':' << Spill.Lane << '\n');
1023 return;
1024 }
1025
1026 MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
1027
1028 if (!MFI->SGPRForFPSaveRestoreCopy) {
1029 // There's no free lane to spill, and no free register to save FP, so we're
1030 // forced to spill another VGPR to use for the spill.
1031 int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1032 TargetStackID::SGPRSpill);
1033 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1034 llvm_unreachable("allocate SGPR spill should have worked");
1035 MFI->FramePointerSaveIndex = NewFI;
1036
1037 LLVM_DEBUG(
1038 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
1039 dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
1040 << ':' << Spill.Lane << '\n';);
1041 } else {
1042 LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
1043 printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
1044 }
Matt Arsenault5b0922f2019-07-03 23:32:29 +00001045}
1046
1047void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1048 BitVector &SavedRegs,
1049 RegScavenger *RS) const {
1050 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
Matt Arsenaultecb43ef2017-09-13 23:47:01 +00001051 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1052
Matt Arsenault5b0922f2019-07-03 23:32:29 +00001053 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1054 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1055
Matt Arsenaultecb43ef2017-09-13 23:47:01 +00001056 // The SP is specifically managed and we don't want extra spills of it.
1057 SavedRegs.reset(MFI->getStackPtrOffsetReg());
Matt Arsenault5b0922f2019-07-03 23:32:29 +00001058 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
Matt Arsenaultecb43ef2017-09-13 23:47:01 +00001059}
1060
Matt Arsenault71dfb7e2019-07-08 19:03:38 +00001061bool SIFrameLowering::assignCalleeSavedSpillSlots(
1062 MachineFunction &MF, const TargetRegisterInfo *TRI,
1063 std::vector<CalleeSavedInfo> &CSI) const {
1064 if (CSI.empty())
1065 return true; // Early exit if no callee saved registers are modified!
1066
1067 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1068 if (!FuncInfo->SGPRForFPSaveRestoreCopy)
1069 return false;
1070
1071 for (auto &CS : CSI) {
1072 if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
1073 if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
1074 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1075 break;
1076 }
1077 }
1078
1079 return false;
1080}
1081
Matt Arsenaultb62a4eb2017-08-01 19:54:18 +00001082MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1083 MachineFunction &MF,
1084 MachineBasicBlock &MBB,
1085 MachineBasicBlock::iterator I) const {
1086 int64_t Amount = I->getOperand(0).getImm();
1087 if (Amount == 0)
1088 return MBB.erase(I);
1089
Tom Stellard5bfbae52018-07-11 20:59:01 +00001090 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Matt Arsenaultb62a4eb2017-08-01 19:54:18 +00001091 const SIInstrInfo *TII = ST.getInstrInfo();
1092 const DebugLoc &DL = I->getDebugLoc();
1093 unsigned Opc = I->getOpcode();
1094 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1095 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1096
Matt Arsenault8fcc70f2019-06-25 20:53:35 +00001097 if (!hasReservedCallFrame(MF)) {
Matt Arsenaultb62a4eb2017-08-01 19:54:18 +00001098 unsigned Align = getStackAlignment();
1099
1100 Amount = alignTo(Amount, Align);
1101 assert(isUInt<32>(Amount) && "exceeded stack address space size");
1102 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1103 unsigned SPReg = MFI->getStackPtrOffsetReg();
1104
1105 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1106 BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1107 .addReg(SPReg)
1108 .addImm(Amount * ST.getWavefrontSize());
1109 } else if (CalleePopAmount != 0) {
1110 llvm_unreachable("is this used?");
1111 }
1112
1113 return MBB.erase(I);
1114}
1115
Matt Arsenaultf28683c2017-06-26 17:53:59 +00001116bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
Matt Arsenaultf28683c2017-06-26 17:53:59 +00001117 const MachineFrameInfo &MFI = MF.getFrameInfo();
Matt Arsenaultb812b7a2019-06-05 22:20:47 +00001118 if (MFI.hasCalls()) {
1119 // All offsets are unsigned, so need to be addressed in the same direction
1120 // as stack growth.
Matt Arsenault71dfb7e2019-07-08 19:03:38 +00001121
1122 // FIXME: This function is pretty broken, since it can be called before the
1123 // frame layout is determined or CSR spills are inserted.
Matt Arsenaultb812b7a2019-06-05 22:20:47 +00001124 if (MFI.getStackSize() != 0)
1125 return true;
Matt Arsenaultf28683c2017-06-26 17:53:59 +00001126
Matt Arsenaultb812b7a2019-06-05 22:20:47 +00001127 // For the entry point, the input wave scratch offset must be copied to the
1128 // API SP if there are calls.
1129 if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
1130 return true;
Matt Arsenaultb812b7a2019-06-05 22:20:47 +00001131 }
1132
1133 return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1134 MFI.hasStackMap() || MFI.hasPatchPoint() ||
Matt Arsenault5dc457c2019-06-20 17:03:23 +00001135 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1136 MF.getTarget().Options.DisableFramePointerElim(MF);
Matt Arsenaultf28683c2017-06-26 17:53:59 +00001137}