blob: 0c0d9c9f077f6931094a19f148af543fa283e2f6 [file] [log] [blame]
Matt Arsenault0c90e952015-11-06 18:17:45 +00001//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9
10#include "SIFrameLowering.h"
Matt Arsenault0e3d3892015-11-30 21:15:53 +000011#include "SIInstrInfo.h"
12#include "SIMachineFunctionInfo.h"
Matt Arsenault0c90e952015-11-06 18:17:45 +000013#include "SIRegisterInfo.h"
Matt Arsenault43e92fe2016-06-24 06:30:11 +000014#include "AMDGPUSubtarget.h"
15
Matt Arsenault0c90e952015-11-06 18:17:45 +000016#include "llvm/CodeGen/MachineFrameInfo.h"
17#include "llvm/CodeGen/MachineFunction.h"
Matt Arsenault0e3d3892015-11-30 21:15:53 +000018#include "llvm/CodeGen/MachineInstrBuilder.h"
Matt Arsenault0c90e952015-11-06 18:17:45 +000019#include "llvm/CodeGen/RegisterScavenging.h"
20
21using namespace llvm;
22
Matt Arsenault0e3d3892015-11-30 21:15:53 +000023
24static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo,
Matthias Braun941a7052016-07-28 18:40:00 +000025 const MachineFrameInfo &MFI) {
Matt Arsenault296b8492016-02-12 06:31:30 +000026 return FuncInfo->hasSpilledSGPRs() &&
27 (!FuncInfo->hasSpilledVGPRs() && !FuncInfo->hasNonSpillStackObjects());
Matt Arsenault0e3d3892015-11-30 21:15:53 +000028}
29
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +000030static ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF,
31 const SIRegisterInfo *TRI) {
Matt Arsenaultab3429c2016-05-18 15:19:50 +000032 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +000033 TRI->getMaxNumSGPRs(MF) / 4);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000034}
35
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +000036static ArrayRef<MCPhysReg> getAllSGPRs(const MachineFunction &MF,
37 const SIRegisterInfo *TRI) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000038 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +000039 TRI->getMaxNumSGPRs(MF));
Matt Arsenault26f8f3d2015-11-30 21:16:03 +000040}
41
Matt Arsenault57bc4322016-08-31 21:52:21 +000042void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
43 const SIRegisterInfo* TRI,
44 MachineFunction &MF,
45 MachineBasicBlock &MBB) const {
46 // We don't need this if we only have spills since there is no user facing
47 // scratch.
48
49 // TODO: If we know we don't have flat instructions earlier, we can omit
50 // this from the input registers.
51 //
52 // TODO: We only need to know if we access scratch space through a flat
53 // pointer. Because we only detect if flat instructions are used at all,
54 // this will be used more often than necessary on VI.
55
56 // Debug location must be unknown since the first debug location is used to
57 // determine the end of the prologue.
58 DebugLoc DL;
59 MachineBasicBlock::iterator I = MBB.begin();
60
61 unsigned FlatScratchInitReg
62 = TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);
63
64 MachineRegisterInfo &MRI = MF.getRegInfo();
65 MRI.addLiveIn(FlatScratchInitReg);
66 MBB.addLiveIn(FlatScratchInitReg);
67
68 // Copy the size in bytes.
69 unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
Matt Arsenault1d215172016-08-31 21:52:25 +000070 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
Matt Arsenault57bc4322016-08-31 21:52:21 +000071 .addReg(FlatScrInitHi, RegState::Kill);
72
73 unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
74
75 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
76 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
77
78
79 // Add wave offset in bytes to private base offset.
80 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
81 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
82 .addReg(FlatScrInitLo)
83 .addReg(ScratchWaveOffsetReg);
84
85 // Convert offset to 256-byte units.
86 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
87 .addReg(FlatScrInitLo, RegState::Kill)
88 .addImm(8);
89}
90
91unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
92 const SISubtarget &ST,
93 const SIInstrInfo *TII,
94 const SIRegisterInfo *TRI,
95 SIMachineFunctionInfo *MFI,
96 MachineFunction &MF) const {
97
98 // We need to insert initialization of the scratch resource descriptor.
99 unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
100 assert(ScratchRsrcReg != AMDGPU::NoRegister);
101
102 if (ST.hasSGPRInitBug() ||
103 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
104 return ScratchRsrcReg;
105
106 // We reserved the last registers for this. Shift it down to the end of those
107 // which were actually used.
108 //
109 // FIXME: It might be safer to use a pseudoregister before replacement.
110
111 // FIXME: We should be able to eliminate unused input registers. We only
112 // cannot do this for the resources required for scratch access. For now we
113 // skip over user SGPRs and may leave unused holes.
114
115 // We find the resource first because it has an alignment requirement.
116
117 MachineRegisterInfo &MRI = MF.getRegInfo();
118
119 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
120 // Skip the last 2 elements because the last one is reserved for VCC, and
121 // this is the 2nd to last element already.
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000122 for (MCPhysReg Reg : getAllSGPR128(MF, TRI).drop_back(2).slice(NumPreloaded)) {
Matt Arsenault57bc4322016-08-31 21:52:21 +0000123 // Pick the first unallocated one. Make sure we don't clobber the other
124 // reserved input we needed.
125 if (!MRI.isPhysRegUsed(Reg)) {
126 assert(MRI.isAllocatable(Reg));
127 MRI.replaceRegWith(ScratchRsrcReg, Reg);
128 MFI->setScratchRSrcReg(Reg);
129 return Reg;
130 }
131 }
132
133 return ScratchRsrcReg;
134}
135
136unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
137 const SISubtarget &ST,
138 const SIInstrInfo *TII,
139 const SIRegisterInfo *TRI,
140 SIMachineFunctionInfo *MFI,
141 MachineFunction &MF) const {
142 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
143 if (ST.hasSGPRInitBug() ||
144 ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF))
145 return ScratchWaveOffsetReg;
146
147 unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
148 MachineRegisterInfo &MRI = MF.getRegInfo();
149 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
150
151 // We need to drop register from the end of the list that we cannot use
152 // for the scratch wave offset.
153 // + 2 s102 and s103 do not exist on VI.
154 // + 2 for vcc
155 // + 2 for xnack_mask
156 // + 2 for flat_scratch
157 // + 4 for registers reserved for scratch resource register
158 // + 1 for register reserved for scratch wave offset. (By exluding this
159 // register from the list to consider, it means that when this
160 // register is being used for the scratch wave offset and there
161 // are no other free SGPRs, then the value will stay in this register.
162 // ----
163 // 13
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000164 for (MCPhysReg Reg : getAllSGPRs(MF, TRI).drop_back(13).slice(NumPreloaded)) {
Matt Arsenault57bc4322016-08-31 21:52:21 +0000165 // Pick the first unallocated SGPR. Be careful not to pick an alias of the
166 // scratch descriptor, since we haven’t added its uses yet.
167 if (!MRI.isPhysRegUsed(Reg)) {
168 if (!MRI.isAllocatable(Reg) ||
169 TRI->isSubRegisterEq(ScratchRsrcReg, Reg))
170 continue;
171
172 MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
173 MFI->setScratchWaveOffsetReg(Reg);
174 return Reg;
175 }
176 }
177
178 return ScratchWaveOffsetReg;
179}
180
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000181void SIFrameLowering::emitPrologue(MachineFunction &MF,
182 MachineBasicBlock &MBB) const {
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000183 // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
184 // specified.
185 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
186 if (ST.debuggerEmitPrologue())
187 emitDebuggerPrologue(MF, MBB);
188
Matthias Braun941a7052016-07-28 18:40:00 +0000189 if (!MF.getFrameInfo().hasStackObjects())
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000190 return;
191
192 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
193
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000194 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000195
196 // If we only have SGPR spills, we won't actually be using scratch memory
197 // since these spill to VGPRs.
198 //
199 // FIXME: We should be cleaning up these unused SGPR spill frame indices
200 // somewhere.
201 if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
202 return;
203
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000204 const SIInstrInfo *TII = ST.getInstrInfo();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000205 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
Matt Arsenault296b8492016-02-12 06:31:30 +0000206 MachineRegisterInfo &MRI = MF.getRegInfo();
Matt Arsenault57bc4322016-08-31 21:52:21 +0000207
208 unsigned ScratchRsrcReg
209 = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
210 unsigned ScratchWaveOffsetReg
211 = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
212 assert(ScratchRsrcReg != AMDGPU::NoRegister);
213 assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
214 assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
215
216 if (MFI->hasFlatScratchInit())
217 emitFlatScratchInit(TII, TRI, MF, MBB);
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000218
219 // We need to insert initialization of the scratch resource descriptor.
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000220 unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
221 MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
222
223 unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
Tom Stellard0b76fc4c2016-09-16 21:34:26 +0000224 if (ST.isAmdCodeObjectV2()) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000225 PreloadedPrivateBufferReg = TRI->getPreloadedValue(
226 MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
227 }
228
229 // If we reserved the original input registers, we don't need to copy to the
230 // reserved registers.
231 if (ScratchRsrcReg == PreloadedPrivateBufferReg) {
232 // We should always reserve these 5 registers at the same time.
233 assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg &&
234 "scratch wave offset and private segment buffer inconsistent");
235 return;
236 }
237
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000238 // We added live-ins during argument lowering, but since they were not used
239 // they were deleted. We're adding the uses now, so add them back.
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000240 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
241 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
242
Tom Stellard0b76fc4c2016-09-16 21:34:26 +0000243 if (ST.isAmdCodeObjectV2()) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000244 MRI.addLiveIn(PreloadedPrivateBufferReg);
245 MBB.addLiveIn(PreloadedPrivateBufferReg);
246 }
247
Matt Arsenault57bc4322016-08-31 21:52:21 +0000248 // Make the register selected live throughout the function.
249 for (MachineBasicBlock &OtherBB : MF) {
250 if (&OtherBB == &MBB)
251 continue;
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000252
Matt Arsenault57bc4322016-08-31 21:52:21 +0000253 OtherBB.addLiveIn(ScratchRsrcReg);
254 OtherBB.addLiveIn(ScratchWaveOffsetReg);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000255 }
256
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000257 DebugLoc DL;
Matt Arsenault57bc4322016-08-31 21:52:21 +0000258 MachineBasicBlock::iterator I = MBB.begin();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000259
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000260 if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
Matt Arsenault57bc4322016-08-31 21:52:21 +0000261 // Make sure we emit the copy for the offset first. We may have chosen to
262 // copy the buffer resource into a register that aliases the input offset
263 // register.
Matt Arsenault1d215172016-08-31 21:52:25 +0000264 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000265 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
266 }
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000267
Tom Stellard0b76fc4c2016-09-16 21:34:26 +0000268 if (ST.isAmdCodeObjectV2()) {
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000269 // Insert copies from argument register.
270 assert(
271 !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) &&
272 !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg));
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000273
Matt Arsenault1d215172016-08-31 21:52:25 +0000274 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
275 .addReg(PreloadedPrivateBufferReg, RegState::Kill);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000276 } else {
Matt Arsenault1d215172016-08-31 21:52:25 +0000277 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
278
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000279 unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
280 unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
281 unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
282 unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
283
284 // Use relocations to get the pointer, and setup the other bits manually.
285 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
286 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
287 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
288 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
289
290 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
291 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
292 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
293
294 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
295 .addImm(Rsrc23 & 0xffffffff)
296 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
297
298 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
299 .addImm(Rsrc23 >> 32)
300 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
301 }
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000302}
303
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000304void SIFrameLowering::emitEpilogue(MachineFunction &MF,
305 MachineBasicBlock &MBB) const {
306
307}
308
Matt Arsenault0c90e952015-11-06 18:17:45 +0000309void SIFrameLowering::processFunctionBeforeFrameFinalized(
310 MachineFunction &MF,
311 RegScavenger *RS) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000312 MachineFrameInfo &MFI = MF.getFrameInfo();
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000313
Matthias Braun941a7052016-07-28 18:40:00 +0000314 if (!MFI.hasStackObjects())
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000315 return;
316
Matthias Braun941a7052016-07-28 18:40:00 +0000317 bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
Matt Arsenault0c90e952015-11-06 18:17:45 +0000318
319 assert((RS || !MayNeedScavengingEmergencySlot) &&
320 "RegScavenger required if spilling");
321
322 if (MayNeedScavengingEmergencySlot) {
Matt Arsenaultb920e992016-08-10 19:11:36 +0000323 int ScavengeFI = MFI.CreateStackObject(
Matt Arsenault0c90e952015-11-06 18:17:45 +0000324 AMDGPU::SGPR_32RegClass.getSize(),
Matt Arsenaultb920e992016-08-10 19:11:36 +0000325 AMDGPU::SGPR_32RegClass.getAlignment(), false);
Matt Arsenault0c90e952015-11-06 18:17:45 +0000326 RS->addScavengingFrameIndex(ScavengeFI);
327 }
328}
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000329
330void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
331 MachineBasicBlock &MBB) const {
332 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
333 const SIInstrInfo *TII = ST.getInstrInfo();
334 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
335 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
336
337 MachineBasicBlock::iterator I = MBB.begin();
338 DebugLoc DL;
339
340 // For each dimension:
341 for (unsigned i = 0; i < 3; ++i) {
342 // Get work group ID SGPR, and make it live-in again.
343 unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
344 MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
345 MBB.addLiveIn(WorkGroupIDSGPR);
346
347 // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
348 // order to spill it to scratch.
349 unsigned WorkGroupIDVGPR =
350 MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
351 BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
352 .addReg(WorkGroupIDSGPR);
353
354 // Spill work group ID.
355 int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
356 TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
357 WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
358
359 // Get work item ID VGPR, and make it live-in again.
360 unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
361 MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
362 MBB.addLiveIn(WorkItemIDVGPR);
363
364 // Spill work item ID.
365 int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
366 TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
367 WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
368 }
369}