Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 1 | //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all |
| 10 | // SGPR spills, so must insert CSR SGPR spills as well as expand them. |
| 11 | // |
| 12 | // This pass must never create new SGPR virtual registers. |
| 13 | // |
| 14 | // FIXME: Must stop RegScavenger spills in later passes. |
| 15 | // |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | |
| 18 | #include "AMDGPU.h" |
| 19 | #include "AMDGPUSubtarget.h" |
| 20 | #include "SIInstrInfo.h" |
| 21 | #include "SIMachineFunctionInfo.h" |
| 22 | #include "llvm/CodeGen/LiveIntervals.h" |
| 23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 24 | #include "llvm/CodeGen/MachineFunction.h" |
| 25 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 26 | #include "llvm/CodeGen/MachineInstr.h" |
| 27 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 28 | #include "llvm/CodeGen/MachineOperand.h" |
| 29 | #include "llvm/CodeGen/VirtRegMap.h" |
Reid Kleckner | 05da2fe | 2019-11-13 13:15:01 -0800 | [diff] [blame^] | 30 | #include "llvm/InitializePasses.h" |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 31 | #include "llvm/Target/TargetMachine.h" |
| 32 | |
| 33 | using namespace llvm; |
| 34 | |
| 35 | #define DEBUG_TYPE "si-lower-sgpr-spills" |
| 36 | |
| 37 | using MBBVector = SmallVector<MachineBasicBlock *, 4>; |
| 38 | |
| 39 | namespace { |
| 40 | |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 41 | static cl::opt<bool> EnableSpillVGPRToAGPR( |
| 42 | "amdgpu-spill-vgpr-to-agpr", |
| 43 | cl::desc("Enable spilling VGPRs to AGPRs"), |
| 44 | cl::ReallyHidden, |
| 45 | cl::init(true)); |
| 46 | |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 47 | class SILowerSGPRSpills : public MachineFunctionPass { |
| 48 | private: |
| 49 | const SIRegisterInfo *TRI = nullptr; |
| 50 | const SIInstrInfo *TII = nullptr; |
| 51 | VirtRegMap *VRM = nullptr; |
| 52 | LiveIntervals *LIS = nullptr; |
| 53 | |
| 54 | // Save and Restore blocks of the current function. Typically there is a |
| 55 | // single save block, unless Windows EH funclets are involved. |
| 56 | MBBVector SaveBlocks; |
| 57 | MBBVector RestoreBlocks; |
| 58 | |
| 59 | public: |
| 60 | static char ID; |
| 61 | |
| 62 | SILowerSGPRSpills() : MachineFunctionPass(ID) {} |
| 63 | |
| 64 | void calculateSaveRestoreBlocks(MachineFunction &MF); |
| 65 | bool spillCalleeSavedRegs(MachineFunction &MF); |
| 66 | |
| 67 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 68 | |
| 69 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 70 | AU.setPreservesAll(); |
| 71 | MachineFunctionPass::getAnalysisUsage(AU); |
| 72 | } |
| 73 | }; |
| 74 | |
| 75 | } // end anonymous namespace |
| 76 | |
| 77 | char SILowerSGPRSpills::ID = 0; |
| 78 | |
| 79 | INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, |
| 80 | "SI lower SGPR spill instructions", false, false) |
| 81 | INITIALIZE_PASS_DEPENDENCY(VirtRegMap) |
| 82 | INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, |
| 83 | "SI lower SGPR spill instructions", false, false) |
| 84 | |
| 85 | char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; |
| 86 | |
| 87 | /// Insert restore code for the callee-saved registers used in the function. |
| 88 | static void insertCSRSaves(MachineBasicBlock &SaveBlock, |
| 89 | ArrayRef<CalleeSavedInfo> CSI, |
| 90 | LiveIntervals *LIS) { |
| 91 | MachineFunction &MF = *SaveBlock.getParent(); |
| 92 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
| 93 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 94 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
| 95 | |
| 96 | MachineBasicBlock::iterator I = SaveBlock.begin(); |
| 97 | if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { |
| 98 | for (const CalleeSavedInfo &CS : CSI) { |
| 99 | // Insert the spill to the stack frame. |
| 100 | unsigned Reg = CS.getReg(); |
| 101 | |
Michael Liao | 8d6ea2d | 2019-07-05 20:23:59 +0000 | [diff] [blame] | 102 | MachineInstrSpan MIS(I, &SaveBlock); |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 103 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
| 104 | |
| 105 | TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, |
| 106 | TRI); |
| 107 | |
| 108 | if (LIS) { |
| 109 | assert(std::distance(MIS.begin(), I) == 1); |
| 110 | MachineInstr &Inst = *std::prev(I); |
| 111 | |
| 112 | LIS->InsertMachineInstrInMaps(Inst); |
| 113 | LIS->removeAllRegUnitsForPhysReg(Reg); |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | /// Insert restore code for the callee-saved registers used in the function. |
| 120 | static void insertCSRRestores(MachineBasicBlock &RestoreBlock, |
| 121 | std::vector<CalleeSavedInfo> &CSI, |
| 122 | LiveIntervals *LIS) { |
| 123 | MachineFunction &MF = *RestoreBlock.getParent(); |
| 124 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
| 125 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 126 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
| 127 | |
| 128 | // Restore all registers immediately before the return and any |
| 129 | // terminators that precede it. |
| 130 | MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); |
| 131 | |
| 132 | // FIXME: Just emit the readlane/writelane directly |
| 133 | if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { |
| 134 | for (const CalleeSavedInfo &CI : reverse(CSI)) { |
| 135 | unsigned Reg = CI.getReg(); |
| 136 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
| 137 | |
| 138 | TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); |
| 139 | assert(I != RestoreBlock.begin() && |
| 140 | "loadRegFromStackSlot didn't insert any code!"); |
| 141 | // Insert in reverse order. loadRegFromStackSlot can insert |
| 142 | // multiple instructions. |
| 143 | |
| 144 | if (LIS) { |
| 145 | MachineInstr &Inst = *std::prev(I); |
| 146 | LIS->InsertMachineInstrInMaps(Inst); |
| 147 | LIS->removeAllRegUnitsForPhysReg(Reg); |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | /// Compute the sets of entry and return blocks for saving and restoring |
| 154 | /// callee-saved registers, and placing prolog and epilog code. |
| 155 | void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { |
| 156 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 157 | |
| 158 | // Even when we do not change any CSR, we still want to insert the |
| 159 | // prologue and epilogue of the function. |
| 160 | // So set the save points for those. |
| 161 | |
| 162 | // Use the points found by shrink-wrapping, if any. |
| 163 | if (MFI.getSavePoint()) { |
| 164 | SaveBlocks.push_back(MFI.getSavePoint()); |
| 165 | assert(MFI.getRestorePoint() && "Both restore and save must be set"); |
| 166 | MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); |
| 167 | // If RestoreBlock does not have any successor and is not a return block |
| 168 | // then the end point is unreachable and we do not need to insert any |
| 169 | // epilogue. |
| 170 | if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) |
| 171 | RestoreBlocks.push_back(RestoreBlock); |
| 172 | return; |
| 173 | } |
| 174 | |
| 175 | // Save refs to entry and return blocks. |
| 176 | SaveBlocks.push_back(&MF.front()); |
| 177 | for (MachineBasicBlock &MBB : MF) { |
| 178 | if (MBB.isEHFuncletEntry()) |
| 179 | SaveBlocks.push_back(&MBB); |
| 180 | if (MBB.isReturnBlock()) |
| 181 | RestoreBlocks.push_back(&MBB); |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { |
| 186 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 187 | const Function &F = MF.getFunction(); |
| 188 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 189 | const SIFrameLowering *TFI = ST.getFrameLowering(); |
| 190 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 191 | RegScavenger *RS = nullptr; |
| 192 | |
| 193 | // Determine which of the registers in the callee save list should be saved. |
| 194 | BitVector SavedRegs; |
| 195 | TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); |
| 196 | |
| 197 | // Add the code to save and restore the callee saved registers. |
| 198 | if (!F.hasFnAttribute(Attribute::Naked)) { |
| 199 | // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is |
| 200 | // necessary for verifier liveness checks. |
| 201 | MFI.setCalleeSavedInfoValid(true); |
| 202 | |
| 203 | std::vector<CalleeSavedInfo> CSI; |
| 204 | const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); |
| 205 | |
| 206 | for (unsigned I = 0; CSRegs[I]; ++I) { |
| 207 | unsigned Reg = CSRegs[I]; |
| 208 | if (SavedRegs.test(Reg)) { |
| 209 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
| 210 | int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), |
| 211 | TRI->getSpillAlignment(*RC), |
| 212 | true); |
| 213 | |
| 214 | CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | if (!CSI.empty()) { |
| 219 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
| 220 | insertCSRSaves(*SaveBlock, CSI, LIS); |
| 221 | |
| 222 | for (MachineBasicBlock *RestoreBlock : RestoreBlocks) |
| 223 | insertCSRRestores(*RestoreBlock, CSI, LIS); |
| 224 | return true; |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | return false; |
| 229 | } |
| 230 | |
| 231 | bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { |
| 232 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 233 | TII = ST.getInstrInfo(); |
| 234 | TRI = &TII->getRegisterInfo(); |
| 235 | |
| 236 | VRM = getAnalysisIfAvailable<VirtRegMap>(); |
| 237 | |
| 238 | assert(SaveBlocks.empty() && RestoreBlocks.empty()); |
| 239 | |
| 240 | // First, expose any CSR SGPR spills. This is mostly the same as what PEI |
| 241 | // does, but somewhat simpler. |
| 242 | calculateSaveRestoreBlocks(MF); |
| 243 | bool HasCSRs = spillCalleeSavedRegs(MF); |
| 244 | |
| 245 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 246 | if (!MFI.hasStackObjects() && !HasCSRs) { |
| 247 | SaveBlocks.clear(); |
| 248 | RestoreBlocks.clear(); |
| 249 | return false; |
| 250 | } |
| 251 | |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 252 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 253 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 254 | const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() |
| 255 | && EnableSpillVGPRToAGPR; |
| 256 | |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 257 | bool MadeChange = false; |
| 258 | |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 259 | const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); |
| 260 | |
| 261 | // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be |
| 262 | // handled as SpilledToReg in regular PrologEpilogInserter. |
| 263 | if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || |
| 264 | SpillVGPRToAGPR) { |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 265 | // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs |
| 266 | // are spilled to VGPRs, in which case we can eliminate the stack usage. |
| 267 | // |
| 268 | // This operates under the assumption that only other SGPR spills are users |
| 269 | // of the frame index. |
| 270 | for (MachineBasicBlock &MBB : MF) { |
| 271 | MachineBasicBlock::iterator Next; |
| 272 | for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { |
| 273 | MachineInstr &MI = *I; |
| 274 | Next = std::next(I); |
| 275 | |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 276 | if (SpillToAGPR && TII->isVGPRSpill(MI)) { |
| 277 | // Try to eliminate stack used by VGPR spills before frame |
| 278 | // finalization. |
| 279 | unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
| 280 | AMDGPU::OpName::vaddr); |
| 281 | int FI = MI.getOperand(FIOp).getIndex(); |
Daniel Sanders | 0c47611 | 2019-08-15 19:22:08 +0000 | [diff] [blame] | 282 | Register VReg = |
| 283 | TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); |
Stanislav Mekhanoshin | 28550c8 | 2019-07-11 22:30:11 +0000 | [diff] [blame] | 284 | if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, |
| 285 | TRI->isAGPR(MRI, VReg))) { |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 286 | TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr); |
Stanislav Mekhanoshin | 28550c8 | 2019-07-11 22:30:11 +0000 | [diff] [blame] | 287 | continue; |
| 288 | } |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 289 | } |
| 290 | |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 291 | if (!TII->isSGPRSpill(MI)) |
| 292 | continue; |
| 293 | |
| 294 | int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); |
| 295 | assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); |
| 296 | if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { |
| 297 | bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); |
| 298 | (void)Spilled; |
| 299 | assert(Spilled && "failed to spill SGPR to VGPR when allocated"); |
Bill Wendling | 796ed13 | 2019-07-15 06:35:28 +0000 | [diff] [blame] | 300 | } |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 301 | } |
| 302 | } |
| 303 | |
| 304 | for (MachineBasicBlock &MBB : MF) { |
| 305 | for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) |
| 306 | MBB.addLiveIn(SSpill.VGPR); |
Stanislav Mekhanoshin | 937ff6e7 | 2019-07-11 21:54:13 +0000 | [diff] [blame] | 307 | |
| 308 | for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) |
| 309 | MBB.addLiveIn(Reg); |
| 310 | |
| 311 | for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) |
| 312 | MBB.addLiveIn(Reg); |
| 313 | |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 314 | MBB.sortUniqueLiveIns(); |
| 315 | } |
| 316 | |
Matt Arsenault | 5b0922f | 2019-07-03 23:32:29 +0000 | [diff] [blame] | 317 | MadeChange = true; |
| 318 | } |
| 319 | |
| 320 | SaveBlocks.clear(); |
| 321 | RestoreBlocks.clear(); |
| 322 | |
| 323 | return MadeChange; |
| 324 | } |