| Connor Abbott | 92638ab | 2017-08-04 18:36:52 +0000 | [diff] [blame] | 1 | //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | /// \file | 
|  | 11 | /// \brief Computations in WWM can overwrite values in inactive channels for | 
|  | 12 | /// variables that the register allocator thinks are dead. This pass adds fake | 
|  | 13 | /// uses of those variables to WWM instructions to make sure that they aren't | 
|  | 14 | /// overwritten. | 
|  | 15 | /// | 
|  | 16 | /// As an example, consider this snippet: | 
|  | 17 | /// %vgpr0 = V_MOV_B32_e32 0.0 | 
|  | 18 | /// if (...) { | 
|  | 19 | ///   %vgpr1 = ... | 
| Francis Visoiu Mistrih | a8a83d1 | 2017-12-07 10:40:31 +0000 | [diff] [blame] | 20 | ///   %vgpr2 = WWM killed %vgpr1 | 
|  | 21 | ///   ... = killed %vgpr2 | 
| Connor Abbott | 92638ab | 2017-08-04 18:36:52 +0000 | [diff] [blame] | 22 | ///   %vgpr0 = V_MOV_B32_e32 1.0 | 
|  | 23 | /// } | 
|  | 24 | /// ... = %vgpr0 | 
|  | 25 | /// | 
|  | 26 | /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, | 
|  | 27 | /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since | 
|  | 28 | /// writing %vgpr1 would only write to channels that would be clobbered by the | 
|  | 29 | /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, | 
|  | 30 | /// it would clobber even the inactive channels for which the if-condition is | 
|  | 31 | /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use | 
|  | 32 | /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the | 
|  | 33 | /// same register. | 
|  | 34 | /// | 
|  | 35 | /// In general, we need to figure out what registers might have their inactive | 
|  | 36 | /// channels which are eventually used accidentally clobbered by a WWM | 
|  | 37 | /// instruction. We approximate this using two conditions: | 
|  | 38 | /// | 
|  | 39 | /// 1. A definition of the variable reaches the WWM instruction. | 
|  | 40 | /// 2. The variable would be live at the WWM instruction if all its defs were | 
|  | 41 | /// partial defs (i.e. considered as a use), ignoring normal uses. | 
|  | 42 | /// | 
|  | 43 | /// If a register matches both conditions, then we add an implicit use of it to | 
|  | 44 | /// the WWM instruction. Condition #2 is the heart of the matter: every | 
|  | 45 | /// definition is really a partial definition, since every VALU instruction is | 
|  | 46 | /// implicitly predicated.  We can usually ignore this, but WWM forces us not | 
|  | 47 | /// to. Condition #1 prevents false positives if the variable is undefined at | 
|  | 48 | /// the WWM instruction anyways. This is overly conservative in certain cases, | 
|  | 49 | /// especially in uniform control flow, but this is a workaround anyways until | 
|  | 50 | /// LLVM gains the notion of predicated uses and definitions of variables. | 
|  | 51 | /// | 
|  | 52 | //===----------------------------------------------------------------------===// | 
|  | 53 |  | 
|  | 54 | #include "AMDGPU.h" | 
|  | 55 | #include "AMDGPUSubtarget.h" | 
|  | 56 | #include "SIInstrInfo.h" | 
|  | 57 | #include "SIRegisterInfo.h" | 
|  | 58 | #include "llvm/ADT/DepthFirstIterator.h" | 
|  | 59 | #include "llvm/ADT/SparseBitVector.h" | 
| Matthias Braun | f842297 | 2017-12-13 02:51:04 +0000 | [diff] [blame] | 60 | #include "llvm/CodeGen/LiveIntervals.h" | 
| Connor Abbott | 92638ab | 2017-08-04 18:36:52 +0000 | [diff] [blame] | 61 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | 62 | #include "llvm/CodeGen/Passes.h" | 
| David Blaikie | b3bde2e | 2017-11-17 01:07:10 +0000 | [diff] [blame] | 63 | #include "llvm/CodeGen/TargetRegisterInfo.h" | 
| Connor Abbott | 92638ab | 2017-08-04 18:36:52 +0000 | [diff] [blame] | 64 |  | 
|  | 65 | using namespace llvm; | 
|  | 66 |  | 
|  | 67 | #define DEBUG_TYPE "si-fix-wwm-liveness" | 
|  | 68 |  | 
|  | 69 | namespace { | 
|  | 70 |  | 
|  | 71 | class SIFixWWMLiveness : public MachineFunctionPass { | 
|  | 72 | private: | 
|  | 73 | LiveIntervals *LIS = nullptr; | 
|  | 74 | const SIRegisterInfo *TRI; | 
|  | 75 | MachineRegisterInfo *MRI; | 
|  | 76 |  | 
|  | 77 | public: | 
|  | 78 | static char ID; | 
|  | 79 |  | 
|  | 80 | SIFixWWMLiveness() : MachineFunctionPass(ID) { | 
|  | 81 | initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); | 
|  | 82 | } | 
|  | 83 |  | 
|  | 84 | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  | 85 |  | 
|  | 86 | bool runOnWWMInstruction(MachineInstr &MI); | 
|  | 87 |  | 
|  | 88 | void addDefs(const MachineInstr &MI, SparseBitVector<> &set); | 
|  | 89 |  | 
|  | 90 | StringRef getPassName() const override { return "SI Fix WWM Liveness"; } | 
|  | 91 |  | 
|  | 92 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | 93 | // Should preserve the same set that TwoAddressInstructions does. | 
|  | 94 | AU.addPreserved<SlotIndexes>(); | 
|  | 95 | AU.addPreserved<LiveIntervals>(); | 
|  | 96 | AU.addPreservedID(LiveVariablesID); | 
|  | 97 | AU.addPreservedID(MachineLoopInfoID); | 
|  | 98 | AU.addPreservedID(MachineDominatorsID); | 
|  | 99 | AU.setPreservesCFG(); | 
|  | 100 | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | 101 | } | 
|  | 102 | }; | 
|  | 103 |  | 
|  | 104 | } // End anonymous namespace. | 
|  | 105 |  | 
|  | 106 | INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE, | 
|  | 107 | "SI fix WWM liveness", false, false) | 
|  | 108 |  | 
|  | 109 | char SIFixWWMLiveness::ID = 0; | 
|  | 110 |  | 
|  | 111 | char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; | 
|  | 112 |  | 
|  | 113 | FunctionPass *llvm::createSIFixWWMLivenessPass() { | 
|  | 114 | return new SIFixWWMLiveness(); | 
|  | 115 | } | 
|  | 116 |  | 
|  | 117 | void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs) | 
|  | 118 | { | 
|  | 119 | for (const MachineOperand &Op : MI.defs()) { | 
|  | 120 | if (Op.isReg()) { | 
|  | 121 | unsigned Reg = Op.getReg(); | 
|  | 122 | if (TRI->isVGPR(*MRI, Reg)) | 
|  | 123 | Regs.set(Reg); | 
|  | 124 | } | 
|  | 125 | } | 
|  | 126 | } | 
|  | 127 |  | 
|  | 128 | bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) { | 
|  | 129 | MachineBasicBlock *MBB = WWM.getParent(); | 
|  | 130 |  | 
|  | 131 | // Compute the registers that are live out of MI by figuring out which defs | 
|  | 132 | // are reachable from MI. | 
|  | 133 | SparseBitVector<> LiveOut; | 
|  | 134 |  | 
|  | 135 | for (auto II = MachineBasicBlock::iterator(WWM), IE = | 
|  | 136 | MBB->end(); II != IE; ++II) { | 
|  | 137 | addDefs(*II, LiveOut); | 
|  | 138 | } | 
|  | 139 |  | 
|  | 140 | for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB), | 
|  | 141 | E = df_end(MBB); | 
|  | 142 | I != E; ++I) { | 
|  | 143 | for (const MachineInstr &MI : **I) { | 
|  | 144 | addDefs(MI, LiveOut); | 
|  | 145 | } | 
|  | 146 | } | 
|  | 147 |  | 
|  | 148 | // Compute the registers that reach MI. | 
|  | 149 | SparseBitVector<> Reachable; | 
|  | 150 |  | 
|  | 151 | for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE = | 
|  | 152 | MBB->rend(); II != IE; ++II) { | 
|  | 153 | addDefs(*II, Reachable); | 
|  | 154 | } | 
|  | 155 |  | 
|  | 156 | for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB), | 
|  | 157 | E = idf_end(MBB); | 
|  | 158 | I != E; ++I) { | 
|  | 159 | for (const MachineInstr &MI : **I) { | 
|  | 160 | addDefs(MI, Reachable); | 
|  | 161 | } | 
|  | 162 | } | 
|  | 163 |  | 
|  | 164 | // find the intersection, and add implicit uses. | 
|  | 165 | LiveOut &= Reachable; | 
|  | 166 |  | 
|  | 167 | bool Modified = false; | 
|  | 168 | for (unsigned Reg : LiveOut) { | 
|  | 169 | WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); | 
|  | 170 | if (LIS) { | 
|  | 171 | // FIXME: is there a better way to update the live interval? | 
|  | 172 | LIS->removeInterval(Reg); | 
|  | 173 | LIS->createAndComputeVirtRegInterval(Reg); | 
|  | 174 | } | 
|  | 175 | Modified = true; | 
|  | 176 | } | 
|  | 177 |  | 
|  | 178 | return Modified; | 
|  | 179 | } | 
|  | 180 |  | 
|  | 181 | bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { | 
|  | 182 | bool Modified = false; | 
|  | 183 |  | 
|  | 184 | // This doesn't actually need LiveIntervals, but we can preserve them. | 
|  | 185 | LIS = getAnalysisIfAvailable<LiveIntervals>(); | 
|  | 186 |  | 
|  | 187 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); | 
|  | 188 | const SIInstrInfo *TII = ST.getInstrInfo(); | 
|  | 189 |  | 
|  | 190 | TRI = &TII->getRegisterInfo(); | 
|  | 191 | MRI = &MF.getRegInfo(); | 
|  | 192 |  | 
|  | 193 | for (MachineBasicBlock &MBB : MF) { | 
|  | 194 | for (MachineInstr &MI : MBB) { | 
|  | 195 | if (MI.getOpcode() == AMDGPU::EXIT_WWM) { | 
|  | 196 | Modified |= runOnWWMInstruction(MI); | 
|  | 197 | } | 
|  | 198 | } | 
|  | 199 | } | 
|  | 200 |  | 
|  | 201 | return Modified; | 
|  | 202 | } |