|  | //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// Computations in WWM can overwrite values in inactive channels for | 
|  | /// variables that the register allocator thinks are dead. This pass adds fake | 
|  | /// uses of those variables to WWM instructions to make sure that they aren't | 
|  | /// overwritten. | 
|  | /// | 
|  | /// As an example, consider this snippet: | 
|  | /// %vgpr0 = V_MOV_B32_e32 0.0 | 
|  | /// if (...) { | 
|  | ///   %vgpr1 = ... | 
|  | ///   %vgpr2 = WWM killed %vgpr1 | 
|  | ///   ... = killed %vgpr2 | 
|  | ///   %vgpr0 = V_MOV_B32_e32 1.0 | 
|  | /// } | 
|  | /// ... = %vgpr0 | 
|  | /// | 
|  | /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, | 
|  | /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since | 
|  | /// writing %vgpr1 would only write to channels that would be clobbered by the | 
|  | /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, | 
|  | /// it would clobber even the inactive channels for which the if-condition is | 
|  | /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use | 
|  | /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the | 
|  | /// same register. | 
|  | /// | 
|  | /// In general, we need to figure out what registers might have their inactive | 
|  | /// channels which are eventually used accidentally clobbered by a WWM | 
|  | /// instruction. We approximate this using two conditions: | 
|  | /// | 
|  | /// 1. A definition of the variable reaches the WWM instruction. | 
|  | /// 2. The variable would be live at the WWM instruction if all its defs were | 
|  | /// partial defs (i.e. considered as a use), ignoring normal uses. | 
|  | /// | 
|  | /// If a register matches both conditions, then we add an implicit use of it to | 
|  | /// the WWM instruction. Condition #2 is the heart of the matter: every | 
|  | /// definition is really a partial definition, since every VALU instruction is | 
|  | /// implicitly predicated.  We can usually ignore this, but WWM forces us not | 
|  | /// to. Condition #1 prevents false positives if the variable is undefined at | 
|  | /// the WWM instruction anyways. This is overly conservative in certain cases, | 
|  | /// especially in uniform control flow, but this is a workaround anyways until | 
|  | /// LLVM gains the notion of predicated uses and definitions of variables. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "SIInstrInfo.h" | 
|  | #include "SIRegisterInfo.h" | 
|  | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | 
|  | #include "llvm/ADT/DepthFirstIterator.h" | 
|  | #include "llvm/ADT/SparseBitVector.h" | 
|  | #include "llvm/CodeGen/LiveIntervals.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/Passes.h" | 
|  | #include "llvm/CodeGen/TargetRegisterInfo.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "si-fix-wwm-liveness" | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class SIFixWWMLiveness : public MachineFunctionPass { | 
|  | private: | 
|  | LiveIntervals *LIS = nullptr; | 
|  | const SIRegisterInfo *TRI; | 
|  | MachineRegisterInfo *MRI; | 
|  |  | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | SIFixWWMLiveness() : MachineFunctionPass(ID) { | 
|  | initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | bool runOnWWMInstruction(MachineInstr &MI); | 
|  |  | 
|  | void addDefs(const MachineInstr &MI, SparseBitVector<> &set); | 
|  |  | 
|  | StringRef getPassName() const override { return "SI Fix WWM Liveness"; } | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | // Should preserve the same set that TwoAddressInstructions does. | 
|  | AU.addPreserved<SlotIndexes>(); | 
|  | AU.addPreserved<LiveIntervals>(); | 
|  | AU.addPreservedID(LiveVariablesID); | 
|  | AU.addPreservedID(MachineLoopInfoID); | 
|  | AU.addPreservedID(MachineDominatorsID); | 
|  | AU.setPreservesCFG(); | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // End anonymous namespace. | 
|  |  | 
|  | INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE, | 
|  | "SI fix WWM liveness", false, false) | 
|  |  | 
|  | char SIFixWWMLiveness::ID = 0; | 
|  |  | 
|  | char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; | 
|  |  | 
|  | FunctionPass *llvm::createSIFixWWMLivenessPass() { | 
|  | return new SIFixWWMLiveness(); | 
|  | } | 
|  |  | 
|  | void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs) | 
|  | { | 
|  | for (const MachineOperand &Op : MI.defs()) { | 
|  | if (Op.isReg()) { | 
|  | unsigned Reg = Op.getReg(); | 
|  | if (TRI->isVGPR(*MRI, Reg)) | 
|  | Regs.set(Reg); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) { | 
|  | MachineBasicBlock *MBB = WWM.getParent(); | 
|  |  | 
|  | // Compute the registers that are live out of MI by figuring out which defs | 
|  | // are reachable from MI. | 
|  | SparseBitVector<> LiveOut; | 
|  |  | 
|  | for (auto II = MachineBasicBlock::iterator(WWM), IE = | 
|  | MBB->end(); II != IE; ++II) { | 
|  | addDefs(*II, LiveOut); | 
|  | } | 
|  |  | 
|  | for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB), | 
|  | E = df_end(MBB); | 
|  | I != E; ++I) { | 
|  | for (const MachineInstr &MI : **I) { | 
|  | addDefs(MI, LiveOut); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Compute the registers that reach MI. | 
|  | SparseBitVector<> Reachable; | 
|  |  | 
|  | for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE = | 
|  | MBB->rend(); II != IE; ++II) { | 
|  | addDefs(*II, Reachable); | 
|  | } | 
|  |  | 
|  | for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB), | 
|  | E = idf_end(MBB); | 
|  | I != E; ++I) { | 
|  | for (const MachineInstr &MI : **I) { | 
|  | addDefs(MI, Reachable); | 
|  | } | 
|  | } | 
|  |  | 
|  | // find the intersection, and add implicit uses. | 
|  | LiveOut &= Reachable; | 
|  |  | 
|  | bool Modified = false; | 
|  | for (unsigned Reg : LiveOut) { | 
|  | WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); | 
|  | if (LIS) { | 
|  | // FIXME: is there a better way to update the live interval? | 
|  | LIS->removeInterval(Reg); | 
|  | LIS->createAndComputeVirtRegInterval(Reg); | 
|  | } | 
|  | Modified = true; | 
|  | } | 
|  |  | 
|  | return Modified; | 
|  | } | 
|  |  | 
|  | bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { | 
|  | bool Modified = false; | 
|  |  | 
|  | // This doesn't actually need LiveIntervals, but we can preserve them. | 
|  | LIS = getAnalysisIfAvailable<LiveIntervals>(); | 
|  |  | 
|  | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); | 
|  | const SIInstrInfo *TII = ST.getInstrInfo(); | 
|  |  | 
|  | TRI = &TII->getRegisterInfo(); | 
|  | MRI = &MF.getRegInfo(); | 
|  |  | 
|  | for (MachineBasicBlock &MBB : MF) { | 
|  | for (MachineInstr &MI : MBB) { | 
|  | if (MI.getOpcode() == AMDGPU::EXIT_WWM) { | 
|  | Modified |= runOnWWMInstruction(MI); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return Modified; | 
|  | } |