|  | //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// Copies from VGPR to SGPR registers are illegal and the register coalescer | 
|  | /// will sometimes generate these illegal copies in situations like this: | 
|  | /// | 
|  | ///  Register Class <vsrc> is the union of <vgpr> and <sgpr> | 
|  | /// | 
|  | /// BB0: | 
|  | ///   %0 <sgpr> = SCALAR_INST | 
|  | ///   %1 <vsrc> = COPY %0 <sgpr> | 
|  | ///    ... | 
|  | ///    BRANCH %cond BB1, BB2 | 
|  | ///  BB1: | 
|  | ///    %2 <vgpr> = VECTOR_INST | 
|  | ///    %3 <vsrc> = COPY %2 <vgpr> | 
|  | ///  BB2: | 
|  | ///    %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1> | 
|  | ///    %5 <vgpr> = VECTOR_INST %4 <vsrc> | 
|  | /// | 
|  | /// | 
|  | /// The coalescer will begin at BB0 and eliminate its copy, then the resulting | 
|  | /// code will look like this: | 
|  | /// | 
|  | /// BB0: | 
|  | ///   %0 <sgpr> = SCALAR_INST | 
|  | ///    ... | 
|  | ///    BRANCH %cond BB1, BB2 | 
|  | /// BB1: | 
|  | ///   %2 <vgpr> = VECTOR_INST | 
|  | ///   %3 <vsrc> = COPY %2 <vgpr> | 
|  | /// BB2: | 
|  | ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1> | 
|  | ///   %5 <vgpr> = VECTOR_INST %4 <sgpr> | 
|  | /// | 
|  | /// Now that the result of the PHI instruction is an SGPR, the register | 
|  | /// allocator is now forced to constrain the register class of %3 to | 
|  | /// <sgpr> so we end up with final code like this: | 
|  | /// | 
|  | /// BB0: | 
|  | ///   %0 <sgpr> = SCALAR_INST | 
|  | ///    ... | 
|  | ///    BRANCH %cond BB1, BB2 | 
|  | /// BB1: | 
|  | ///   %2 <vgpr> = VECTOR_INST | 
|  | ///   %3 <sgpr> = COPY %2 <vgpr> | 
|  | /// BB2: | 
|  | ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1> | 
|  | ///   %5 <vgpr> = VECTOR_INST %4 <sgpr> | 
|  | /// | 
|  | /// Now this code contains an illegal copy from a VGPR to an SGPR. | 
|  | /// | 
|  | /// In order to avoid this problem, this pass searches for PHI instructions | 
|  | /// which define a <vsrc> register and constrains its definition class to | 
|  | /// <vgpr> if the user of the PHI's definition register is a vector instruction. | 
|  | /// If the PHI's definition class is constrained to <vgpr> then the coalescer | 
|  | /// will be unable to perform the COPY removal from the above example  which | 
|  | /// ultimately led to the creation of an illegal COPY. | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "SIInstrInfo.h" | 
|  | #include "SIRegisterInfo.h" | 
|  | #include "llvm/ADT/DenseSet.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ADT/SmallSet.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/CodeGen/MachineBasicBlock.h" | 
|  | #include "llvm/CodeGen/MachineDominators.h" | 
|  | #include "llvm/CodeGen/MachineFunction.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/MachineInstr.h" | 
|  | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | #include "llvm/CodeGen/MachineOperand.h" | 
|  | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | #include "llvm/CodeGen/MachinePostDominators.h" | 
|  | #include "llvm/CodeGen/TargetRegisterInfo.h" | 
|  | #include "llvm/Pass.h" | 
|  | #include "llvm/Support/CodeGen.h" | 
|  | #include "llvm/Support/CommandLine.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  | #include "llvm/Target/TargetMachine.h" | 
|  | #include <cassert> | 
|  | #include <cstdint> | 
|  | #include <iterator> | 
|  | #include <list> | 
|  | #include <map> | 
|  | #include <tuple> | 
|  | #include <utility> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "si-fix-sgpr-copies" | 
|  |  | 
|  | static cl::opt<bool> EnableM0Merge( | 
|  | "amdgpu-enable-merge-m0", | 
|  | cl::desc("Merge and hoist M0 initializations"), | 
|  | cl::init(false)); | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class SIFixSGPRCopies : public MachineFunctionPass { | 
|  | MachineDominatorTree *MDT; | 
|  | MachinePostDominatorTree *MPDT; | 
|  | DenseMap<MachineBasicBlock *, SetVector<MachineBasicBlock*>> PDF; | 
|  | void computePDF(MachineFunction * MF); | 
|  | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | void printPDF(); | 
|  | #endif | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | SIFixSGPRCopies() : MachineFunctionPass(ID) {} | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | StringRef getPassName() const override { return "SI Fix SGPR copies"; } | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<MachineDominatorTree>(); | 
|  | AU.addPreserved<MachineDominatorTree>(); | 
|  | AU.addRequired<MachinePostDominatorTree>(); | 
|  | AU.addPreserved<MachinePostDominatorTree>(); | 
|  | AU.setPreservesCFG(); | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, | 
|  | "SI Fix SGPR copies", false, false) | 
|  | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) | 
|  | INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE, | 
|  | "SI Fix SGPR copies", false, false) | 
|  |  | 
|  | char SIFixSGPRCopies::ID = 0; | 
|  |  | 
|  | char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID; | 
|  |  | 
|  | FunctionPass *llvm::createSIFixSGPRCopiesPass() { | 
|  | return new SIFixSGPRCopies(); | 
|  | } | 
|  |  | 
|  | static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { | 
|  | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | 
|  | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | 
|  | if (!MI.getOperand(i).isReg() || | 
|  | !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) | 
|  | continue; | 
|  |  | 
|  | if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> | 
|  | getCopyRegClasses(const MachineInstr &Copy, | 
|  | const SIRegisterInfo &TRI, | 
|  | const MachineRegisterInfo &MRI) { | 
|  | unsigned DstReg = Copy.getOperand(0).getReg(); | 
|  | unsigned SrcReg = Copy.getOperand(1).getReg(); | 
|  |  | 
|  | const TargetRegisterClass *SrcRC = | 
|  | TargetRegisterInfo::isVirtualRegister(SrcReg) ? | 
|  | MRI.getRegClass(SrcReg) : | 
|  | TRI.getPhysRegClass(SrcReg); | 
|  |  | 
|  | // We don't really care about the subregister here. | 
|  | // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); | 
|  |  | 
|  | const TargetRegisterClass *DstRC = | 
|  | TargetRegisterInfo::isVirtualRegister(DstReg) ? | 
|  | MRI.getRegClass(DstReg) : | 
|  | TRI.getPhysRegClass(DstReg); | 
|  |  | 
|  | return std::make_pair(SrcRC, DstRC); | 
|  | } | 
|  |  | 
|  | static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, | 
|  | const TargetRegisterClass *DstRC, | 
|  | const SIRegisterInfo &TRI) { | 
|  | return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC); | 
|  | } | 
|  |  | 
|  | static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, | 
|  | const TargetRegisterClass *DstRC, | 
|  | const SIRegisterInfo &TRI) { | 
|  | return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); | 
|  | } | 
|  |  | 
|  | static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, | 
|  | const SIRegisterInfo *TRI, | 
|  | const SIInstrInfo *TII) { | 
|  | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | 
|  | auto &Src = MI.getOperand(1); | 
|  | unsigned DstReg = MI.getOperand(0).getReg(); | 
|  | unsigned SrcReg = Src.getReg(); | 
|  | if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || | 
|  | !TargetRegisterInfo::isVirtualRegister(DstReg)) | 
|  | return false; | 
|  |  | 
|  | for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) { | 
|  | const auto *UseMI = MO.getParent(); | 
|  | if (UseMI == &MI) | 
|  | continue; | 
|  | if (MO.isDef() || UseMI->getParent() != MI.getParent() || | 
|  | UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END || | 
|  | !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src)) | 
|  | return false; | 
|  | } | 
|  | // Change VGPR to SGPR destination. | 
|  | MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg))); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. | 
|  | // | 
|  | // SGPRx = ... | 
|  | // SGPRy = REG_SEQUENCE SGPRx, sub0 ... | 
|  | // VGPRz = COPY SGPRy | 
|  | // | 
|  | // ==> | 
|  | // | 
|  | // VGPRx = COPY SGPRx | 
|  | // VGPRz = REG_SEQUENCE VGPRx, sub0 | 
|  | // | 
|  | // This exposes immediate folding opportunities when materializing 64-bit | 
|  | // immediates. | 
|  | static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, | 
|  | const SIRegisterInfo *TRI, | 
|  | const SIInstrInfo *TII, | 
|  | MachineRegisterInfo &MRI) { | 
|  | assert(MI.isRegSequence()); | 
|  |  | 
|  | unsigned DstReg = MI.getOperand(0).getReg(); | 
|  | if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) | 
|  | return false; | 
|  |  | 
|  | if (!MRI.hasOneUse(DstReg)) | 
|  | return false; | 
|  |  | 
|  | MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); | 
|  | if (!CopyUse.isCopy()) | 
|  | return false; | 
|  |  | 
|  | // It is illegal to have vreg inputs to a physreg defining reg_sequence. | 
|  | if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg())) | 
|  | return false; | 
|  |  | 
|  | const TargetRegisterClass *SrcRC, *DstRC; | 
|  | std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); | 
|  |  | 
|  | if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) | 
|  | return false; | 
|  |  | 
|  | if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII)) | 
|  | return true; | 
|  |  | 
|  | // TODO: Could have multiple extracts? | 
|  | unsigned SubReg = CopyUse.getOperand(1).getSubReg(); | 
|  | if (SubReg != AMDGPU::NoSubRegister) | 
|  | return false; | 
|  |  | 
|  | MRI.setRegClass(DstReg, DstRC); | 
|  |  | 
|  | // SGPRx = ... | 
|  | // SGPRy = REG_SEQUENCE SGPRx, sub0 ... | 
|  | // VGPRz = COPY SGPRy | 
|  |  | 
|  | // => | 
|  | // VGPRx = COPY SGPRx | 
|  | // VGPRz = REG_SEQUENCE VGPRx, sub0 | 
|  |  | 
|  | MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); | 
|  |  | 
|  | for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { | 
|  | unsigned SrcReg = MI.getOperand(I).getReg(); | 
|  | unsigned SrcSubReg = MI.getOperand(I).getSubReg(); | 
|  |  | 
|  | const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); | 
|  | assert(TRI->isSGPRClass(SrcRC) && | 
|  | "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); | 
|  |  | 
|  | SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); | 
|  | const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); | 
|  |  | 
|  | unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); | 
|  |  | 
|  | BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), | 
|  | TmpReg) | 
|  | .add(MI.getOperand(I)); | 
|  |  | 
|  | MI.getOperand(I).setReg(TmpReg); | 
|  | } | 
|  |  | 
|  | CopyUse.eraseFromParent(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static bool phiHasVGPROperands(const MachineInstr &PHI, | 
|  | const MachineRegisterInfo &MRI, | 
|  | const SIRegisterInfo *TRI, | 
|  | const SIInstrInfo *TII) { | 
|  | for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { | 
|  | unsigned Reg = PHI.getOperand(i).getReg(); | 
|  | if (TRI->hasVGPRs(MRI.getRegClass(Reg))) | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static bool phiHasBreakDef(const MachineInstr &PHI, | 
|  | const MachineRegisterInfo &MRI, | 
|  | SmallSet<unsigned, 8> &Visited) { | 
|  | for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { | 
|  | unsigned Reg = PHI.getOperand(i).getReg(); | 
|  | if (Visited.count(Reg)) | 
|  | continue; | 
|  |  | 
|  | Visited.insert(Reg); | 
|  |  | 
|  | MachineInstr *DefInstr = MRI.getVRegDef(Reg); | 
|  | switch (DefInstr->getOpcode()) { | 
|  | default: | 
|  | break; | 
|  | case AMDGPU::SI_BREAK: | 
|  | case AMDGPU::SI_IF_BREAK: | 
|  | case AMDGPU::SI_ELSE_BREAK: | 
|  | return true; | 
|  | case AMDGPU::PHI: | 
|  | if (phiHasBreakDef(*DefInstr, MRI, Visited)) | 
|  | return true; | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, | 
|  | const TargetRegisterInfo &TRI) { | 
|  | for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), | 
|  | E = MBB.end(); I != E; ++I) { | 
|  | if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, | 
|  | const MachineInstr *MoveImm, | 
|  | const SIInstrInfo *TII, | 
|  | unsigned &SMovOp, | 
|  | int64_t &Imm) { | 
|  | if (Copy->getOpcode() != AMDGPU::COPY) | 
|  | return false; | 
|  |  | 
|  | if (!MoveImm->isMoveImmediate()) | 
|  | return false; | 
|  |  | 
|  | const MachineOperand *ImmOp = | 
|  | TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0); | 
|  | if (!ImmOp->isImm()) | 
|  | return false; | 
|  |  | 
|  | // FIXME: Handle copies with sub-regs. | 
|  | if (Copy->getOperand(0).getSubReg()) | 
|  | return false; | 
|  |  | 
|  | switch (MoveImm->getOpcode()) { | 
|  | default: | 
|  | return false; | 
|  | case AMDGPU::V_MOV_B32_e32: | 
|  | SMovOp = AMDGPU::S_MOV_B32; | 
|  | break; | 
|  | case AMDGPU::V_MOV_B64_PSEUDO: | 
|  | SMovOp = AMDGPU::S_MOV_B64; | 
|  | break; | 
|  | } | 
|  | Imm = ImmOp->getImm(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | template <class UnaryPredicate> | 
|  | bool searchPredecessors(const MachineBasicBlock *MBB, | 
|  | const MachineBasicBlock *CutOff, | 
|  | UnaryPredicate Predicate) { | 
|  | if (MBB == CutOff) | 
|  | return false; | 
|  |  | 
|  | DenseSet<const MachineBasicBlock *> Visited; | 
|  | SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(), | 
|  | MBB->pred_end()); | 
|  |  | 
|  | while (!Worklist.empty()) { | 
|  | MachineBasicBlock *MBB = Worklist.pop_back_val(); | 
|  |  | 
|  | if (!Visited.insert(MBB).second) | 
|  | continue; | 
|  | if (MBB == CutOff) | 
|  | continue; | 
|  | if (Predicate(MBB)) | 
|  | return true; | 
|  |  | 
|  | Worklist.append(MBB->pred_begin(), MBB->pred_end()); | 
|  | } | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Checks if there is potential path From instruction To instruction. | 
|  | // If CutOff is specified and it sits in between of that path we ignore | 
|  | // a higher portion of the path and report it is not reachable. | 
|  | static bool isReachable(const MachineInstr *From, | 
|  | const MachineInstr *To, | 
|  | const MachineBasicBlock *CutOff, | 
|  | MachineDominatorTree &MDT) { | 
|  | // If either From block dominates To block or instructions are in the same | 
|  | // block and From is higher. | 
|  | if (MDT.dominates(From, To)) | 
|  | return true; | 
|  |  | 
|  | const MachineBasicBlock *MBBFrom = From->getParent(); | 
|  | const MachineBasicBlock *MBBTo = To->getParent(); | 
|  | if (MBBFrom == MBBTo) | 
|  | return false; | 
|  |  | 
|  | // Instructions are in different blocks, do predecessor search. | 
|  | // We should almost never get here since we do not usually produce M0 stores | 
|  | // other than -1. | 
|  | return searchPredecessors(MBBTo, CutOff, [MBBFrom] | 
|  | (const MachineBasicBlock *MBB) { return MBB == MBBFrom; }); | 
|  | } | 
|  |  | 
|  | // Hoist and merge identical SGPR initializations into a common predecessor. | 
|  | // This is intended to combine M0 initializations, but can work with any | 
|  | // SGPR. A VGPR cannot be processed since we cannot guarantee vector | 
|  | // executioon. | 
|  | static bool hoistAndMergeSGPRInits(unsigned Reg, | 
|  | const MachineRegisterInfo &MRI, | 
|  | MachineDominatorTree &MDT) { | 
|  | // List of inits by immediate value. | 
|  | using InitListMap = std::map<unsigned, std::list<MachineInstr *>>; | 
|  | InitListMap Inits; | 
|  | // List of clobbering instructions. | 
|  | SmallVector<MachineInstr*, 8> Clobbers; | 
|  | bool Changed = false; | 
|  |  | 
|  | for (auto &MI : MRI.def_instructions(Reg)) { | 
|  | MachineOperand *Imm = nullptr; | 
|  | for (auto &MO: MI.operands()) { | 
|  | if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) || | 
|  | (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) { | 
|  | Imm = nullptr; | 
|  | break; | 
|  | } else if (MO.isImm()) | 
|  | Imm = &MO; | 
|  | } | 
|  | if (Imm) | 
|  | Inits[Imm->getImm()].push_front(&MI); | 
|  | else | 
|  | Clobbers.push_back(&MI); | 
|  | } | 
|  |  | 
|  | for (auto &Init : Inits) { | 
|  | auto &Defs = Init.second; | 
|  |  | 
|  | for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) { | 
|  | MachineInstr *MI1 = *I1; | 
|  |  | 
|  | for (auto I2 = std::next(I1); I2 != E; ) { | 
|  | MachineInstr *MI2 = *I2; | 
|  |  | 
|  | // Check any possible interference | 
|  | auto intereferes = [&](MachineBasicBlock::iterator From, | 
|  | MachineBasicBlock::iterator To) -> bool { | 
|  |  | 
|  | assert(MDT.dominates(&*To, &*From)); | 
|  |  | 
|  | auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool { | 
|  | const MachineBasicBlock *MBBFrom = From->getParent(); | 
|  | const MachineBasicBlock *MBBTo = To->getParent(); | 
|  | bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT); | 
|  | bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT); | 
|  | if (!MayClobberFrom && !MayClobberTo) | 
|  | return false; | 
|  | if ((MayClobberFrom && !MayClobberTo) || | 
|  | (!MayClobberFrom && MayClobberTo)) | 
|  | return true; | 
|  | // Both can clobber, this is not an interference only if both are | 
|  | // dominated by Clobber and belong to the same block or if Clobber | 
|  | // properly dominates To, given that To >> From, so it dominates | 
|  | // both and located in a common dominator. | 
|  | return !((MBBFrom == MBBTo && | 
|  | MDT.dominates(Clobber, &*From) && | 
|  | MDT.dominates(Clobber, &*To)) || | 
|  | MDT.properlyDominates(Clobber->getParent(), MBBTo)); | 
|  | }; | 
|  |  | 
|  | return (llvm::any_of(Clobbers, interferes)) || | 
|  | (llvm::any_of(Inits, [&](InitListMap::value_type &C) { | 
|  | return C.first != Init.first && | 
|  | llvm::any_of(C.second, interferes); | 
|  | })); | 
|  | }; | 
|  |  | 
|  | if (MDT.dominates(MI1, MI2)) { | 
|  | if (!intereferes(MI2, MI1)) { | 
|  | DEBUG(dbgs() << "Erasing from " | 
|  | << printMBBReference(*MI2->getParent()) << " " | 
|  | << *MI2); | 
|  | MI2->eraseFromParent(); | 
|  | Defs.erase(I2++); | 
|  | Changed = true; | 
|  | continue; | 
|  | } | 
|  | } else if (MDT.dominates(MI2, MI1)) { | 
|  | if (!intereferes(MI1, MI2)) { | 
|  | DEBUG(dbgs() << "Erasing from " | 
|  | << printMBBReference(*MI1->getParent()) << " " | 
|  | << *MI1); | 
|  | MI1->eraseFromParent(); | 
|  | Defs.erase(I1++); | 
|  | Changed = true; | 
|  | break; | 
|  | } | 
|  | } else { | 
|  | auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(), | 
|  | MI2->getParent()); | 
|  | if (!MBB) { | 
|  | ++I2; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); | 
|  | if (!intereferes(MI1, I) && !intereferes(MI2, I)) { | 
|  | DEBUG(dbgs() << "Erasing from " | 
|  | << printMBBReference(*MI1->getParent()) << " " << *MI1 | 
|  | << "and moving from " | 
|  | << printMBBReference(*MI2->getParent()) << " to " | 
|  | << printMBBReference(*I->getParent()) << " " << *MI2); | 
|  | I->getParent()->splice(I, MI2->getParent(), MI2); | 
|  | MI1->eraseFromParent(); | 
|  | Defs.erase(I1++); | 
|  | Changed = true; | 
|  | break; | 
|  | } | 
|  | } | 
|  | ++I2; | 
|  | } | 
|  | ++I1; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (Changed) | 
|  | MRI.clearKillFlags(Reg); | 
|  |  | 
|  | return Changed; | 
|  | } | 
|  |  | 
|  | void SIFixSGPRCopies::computePDF(MachineFunction *MF) { | 
|  | MachineFunction::iterator B = MF->begin(); | 
|  | MachineFunction::iterator E = MF->end(); | 
|  | for (; B != E; ++B) { | 
|  | if (B->succ_size() > 1) { | 
|  | for (auto S : B->successors()) { | 
|  | MachineDomTreeNode *runner = MPDT->getNode(&*S); | 
|  | MachineDomTreeNode *sentinel = MPDT->getNode(&*B)->getIDom(); | 
|  | while (runner && runner != sentinel) { | 
|  | PDF[runner->getBlock()].insert(&*B); | 
|  | runner = runner->getIDom(); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | void SIFixSGPRCopies::printPDF() { | 
|  | dbgs() << "\n######## PostDominanceFrontiers set #########\n"; | 
|  | for (auto &I : PDF) { | 
|  | dbgs() << "PDF[ " << I.first->getNumber() << "] : "; | 
|  | for (auto &J : I.second) { | 
|  | dbgs() << J->getNumber() << ' '; | 
|  | } | 
|  | dbgs() << '\n'; | 
|  | } | 
|  | dbgs() << "\n##############################################\n"; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { | 
|  | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); | 
|  | MachineRegisterInfo &MRI = MF.getRegInfo(); | 
|  | const SIRegisterInfo *TRI = ST.getRegisterInfo(); | 
|  | const SIInstrInfo *TII = ST.getInstrInfo(); | 
|  | MDT = &getAnalysis<MachineDominatorTree>(); | 
|  | MPDT = &getAnalysis<MachinePostDominatorTree>(); | 
|  | PDF.clear(); | 
|  | computePDF(&MF); | 
|  | DEBUG(printPDF()); | 
|  |  | 
|  | SmallVector<MachineInstr *, 16> Worklist; | 
|  |  | 
|  | for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); | 
|  | BI != BE; ++BI) { | 
|  | MachineBasicBlock &MBB = *BI; | 
|  | for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); | 
|  | I != E; ++I) { | 
|  | MachineInstr &MI = *I; | 
|  |  | 
|  | switch (MI.getOpcode()) { | 
|  | default: | 
|  | continue; | 
|  | case AMDGPU::COPY: | 
|  | case AMDGPU::WQM: | 
|  | case AMDGPU::WWM: { | 
|  | // If the destination register is a physical register there isn't really | 
|  | // much we can do to fix this. | 
|  | if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) | 
|  | continue; | 
|  |  | 
|  | const TargetRegisterClass *SrcRC, *DstRC; | 
|  | std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); | 
|  | if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { | 
|  | unsigned SrcReg = MI.getOperand(1).getReg(); | 
|  | if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { | 
|  | TII->moveToVALU(MI); | 
|  | break; | 
|  | } | 
|  |  | 
|  | MachineInstr *DefMI = MRI.getVRegDef(SrcReg); | 
|  | unsigned SMovOp; | 
|  | int64_t Imm; | 
|  | // If we are just copying an immediate, we can replace the copy with | 
|  | // s_mov_b32. | 
|  | if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) { | 
|  | MI.getOperand(1).ChangeToImmediate(Imm); | 
|  | MI.addImplicitDefUseOperands(MF); | 
|  | MI.setDesc(TII->get(SMovOp)); | 
|  | break; | 
|  | } | 
|  | TII->moveToVALU(MI); | 
|  | } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { | 
|  | tryChangeVGPRtoSGPRinCopy(MI, TRI, TII); | 
|  | } | 
|  |  | 
|  | break; | 
|  | } | 
|  | case AMDGPU::PHI: { | 
|  | unsigned Reg = MI.getOperand(0).getReg(); | 
|  | if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) | 
|  | break; | 
|  |  | 
|  | // We don't need to fix the PHI if all the source blocks | 
|  | // have no divergent control dependecies | 
|  | bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); | 
|  | if (!HasVGPROperand) { | 
|  | bool Uniform = true; | 
|  | MachineBasicBlock * Join = MI.getParent(); | 
|  | for (auto &O : MI.explicit_operands()) { | 
|  | if (O.isMBB()) { | 
|  | MachineBasicBlock * Source = O.getMBB(); | 
|  | SetVector<MachineBasicBlock*> &SourcePDF = PDF[Source]; | 
|  | SetVector<MachineBasicBlock*> &JoinPDF   = PDF[Join]; | 
|  | SetVector<MachineBasicBlock*> CDList; | 
|  | for (auto &I : SourcePDF) { | 
|  | if (!JoinPDF.count(I) || /* back edge */MDT->dominates(Join, I)) { | 
|  | if (hasTerminatorThatModifiesExec(*I, *TRI)) | 
|  | Uniform = false; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | if (Uniform) { | 
|  | DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n'); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // If a PHI node defines an SGPR and any of its operands are VGPRs, | 
|  | // then we need to move it to the VALU. | 
|  | // | 
|  | // Also, if a PHI node defines an SGPR and has all SGPR operands | 
|  | // we must move it to the VALU, because the SGPR operands will | 
|  | // all end up being assigned the same register, which means | 
|  | // there is a potential for a conflict if different threads take | 
|  | // different control flow paths. | 
|  | // | 
|  | // For Example: | 
|  | // | 
|  | // sgpr0 = def; | 
|  | // ... | 
|  | // sgpr1 = def; | 
|  | // ... | 
|  | // sgpr2 = PHI sgpr0, sgpr1 | 
|  | // use sgpr2; | 
|  | // | 
|  | // Will Become: | 
|  | // | 
|  | // sgpr2 = def; | 
|  | // ... | 
|  | // sgpr2 = def; | 
|  | // ... | 
|  | // use sgpr2 | 
|  | // | 
|  | // The one exception to this rule is when one of the operands | 
|  | // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK | 
|  | // instruction.  In this case, there we know the program will | 
|  | // never enter the second block (the loop) without entering | 
|  | // the first block (where the condition is computed), so there | 
|  | // is no chance for values to be over-written. | 
|  |  | 
|  | SmallSet<unsigned, 8> Visited; | 
|  | if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { | 
|  | DEBUG(dbgs() << "Fixing PHI: " << MI); | 
|  | TII->moveToVALU(MI); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case AMDGPU::REG_SEQUENCE: | 
|  | if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || | 
|  | !hasVGPROperands(MI, TRI)) { | 
|  | foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); | 
|  |  | 
|  | TII->moveToVALU(MI); | 
|  | break; | 
|  | case AMDGPU::INSERT_SUBREG: { | 
|  | const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; | 
|  | DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); | 
|  | Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); | 
|  | Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); | 
|  | if (TRI->isSGPRClass(DstRC) && | 
|  | (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { | 
|  | DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); | 
|  | TII->moveToVALU(MI); | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) | 
|  | hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT); | 
|  |  | 
|  | return true; | 
|  | } |