|  | //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //==-----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "R600InstrInfo.h" | 
|  | #include "R600RegisterInfo.h" | 
|  | #include "llvm/ADT/DepthFirstIterator.h" | 
|  | #include "llvm/ADT/SCCIterator.h" | 
|  | #include "llvm/ADT/SmallPtrSet.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/ADT/Statistic.h" | 
|  | #include "llvm/ADT/StringRef.h" | 
|  | #include "llvm/CodeGen/MachineBasicBlock.h" | 
|  | #include "llvm/CodeGen/MachineDominators.h" | 
|  | #include "llvm/CodeGen/MachineFunction.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/MachineInstr.h" | 
|  | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | #include "llvm/CodeGen/MachineJumpTableInfo.h" | 
|  | #include "llvm/CodeGen/MachineLoopInfo.h" | 
|  | #include "llvm/CodeGen/MachineOperand.h" | 
|  | #include "llvm/CodeGen/MachinePostDominators.h" | 
|  | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | #include "llvm/CodeGen/MachineValueType.h" | 
|  | #include "llvm/IR/DebugLoc.h" | 
|  | #include "llvm/IR/LLVMContext.h" | 
|  | #include "llvm/Pass.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/ErrorHandling.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  | #include <cassert> | 
|  | #include <cstddef> | 
|  | #include <deque> | 
|  | #include <iterator> | 
|  | #include <map> | 
|  | #include <utility> | 
|  | #include <vector> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "structcfg" | 
|  |  | 
|  | #define DEFAULT_VEC_SLOTS 8 | 
|  |  | 
|  | // TODO: move-begin. | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // Statistics for CFGStructurizer. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern " | 
|  | "matched"); | 
|  | STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern " | 
|  | "matched"); | 
|  | STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks"); | 
|  | STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions"); | 
|  |  | 
|  | namespace llvm { | 
|  |  | 
|  | void initializeAMDGPUCFGStructurizerPass(PassRegistry &); | 
|  |  | 
|  | } // end namespace llvm | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // Miscellaneous utility for CFGStructurizer. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #define SHOWNEWINSTR(i) \ | 
|  | DEBUG(dbgs() << "New instr: " << *i << "\n"); | 
|  |  | 
|  | #define SHOWNEWBLK(b, msg) \ | 
|  | DEBUG( \ | 
|  | dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ | 
|  | dbgs() << "\n"; \ | 
|  | ); | 
|  |  | 
|  | #define SHOWBLK_DETAIL(b, msg) \ | 
|  | DEBUG( \ | 
|  | if (b) { \ | 
|  | dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ | 
|  | b->print(dbgs()); \ | 
|  | dbgs() << "\n"; \ | 
|  | } \ | 
|  | ); | 
|  |  | 
|  | #define INVALIDSCCNUM -1 | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // supporting data structure for CFGStructurizer | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | class BlockInformation { | 
|  | public: | 
|  | bool IsRetired = false; | 
|  | int SccNum = INVALIDSCCNUM; | 
|  |  | 
|  | BlockInformation() = default; | 
|  | }; | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // CFGStructurizer | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | class AMDGPUCFGStructurizer : public MachineFunctionPass { | 
|  | public: | 
|  | using MBBVector = SmallVector<MachineBasicBlock *, 32>; | 
|  | using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>; | 
|  | using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>; | 
|  |  | 
|  | enum PathToKind { | 
|  | Not_SinglePath = 0, | 
|  | SinglePath_InPath = 1, | 
|  | SinglePath_NotInPath = 2 | 
|  | }; | 
|  |  | 
|  | static char ID; | 
|  |  | 
|  | AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { | 
|  | initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | StringRef getPassName() const override { | 
|  | return "AMDGPU Control Flow Graph structurizer Pass"; | 
|  | } | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<MachineDominatorTree>(); | 
|  | AU.addRequired<MachinePostDominatorTree>(); | 
|  | AU.addRequired<MachineLoopInfo>(); | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | } | 
|  |  | 
|  | /// Perform the CFG structurization | 
|  | bool run(); | 
|  |  | 
|  | /// Perform the CFG preparation | 
|  | /// This step will remove every unconditionnal/dead jump instructions and make | 
|  | /// sure all loops have an exit block | 
|  | bool prepare(); | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &MF) override { | 
|  | TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); | 
|  | TRI = &TII->getRegisterInfo(); | 
|  | DEBUG(MF.dump();); | 
|  | OrderedBlks.clear(); | 
|  | Visited.clear(); | 
|  | FuncRep = &MF; | 
|  | MLI = &getAnalysis<MachineLoopInfo>(); | 
|  | DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); | 
|  | MDT = &getAnalysis<MachineDominatorTree>(); | 
|  | DEBUG(MDT->print(dbgs(), (const Module*)nullptr);); | 
|  | PDT = &getAnalysis<MachinePostDominatorTree>(); | 
|  | DEBUG(PDT->print(dbgs());); | 
|  | prepare(); | 
|  | run(); | 
|  | DEBUG(MF.dump();); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | protected: | 
|  | MachineDominatorTree *MDT; | 
|  | MachinePostDominatorTree *PDT; | 
|  | MachineLoopInfo *MLI; | 
|  | const R600InstrInfo *TII = nullptr; | 
|  | const R600RegisterInfo *TRI = nullptr; | 
|  |  | 
|  | // PRINT FUNCTIONS | 
|  | /// Print the ordered Blocks. | 
|  | void printOrderedBlocks() const { | 
|  | size_t i = 0; | 
|  | for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), | 
|  | iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { | 
|  | dbgs() << "BB" << (*iterBlk)->getNumber(); | 
|  | dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; | 
|  | if (i != 0 && i % 10 == 0) { | 
|  | dbgs() << "\n"; | 
|  | } else { | 
|  | dbgs() << " "; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { | 
|  | for (MachineLoop::iterator iter = LoopInfo.begin(), | 
|  | iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { | 
|  | (*iter)->print(dbgs(), 0); | 
|  | } | 
|  | } | 
|  |  | 
|  | // UTILITY FUNCTIONS | 
|  | int getSCCNum(MachineBasicBlock *MBB) const; | 
|  | MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; | 
|  | bool hasBackEdge(MachineBasicBlock *MBB) const; | 
|  | bool isRetiredBlock(MachineBasicBlock *MBB) const; | 
|  | bool isActiveLoophead(MachineBasicBlock *MBB) const; | 
|  | PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, | 
|  | bool AllowSideEntry = true) const; | 
|  | int countActiveBlock(MBBVector::const_iterator It, | 
|  | MBBVector::const_iterator E) const; | 
|  | bool needMigrateBlock(MachineBasicBlock *MBB) const; | 
|  |  | 
|  | // Utility Functions | 
|  | void reversePredicateSetter(MachineBasicBlock::iterator I, | 
|  | MachineBasicBlock &MBB); | 
|  | /// Compute the reversed DFS post order of Blocks | 
|  | void orderBlocks(MachineFunction *MF); | 
|  |  | 
|  | // Function originally from CFGStructTraits | 
|  | void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, | 
|  | const DebugLoc &DL = DebugLoc()); | 
|  | MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, | 
|  | const DebugLoc &DL = DebugLoc()); | 
|  | MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); | 
|  | void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, | 
|  | const DebugLoc &DL); | 
|  | void insertCondBranchBefore(MachineBasicBlock *MBB, | 
|  | MachineBasicBlock::iterator I, int NewOpcode, | 
|  | int RegNum, const DebugLoc &DL); | 
|  |  | 
|  | static int getBranchNzeroOpcode(int OldOpcode); | 
|  | static int getBranchZeroOpcode(int OldOpcode); | 
|  | static int getContinueNzeroOpcode(int OldOpcode); | 
|  | static int getContinueZeroOpcode(int OldOpcode); | 
|  | static MachineBasicBlock *getTrueBranch(MachineInstr *MI); | 
|  | static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); | 
|  | static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, | 
|  | MachineInstr *MI); | 
|  | static bool isCondBranch(MachineInstr *MI); | 
|  | static bool isUncondBranch(MachineInstr *MI); | 
|  | static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); | 
|  | static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); | 
|  |  | 
|  | /// The correct naming for this is getPossibleLoopendBlockBranchInstr. | 
|  | /// | 
|  | /// BB with backward-edge could have move instructions after the branch | 
|  | /// instruction.  Such move instruction "belong to" the loop backward-edge. | 
|  | MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); | 
|  |  | 
|  | static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); | 
|  | static bool isReturnBlock(MachineBasicBlock *MBB); | 
|  | static void cloneSuccessorList(MachineBasicBlock *DstMBB, | 
|  | MachineBasicBlock *SrcMBB); | 
|  | static MachineBasicBlock *clone(MachineBasicBlock *MBB); | 
|  |  | 
|  | /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose | 
|  | /// because the AMDGPU instruction is not recognized as terminator fix this | 
|  | /// and retire this routine | 
|  | void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, | 
|  | MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); | 
|  |  | 
|  | static void wrapup(MachineBasicBlock *MBB); | 
|  |  | 
|  | int patternMatch(MachineBasicBlock *MBB); | 
|  | int patternMatchGroup(MachineBasicBlock *MBB); | 
|  | int serialPatternMatch(MachineBasicBlock *MBB); | 
|  | int ifPatternMatch(MachineBasicBlock *MBB); | 
|  | int loopendPatternMatch(); | 
|  | int mergeLoop(MachineLoop *LoopRep); | 
|  |  | 
|  | /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in | 
|  | /// the same loop with LoopLandInfo without explicitly keeping track of | 
|  | /// loopContBlks and loopBreakBlks, this is a method to get the information. | 
|  | bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, | 
|  | MachineBasicBlock *Src2MBB); | 
|  | int handleJumpintoIf(MachineBasicBlock *HeadMBB, | 
|  | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); | 
|  | int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, | 
|  | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); | 
|  | int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, | 
|  | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, | 
|  | MachineBasicBlock **LandMBBPtr); | 
|  | void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, | 
|  | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, | 
|  | MachineBasicBlock *LandMBB, bool Detail = false); | 
|  | int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, | 
|  | MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); | 
|  | void mergeSerialBlock(MachineBasicBlock *DstMBB, | 
|  | MachineBasicBlock *SrcMBB); | 
|  |  | 
|  | void mergeIfthenelseBlock(MachineInstr *BranchMI, | 
|  | MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, | 
|  | MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); | 
|  | void mergeLooplandBlock(MachineBasicBlock *DstMBB, | 
|  | MachineBasicBlock *LandMBB); | 
|  | void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, | 
|  | MachineBasicBlock *LandMBB); | 
|  | void settleLoopcontBlock(MachineBasicBlock *ContingMBB, | 
|  | MachineBasicBlock *ContMBB); | 
|  |  | 
|  | /// normalizeInfiniteLoopExit change | 
|  | ///   B1: | 
|  | ///        uncond_br LoopHeader | 
|  | /// | 
|  | /// to | 
|  | ///   B1: | 
|  | ///        cond_br 1 LoopHeader dummyExit | 
|  | /// and return the newly added dummy exit block | 
|  | MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); | 
|  | void removeUnconditionalBranch(MachineBasicBlock *MBB); | 
|  |  | 
|  | /// Remove duplicate branches instructions in a block. | 
|  | /// For instance | 
|  | /// B0: | 
|  | ///    cond_br X B1 B2 | 
|  | ///    cond_br X B1 B2 | 
|  | /// is transformed to | 
|  | /// B0: | 
|  | ///    cond_br X B1 B2 | 
|  | void removeRedundantConditionalBranch(MachineBasicBlock *MBB); | 
|  |  | 
|  | void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); | 
|  | void removeSuccessor(MachineBasicBlock *MBB); | 
|  | MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, | 
|  | MachineBasicBlock *PredMBB); | 
|  | void migrateInstruction(MachineBasicBlock *SrcMBB, | 
|  | MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); | 
|  | void recordSccnum(MachineBasicBlock *MBB, int SCCNum); | 
|  | void retireBlock(MachineBasicBlock *MBB); | 
|  |  | 
|  | private: | 
|  | MBBInfoMap BlockInfoMap; | 
|  | LoopLandInfoMap LLInfoMap; | 
|  | std::map<MachineLoop *, bool> Visited; | 
|  | MachineFunction *FuncRep; | 
|  | SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | char AMDGPUCFGStructurizer::ID = 0; | 
|  |  | 
|  | int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { | 
|  | MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); | 
|  | if (It == BlockInfoMap.end()) | 
|  | return INVALIDSCCNUM; | 
|  | return (*It).second->SccNum; | 
|  | } | 
|  |  | 
|  | MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) | 
|  | const { | 
|  | LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); | 
|  | if (It == LLInfoMap.end()) | 
|  | return nullptr; | 
|  | return (*It).second; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { | 
|  | MachineLoop *LoopRep = MLI->getLoopFor(MBB); | 
|  | if (!LoopRep) | 
|  | return false; | 
|  | MachineBasicBlock *LoopHeader = LoopRep->getHeader(); | 
|  | return MBB->isSuccessor(LoopHeader); | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { | 
|  | MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); | 
|  | if (It == BlockInfoMap.end()) | 
|  | return false; | 
|  | return (*It).second->IsRetired; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { | 
|  | MachineLoop *LoopRep = MLI->getLoopFor(MBB); | 
|  | while (LoopRep && LoopRep->getHeader() == MBB) { | 
|  | MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); | 
|  | if(!LoopLand) | 
|  | return true; | 
|  | if (!isRetiredBlock(LoopLand)) | 
|  | return true; | 
|  | LoopRep = LoopRep->getParentLoop(); | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( | 
|  | MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, | 
|  | bool AllowSideEntry) const { | 
|  | assert(DstMBB); | 
|  | if (SrcMBB == DstMBB) | 
|  | return SinglePath_InPath; | 
|  | while (SrcMBB && SrcMBB->succ_size() == 1) { | 
|  | SrcMBB = *SrcMBB->succ_begin(); | 
|  | if (SrcMBB == DstMBB) | 
|  | return SinglePath_InPath; | 
|  | if (!AllowSideEntry && SrcMBB->pred_size() > 1) | 
|  | return Not_SinglePath; | 
|  | } | 
|  | if (SrcMBB && SrcMBB->succ_size()==0) | 
|  | return SinglePath_NotInPath; | 
|  | return Not_SinglePath; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, | 
|  | MBBVector::const_iterator E) const { | 
|  | int Count = 0; | 
|  | while (It != E) { | 
|  | if (!isRetiredBlock(*It)) | 
|  | ++Count; | 
|  | ++It; | 
|  | } | 
|  | return Count; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { | 
|  | unsigned BlockSizeThreshold = 30; | 
|  | unsigned CloneInstrThreshold = 100; | 
|  | bool MultiplePreds = MBB && (MBB->pred_size() > 1); | 
|  |  | 
|  | if(!MultiplePreds) | 
|  | return false; | 
|  | unsigned BlkSize = MBB->size(); | 
|  | return ((BlkSize > BlockSizeThreshold) && | 
|  | (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::reversePredicateSetter( | 
|  | MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { | 
|  | assert(I.isValid() && "Expected valid iterator"); | 
|  | for (;; --I) { | 
|  | if (I == MBB.end()) | 
|  | continue; | 
|  | if (I->getOpcode() == AMDGPU::PRED_X) { | 
|  | switch (I->getOperand(2).getImm()) { | 
|  | case AMDGPU::PRED_SETE_INT: | 
|  | I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT); | 
|  | return; | 
|  | case AMDGPU::PRED_SETNE_INT: | 
|  | I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT); | 
|  | return; | 
|  | case AMDGPU::PRED_SETE: | 
|  | I->getOperand(2).setImm(AMDGPU::PRED_SETNE); | 
|  | return; | 
|  | case AMDGPU::PRED_SETNE: | 
|  | I->getOperand(2).setImm(AMDGPU::PRED_SETE); | 
|  | return; | 
|  | default: | 
|  | llvm_unreachable("PRED_X Opcode invalid!"); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, | 
|  | int NewOpcode, const DebugLoc &DL) { | 
|  | MachineInstr *MI = | 
|  | MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); | 
|  | MBB->push_back(MI); | 
|  | //assume the instruction doesn't take any reg operand ... | 
|  | SHOWNEWINSTR(MI); | 
|  | } | 
|  |  | 
|  | MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, | 
|  | int NewOpcode, | 
|  | const DebugLoc &DL) { | 
|  | MachineInstr *MI = | 
|  | MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); | 
|  | if (MBB->begin() != MBB->end()) | 
|  | MBB->insert(MBB->begin(), MI); | 
|  | else | 
|  | MBB->push_back(MI); | 
|  | SHOWNEWINSTR(MI); | 
|  | return MI; | 
|  | } | 
|  |  | 
|  | MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( | 
|  | MachineBasicBlock::iterator I, int NewOpcode) { | 
|  | MachineInstr *OldMI = &(*I); | 
|  | MachineBasicBlock *MBB = OldMI->getParent(); | 
|  | MachineInstr *NewMBB = | 
|  | MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); | 
|  | MBB->insert(I, NewMBB); | 
|  | //assume the instruction doesn't take any reg operand ... | 
|  | SHOWNEWINSTR(NewMBB); | 
|  | return NewMBB; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::insertCondBranchBefore( | 
|  | MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { | 
|  | MachineInstr *OldMI = &(*I); | 
|  | MachineBasicBlock *MBB = OldMI->getParent(); | 
|  | MachineFunction *MF = MBB->getParent(); | 
|  | MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); | 
|  | MBB->insert(I, NewMI); | 
|  | MachineInstrBuilder MIB(*MF, NewMI); | 
|  | MIB.addReg(OldMI->getOperand(1).getReg(), false); | 
|  | SHOWNEWINSTR(NewMI); | 
|  | //erase later oldInstr->eraseFromParent(); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::insertCondBranchBefore( | 
|  | MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, | 
|  | int RegNum, const DebugLoc &DL) { | 
|  | MachineFunction *MF = blk->getParent(); | 
|  | MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); | 
|  | //insert before | 
|  | blk->insert(I, NewInstr); | 
|  | MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); | 
|  | SHOWNEWINSTR(NewInstr); | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { | 
|  | switch(OldOpcode) { | 
|  | case AMDGPU::JUMP_COND: | 
|  | case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; | 
|  | case AMDGPU::BRANCH_COND_i32: | 
|  | case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; | 
|  | default: llvm_unreachable("internal error"); | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { | 
|  | switch(OldOpcode) { | 
|  | case AMDGPU::JUMP_COND: | 
|  | case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; | 
|  | case AMDGPU::BRANCH_COND_i32: | 
|  | case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; | 
|  | default: llvm_unreachable("internal error"); | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { | 
|  | switch(OldOpcode) { | 
|  | case AMDGPU::JUMP_COND: | 
|  | case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; | 
|  | default: llvm_unreachable("internal error"); | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { | 
|  | switch(OldOpcode) { | 
|  | case AMDGPU::JUMP_COND: | 
|  | case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; | 
|  | default: llvm_unreachable("internal error"); | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { | 
|  | return MI->getOperand(0).getMBB(); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, | 
|  | MachineBasicBlock *MBB) { | 
|  | MI->getOperand(0).setMBB(MBB); | 
|  | } | 
|  |  | 
|  | MachineBasicBlock * | 
|  | AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, | 
|  | MachineInstr *MI) { | 
|  | assert(MBB->succ_size() == 2); | 
|  | MachineBasicBlock *TrueBranch = getTrueBranch(MI); | 
|  | MachineBasicBlock::succ_iterator It = MBB->succ_begin(); | 
|  | MachineBasicBlock::succ_iterator Next = It; | 
|  | ++Next; | 
|  | return (*It == TrueBranch) ? *Next : *It; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { | 
|  | switch (MI->getOpcode()) { | 
|  | case AMDGPU::JUMP_COND: | 
|  | case AMDGPU::BRANCH_COND_i32: | 
|  | case AMDGPU::BRANCH_COND_f32: return true; | 
|  | default: | 
|  | return false; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { | 
|  | switch (MI->getOpcode()) { | 
|  | case AMDGPU::JUMP: | 
|  | case AMDGPU::BRANCH: | 
|  | return true; | 
|  | default: | 
|  | return false; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { | 
|  | //get DebugLoc from the first MachineBasicBlock instruction with debug info | 
|  | DebugLoc DL; | 
|  | for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); | 
|  | ++It) { | 
|  | MachineInstr *instr = &(*It); | 
|  | if (instr->getDebugLoc()) | 
|  | DL = instr->getDebugLoc(); | 
|  | } | 
|  | return DL; | 
|  | } | 
|  |  | 
|  | MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( | 
|  | MachineBasicBlock *MBB) { | 
|  | MachineBasicBlock::reverse_iterator It = MBB->rbegin(); | 
|  | MachineInstr *MI = &*It; | 
|  | if (MI && (isCondBranch(MI) || isUncondBranch(MI))) | 
|  | return MI; | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( | 
|  | MachineBasicBlock *MBB) { | 
|  | for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); | 
|  | It != E; ++It) { | 
|  | // FIXME: Simplify | 
|  | MachineInstr *MI = &*It; | 
|  | if (MI) { | 
|  | if (isCondBranch(MI) || isUncondBranch(MI)) | 
|  | return MI; | 
|  | else if (!TII->isMov(MI->getOpcode())) | 
|  | break; | 
|  | } | 
|  | } | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { | 
|  | MachineBasicBlock::reverse_iterator It = MBB->rbegin(); | 
|  | if (It != MBB->rend()) { | 
|  | MachineInstr *instr = &(*It); | 
|  | if (instr->getOpcode() == AMDGPU::RETURN) | 
|  | return instr; | 
|  | } | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { | 
|  | MachineInstr *MI = getReturnInstr(MBB); | 
|  | bool IsReturn = (MBB->succ_size() == 0); | 
|  | if (MI) | 
|  | assert(IsReturn); | 
|  | else if (IsReturn) | 
|  | DEBUG( | 
|  | dbgs() << "BB" << MBB->getNumber() | 
|  | <<" is return block without RETURN instr\n";); | 
|  | return  IsReturn; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, | 
|  | MachineBasicBlock *SrcMBB) { | 
|  | for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), | 
|  | iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) | 
|  | DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of | 
|  | } | 
|  |  | 
|  | MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { | 
|  | MachineFunction *Func = MBB->getParent(); | 
|  | MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); | 
|  | Func->push_back(NewMBB);  //insert to function | 
|  | for (const MachineInstr &It : *MBB) | 
|  | NewMBB->push_back(Func->CloneMachineInstr(&It)); | 
|  | return NewMBB; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( | 
|  | MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, | 
|  | MachineBasicBlock *NewBlk) { | 
|  | MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); | 
|  | if (BranchMI && isCondBranch(BranchMI) && | 
|  | getTrueBranch(BranchMI) == OldMBB) | 
|  | setTrueBranch(BranchMI, NewBlk); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { | 
|  | assert((!MBB->getParent()->getJumpTableInfo() | 
|  | || MBB->getParent()->getJumpTableInfo()->isEmpty()) | 
|  | && "found a jump table"); | 
|  |  | 
|  | //collect continue right before endloop | 
|  | SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; | 
|  | MachineBasicBlock::iterator Pre = MBB->begin(); | 
|  | MachineBasicBlock::iterator E = MBB->end(); | 
|  | MachineBasicBlock::iterator It = Pre; | 
|  | while (It != E) { | 
|  | if (Pre->getOpcode() == AMDGPU::CONTINUE | 
|  | && It->getOpcode() == AMDGPU::ENDLOOP) | 
|  | ContInstr.push_back(&*Pre); | 
|  | Pre = It; | 
|  | ++It; | 
|  | } | 
|  |  | 
|  | //delete continue right before endloop | 
|  | for (unsigned i = 0; i < ContInstr.size(); ++i) | 
|  | ContInstr[i]->eraseFromParent(); | 
|  |  | 
|  | // TODO to fix up jump table so later phase won't be confused.  if | 
|  | // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but | 
|  | // there isn't such an interface yet.  alternatively, replace all the other | 
|  | // blocks in the jump table with the entryBlk //} | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::prepare() { | 
|  | bool Changed = false; | 
|  |  | 
|  | //FIXME: if not reducible flow graph, make it so ??? | 
|  |  | 
|  | DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); | 
|  |  | 
|  | orderBlocks(FuncRep); | 
|  |  | 
|  | SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; | 
|  |  | 
|  | // Add an ExitBlk to loop that don't have one | 
|  | for (MachineLoopInfo::iterator It = MLI->begin(), | 
|  | E = MLI->end(); It != E; ++It) { | 
|  | MachineLoop *LoopRep = (*It); | 
|  | MBBVector ExitingMBBs; | 
|  | LoopRep->getExitingBlocks(ExitingMBBs); | 
|  |  | 
|  | if (ExitingMBBs.size() == 0) { | 
|  | MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); | 
|  | if (DummyExitBlk) | 
|  | RetBlks.push_back(DummyExitBlk); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Remove unconditional branch instr. | 
|  | // Add dummy exit block iff there are multiple returns. | 
|  | for (SmallVectorImpl<MachineBasicBlock *>::const_iterator | 
|  | It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { | 
|  | MachineBasicBlock *MBB = *It; | 
|  | removeUnconditionalBranch(MBB); | 
|  | removeRedundantConditionalBranch(MBB); | 
|  | if (isReturnBlock(MBB)) { | 
|  | RetBlks.push_back(MBB); | 
|  | } | 
|  | assert(MBB->succ_size() <= 2); | 
|  | } | 
|  |  | 
|  | if (RetBlks.size() >= 2) { | 
|  | addDummyExitBlock(RetBlks); | 
|  | Changed = true; | 
|  | } | 
|  |  | 
|  | return Changed; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::run() { | 
|  | //Assume reducible CFG... | 
|  | DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); | 
|  |  | 
|  | #ifdef STRESSTEST | 
|  | //Use the worse block ordering to test the algorithm. | 
|  | ReverseVector(orderedBlks); | 
|  | #endif | 
|  |  | 
|  | DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); | 
|  | int NumIter = 0; | 
|  | bool Finish = false; | 
|  | MachineBasicBlock *MBB; | 
|  | bool MakeProgress = false; | 
|  | int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), | 
|  | OrderedBlks.end()); | 
|  |  | 
|  | do { | 
|  | ++NumIter; | 
|  | DEBUG( | 
|  | dbgs() << "numIter = " << NumIter | 
|  | << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; | 
|  | ); | 
|  |  | 
|  | SmallVectorImpl<MachineBasicBlock *>::const_iterator It = | 
|  | OrderedBlks.begin(); | 
|  | SmallVectorImpl<MachineBasicBlock *>::const_iterator E = | 
|  | OrderedBlks.end(); | 
|  |  | 
|  | SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = | 
|  | It; | 
|  | MachineBasicBlock *SccBeginMBB = nullptr; | 
|  | int SccNumBlk = 0;  // The number of active blocks, init to a | 
|  | // maximum possible number. | 
|  | int SccNumIter;     // Number of iteration in this SCC. | 
|  |  | 
|  | while (It != E) { | 
|  | MBB = *It; | 
|  |  | 
|  | if (!SccBeginMBB) { | 
|  | SccBeginIter = It; | 
|  | SccBeginMBB = MBB; | 
|  | SccNumIter = 0; | 
|  | SccNumBlk = NumRemainedBlk; // Init to maximum possible number. | 
|  | DEBUG( | 
|  | dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); | 
|  | dbgs() << "\n"; | 
|  | ); | 
|  | } | 
|  |  | 
|  | if (!isRetiredBlock(MBB)) | 
|  | patternMatch(MBB); | 
|  |  | 
|  | ++It; | 
|  |  | 
|  | bool ContNextScc = true; | 
|  | if (It == E | 
|  | || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { | 
|  | // Just finish one scc. | 
|  | ++SccNumIter; | 
|  | int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); | 
|  | if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { | 
|  | DEBUG( | 
|  | dbgs() << "Can't reduce SCC " << getSCCNum(MBB) | 
|  | << ", sccNumIter = " << SccNumIter; | 
|  | dbgs() << "doesn't make any progress\n"; | 
|  | ); | 
|  | ContNextScc = true; | 
|  | } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { | 
|  | SccNumBlk = sccRemainedNumBlk; | 
|  | It = SccBeginIter; | 
|  | ContNextScc = false; | 
|  | DEBUG( | 
|  | dbgs() << "repeat processing SCC" << getSCCNum(MBB) | 
|  | << "sccNumIter = " << SccNumIter << '\n'; | 
|  | ); | 
|  | } else { | 
|  | // Finish the current scc. | 
|  | ContNextScc = true; | 
|  | } | 
|  | } else { | 
|  | // Continue on next component in the current scc. | 
|  | ContNextScc = false; | 
|  | } | 
|  |  | 
|  | if (ContNextScc) | 
|  | SccBeginMBB = nullptr; | 
|  | } //while, "one iteration" over the function. | 
|  |  | 
|  | MachineBasicBlock *EntryMBB = | 
|  | *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); | 
|  | if (EntryMBB->succ_size() == 0) { | 
|  | Finish = true; | 
|  | DEBUG( | 
|  | dbgs() << "Reduce to one block\n"; | 
|  | ); | 
|  | } else { | 
|  | int NewnumRemainedBlk | 
|  | = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); | 
|  | // consider cloned blocks ?? | 
|  | if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { | 
|  | MakeProgress = true; | 
|  | NumRemainedBlk = NewnumRemainedBlk; | 
|  | } else { | 
|  | MakeProgress = false; | 
|  | DEBUG( | 
|  | dbgs() << "No progress\n"; | 
|  | ); | 
|  | } | 
|  | } | 
|  | } while (!Finish && MakeProgress); | 
|  |  | 
|  | // Misc wrap up to maintain the consistency of the Function representation. | 
|  | wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); | 
|  |  | 
|  | // Detach retired Block, release memory. | 
|  | for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); | 
|  | It != E; ++It) { | 
|  | if ((*It).second && (*It).second->IsRetired) { | 
|  | assert(((*It).first)->getNumber() != -1); | 
|  | DEBUG( | 
|  | dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; | 
|  | ); | 
|  | (*It).first->eraseFromParent();  //Remove from the parent Function. | 
|  | } | 
|  | delete (*It).second; | 
|  | } | 
|  | BlockInfoMap.clear(); | 
|  | LLInfoMap.clear(); | 
|  |  | 
|  | if (!Finish) { | 
|  | DEBUG(FuncRep->viewCFG()); | 
|  | report_fatal_error("IRREDUCIBLE_CFG"); | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { | 
|  | int SccNum = 0; | 
|  | MachineBasicBlock *MBB; | 
|  | for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); | 
|  | ++It, ++SccNum) { | 
|  | const std::vector<MachineBasicBlock *> &SccNext = *It; | 
|  | for (std::vector<MachineBasicBlock *>::const_iterator | 
|  | blockIter = SccNext.begin(), blockEnd = SccNext.end(); | 
|  | blockIter != blockEnd; ++blockIter) { | 
|  | MBB = *blockIter; | 
|  | OrderedBlks.push_back(MBB); | 
|  | recordSccnum(MBB, SccNum); | 
|  | } | 
|  | } | 
|  |  | 
|  | // walk through all the block in func to check for unreachable | 
|  | for (auto *MBB : nodes(MF)) { | 
|  | SccNum = getSCCNum(MBB); | 
|  | if (SccNum == INVALIDSCCNUM) | 
|  | dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; | 
|  | } | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { | 
|  | int NumMatch = 0; | 
|  | int CurMatch; | 
|  |  | 
|  | DEBUG( | 
|  | dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; | 
|  | ); | 
|  |  | 
|  | while ((CurMatch = patternMatchGroup(MBB)) > 0) | 
|  | NumMatch += CurMatch; | 
|  |  | 
|  | DEBUG( | 
|  | dbgs() << "End patternMatch BB" << MBB->getNumber() | 
|  | << ", numMatch = " << NumMatch << "\n"; | 
|  | ); | 
|  |  | 
|  | return NumMatch; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { | 
|  | int NumMatch = 0; | 
|  | NumMatch += loopendPatternMatch(); | 
|  | NumMatch += serialPatternMatch(MBB); | 
|  | NumMatch += ifPatternMatch(MBB); | 
|  | return NumMatch; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { | 
|  | if (MBB->succ_size() != 1) | 
|  | return 0; | 
|  |  | 
|  | MachineBasicBlock *childBlk = *MBB->succ_begin(); | 
|  | if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) | 
|  | return 0; | 
|  |  | 
|  | mergeSerialBlock(MBB, childBlk); | 
|  | ++numSerialPatternMatch; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { | 
|  | //two edges | 
|  | if (MBB->succ_size() != 2) | 
|  | return 0; | 
|  | if (hasBackEdge(MBB)) | 
|  | return 0; | 
|  | MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); | 
|  | if (!BranchMI) | 
|  | return 0; | 
|  |  | 
|  | assert(isCondBranch(BranchMI)); | 
|  | int NumMatch = 0; | 
|  |  | 
|  | MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); | 
|  | NumMatch += serialPatternMatch(TrueMBB); | 
|  | NumMatch += ifPatternMatch(TrueMBB); | 
|  | MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); | 
|  | NumMatch += serialPatternMatch(FalseMBB); | 
|  | NumMatch += ifPatternMatch(FalseMBB); | 
|  | MachineBasicBlock *LandBlk; | 
|  | int Cloned = 0; | 
|  |  | 
|  | assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); | 
|  | // TODO: Simplify | 
|  | if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 | 
|  | && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { | 
|  | // Diamond pattern | 
|  | LandBlk = *TrueMBB->succ_begin(); | 
|  | } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { | 
|  | // Triangle pattern, false is empty | 
|  | LandBlk = FalseMBB; | 
|  | FalseMBB = nullptr; | 
|  | } else if (FalseMBB->succ_size() == 1 | 
|  | && *FalseMBB->succ_begin() == TrueMBB) { | 
|  | // Triangle pattern, true is empty | 
|  | // We reverse the predicate to make a triangle, empty false pattern; | 
|  | std::swap(TrueMBB, FalseMBB); | 
|  | reversePredicateSetter(MBB->end(), *MBB); | 
|  | LandBlk = FalseMBB; | 
|  | FalseMBB = nullptr; | 
|  | } else if (FalseMBB->succ_size() == 1 | 
|  | && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { | 
|  | LandBlk = *FalseMBB->succ_begin(); | 
|  | } else if (TrueMBB->succ_size() == 1 | 
|  | && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { | 
|  | LandBlk = *TrueMBB->succ_begin(); | 
|  | } else { | 
|  | return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); | 
|  | } | 
|  |  | 
|  | // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the | 
|  | // new BB created for landBlk==NULL may introduce new challenge to the | 
|  | // reduction process. | 
|  | if (LandBlk && | 
|  | ((TrueMBB && TrueMBB->pred_size() > 1) | 
|  | || (FalseMBB && FalseMBB->pred_size() > 1))) { | 
|  | Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); | 
|  | } | 
|  |  | 
|  | if (TrueMBB && TrueMBB->pred_size() > 1) { | 
|  | TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); | 
|  | ++Cloned; | 
|  | } | 
|  |  | 
|  | if (FalseMBB && FalseMBB->pred_size() > 1) { | 
|  | FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); | 
|  | ++Cloned; | 
|  | } | 
|  |  | 
|  | mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); | 
|  |  | 
|  | ++numIfPatternMatch; | 
|  |  | 
|  | numClonedBlock += Cloned; | 
|  |  | 
|  | return 1 + Cloned + NumMatch; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::loopendPatternMatch() { | 
|  | std::deque<MachineLoop *> NestedLoops; | 
|  | for (auto &It: *MLI) | 
|  | for (MachineLoop *ML : depth_first(It)) | 
|  | NestedLoops.push_front(ML); | 
|  |  | 
|  | if (NestedLoops.empty()) | 
|  | return 0; | 
|  |  | 
|  | // Process nested loop outside->inside (we did push_front), | 
|  | // so "continue" to a outside loop won't be mistaken as "break" | 
|  | // of the current loop. | 
|  | int Num = 0; | 
|  | for (MachineLoop *ExaminedLoop : NestedLoops) { | 
|  | if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) | 
|  | continue; | 
|  | DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); | 
|  | int NumBreak = mergeLoop(ExaminedLoop); | 
|  | if (NumBreak == -1) | 
|  | break; | 
|  | Num += NumBreak; | 
|  | } | 
|  | return Num; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { | 
|  | MachineBasicBlock *LoopHeader = LoopRep->getHeader(); | 
|  | MBBVector ExitingMBBs; | 
|  | LoopRep->getExitingBlocks(ExitingMBBs); | 
|  | assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); | 
|  | DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); | 
|  | // We assume a single ExitBlk | 
|  | MBBVector ExitBlks; | 
|  | LoopRep->getExitBlocks(ExitBlks); | 
|  | SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; | 
|  | for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) | 
|  | ExitBlkSet.insert(ExitBlks[i]); | 
|  | assert(ExitBlkSet.size() == 1); | 
|  | MachineBasicBlock *ExitBlk = *ExitBlks.begin(); | 
|  | assert(ExitBlk && "Loop has several exit block"); | 
|  | MBBVector LatchBlks; | 
|  | for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader)) | 
|  | if (LoopRep->contains(LB)) | 
|  | LatchBlks.push_back(LB); | 
|  |  | 
|  | for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) | 
|  | mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); | 
|  | for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) | 
|  | settleLoopcontBlock(LatchBlks[i], LoopHeader); | 
|  | int Match = 0; | 
|  | do { | 
|  | Match = 0; | 
|  | Match += serialPatternMatch(LoopHeader); | 
|  | Match += ifPatternMatch(LoopHeader); | 
|  | } while (Match > 0); | 
|  | mergeLooplandBlock(LoopHeader, ExitBlk); | 
|  | MachineLoop *ParentLoop = LoopRep->getParentLoop(); | 
|  | if (ParentLoop) | 
|  | MLI->changeLoopFor(LoopHeader, ParentLoop); | 
|  | else | 
|  | MLI->removeBlock(LoopHeader); | 
|  | Visited[LoopRep] = true; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( | 
|  | MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { | 
|  | if (Src1MBB->succ_size() == 0) { | 
|  | MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); | 
|  | if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { | 
|  | MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; | 
|  | if (TheEntry) { | 
|  | DEBUG( | 
|  | dbgs() << "isLoopContBreakBlock yes src1 = BB" | 
|  | << Src1MBB->getNumber() | 
|  | << " src2 = BB" << Src2MBB->getNumber() << "\n"; | 
|  | ); | 
|  | return true; | 
|  | } | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, | 
|  | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { | 
|  | int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); | 
|  | if (Num == 0) { | 
|  | DEBUG( | 
|  | dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; | 
|  | ); | 
|  | Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); | 
|  | } | 
|  | return Num; | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, | 
|  | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { | 
|  | int Num = 0; | 
|  | MachineBasicBlock *DownBlk; | 
|  |  | 
|  | //trueBlk could be the common post dominator | 
|  | DownBlk = TrueMBB; | 
|  |  | 
|  | DEBUG( | 
|  | dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() | 
|  | << " true = BB" << TrueMBB->getNumber() | 
|  | << ", numSucc=" << TrueMBB->succ_size() | 
|  | << " false = BB" << FalseMBB->getNumber() << "\n"; | 
|  | ); | 
|  |  | 
|  | while (DownBlk) { | 
|  | DEBUG( | 
|  | dbgs() << "check down = BB" << DownBlk->getNumber(); | 
|  | ); | 
|  |  | 
|  | if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { | 
|  | DEBUG( | 
|  | dbgs() << " working\n"; | 
|  | ); | 
|  |  | 
|  | Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); | 
|  | Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); | 
|  |  | 
|  | numClonedBlock += Num; | 
|  | Num += serialPatternMatch(*HeadMBB->succ_begin()); | 
|  | Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); | 
|  | Num += ifPatternMatch(HeadMBB); | 
|  | assert(Num > 0); | 
|  |  | 
|  | break; | 
|  | } | 
|  | DEBUG( | 
|  | dbgs() << " not working\n"; | 
|  | ); | 
|  | DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; | 
|  | } // walk down the postDomTree | 
|  |  | 
|  | return Num; | 
|  | } | 
|  |  | 
|  | #ifndef NDEBUG | 
|  | void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( | 
|  | MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, | 
|  | MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { | 
|  | dbgs() << "head = BB" << HeadMBB->getNumber() | 
|  | << " size = " << HeadMBB->size(); | 
|  | if (Detail) { | 
|  | dbgs() << "\n"; | 
|  | HeadMBB->print(dbgs()); | 
|  | dbgs() << "\n"; | 
|  | } | 
|  |  | 
|  | if (TrueMBB) { | 
|  | dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " | 
|  | << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); | 
|  | if (Detail) { | 
|  | dbgs() << "\n"; | 
|  | TrueMBB->print(dbgs()); | 
|  | dbgs() << "\n"; | 
|  | } | 
|  | } | 
|  | if (FalseMBB) { | 
|  | dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " | 
|  | << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); | 
|  | if (Detail) { | 
|  | dbgs() << "\n"; | 
|  | FalseMBB->print(dbgs()); | 
|  | dbgs() << "\n"; | 
|  | } | 
|  | } | 
|  | if (LandMBB) { | 
|  | dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " | 
|  | << LandMBB->size() << " numPred = " << LandMBB->pred_size(); | 
|  | if (Detail) { | 
|  | dbgs() << "\n"; | 
|  | LandMBB->print(dbgs()); | 
|  | dbgs() << "\n"; | 
|  | } | 
|  | } | 
|  |  | 
|  | dbgs() << "\n"; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, | 
|  | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, | 
|  | MachineBasicBlock **LandMBBPtr) { | 
|  | bool MigrateTrue = false; | 
|  | bool MigrateFalse = false; | 
|  |  | 
|  | MachineBasicBlock *LandBlk = *LandMBBPtr; | 
|  |  | 
|  | assert((!TrueMBB || TrueMBB->succ_size() <= 1) | 
|  | && (!FalseMBB || FalseMBB->succ_size() <= 1)); | 
|  |  | 
|  | if (TrueMBB == FalseMBB) | 
|  | return 0; | 
|  |  | 
|  | MigrateTrue = needMigrateBlock(TrueMBB); | 
|  | MigrateFalse = needMigrateBlock(FalseMBB); | 
|  |  | 
|  | if (!MigrateTrue && !MigrateFalse) | 
|  | return 0; | 
|  |  | 
|  | // If we need to migrate either trueBlk and falseBlk, migrate the rest that | 
|  | // have more than one predecessors.  without doing this, its predecessor | 
|  | // rather than headBlk will have undefined value in initReg. | 
|  | if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) | 
|  | MigrateTrue = true; | 
|  | if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) | 
|  | MigrateFalse = true; | 
|  |  | 
|  | DEBUG( | 
|  | dbgs() << "before improveSimpleJumpintoIf: "; | 
|  | showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); | 
|  | ); | 
|  |  | 
|  | // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk | 
|  | // | 
|  | // new: headBlk => if () {initReg = 1; org trueBlk branch} else | 
|  | //      {initReg = 0; org falseBlk branch } | 
|  | //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk} | 
|  | //      => org landBlk | 
|  | //      if landBlk->pred_size() > 2, put the about if-else inside | 
|  | //      if (initReg !=2) {...} | 
|  | // | 
|  | // add initReg = initVal to headBlk | 
|  |  | 
|  | const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); | 
|  | if (!MigrateTrue || !MigrateFalse) { | 
|  | // XXX: We have an opportunity here to optimize the "branch into if" case | 
|  | // here.  Branch into if looks like this: | 
|  | //                        entry | 
|  | //                       /     | | 
|  | //           diamond_head       branch_from | 
|  | //             /      \           | | 
|  | // diamond_false        diamond_true | 
|  | //             \      / | 
|  | //               done | 
|  | // | 
|  | // The diamond_head block begins the "if" and the diamond_true block | 
|  | // is the block being "branched into". | 
|  | // | 
|  | // If MigrateTrue is true, then TrueBB is the block being "branched into" | 
|  | // and if MigrateFalse is true, then FalseBB is the block being | 
|  | // "branched into" | 
|  | // | 
|  | // Here is the pseudo code for how I think the optimization should work: | 
|  | // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. | 
|  | // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. | 
|  | // 3. Move the branch instruction from diamond_head into its own basic | 
|  | //    block (new_block). | 
|  | // 4. Add an unconditional branch from diamond_head to new_block | 
|  | // 5. Replace the branch instruction in branch_from with an unconditional | 
|  | //    branch to new_block.  If branch_from has multiple predecessors, then | 
|  | //    we need to replace the True/False block in the branch | 
|  | //    instruction instead of replacing it. | 
|  | // 6. Change the condition of the branch instruction in new_block from | 
|  | //    COND to (COND || GPR0) | 
|  | // | 
|  | // In order insert these MOV instruction, we will need to use the | 
|  | // RegisterScavenger.  Usually liveness stops being tracked during | 
|  | // the late machine optimization passes, however if we implement | 
|  | // bool TargetRegisterInfo::requiresRegisterScavenging( | 
|  | //                                                const MachineFunction &MF) | 
|  | // and have it return true, liveness will be tracked correctly | 
|  | // by generic optimization passes.  We will also need to make sure that | 
|  | // all of our target-specific passes that run after regalloc and before | 
|  | // the CFGStructurizer track liveness and we will need to modify this pass | 
|  | // to correctly track liveness. | 
|  | // | 
|  | // After the above changes, the new CFG should look like this: | 
|  | //                        entry | 
|  | //                       /     | | 
|  | //           diamond_head       branch_from | 
|  | //                       \     / | 
|  | //                      new_block | 
|  | //                      /      | | 
|  | //         diamond_false        diamond_true | 
|  | //                      \      / | 
|  | //                        done | 
|  | // | 
|  | // Without this optimization, we are forced to duplicate the diamond_true | 
|  | // block and we will end up with a CFG like this: | 
|  | // | 
|  | //                        entry | 
|  | //                       /     | | 
|  | //           diamond_head       branch_from | 
|  | //             /      \                   | | 
|  | // diamond_false        diamond_true      diamond_true (duplicate) | 
|  | //             \      /                   | | 
|  | //               done --------------------| | 
|  | // | 
|  | // Duplicating diamond_true can be very costly especially if it has a | 
|  | // lot of instructions. | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int NumNewBlk = 0; | 
|  |  | 
|  | bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); | 
|  |  | 
|  | //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" | 
|  | MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); | 
|  |  | 
|  | if (LandBlkHasOtherPred) { | 
|  | report_fatal_error("Extra register needed to handle CFG"); | 
|  | unsigned CmpResReg = | 
|  | HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); | 
|  | report_fatal_error("Extra compare instruction needed to handle CFG"); | 
|  | insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, | 
|  | CmpResReg, DebugLoc()); | 
|  | } | 
|  |  | 
|  | // XXX: We are running this after RA, so creating virtual registers will | 
|  | // cause an assertion failure in the PostRA scheduling pass. | 
|  | unsigned InitReg = | 
|  | HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); | 
|  | insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, | 
|  | DebugLoc()); | 
|  |  | 
|  | if (MigrateTrue) { | 
|  | migrateInstruction(TrueMBB, LandBlk, I); | 
|  | // need to uncondionally insert the assignment to ensure a path from its | 
|  | // predecessor rather than headBlk has valid value in initReg if | 
|  | // (initVal != 1). | 
|  | report_fatal_error("Extra register needed to handle CFG"); | 
|  | } | 
|  | insertInstrBefore(I, AMDGPU::ELSE); | 
|  |  | 
|  | if (MigrateFalse) { | 
|  | migrateInstruction(FalseMBB, LandBlk, I); | 
|  | // need to uncondionally insert the assignment to ensure a path from its | 
|  | // predecessor rather than headBlk has valid value in initReg if | 
|  | // (initVal != 0) | 
|  | report_fatal_error("Extra register needed to handle CFG"); | 
|  | } | 
|  |  | 
|  | if (LandBlkHasOtherPred) { | 
|  | // add endif | 
|  | insertInstrBefore(I, AMDGPU::ENDIF); | 
|  |  | 
|  | // put initReg = 2 to other predecessors of landBlk | 
|  | for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), | 
|  | PE = LandBlk->pred_end(); PI != PE; ++PI) { | 
|  | MachineBasicBlock *MBB = *PI; | 
|  | if (MBB != TrueMBB && MBB != FalseMBB) | 
|  | report_fatal_error("Extra register needed to handle CFG"); | 
|  | } | 
|  | } | 
|  | DEBUG( | 
|  | dbgs() << "result from improveSimpleJumpintoIf: "; | 
|  | showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); | 
|  | ); | 
|  |  | 
|  | // update landBlk | 
|  | *LandMBBPtr = LandBlk; | 
|  |  | 
|  | return NumNewBlk; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, | 
|  | MachineBasicBlock *SrcMBB) { | 
|  | DEBUG( | 
|  | dbgs() << "serialPattern BB" << DstMBB->getNumber() | 
|  | << " <= BB" << SrcMBB->getNumber() << "\n"; | 
|  | ); | 
|  | DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); | 
|  |  | 
|  | DstMBB->removeSuccessor(SrcMBB, true); | 
|  | cloneSuccessorList(DstMBB, SrcMBB); | 
|  |  | 
|  | removeSuccessor(SrcMBB); | 
|  | MLI->removeBlock(SrcMBB); | 
|  | retireBlock(SrcMBB); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, | 
|  | MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, | 
|  | MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { | 
|  | assert (TrueMBB); | 
|  | DEBUG( | 
|  | dbgs() << "ifPattern BB" << MBB->getNumber(); | 
|  | dbgs() << "{  "; | 
|  | if (TrueMBB) { | 
|  | dbgs() << "BB" << TrueMBB->getNumber(); | 
|  | } | 
|  | dbgs() << "  } else "; | 
|  | dbgs() << "{  "; | 
|  | if (FalseMBB) { | 
|  | dbgs() << "BB" << FalseMBB->getNumber(); | 
|  | } | 
|  | dbgs() << "  }\n "; | 
|  | dbgs() << "landBlock: "; | 
|  | if (!LandMBB) { | 
|  | dbgs() << "NULL"; | 
|  | } else { | 
|  | dbgs() << "BB" << LandMBB->getNumber(); | 
|  | } | 
|  | dbgs() << "\n"; | 
|  | ); | 
|  |  | 
|  | int OldOpcode = BranchMI->getOpcode(); | 
|  | DebugLoc BranchDL = BranchMI->getDebugLoc(); | 
|  |  | 
|  | //    transform to | 
|  | //    if cond | 
|  | //       trueBlk | 
|  | //    else | 
|  | //       falseBlk | 
|  | //    endif | 
|  | //    landBlk | 
|  |  | 
|  | MachineBasicBlock::iterator I = BranchMI; | 
|  | insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), | 
|  | BranchDL); | 
|  |  | 
|  | if (TrueMBB) { | 
|  | MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); | 
|  | MBB->removeSuccessor(TrueMBB, true); | 
|  | if (LandMBB && TrueMBB->succ_size()!=0) | 
|  | TrueMBB->removeSuccessor(LandMBB, true); | 
|  | retireBlock(TrueMBB); | 
|  | MLI->removeBlock(TrueMBB); | 
|  | } | 
|  |  | 
|  | if (FalseMBB) { | 
|  | insertInstrBefore(I, AMDGPU::ELSE); | 
|  | MBB->splice(I, FalseMBB, FalseMBB->begin(), | 
|  | FalseMBB->end()); | 
|  | MBB->removeSuccessor(FalseMBB, true); | 
|  | if (LandMBB && FalseMBB->succ_size() != 0) | 
|  | FalseMBB->removeSuccessor(LandMBB, true); | 
|  | retireBlock(FalseMBB); | 
|  | MLI->removeBlock(FalseMBB); | 
|  | } | 
|  | insertInstrBefore(I, AMDGPU::ENDIF); | 
|  |  | 
|  | BranchMI->eraseFromParent(); | 
|  |  | 
|  | if (LandMBB && TrueMBB && FalseMBB) | 
|  | MBB->addSuccessor(LandMBB); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, | 
|  | MachineBasicBlock *LandMBB) { | 
|  | DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() | 
|  | << " land = BB" << LandMBB->getNumber() << "\n";); | 
|  |  | 
|  | insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); | 
|  | insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); | 
|  | DstBlk->replaceSuccessor(DstBlk, LandMBB); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, | 
|  | MachineBasicBlock *LandMBB) { | 
|  | DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() | 
|  | << " land = BB" << LandMBB->getNumber() << "\n";); | 
|  | MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); | 
|  | assert(BranchMI && isCondBranch(BranchMI)); | 
|  | DebugLoc DL = BranchMI->getDebugLoc(); | 
|  | MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); | 
|  | MachineBasicBlock::iterator I = BranchMI; | 
|  | if (TrueBranch != LandMBB) | 
|  | reversePredicateSetter(I, *I->getParent()); | 
|  | insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); | 
|  | insertInstrBefore(I, AMDGPU::BREAK); | 
|  | insertInstrBefore(I, AMDGPU::ENDIF); | 
|  | //now branchInst can be erase safely | 
|  | BranchMI->eraseFromParent(); | 
|  | //now take care of successors, retire blocks | 
|  | ExitingMBB->removeSuccessor(LandMBB, true); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, | 
|  | MachineBasicBlock *ContMBB) { | 
|  | DEBUG(dbgs() << "settleLoopcontBlock conting = BB" | 
|  | << ContingMBB->getNumber() | 
|  | << ", cont = BB" << ContMBB->getNumber() << "\n";); | 
|  |  | 
|  | MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); | 
|  | if (MI) { | 
|  | assert(isCondBranch(MI)); | 
|  | MachineBasicBlock::iterator I = MI; | 
|  | MachineBasicBlock *TrueBranch = getTrueBranch(MI); | 
|  | int OldOpcode = MI->getOpcode(); | 
|  | DebugLoc DL = MI->getDebugLoc(); | 
|  |  | 
|  | bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); | 
|  |  | 
|  | if (!UseContinueLogical) { | 
|  | int BranchOpcode = | 
|  | TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : | 
|  | getBranchZeroOpcode(OldOpcode); | 
|  | insertCondBranchBefore(I, BranchOpcode, DL); | 
|  | // insertEnd to ensure phi-moves, if exist, go before the continue-instr. | 
|  | insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); | 
|  | insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); | 
|  | } else { | 
|  | int BranchOpcode = | 
|  | TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : | 
|  | getContinueZeroOpcode(OldOpcode); | 
|  | insertCondBranchBefore(I, BranchOpcode, DL); | 
|  | } | 
|  |  | 
|  | MI->eraseFromParent(); | 
|  | } else { | 
|  | // if we've arrived here then we've already erased the branch instruction | 
|  | // travel back up the basic block to see the last reference of our debug | 
|  | // location we've just inserted that reference here so it should be | 
|  | // representative insertEnd to ensure phi-moves, if exist, go before the | 
|  | // continue-instr. | 
|  | insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, | 
|  | getLastDebugLocInBB(ContingMBB)); | 
|  | } | 
|  | } | 
|  |  | 
|  | int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, | 
|  | MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { | 
|  | int Cloned = 0; | 
|  | assert(PreMBB->isSuccessor(SrcMBB)); | 
|  | while (SrcMBB && SrcMBB != DstMBB) { | 
|  | assert(SrcMBB->succ_size() == 1); | 
|  | if (SrcMBB->pred_size() > 1) { | 
|  | SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); | 
|  | ++Cloned; | 
|  | } | 
|  |  | 
|  | PreMBB = SrcMBB; | 
|  | SrcMBB = *SrcMBB->succ_begin(); | 
|  | } | 
|  |  | 
|  | return Cloned; | 
|  | } | 
|  |  | 
|  | MachineBasicBlock * | 
|  | AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, | 
|  | MachineBasicBlock *PredMBB) { | 
|  | assert(PredMBB->isSuccessor(MBB) && | 
|  | "succBlk is not a prececessor of curBlk"); | 
|  |  | 
|  | MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions | 
|  | replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); | 
|  | //srcBlk, oldBlk, newBlk | 
|  |  | 
|  | PredMBB->replaceSuccessor(MBB, CloneMBB); | 
|  |  | 
|  | // add all successor to cloneBlk | 
|  | cloneSuccessorList(CloneMBB, MBB); | 
|  |  | 
|  | numClonedInstr += MBB->size(); | 
|  |  | 
|  | DEBUG( | 
|  | dbgs() << "Cloned block: " << "BB" | 
|  | << MBB->getNumber() << "size " << MBB->size() << "\n"; | 
|  | ); | 
|  |  | 
|  | SHOWNEWBLK(CloneMBB, "result of Cloned block: "); | 
|  |  | 
|  | return CloneMBB; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, | 
|  | MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { | 
|  | MachineBasicBlock::iterator SpliceEnd; | 
|  | //look for the input branchinstr, not the AMDGPU branchinstr | 
|  | MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); | 
|  | if (!BranchMI) { | 
|  | DEBUG( | 
|  | dbgs() << "migrateInstruction don't see branch instr\n"; | 
|  | ); | 
|  | SpliceEnd = SrcMBB->end(); | 
|  | } else { | 
|  | DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); | 
|  | SpliceEnd = BranchMI; | 
|  | } | 
|  | DEBUG( | 
|  | dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() | 
|  | << "srcSize = " << SrcMBB->size() << "\n"; | 
|  | ); | 
|  |  | 
|  | //splice insert before insertPos | 
|  | DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); | 
|  |  | 
|  | DEBUG( | 
|  | dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() | 
|  | << "srcSize = " << SrcMBB->size() << '\n'; | 
|  | ); | 
|  | } | 
|  |  | 
|  | MachineBasicBlock * | 
|  | AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { | 
|  | MachineBasicBlock *LoopHeader = LoopRep->getHeader(); | 
|  | MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); | 
|  |  | 
|  | if (!LoopHeader || !LoopLatch) | 
|  | return nullptr; | 
|  | MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); | 
|  | // Is LoopRep an infinite loop ? | 
|  | if (!BranchMI || !isUncondBranch(BranchMI)) | 
|  | return nullptr; | 
|  |  | 
|  | MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); | 
|  | FuncRep->push_back(DummyExitBlk);  //insert to function | 
|  | SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); | 
|  | DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); | 
|  | LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); | 
|  | Ctx.emitError("Extra register needed to handle CFG"); | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { | 
|  | MachineInstr *BranchMI; | 
|  |  | 
|  | // I saw two unconditional branch in one basic block in example | 
|  | // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. | 
|  | while ((BranchMI = getLoopendBlockBranchInstr(MBB)) | 
|  | && isUncondBranch(BranchMI)) { | 
|  | DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); | 
|  | BranchMI->eraseFromParent(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( | 
|  | MachineBasicBlock *MBB) { | 
|  | if (MBB->succ_size() != 2) | 
|  | return; | 
|  | MachineBasicBlock *MBB1 = *MBB->succ_begin(); | 
|  | MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); | 
|  | if (MBB1 != MBB2) | 
|  | return; | 
|  |  | 
|  | MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); | 
|  | assert(BranchMI && isCondBranch(BranchMI)); | 
|  | DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); | 
|  | BranchMI->eraseFromParent(); | 
|  | SHOWNEWBLK(MBB1, "Removing redundant successor"); | 
|  | MBB->removeSuccessor(MBB1, true); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::addDummyExitBlock( | 
|  | SmallVectorImpl<MachineBasicBlock*> &RetMBB) { | 
|  | MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); | 
|  | FuncRep->push_back(DummyExitBlk);  //insert to function | 
|  | insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); | 
|  |  | 
|  | for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), | 
|  | E = RetMBB.end(); It != E; ++It) { | 
|  | MachineBasicBlock *MBB = *It; | 
|  | MachineInstr *MI = getReturnInstr(MBB); | 
|  | if (MI) | 
|  | MI->eraseFromParent(); | 
|  | MBB->addSuccessor(DummyExitBlk); | 
|  | DEBUG( | 
|  | dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() | 
|  | << " successors\n"; | 
|  | ); | 
|  | } | 
|  | SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { | 
|  | while (MBB->succ_size()) | 
|  | MBB->removeSuccessor(*MBB->succ_begin()); | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, | 
|  | int SccNum) { | 
|  | BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; | 
|  | if (!srcBlkInfo) | 
|  | srcBlkInfo = new BlockInformation(); | 
|  | srcBlkInfo->SccNum = SccNum; | 
|  | } | 
|  |  | 
|  | void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { | 
|  | DEBUG( | 
|  | dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; | 
|  | ); | 
|  |  | 
|  | BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; | 
|  |  | 
|  | if (!SrcBlkInfo) | 
|  | SrcBlkInfo = new BlockInformation(); | 
|  |  | 
|  | SrcBlkInfo->IsRetired = true; | 
|  | assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 | 
|  | && "can't retire block yet"); | 
|  | } | 
|  |  | 
|  | INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", | 
|  | "AMDGPU CFG Structurizer", false, false) | 
|  | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) | 
|  | INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) | 
|  | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) | 
|  | INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", | 
|  | "AMDGPU CFG Structurizer", false, false) | 
|  |  | 
|  | FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { | 
|  | return new AMDGPUCFGStructurizer(); | 
|  | } |