blob: 1f276dc570c9b109346956ce2219debedd40a05d [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//==-----------------------------------------------------------------------===//
10
11#define DEBUGME 0
12#define DEBUG_TYPE "structcfg"
13
14#include "AMDGPUInstrInfo.h"
15#include "AMDIL.h"
16#include "llvm/ADT/SCCIterator.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/DominatorInternals.h"
20#include "llvm/Analysis/Dominators.h"
21#include "llvm/CodeGen/MachinePostDominators.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineFunctionAnalysis.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineJumpTableInfo.h"
28#include "llvm/CodeGen/MachineLoopInfo.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/Target/TargetInstrInfo.h"
31
32using namespace llvm;
33
34// TODO: move-begin.
35
36//===----------------------------------------------------------------------===//
37//
38// Statistics for CFGStructurizer.
39//
40//===----------------------------------------------------------------------===//
41
42STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
43 "matched");
44STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
45 "matched");
46STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
47 "pattern matched");
48STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
49 "pattern matched");
50STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
51 "matched");
52STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
53STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
54
55//===----------------------------------------------------------------------===//
56//
57// Miscellaneous utility for CFGStructurizer.
58//
59//===----------------------------------------------------------------------===//
60namespace llvmCFGStruct {
61#define SHOWNEWINSTR(i) \
62 if (DEBUGME) errs() << "New instr: " << *i << "\n"
63
64#define SHOWNEWBLK(b, msg) \
65if (DEBUGME) { \
66 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
67 errs() << "\n"; \
68}
69
70#define SHOWBLK_DETAIL(b, msg) \
71if (DEBUGME) { \
72 if (b) { \
73 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
74 b->print(errs()); \
75 errs() << "\n"; \
76 } \
77}
78
79#define INVALIDSCCNUM -1
80#define INVALIDREGNUM 0
81
82template<class LoopinfoT>
83void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
84 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
85 iterEnd = LoopInfo.end();
86 iter != iterEnd; ++iter) {
87 (*iter)->print(OS, 0);
88 }
89}
90
91template<class NodeT>
92void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
93 size_t sz = Src.size();
94 for (size_t i = 0; i < sz/2; ++i) {
95 NodeT *t = Src[i];
96 Src[i] = Src[sz - i - 1];
97 Src[sz - i - 1] = t;
98 }
99}
100
101} //end namespace llvmCFGStruct
102
103//===----------------------------------------------------------------------===//
104//
105// supporting data structure for CFGStructurizer
106//
107//===----------------------------------------------------------------------===//
108
109namespace llvmCFGStruct {
110template<class PassT>
111struct CFGStructTraits {
112};
113
114template <class InstrT>
115class BlockInformation {
116public:
117 bool isRetired;
118 int sccNum;
119 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
120 //Instructions defining the corresponding successor.
121 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
122};
123
124template <class BlockT, class InstrT, class RegiT>
125class LandInformation {
126public:
127 BlockT *landBlk;
128 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
129 //WHILELOOP(thisloop) init before entering
130 //thisloop.
131 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
132 //WHILELOOP(thisloop) init after entering
133 //thisloop.
134 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
135 //land block, branch cond on this reg.
136 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
137 //endif" after ENDLOOP(thisloop) break
138 //outerLoopOf(thisLoop).
139 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
140 //endif" after ENDLOOP(thisloop) continue on
141 //outerLoopOf(thisLoop).
142 LandInformation() : landBlk(NULL) {}
143};
144
145} //end of namespace llvmCFGStruct
146
147//===----------------------------------------------------------------------===//
148//
149// CFGStructurizer
150//
151//===----------------------------------------------------------------------===//
152
153namespace llvmCFGStruct {
154// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
155template<class PassT>
156class CFGStructurizer {
157public:
158 typedef enum {
159 Not_SinglePath = 0,
160 SinglePath_InPath = 1,
161 SinglePath_NotInPath = 2
162 } PathToKind;
163
164public:
165 typedef typename PassT::InstructionType InstrT;
166 typedef typename PassT::FunctionType FuncT;
167 typedef typename PassT::DominatortreeType DomTreeT;
168 typedef typename PassT::PostDominatortreeType PostDomTreeT;
169 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
170 typedef typename PassT::LoopinfoType LoopInfoT;
171
172 typedef GraphTraits<FuncT *> FuncGTraits;
173 //typedef FuncGTraits::nodes_iterator BlockIterator;
174 typedef typename FuncT::iterator BlockIterator;
175
176 typedef typename FuncGTraits::NodeType BlockT;
177 typedef GraphTraits<BlockT *> BlockGTraits;
178 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
179 //typedef BlockGTraits::succ_iterator InstructionIterator;
180 typedef typename BlockT::iterator InstrIterator;
181
182 typedef CFGStructTraits<PassT> CFGTraits;
183 typedef BlockInformation<InstrT> BlockInfo;
184 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
185
186 typedef int RegiT;
187 typedef typename PassT::LoopType LoopT;
188 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
189 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
190 //landing info for loop break
191 typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
192
193public:
194 CFGStructurizer();
195 ~CFGStructurizer();
196
197 /// Perform the CFG structurization
198 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
199
200 /// Perform the CFG preparation
201 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
202
203private:
204 void reversePredicateSetter(typename BlockT::iterator);
205 void orderBlocks();
206 void printOrderedBlocks(llvm::raw_ostream &OS);
207 int patternMatch(BlockT *CurBlock);
208 int patternMatchGroup(BlockT *CurBlock);
209
210 int serialPatternMatch(BlockT *CurBlock);
211 int ifPatternMatch(BlockT *CurBlock);
212 int switchPatternMatch(BlockT *CurBlock);
213 int loopendPatternMatch(BlockT *CurBlock);
214 int loopPatternMatch(BlockT *CurBlock);
215
216 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
217 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
218 //int loopWithoutBreak(BlockT *);
219
220 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
221 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
222 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
223 BlockT *ContBlock, LoopT *contLoop);
224 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
225 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
226 BlockT *FalseBlock);
227 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
228 BlockT *FalseBlock);
229 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
230 BlockT *FalseBlock, BlockT **LandBlockPtr);
231 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
232 BlockT *FalseBlock, BlockT *LandBlock,
233 bool Detail = false);
234 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
235 bool AllowSideEntry = true);
236 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
237 bool AllowSideEntry = true);
238 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
239 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
240
241 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
242 BlockT *TrueBlock, BlockT *FalseBlock,
243 BlockT *LandBlock);
244 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
245 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
246 BlockT *ExitLandBlock, RegiT SetReg);
247 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
248 RegiT SetReg);
249 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
250 std::set<BlockT*> &ExitBlockSet,
251 BlockT *ExitLandBlk);
252 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
253 BlockTSmallerVector &ExitingBlocks,
254 BlockTSmallerVector &ExitBlocks);
255 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
256 void removeUnconditionalBranch(BlockT *SrcBlock);
257 void removeRedundantConditionalBranch(BlockT *SrcBlock);
258 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
259
260 void removeSuccessor(BlockT *SrcBlock);
261 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
262 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
263
264 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
265 InstrIterator InsertPos);
266
267 void recordSccnum(BlockT *SrcBlock, int SCCNum);
268 int getSCCNum(BlockT *srcBlk);
269
270 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
271 bool isRetiredBlock(BlockT *SrcBlock);
272 bool isActiveLoophead(BlockT *CurBlock);
273 bool needMigrateBlock(BlockT *Block);
274
275 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
276 BlockTSmallerVector &exitBlocks,
277 std::set<BlockT*> &ExitBlockSet);
278 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
279 BlockT *getLoopLandBlock(LoopT *LoopRep);
280 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
281
282 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
283 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
284 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
285 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
286 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
287
288 bool hasBackEdge(BlockT *curBlock);
289 unsigned getLoopDepth (LoopT *LoopRep);
290 int countActiveBlock(
291 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
292 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
293 BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
294 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
295
296private:
297 DomTreeT *domTree;
298 PostDomTreeT *postDomTree;
299 LoopInfoT *loopInfo;
300 PassT *passRep;
301 FuncT *funcRep;
302
303 BlockInfoMap blockInfoMap;
304 LoopLandInfoMap loopLandInfoMap;
305 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
306 const AMDGPURegisterInfo *TRI;
307
308}; //template class CFGStructurizer
309
310template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
311 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
312}
313
314template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
315 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
316 E = blockInfoMap.end(); I != E; ++I) {
317 delete I->second;
318 }
319}
320
321template<class PassT>
322bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
323 const AMDGPURegisterInfo * tri) {
324 passRep = &pass;
325 funcRep = &func;
326 TRI = tri;
327
328 bool changed = false;
329
330 //FIXME: if not reducible flow graph, make it so ???
331
332 if (DEBUGME) {
333 errs() << "AMDGPUCFGStructurizer::prepare\n";
334 }
335
336 loopInfo = CFGTraits::getLoopInfo(pass);
337 if (DEBUGME) {
338 errs() << "LoopInfo:\n";
339 PrintLoopinfo(*loopInfo, errs());
340 }
341
342 orderBlocks();
343 if (DEBUGME) {
344 errs() << "Ordered blocks:\n";
345 printOrderedBlocks(errs());
346 }
347
348 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
349
350 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
351 iterEnd = loopInfo->end();
352 iter != iterEnd; ++iter) {
353 LoopT* loopRep = (*iter);
354 BlockTSmallerVector exitingBlks;
355 loopRep->getExitingBlocks(exitingBlks);
356
357 if (exitingBlks.size() == 0) {
358 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
359 if (dummyExitBlk != NULL)
360 retBlks.push_back(dummyExitBlk);
361 }
362 }
363
364 // Remove unconditional branch instr.
365 // Add dummy exit block iff there are multiple returns.
366
367 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
368 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
369 iterBlk != iterEndBlk;
370 ++iterBlk) {
371 BlockT *curBlk = *iterBlk;
372 removeUnconditionalBranch(curBlk);
373 removeRedundantConditionalBranch(curBlk);
374 if (CFGTraits::isReturnBlock(curBlk)) {
375 retBlks.push_back(curBlk);
376 }
377 assert(curBlk->succ_size() <= 2);
378 } //for
379
380 if (retBlks.size() >= 2) {
381 addDummyExitBlock(retBlks);
382 changed = true;
383 }
384
385 return changed;
386} //CFGStructurizer::prepare
387
388template<class PassT>
389bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
390 const AMDGPURegisterInfo * tri) {
391 passRep = &pass;
392 funcRep = &func;
393 TRI = tri;
394
395 //Assume reducible CFG...
396 if (DEBUGME) {
397 errs() << "AMDGPUCFGStructurizer::run\n";
398 func.viewCFG();
399 }
400
401 domTree = CFGTraits::getDominatorTree(pass);
402 if (DEBUGME) {
403 domTree->print(errs(), (const llvm::Module*)0);
404 }
405
406 postDomTree = CFGTraits::getPostDominatorTree(pass);
407 if (DEBUGME) {
408 postDomTree->print(errs());
409 }
410
411 loopInfo = CFGTraits::getLoopInfo(pass);
412 if (DEBUGME) {
413 errs() << "LoopInfo:\n";
414 PrintLoopinfo(*loopInfo, errs());
415 }
416
417 orderBlocks();
418#ifdef STRESSTEST
419 //Use the worse block ordering to test the algorithm.
420 ReverseVector(orderedBlks);
421#endif
422
423 if (DEBUGME) {
424 errs() << "Ordered blocks:\n";
425 printOrderedBlocks(errs());
426 }
427 int numIter = 0;
428 bool finish = false;
429 BlockT *curBlk;
430 bool makeProgress = false;
431 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
432 orderedBlks.end());
433
434 do {
435 ++numIter;
436 if (DEBUGME) {
437 errs() << "numIter = " << numIter
438 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
439 }
440
441 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
442 iterBlk = orderedBlks.begin();
443 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
444 iterBlkEnd = orderedBlks.end();
445
446 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
447 sccBeginIter = iterBlk;
448 BlockT *sccBeginBlk = NULL;
449 int sccNumBlk = 0; // The number of active blocks, init to a
450 // maximum possible number.
451 int sccNumIter; // Number of iteration in this SCC.
452
453 while (iterBlk != iterBlkEnd) {
454 curBlk = *iterBlk;
455
456 if (sccBeginBlk == NULL) {
457 sccBeginIter = iterBlk;
458 sccBeginBlk = curBlk;
459 sccNumIter = 0;
460 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
461 if (DEBUGME) {
462 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
463 errs() << "\n";
464 }
465 }
466
467 if (!isRetiredBlock(curBlk)) {
468 patternMatch(curBlk);
469 }
470
471 ++iterBlk;
472
473 bool contNextScc = true;
474 if (iterBlk == iterBlkEnd
475 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
476 // Just finish one scc.
477 ++sccNumIter;
478 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
479 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
480 if (DEBUGME) {
481 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
482 << ", sccNumIter = " << sccNumIter;
483 errs() << "doesn't make any progress\n";
484 }
485 contNextScc = true;
486 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
487 sccNumBlk = sccRemainedNumBlk;
488 iterBlk = sccBeginIter;
489 contNextScc = false;
490 if (DEBUGME) {
491 errs() << "repeat processing SCC" << getSCCNum(curBlk)
492 << "sccNumIter = " << sccNumIter << "\n";
493 func.viewCFG();
494 }
495 } else {
496 // Finish the current scc.
497 contNextScc = true;
498 }
499 } else {
500 // Continue on next component in the current scc.
501 contNextScc = false;
502 }
503
504 if (contNextScc) {
505 sccBeginBlk = NULL;
506 }
507 } //while, "one iteration" over the function.
508
509 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
510 if (entryBlk->succ_size() == 0) {
511 finish = true;
512 if (DEBUGME) {
513 errs() << "Reduce to one block\n";
514 }
515 } else {
516 int newnumRemainedBlk
517 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
518 // consider cloned blocks ??
519 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
520 makeProgress = true;
521 numRemainedBlk = newnumRemainedBlk;
522 } else {
523 makeProgress = false;
524 if (DEBUGME) {
525 errs() << "No progress\n";
526 }
527 }
528 }
529 } while (!finish && makeProgress);
530
531 // Misc wrap up to maintain the consistency of the Function representation.
532 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
533
534 // Detach retired Block, release memory.
535 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
536 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
537 if ((*iterMap).second && (*iterMap).second->isRetired) {
538 assert(((*iterMap).first)->getNumber() != -1);
539 if (DEBUGME) {
540 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
541 }
542 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
543 }
544 delete (*iterMap).second;
545 }
546 blockInfoMap.clear();
547
548 // clear loopLandInfoMap
549 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
550 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
551 delete (*iterMap).second;
552 }
553 loopLandInfoMap.clear();
554
555 if (DEBUGME) {
556 func.viewCFG();
557 }
558
559 if (!finish) {
560 assert(!"IRREDUCIBL_CF");
561 }
562
563 return true;
564} //CFGStructurizer::run
565
566/// Print the ordered Blocks.
567///
568template<class PassT>
569void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
570 size_t i = 0;
571 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
572 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
573 iterBlk != iterBlkEnd;
574 ++iterBlk, ++i) {
575 os << "BB" << (*iterBlk)->getNumber();
576 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
577 if (i != 0 && i % 10 == 0) {
578 os << "\n";
579 } else {
580 os << " ";
581 }
582 }
583} //printOrderedBlocks
584
585/// Compute the reversed DFS post order of Blocks
586///
587template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
588 int sccNum = 0;
589 BlockT *bb;
590 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
591 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
592 std::vector<BlockT *> &sccNext = *sccIter;
593 for (typename std::vector<BlockT *>::const_iterator
594 blockIter = sccNext.begin(), blockEnd = sccNext.end();
595 blockIter != blockEnd; ++blockIter) {
596 bb = *blockIter;
597 orderedBlks.push_back(bb);
598 recordSccnum(bb, sccNum);
599 }
600 }
601
602 //walk through all the block in func to check for unreachable
603 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
604 blockEnd1 = FuncGTraits::nodes_end(funcRep);
605 blockIter1 != blockEnd1; ++blockIter1) {
606 BlockT *bb = &(*blockIter1);
607 sccNum = getSCCNum(bb);
608 if (sccNum == INVALIDSCCNUM) {
609 errs() << "unreachable block BB" << bb->getNumber() << "\n";
610 }
611 }
612} //orderBlocks
613
614template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
615 int numMatch = 0;
616 int curMatch;
617
618 if (DEBUGME) {
619 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
620 }
621
622 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
623 numMatch += curMatch;
624 }
625
626 if (DEBUGME) {
627 errs() << "End patternMatch BB" << curBlk->getNumber()
628 << ", numMatch = " << numMatch << "\n";
629 }
630
631 return numMatch;
632} //patternMatch
633
634template<class PassT>
635int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
636 int numMatch = 0;
637 numMatch += serialPatternMatch(curBlk);
638 numMatch += ifPatternMatch(curBlk);
639 numMatch += loopendPatternMatch(curBlk);
640 numMatch += loopPatternMatch(curBlk);
641 return numMatch;
642}//patternMatchGroup
643
644template<class PassT>
645int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
646 if (curBlk->succ_size() != 1) {
647 return 0;
648 }
649
650 BlockT *childBlk = *curBlk->succ_begin();
651 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
652 return 0;
653 }
654
655 mergeSerialBlock(curBlk, childBlk);
656 ++numSerialPatternMatch;
657 return 1;
658} //serialPatternMatch
659
660template<class PassT>
661int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
662 //two edges
663 if (curBlk->succ_size() != 2) {
664 return 0;
665 }
666
667 if (hasBackEdge(curBlk)) {
668 return 0;
669 }
670
671 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
672 if (branchInstr == NULL) {
673 return 0;
674 }
675
676 assert(CFGTraits::isCondBranch(branchInstr));
677
678 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
679 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
680 BlockT *landBlk;
681 int cloned = 0;
682
683 // TODO: Simplify
684 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
685 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
686 landBlk = *trueBlk->succ_begin();
687 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
688 landBlk = NULL;
689 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
690 landBlk = falseBlk;
691 falseBlk = NULL;
692 } else if (falseBlk->succ_size() == 1
693 && *falseBlk->succ_begin() == trueBlk) {
694 landBlk = trueBlk;
695 trueBlk = NULL;
696 } else if (falseBlk->succ_size() == 1
697 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
698 landBlk = *falseBlk->succ_begin();
699 } else if (trueBlk->succ_size() == 1
700 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
701 landBlk = *trueBlk->succ_begin();
702 } else {
703 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
704 }
705
706 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
707 // new BB created for landBlk==NULL may introduce new challenge to the
708 // reduction process.
709 if (landBlk != NULL &&
710 ((trueBlk && trueBlk->pred_size() > 1)
711 || (falseBlk && falseBlk->pred_size() > 1))) {
712 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
713 }
714
715 if (trueBlk && trueBlk->pred_size() > 1) {
716 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
717 ++cloned;
718 }
719
720 if (falseBlk && falseBlk->pred_size() > 1) {
721 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
722 ++cloned;
723 }
724
725 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
726
727 ++numIfPatternMatch;
728
729 numClonedBlock += cloned;
730
731 return 1 + cloned;
732} //ifPatternMatch
733
734template<class PassT>
735int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
736 return 0;
737} //switchPatternMatch
738
739template<class PassT>
740int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
741 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
742 typename std::vector<LoopT *> nestedLoops;
743 while (loopRep) {
744 nestedLoops.push_back(loopRep);
745 loopRep = loopRep->getParentLoop();
746 }
747
748 if (nestedLoops.size() == 0) {
749 return 0;
750 }
751
752 // Process nested loop outside->inside, so "continue" to a outside loop won't
753 // be mistaken as "break" of the current loop.
754 int num = 0;
755 for (typename std::vector<LoopT *>::reverse_iterator
756 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
757 iter != iterEnd; ++iter) {
758 loopRep = *iter;
759
760 if (getLoopLandBlock(loopRep) != NULL) {
761 continue;
762 }
763
764 BlockT *loopHeader = loopRep->getHeader();
765
766 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
767
768 if (numBreak == -1) {
769 break;
770 }
771
772 int numCont = loopcontPatternMatch(loopRep, loopHeader);
773 num += numBreak + numCont;
774 }
775
776 return num;
777} //loopendPatternMatch
778
779template<class PassT>
780int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
781 if (curBlk->succ_size() != 0) {
782 return 0;
783 }
784
785 int numLoop = 0;
786 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
787 while (loopRep && loopRep->getHeader() == curBlk) {
788 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
789 if (loopLand) {
790 BlockT *landBlk = loopLand->landBlk;
791 assert(landBlk);
792 if (!isRetiredBlock(landBlk)) {
793 mergeLooplandBlock(curBlk, loopLand);
794 ++numLoop;
795 }
796 }
797 loopRep = loopRep->getParentLoop();
798 }
799
800 numLoopPatternMatch += numLoop;
801
802 return numLoop;
803} //loopPatternMatch
804
805template<class PassT>
806int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
807 BlockT *loopHeader) {
808 BlockTSmallerVector exitingBlks;
809 loopRep->getExitingBlocks(exitingBlks);
810
811 if (DEBUGME) {
812 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
813 }
814
815 if (exitingBlks.size() == 0) {
816 setLoopLandBlock(loopRep);
817 return 0;
818 }
819
820 // Compute the corresponding exitBlks and exit block set.
821 BlockTSmallerVector exitBlks;
822 std::set<BlockT *> exitBlkSet;
823 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
824 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
825 BlockT *exitingBlk = *iter;
826 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
827 exitBlks.push_back(exitBlk);
828 exitBlkSet.insert(exitBlk); //non-duplicate insert
829 }
830
831 assert(exitBlkSet.size() > 0);
832 assert(exitBlks.size() == exitingBlks.size());
833
834 if (DEBUGME) {
835 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
836 }
837
838 // Find exitLandBlk.
839 BlockT *exitLandBlk = NULL;
840 int numCloned = 0;
841 int numSerial = 0;
842
843 if (exitBlkSet.size() == 1) {
844 exitLandBlk = *exitBlkSet.begin();
845 } else {
846 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
847
848 if (exitLandBlk == NULL) {
849 return -1;
850 }
851
852 bool allInPath = true;
853 bool allNotInPath = true;
854 for (typename std::set<BlockT*>::const_iterator
855 iter = exitBlkSet.begin(),
856 iterEnd = exitBlkSet.end();
857 iter != iterEnd; ++iter) {
858 BlockT *exitBlk = *iter;
859
860 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
861 if (DEBUGME) {
862 errs() << "BB" << exitBlk->getNumber()
863 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
864 << pathKind << "\n";
865 }
866
867 allInPath = allInPath && (pathKind == SinglePath_InPath);
868 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
869
870 if (!allInPath && !allNotInPath) {
871 if (DEBUGME) {
872 errs() << "singlePath check fail\n";
873 }
874 return -1;
875 }
876 } // check all exit blocks
877
878 if (allNotInPath) {
879
880 // TODO: Simplify, maybe separate function?
881 LoopT *parentLoopRep = loopRep->getParentLoop();
882 BlockT *parentLoopHeader = NULL;
883 if (parentLoopRep)
884 parentLoopHeader = parentLoopRep->getHeader();
885
886 if (exitLandBlk == parentLoopHeader &&
887 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
888 loopRep,
889 exitBlkSet,
890 exitLandBlk)) != NULL) {
891 if (DEBUGME) {
892 errs() << "relocateLoopcontBlock success\n";
893 }
894 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
895 exitingBlks,
896 exitBlks)) != NULL) {
897 if (DEBUGME) {
898 errs() << "insertEndbranchBlock success\n";
899 }
900 } else {
901 if (DEBUGME) {
902 errs() << "loop exit fail\n";
903 }
904 return -1;
905 }
906 }
907
908 // Handle side entry to exit path.
909 exitBlks.clear();
910 exitBlkSet.clear();
911 for (typename BlockTSmallerVector::iterator iterExiting =
912 exitingBlks.begin(),
913 iterExitingEnd = exitingBlks.end();
914 iterExiting != iterExitingEnd; ++iterExiting) {
915 BlockT *exitingBlk = *iterExiting;
916 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
917 BlockT *newExitBlk = exitBlk;
918
919 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
920 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
921 ++numCloned;
922 }
923
924 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
925
926 exitBlks.push_back(newExitBlk);
927 exitBlkSet.insert(newExitBlk);
928 }
929
930 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
931 iterExitEnd = exitBlks.end();
932 iterExit != iterExitEnd; ++iterExit) {
933 BlockT *exitBlk = *iterExit;
934 numSerial += serialPatternMatch(exitBlk);
935 }
936
937 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
938 iterExitEnd = exitBlks.end();
939 iterExit != iterExitEnd; ++iterExit) {
940 BlockT *exitBlk = *iterExit;
941 if (exitBlk->pred_size() > 1) {
942 if (exitBlk != exitLandBlk) {
943 return -1;
944 }
945 } else {
946 if (exitBlk != exitLandBlk &&
947 (exitBlk->succ_size() != 1 ||
948 *exitBlk->succ_begin() != exitLandBlk)) {
949 return -1;
950 }
951 }
952 }
953 } // else
954
955 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
956
957 // Fold break into the breaking block. Leverage across level breaks.
958 assert(exitingBlks.size() == exitBlks.size());
959 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
960 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
961 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
962 BlockT *exitBlk = *iterExit;
963 BlockT *exitingBlk = *iterExiting;
964 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
965 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
966 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
967 }
968
969 int numBreak = static_cast<int>(exitingBlks.size());
970 numLoopbreakPatternMatch += numBreak;
971 numClonedBlock += numCloned;
972 return numBreak + numSerial + numCloned;
973} //loopbreakPatternMatch
974
975template<class PassT>
976int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
977 BlockT *loopHeader) {
978 int numCont = 0;
979 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
980 for (typename InvBlockGTraits::ChildIteratorType iter =
981 InvBlockGTraits::child_begin(loopHeader),
982 iterEnd = InvBlockGTraits::child_end(loopHeader);
983 iter != iterEnd; ++iter) {
984 BlockT *curBlk = *iter;
985 if (loopRep->contains(curBlk)) {
986 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
987 loopHeader, loopRep);
988 contBlk.push_back(curBlk);
989 ++numCont;
990 }
991 }
992
993 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
994 iter = contBlk.begin(), iterEnd = contBlk.end();
995 iter != iterEnd; ++iter) {
996 (*iter)->removeSuccessor(loopHeader);
997 }
998
999 numLoopcontPatternMatch += numCont;
1000
1001 return numCont;
1002} //loopcontPatternMatch
1003
1004
1005template<class PassT>
1006bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1007 BlockT *src2Blk) {
1008 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1009 // same loop with LoopLandInfo without explicitly keeping track of
1010 // loopContBlks and loopBreakBlks, this is a method to get the information.
1011 //
1012 if (src1Blk->succ_size() == 0) {
1013 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1014 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1015 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1016 if (theEntry != NULL) {
1017 if (DEBUGME) {
1018 errs() << "isLoopContBreakBlock yes src1 = BB"
1019 << src1Blk->getNumber()
1020 << " src2 = BB" << src2Blk->getNumber() << "\n";
1021 }
1022 return true;
1023 }
1024 }
1025 }
1026 return false;
1027} //isSameloopDetachedContbreak
1028
1029template<class PassT>
1030int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1031 BlockT *trueBlk,
1032 BlockT *falseBlk) {
1033 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1034 if (num == 0) {
1035 if (DEBUGME) {
1036 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1037 }
1038 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1039 }
1040 return num;
1041}
1042
1043template<class PassT>
1044int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1045 BlockT *trueBlk,
1046 BlockT *falseBlk) {
1047 int num = 0;
1048 BlockT *downBlk;
1049
1050 //trueBlk could be the common post dominator
1051 downBlk = trueBlk;
1052
1053 if (DEBUGME) {
1054 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1055 << " true = BB" << trueBlk->getNumber()
1056 << ", numSucc=" << trueBlk->succ_size()
1057 << " false = BB" << falseBlk->getNumber() << "\n";
1058 }
1059
1060 while (downBlk) {
1061 if (DEBUGME) {
1062 errs() << "check down = BB" << downBlk->getNumber();
1063 }
1064
1065 if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1066 if (DEBUGME) {
1067 errs() << " working\n";
1068 }
1069
1070 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1071 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1072
1073 numClonedBlock += num;
1074 num += serialPatternMatch(*headBlk->succ_begin());
1075 num += serialPatternMatch(*(++headBlk->succ_begin()));
1076 num += ifPatternMatch(headBlk);
1077 assert(num > 0);
1078
1079 break;
1080 }
1081 if (DEBUGME) {
1082 errs() << " not working\n";
1083 }
1084 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1085 } // walk down the postDomTree
1086
1087 return num;
1088} //handleJumpintoIf
1089
1090template<class PassT>
1091void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1092 BlockT *trueBlk,
1093 BlockT *falseBlk,
1094 BlockT *landBlk,
1095 bool detail) {
1096 errs() << "head = BB" << headBlk->getNumber()
1097 << " size = " << headBlk->size();
1098 if (detail) {
1099 errs() << "\n";
1100 headBlk->print(errs());
1101 errs() << "\n";
1102 }
1103
1104 if (trueBlk) {
1105 errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1106 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1107 if (detail) {
1108 errs() << "\n";
1109 trueBlk->print(errs());
1110 errs() << "\n";
1111 }
1112 }
1113 if (falseBlk) {
1114 errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1115 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1116 if (detail) {
1117 errs() << "\n";
1118 falseBlk->print(errs());
1119 errs() << "\n";
1120 }
1121 }
1122 if (landBlk) {
1123 errs() << ", land = BB" << landBlk->getNumber() << " size = "
1124 << landBlk->size() << " numPred = " << landBlk->pred_size();
1125 if (detail) {
1126 errs() << "\n";
1127 landBlk->print(errs());
1128 errs() << "\n";
1129 }
1130 }
1131
1132 errs() << "\n";
1133} //showImproveSimpleJumpintoIf
1134
1135template<class PassT>
1136int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1137 BlockT *trueBlk,
1138 BlockT *falseBlk,
1139 BlockT **plandBlk) {
1140 bool migrateTrue = false;
1141 bool migrateFalse = false;
1142
1143 BlockT *landBlk = *plandBlk;
1144
1145 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1146 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1147
1148 if (trueBlk == falseBlk) {
1149 return 0;
1150 }
1151
1152 migrateTrue = needMigrateBlock(trueBlk);
1153 migrateFalse = needMigrateBlock(falseBlk);
1154
1155 if (!migrateTrue && !migrateFalse) {
1156 return 0;
1157 }
1158
1159 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1160 // have more than one predecessors. without doing this, its predecessor
1161 // rather than headBlk will have undefined value in initReg.
1162 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1163 migrateTrue = true;
1164 }
1165 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1166 migrateFalse = true;
1167 }
1168
1169 if (DEBUGME) {
1170 errs() << "before improveSimpleJumpintoIf: ";
1171 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1172 }
1173
1174 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1175 //
1176 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1177 // {initReg = 0; org falseBlk branch }
1178 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1179 // => org landBlk
1180 // if landBlk->pred_size() > 2, put the about if-else inside
1181 // if (initReg !=2) {...}
1182 //
1183 // add initReg = initVal to headBlk
1184
1185 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1186 unsigned initReg =
1187 funcRep->getRegInfo().createVirtualRegister(I32RC);
1188 if (!migrateTrue || !migrateFalse) {
1189 int initVal = migrateTrue ? 0 : 1;
1190 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1191 }
1192
1193 int numNewBlk = 0;
1194
1195 if (landBlk == NULL) {
1196 landBlk = funcRep->CreateMachineBasicBlock();
1197 funcRep->push_back(landBlk); //insert to function
1198
1199 if (trueBlk) {
1200 trueBlk->addSuccessor(landBlk);
1201 } else {
1202 headBlk->addSuccessor(landBlk);
1203 }
1204
1205 if (falseBlk) {
1206 falseBlk->addSuccessor(landBlk);
1207 } else {
1208 headBlk->addSuccessor(landBlk);
1209 }
1210
1211 numNewBlk ++;
1212 }
1213
1214 bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1215
1216 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1217 typename BlockT::iterator insertPos =
1218 CFGTraits::getInstrPos
1219 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1220
1221 if (landBlkHasOtherPred) {
1222 unsigned immReg =
1223 funcRep->getRegInfo().createVirtualRegister(I32RC);
1224 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1225 unsigned cmpResReg =
1226 funcRep->getRegInfo().createVirtualRegister(I32RC);
1227
1228 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1229 initReg, immReg);
1230 CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1231 AMDGPU::IF_PREDICATE_SET, passRep,
1232 cmpResReg, DebugLoc());
1233 }
1234
1235 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
1236 passRep, initReg, DebugLoc());
1237
1238 if (migrateTrue) {
1239 migrateInstruction(trueBlk, landBlk, insertPos);
1240 // need to uncondionally insert the assignment to ensure a path from its
1241 // predecessor rather than headBlk has valid value in initReg if
1242 // (initVal != 1).
1243 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1244 }
1245 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1246
1247 if (migrateFalse) {
1248 migrateInstruction(falseBlk, landBlk, insertPos);
1249 // need to uncondionally insert the assignment to ensure a path from its
1250 // predecessor rather than headBlk has valid value in initReg if
1251 // (initVal != 0)
1252 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1253 }
1254
1255 if (landBlkHasOtherPred) {
1256 // add endif
1257 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1258
1259 // put initReg = 2 to other predecessors of landBlk
1260 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1261 predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1262 ++predIter) {
1263 BlockT *curBlk = *predIter;
1264 if (curBlk != trueBlk && curBlk != falseBlk) {
1265 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1266 }
1267 } //for
1268 }
1269 if (DEBUGME) {
1270 errs() << "result from improveSimpleJumpintoIf: ";
1271 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1272 }
1273
1274 // update landBlk
1275 *plandBlk = landBlk;
1276
1277 return numNewBlk;
1278} //improveSimpleJumpintoIf
1279
1280template<class PassT>
1281void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1282 LoopT *exitingLoop,
1283 BlockT *exitBlk,
1284 LoopT *exitLoop,
1285 BlockT *landBlk) {
1286 if (DEBUGME) {
1287 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1288 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1289 }
1290 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1291
1292 RegiT initReg = INVALIDREGNUM;
1293 if (exitingLoop != exitLoop) {
1294 initReg = static_cast<int>
1295 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1296 assert(initReg != INVALIDREGNUM);
1297 addLoopBreakInitReg(exitLoop, initReg);
1298 while (exitingLoop != exitLoop && exitingLoop) {
1299 addLoopBreakOnReg(exitingLoop, initReg);
1300 exitingLoop = exitingLoop->getParentLoop();
1301 }
1302 assert(exitingLoop == exitLoop);
1303 }
1304
1305 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1306
1307} //handleLoopbreak
1308
1309template<class PassT>
1310void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1311 LoopT *contingLoop,
1312 BlockT *contBlk,
1313 LoopT *contLoop) {
1314 if (DEBUGME) {
1315 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1316 << " header = BB" << contBlk->getNumber() << "\n";
1317
1318 errs() << "Trying to continue loop-depth = "
1319 << getLoopDepth(contLoop)
1320 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1321 }
1322
1323 RegiT initReg = INVALIDREGNUM;
1324 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1325 if (contingLoop != contLoop) {
1326 initReg = static_cast<int>
1327 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1328 assert(initReg != INVALIDREGNUM);
1329 addLoopContInitReg(contLoop, initReg);
1330 while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1331 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
1332 contingLoop = contingLoop->getParentLoop();
1333 }
1334 assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1335 addLoopContOnReg(contingLoop, initReg);
1336 }
1337
1338 settleLoopcontBlock(contingBlk, contBlk, initReg);
1339} //handleLoopcontBlock
1340
1341template<class PassT>
1342void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1343 if (DEBUGME) {
1344 errs() << "serialPattern BB" << dstBlk->getNumber()
1345 << " <= BB" << srcBlk->getNumber() << "\n";
1346 }
1347 dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
1348
1349 dstBlk->removeSuccessor(srcBlk);
1350 CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1351
1352 removeSuccessor(srcBlk);
1353 retireBlock(dstBlk, srcBlk);
1354} //mergeSerialBlock
1355
1356template<class PassT>
1357void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1358 BlockT *curBlk,
1359 BlockT *trueBlk,
1360 BlockT *falseBlk,
1361 BlockT *landBlk) {
1362 if (DEBUGME) {
1363 errs() << "ifPattern BB" << curBlk->getNumber();
1364 errs() << "{ ";
1365 if (trueBlk) {
1366 errs() << "BB" << trueBlk->getNumber();
1367 }
1368 errs() << " } else ";
1369 errs() << "{ ";
1370 if (falseBlk) {
1371 errs() << "BB" << falseBlk->getNumber();
1372 }
1373 errs() << " }\n ";
1374 errs() << "landBlock: ";
1375 if (landBlk == NULL) {
1376 errs() << "NULL";
1377 } else {
1378 errs() << "BB" << landBlk->getNumber();
1379 }
1380 errs() << "\n";
1381 }
1382
1383 int oldOpcode = branchInstr->getOpcode();
1384 DebugLoc branchDL = branchInstr->getDebugLoc();
1385
1386// transform to
1387// if cond
1388// trueBlk
1389// else
1390// falseBlk
1391// endif
1392// landBlk
1393
1394 typename BlockT::iterator branchInstrPos =
1395 CFGTraits::getInstrPos(curBlk, branchInstr);
1396 CFGTraits::insertCondBranchBefore(branchInstrPos,
1397 CFGTraits::getBranchNzeroOpcode(oldOpcode),
1398 passRep,
1399 branchDL);
1400
1401 if (trueBlk) {
1402 curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
1403 curBlk->removeSuccessor(trueBlk);
1404 if (landBlk && trueBlk->succ_size()!=0) {
1405 trueBlk->removeSuccessor(landBlk);
1406 }
1407 retireBlock(curBlk, trueBlk);
1408 }
1409 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1410
1411 if (falseBlk) {
1412 curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
1413 falseBlk->end());
1414 curBlk->removeSuccessor(falseBlk);
1415 if (landBlk && falseBlk->succ_size() != 0) {
1416 falseBlk->removeSuccessor(landBlk);
1417 }
1418 retireBlock(curBlk, falseBlk);
1419 }
1420 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1421
1422 branchInstr->eraseFromParent();
1423
1424 if (landBlk && trueBlk && falseBlk) {
1425 curBlk->addSuccessor(landBlk);
1426 }
1427
1428} //mergeIfthenelseBlock
1429
1430template<class PassT>
1431void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1432 LoopLandInfo *loopLand) {
1433 BlockT *landBlk = loopLand->landBlk;
1434
1435 if (DEBUGME) {
1436 errs() << "loopPattern header = BB" << dstBlk->getNumber()
1437 << " land = BB" << landBlk->getNumber() << "\n";
1438 }
1439
1440 // Loop contInitRegs are init at the beginning of the loop.
1441 for (typename std::set<RegiT>::const_iterator iter =
1442 loopLand->contInitRegs.begin(),
1443 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1444 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1445 }
1446
1447 /* we last inserterd the DebugLoc in the
1448 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1449 * search for the DebugLoc in the that statement.
1450 * if not found, we have to insert the empty/default DebugLoc */
1451 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1452 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1453
1454 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1455 // Loop breakInitRegs are init before entering the loop.
1456 for (typename std::set<RegiT>::const_iterator iter =
1457 loopLand->breakInitRegs.begin(),
1458 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
1459 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1460 }
1461 // Loop endbranchInitRegs are init before entering the loop.
1462 for (typename std::set<RegiT>::const_iterator iter =
1463 loopLand->endbranchInitRegs.begin(),
1464 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1465 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1466 }
1467
1468 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1469 * search for the DebugLoc in the continue statement.
1470 * if not found, we have to insert the empty/default DebugLoc */
1471 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1472 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1473
1474 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1475 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1476 // loop.
1477 for (typename std::set<RegiT>::const_iterator iter =
1478 loopLand->breakOnRegs.begin(),
1479 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1480 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
1481 *iter);
1482 }
1483
1484 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1485 // loop.
1486 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1487 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1488 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1489 passRep, *iter);
1490 }
1491
1492 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1493
1494 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1495 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1496 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
1497 }
1498
1499 removeSuccessor(landBlk);
1500 retireBlock(dstBlk, landBlk);
1501} //mergeLooplandBlock
1502
1503template<class PassT>
1504void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
1505 while (I--) {
1506 if (I->getOpcode() == AMDGPU::PRED_X) {
1507 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1508 case OPCODE_IS_ZERO_INT:
1509 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1510 return;
1511 case OPCODE_IS_NOT_ZERO_INT:
1512 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1513 return;
1514 case OPCODE_IS_ZERO:
1515 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1516 return;
1517 case OPCODE_IS_NOT_ZERO:
1518 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1519 return;
1520 default:
1521 assert(0 && "PRED_X Opcode invalid!");
1522 }
1523 }
1524 }
1525}
1526
1527template<class PassT>
1528void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1529 BlockT *exitBlk,
1530 BlockT *exitLandBlk,
1531 RegiT setReg) {
1532 if (DEBUGME) {
1533 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1534 << " exit = BB" << exitBlk->getNumber()
1535 << " land = BB" << exitLandBlk->getNumber() << "\n";
1536 }
1537
1538 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1539 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1540
1541 DebugLoc DL = branchInstr->getDebugLoc();
1542
1543 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1544
1545 // transform exitingBlk to
1546 // if ( ) {
1547 // exitBlk (if exitBlk != exitLandBlk)
1548 // setReg = 1
1549 // break
1550 // }endif
1551 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1552
1553 typename BlockT::iterator branchInstrPos =
1554 CFGTraits::getInstrPos(exitingBlk, branchInstr);
1555
1556 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1557 //break_logical
1558
1559 if (trueBranch != exitBlk) {
1560 reversePredicateSetter(branchInstrPos);
1561 }
1562 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1563 } else {
1564 if (trueBranch != exitBlk) {
1565 reversePredicateSetter(branchInstr);
1566 }
1567 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1568 if (exitBlk != exitLandBlk) {
1569 //splice is insert-before ...
1570 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1571 exitBlk->end());
1572 }
1573 if (setReg != INVALIDREGNUM) {
1574 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1575 }
1576 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1577 } //if_logical
1578
1579 //now branchInst can be erase safely
1580 branchInstr->eraseFromParent();
1581
1582 //now take care of successors, retire blocks
1583 exitingBlk->removeSuccessor(exitBlk);
1584 if (exitBlk != exitLandBlk) {
1585 //splice is insert-before ...
1586 exitBlk->removeSuccessor(exitLandBlk);
1587 retireBlock(exitingBlk, exitBlk);
1588 }
1589
1590} //mergeLoopbreakBlock
1591
1592template<class PassT>
1593void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1594 BlockT *contBlk,
1595 RegiT setReg) {
1596 if (DEBUGME) {
1597 errs() << "settleLoopcontBlock conting = BB"
1598 << contingBlk->getNumber()
1599 << ", cont = BB" << contBlk->getNumber() << "\n";
1600 }
1601
1602 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1603 if (branchInstr) {
1604 assert(CFGTraits::isCondBranch(branchInstr));
1605 typename BlockT::iterator branchInstrPos =
1606 CFGTraits::getInstrPos(contingBlk, branchInstr);
1607 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1608 int oldOpcode = branchInstr->getOpcode();
1609 DebugLoc DL = branchInstr->getDebugLoc();
1610
1611 // transform contingBlk to
1612 // if () {
1613 // move instr after branchInstr
1614 // continue
1615 // or
1616 // setReg = 1
1617 // break
1618 // }endif
1619 // successor = {orgSuccessor(contingBlk) - loopHeader}
1620
1621 bool useContinueLogical =
1622 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1623
1624 if (useContinueLogical == false) {
1625 int branchOpcode =
1626 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1627 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1628
1629 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1630
1631 if (setReg != INVALIDREGNUM) {
1632 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1633 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1634 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1635 } else {
1636 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1637 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1638 }
1639
1640 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1641 } else {
1642 int branchOpcode =
1643 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1644 : CFGTraits::getContinueZeroOpcode(oldOpcode);
1645
1646 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1647 }
1648
1649 branchInstr->eraseFromParent();
1650 } else {
1651 // if we've arrived here then we've already erased the branch instruction
1652 // travel back up the basic block to see the last reference of our debug location
1653 // we've just inserted that reference here so it should be representative
1654 if (setReg != INVALIDREGNUM) {
1655 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1656 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1657 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1658 } else {
1659 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1660 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1661 }
1662 } //else
1663
1664} //settleLoopcontBlock
1665
1666// BBs in exitBlkSet are determined as in break-path for loopRep,
1667// before we can put code for BBs as inside loop-body for loopRep
1668// check whether those BBs are determined as cont-BB for parentLoopRep
1669// earlier.
1670// If so, generate a new BB newBlk
1671// (1) set newBlk common successor of BBs in exitBlkSet
1672// (2) change the continue-instr in BBs in exitBlkSet to break-instr
1673// (3) generate continue-instr in newBlk
1674//
1675template<class PassT>
1676typename CFGStructurizer<PassT>::BlockT *
1677CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1678 LoopT *loopRep,
1679 std::set<BlockT *> &exitBlkSet,
1680 BlockT *exitLandBlk) {
1681 std::set<BlockT *> endBlkSet;
1682
1683
1684
1685 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1686 iterEnd = exitBlkSet.end();
1687 iter != iterEnd; ++iter) {
1688 BlockT *exitBlk = *iter;
1689 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1690
1691 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1692 return NULL;
1693
1694 endBlkSet.insert(endBlk);
1695 }
1696
1697 BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1698 funcRep->push_back(newBlk); //insert to function
1699 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1700 SHOWNEWBLK(newBlk, "New continue block: ");
1701
1702 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1703 iterEnd = endBlkSet.end();
1704 iter != iterEnd; ++iter) {
1705 BlockT *endBlk = *iter;
1706 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1707 if (contInstr) {
1708 contInstr->eraseFromParent();
1709 }
1710 endBlk->addSuccessor(newBlk);
1711 if (DEBUGME) {
1712 errs() << "Add new continue Block to BB"
1713 << endBlk->getNumber() << " successors\n";
1714 }
1715 }
1716
1717 return newBlk;
1718} //relocateLoopcontBlock
1719
1720
1721// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1722// LoopLandBlock. This BB branch on the loop endBranchInit register to the
1723// pathes corresponding to the loop exiting branches.
1724
1725template<class PassT>
1726typename CFGStructurizer<PassT>::BlockT *
1727CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1728 BlockTSmallerVector &exitingBlks,
1729 BlockTSmallerVector &exitBlks) {
1730 const AMDGPUInstrInfo *tii =
1731 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1732 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1733
1734 RegiT endBranchReg = static_cast<int>
1735 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1736 assert(endBranchReg >= 0);
1737
1738 // reg = 0 before entering the loop
1739 addLoopEndbranchInitReg(loopRep, endBranchReg);
1740
1741 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1742 assert(numBlks >=2 && numBlks == exitBlks.size());
1743
1744 BlockT *preExitingBlk = exitingBlks[0];
1745 BlockT *preExitBlk = exitBlks[0];
1746 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1747 funcRep->push_back(preBranchBlk); //insert to function
1748 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1749
1750 BlockT *newLandBlk = preBranchBlk;
1751
1752 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1753 newLandBlk);
1754 preExitingBlk->removeSuccessor(preExitBlk);
1755 preExitingBlk->addSuccessor(newLandBlk);
1756
1757 //it is redundant to add reg = 0 to exitingBlks[0]
1758
1759 // For 1..n th exiting path (the last iteration handles two pathes) create the
1760 // branch to the previous path and the current path.
1761 for (uint32_t i = 1; i < numBlks; ++i) {
1762 BlockT *curExitingBlk = exitingBlks[i];
1763 BlockT *curExitBlk = exitBlks[i];
1764 BlockT *curBranchBlk;
1765
1766 if (i == numBlks - 1) {
1767 curBranchBlk = curExitBlk;
1768 } else {
1769 curBranchBlk = funcRep->CreateMachineBasicBlock();
1770 funcRep->push_back(curBranchBlk); //insert to function
1771 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1772 }
1773
1774 // Add reg = i to exitingBlks[i].
1775 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1776 endBranchReg, i);
1777
1778 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1779 // (exitingBlks[i], newLandBlk).
1780 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1781 newLandBlk);
1782 curExitingBlk->removeSuccessor(curExitBlk);
1783 curExitingBlk->addSuccessor(newLandBlk);
1784
1785 // add to preBranchBlk the branch instruction:
1786 // if (endBranchReg == preVal)
1787 // preExitBlk
1788 // else
1789 // curBranchBlk
1790 //
1791 // preValReg = i - 1
1792
1793 DebugLoc DL;
1794 RegiT preValReg = static_cast<int>
1795 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1796
1797 preBranchBlk->insert(preBranchBlk->begin(),
1798 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1799 i - 1));
1800
1801 // condResReg = (endBranchReg == preValReg)
1802 RegiT condResReg = static_cast<int>
1803 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1804 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1805 .addReg(endBranchReg).addReg(preValReg);
1806
1807 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1808 .addMBB(preExitBlk).addReg(condResReg);
1809
1810 preBranchBlk->addSuccessor(preExitBlk);
1811 preBranchBlk->addSuccessor(curBranchBlk);
1812
1813 // Update preExitingBlk, preExitBlk, preBranchBlk.
1814 preExitingBlk = curExitingBlk;
1815 preExitBlk = curExitBlk;
1816 preBranchBlk = curBranchBlk;
1817
1818 } //end for 1 .. n blocks
1819
1820 return newLandBlk;
1821} //addLoopEndbranchBlock
1822
1823template<class PassT>
1824typename CFGStructurizer<PassT>::PathToKind
1825CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1826 bool allowSideEntry) {
1827 assert(dstBlk);
1828
1829 if (srcBlk == dstBlk) {
1830 return SinglePath_InPath;
1831 }
1832
1833 while (srcBlk && srcBlk->succ_size() == 1) {
1834 srcBlk = *srcBlk->succ_begin();
1835 if (srcBlk == dstBlk) {
1836 return SinglePath_InPath;
1837 }
1838
1839 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1840 return Not_SinglePath;
1841 }
1842 }
1843
1844 if (srcBlk && srcBlk->succ_size()==0) {
1845 return SinglePath_NotInPath;
1846 }
1847
1848 return Not_SinglePath;
1849} //singlePathTo
1850
1851// If there is a single path from srcBlk to dstBlk, return the last block before
1852// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1853// last block in the path Otherwise, return NULL
1854template<class PassT>
1855typename CFGStructurizer<PassT>::BlockT *
1856CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
1857 bool allowSideEntry) {
1858 assert(dstBlk);
1859
1860 if (srcBlk == dstBlk) {
1861 return srcBlk;
1862 }
1863
1864 if (srcBlk->succ_size() == 0) {
1865 return srcBlk;
1866 }
1867
1868 while (srcBlk && srcBlk->succ_size() == 1) {
1869 BlockT *preBlk = srcBlk;
1870
1871 srcBlk = *srcBlk->succ_begin();
1872 if (srcBlk == NULL) {
1873 return preBlk;
1874 }
1875
1876 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1877 return NULL;
1878 }
1879 }
1880
1881 if (srcBlk && srcBlk->succ_size()==0) {
1882 return srcBlk;
1883 }
1884
1885 return NULL;
1886
1887} //singlePathEnd
1888
1889template<class PassT>
1890int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
1891 BlockT *dstBlk) {
1892 int cloned = 0;
1893 assert(preBlk->isSuccessor(srcBlk));
1894 while (srcBlk && srcBlk != dstBlk) {
1895 assert(srcBlk->succ_size() == 1);
1896 if (srcBlk->pred_size() > 1) {
1897 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
1898 ++cloned;
1899 }
1900
1901 preBlk = srcBlk;
1902 srcBlk = *srcBlk->succ_begin();
1903 }
1904
1905 return cloned;
1906} //cloneOnSideEntryTo
1907
1908template<class PassT>
1909typename CFGStructurizer<PassT>::BlockT *
1910CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
1911 BlockT *predBlk) {
1912 assert(predBlk->isSuccessor(curBlk) &&
1913 "succBlk is not a prececessor of curBlk");
1914
1915 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
1916 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
1917 //srcBlk, oldBlk, newBlk
1918
1919 predBlk->removeSuccessor(curBlk);
1920 predBlk->addSuccessor(cloneBlk);
1921
1922 // add all successor to cloneBlk
1923 CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
1924
1925 numClonedInstr += curBlk->size();
1926
1927 if (DEBUGME) {
1928 errs() << "Cloned block: " << "BB"
1929 << curBlk->getNumber() << "size " << curBlk->size() << "\n";
1930 }
1931
1932 SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
1933
1934 return cloneBlk;
1935} //cloneBlockForPredecessor
1936
1937template<class PassT>
1938typename CFGStructurizer<PassT>::BlockT *
1939CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
1940 BlockT *exitingBlk) {
1941 BlockT *exitBlk = NULL;
1942
1943 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
1944 iterSuccEnd = exitingBlk->succ_end();
1945 iterSucc != iterSuccEnd; ++iterSucc) {
1946 BlockT *curBlk = *iterSucc;
1947 if (!loopRep->contains(curBlk)) {
1948 assert(exitBlk == NULL);
1949 exitBlk = curBlk;
1950 }
1951 }
1952
1953 assert(exitBlk != NULL);
1954
1955 return exitBlk;
1956} //exitingBlock2ExitBlock
1957
1958template<class PassT>
1959void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
1960 BlockT *dstBlk,
1961 InstrIterator insertPos) {
1962 InstrIterator spliceEnd;
1963 //look for the input branchinstr, not the AMDGPU branchinstr
1964 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
1965 if (branchInstr == NULL) {
1966 if (DEBUGME) {
1967 errs() << "migrateInstruction don't see branch instr\n" ;
1968 }
1969 spliceEnd = srcBlk->end();
1970 } else {
1971 if (DEBUGME) {
1972 errs() << "migrateInstruction see branch instr\n" ;
1973 branchInstr->dump();
1974 }
1975 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
1976 }
1977 if (DEBUGME) {
1978 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
1979 << "srcSize = " << srcBlk->size() << "\n";
1980 }
1981
1982 //splice insert before insertPos
1983 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
1984
1985 if (DEBUGME) {
1986 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
1987 << "srcSize = " << srcBlk->size() << "\n";
1988 }
1989} //migrateInstruction
1990
1991// normalizeInfiniteLoopExit change
1992// B1:
1993// uncond_br LoopHeader
1994//
1995// to
1996// B1:
1997// cond_br 1 LoopHeader dummyExit
1998// and return the newly added dummy exit block
1999//
2000template<class PassT>
2001typename CFGStructurizer<PassT>::BlockT *
2002CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2003 BlockT *loopHeader;
2004 BlockT *loopLatch;
2005 loopHeader = LoopRep->getHeader();
2006 loopLatch = LoopRep->getLoopLatch();
2007 BlockT *dummyExitBlk = NULL;
2008 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2009 if (loopHeader!=NULL && loopLatch!=NULL) {
2010 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2011 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2012 dummyExitBlk = funcRep->CreateMachineBasicBlock();
2013 funcRep->push_back(dummyExitBlk); //insert to function
2014 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2015
2016 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2017
2018 typename BlockT::iterator insertPos =
2019 CFGTraits::getInstrPos(loopLatch, branchInstr);
2020 unsigned immReg =
2021 funcRep->getRegInfo().createVirtualRegister(I32RC);
2022 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2023 InstrT *newInstr =
2024 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2025 MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
2026
2027 SHOWNEWINSTR(newInstr);
2028
2029 branchInstr->eraseFromParent();
2030 loopLatch->addSuccessor(dummyExitBlk);
2031 }
2032 }
2033
2034 return dummyExitBlk;
2035} //normalizeInfiniteLoopExit
2036
2037template<class PassT>
2038void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2039 InstrT *branchInstr;
2040
2041 // I saw two unconditional branch in one basic block in example
2042 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2043 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2044 && CFGTraits::isUncondBranch(branchInstr)) {
2045 if (DEBUGME) {
2046 errs() << "Removing unconditional branch instruction" ;
2047 branchInstr->dump();
2048 }
2049 branchInstr->eraseFromParent();
2050 }
2051} //removeUnconditionalBranch
2052
2053template<class PassT>
2054void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2055 if (srcBlk->succ_size() == 2) {
2056 BlockT *blk1 = *srcBlk->succ_begin();
2057 BlockT *blk2 = *(++srcBlk->succ_begin());
2058
2059 if (blk1 == blk2) {
2060 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2061 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2062 if (DEBUGME) {
2063 errs() << "Removing unneeded conditional branch instruction" ;
2064 branchInstr->dump();
2065 }
2066 branchInstr->eraseFromParent();
2067 SHOWNEWBLK(blk1, "Removing redundant successor");
2068 srcBlk->removeSuccessor(blk1);
2069 }
2070 }
2071} //removeRedundantConditionalBranch
2072
2073template<class PassT>
2074void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2075 DEFAULT_VEC_SLOTS> &retBlks) {
2076 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2077 funcRep->push_back(dummyExitBlk); //insert to function
2078 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2079
2080 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2081 retBlks.begin(),
2082 iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2083 BlockT *curBlk = *iter;
2084 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2085 if (curInstr) {
2086 curInstr->eraseFromParent();
2087 }
2088 curBlk->addSuccessor(dummyExitBlk);
2089 if (DEBUGME) {
2090 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2091 << " successors\n";
2092 }
2093 } //for
2094
2095 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2096} //addDummyExitBlock
2097
2098template<class PassT>
2099void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2100 while (srcBlk->succ_size()) {
2101 srcBlk->removeSuccessor(*srcBlk->succ_begin());
2102 }
2103}
2104
2105template<class PassT>
2106void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2107 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2108
2109 if (srcBlkInfo == NULL) {
2110 srcBlkInfo = new BlockInfo();
2111 }
2112
2113 srcBlkInfo->sccNum = sccNum;
2114}
2115
2116template<class PassT>
2117int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2118 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2119 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2120}
2121
2122template<class PassT>
2123void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2124 if (DEBUGME) {
2125 errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2126 }
2127
2128 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2129
2130 if (srcBlkInfo == NULL) {
2131 srcBlkInfo = new BlockInfo();
2132 }
2133
2134 srcBlkInfo->isRetired = true;
2135 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2136 && "can't retire block yet");
2137}
2138
2139template<class PassT>
2140bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2141 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2142 return (srcBlkInfo && srcBlkInfo->isRetired);
2143}
2144
2145template<class PassT>
2146bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2147 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2148 while (loopRep && loopRep->getHeader() == curBlk) {
2149 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2150
2151 if(loopLand == NULL)
2152 return true;
2153
2154 BlockT *landBlk = loopLand->landBlk;
2155 assert(landBlk);
2156 if (!isRetiredBlock(landBlk)) {
2157 return true;
2158 }
2159
2160 loopRep = loopRep->getParentLoop();
2161 }
2162
2163 return false;
2164} //isActiveLoophead
2165
2166template<class PassT>
2167bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2168 const unsigned blockSizeThreshold = 30;
2169 const unsigned cloneInstrThreshold = 100;
2170
2171 bool multiplePreds = blk && (blk->pred_size() > 1);
2172
2173 if(!multiplePreds)
2174 return false;
2175
2176 unsigned blkSize = blk->size();
2177 return ((blkSize > blockSizeThreshold)
2178 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2179} //needMigrateBlock
2180
2181template<class PassT>
2182typename CFGStructurizer<PassT>::BlockT *
2183CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2184 BlockTSmallerVector &exitBlks,
2185 std::set<BlockT *> &exitBlkSet) {
2186 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
2187
2188 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2189 predIterEnd = landBlk->pred_end();
2190 predIter != predIterEnd; ++predIter) {
2191 BlockT *curBlk = *predIter;
2192 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2193 inpathBlks.push_back(curBlk);
2194 }
2195 } //for
2196
2197 //if landBlk has predecessors that are not in the given loop,
2198 //create a new block
2199 BlockT *newLandBlk = landBlk;
2200 if (inpathBlks.size() != landBlk->pred_size()) {
2201 newLandBlk = funcRep->CreateMachineBasicBlock();
2202 funcRep->push_back(newLandBlk); //insert to function
2203 newLandBlk->addSuccessor(landBlk);
2204 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2205 inpathBlks.begin(),
2206 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2207 BlockT *curBlk = *iter;
2208 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2209 //srcBlk, oldBlk, newBlk
2210 curBlk->removeSuccessor(landBlk);
2211 curBlk->addSuccessor(newLandBlk);
2212 }
2213 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2214 if (exitBlks[i] == landBlk) {
2215 exitBlks[i] = newLandBlk;
2216 }
2217 }
2218 SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2219 }
2220
2221 setLoopLandBlock(loopRep, newLandBlk);
2222
2223 return newLandBlk;
2224} // recordLoopbreakLand
2225
2226template<class PassT>
2227void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2228 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2229
2230 if (theEntry == NULL) {
2231 theEntry = new LoopLandInfo();
2232 }
2233 assert(theEntry->landBlk == NULL);
2234
2235 if (blk == NULL) {
2236 blk = funcRep->CreateMachineBasicBlock();
2237 funcRep->push_back(blk); //insert to function
2238 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2239 }
2240
2241 theEntry->landBlk = blk;
2242
2243 if (DEBUGME) {
2244 errs() << "setLoopLandBlock loop-header = BB"
2245 << loopRep->getHeader()->getNumber()
2246 << " landing-block = BB" << blk->getNumber() << "\n";
2247 }
2248} // setLoopLandBlock
2249
2250template<class PassT>
2251void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2252 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2253
2254 if (theEntry == NULL) {
2255 theEntry = new LoopLandInfo();
2256 }
2257
2258 theEntry->breakOnRegs.insert(regNum);
2259
2260 if (DEBUGME) {
2261 errs() << "addLoopBreakOnReg loop-header = BB"
2262 << loopRep->getHeader()->getNumber()
2263 << " regNum = " << regNum << "\n";
2264 }
2265} // addLoopBreakOnReg
2266
2267template<class PassT>
2268void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2269 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2270
2271 if (theEntry == NULL) {
2272 theEntry = new LoopLandInfo();
2273 }
2274 theEntry->contOnRegs.insert(regNum);
2275
2276 if (DEBUGME) {
2277 errs() << "addLoopContOnReg loop-header = BB"
2278 << loopRep->getHeader()->getNumber()
2279 << " regNum = " << regNum << "\n";
2280 }
2281} // addLoopContOnReg
2282
2283template<class PassT>
2284void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2285 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2286
2287 if (theEntry == NULL) {
2288 theEntry = new LoopLandInfo();
2289 }
2290 theEntry->breakInitRegs.insert(regNum);
2291
2292 if (DEBUGME) {
2293 errs() << "addLoopBreakInitReg loop-header = BB"
2294 << loopRep->getHeader()->getNumber()
2295 << " regNum = " << regNum << "\n";
2296 }
2297} // addLoopBreakInitReg
2298
2299template<class PassT>
2300void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2301 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2302
2303 if (theEntry == NULL) {
2304 theEntry = new LoopLandInfo();
2305 }
2306 theEntry->contInitRegs.insert(regNum);
2307
2308 if (DEBUGME) {
2309 errs() << "addLoopContInitReg loop-header = BB"
2310 << loopRep->getHeader()->getNumber()
2311 << " regNum = " << regNum << "\n";
2312 }
2313} // addLoopContInitReg
2314
2315template<class PassT>
2316void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2317 RegiT regNum) {
2318 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2319
2320 if (theEntry == NULL) {
2321 theEntry = new LoopLandInfo();
2322 }
2323 theEntry->endbranchInitRegs.insert(regNum);
2324
2325 if (DEBUGME) {
2326 errs() << "addLoopEndbranchInitReg loop-header = BB"
2327 << loopRep->getHeader()->getNumber()
2328 << " regNum = " << regNum << "\n";
2329 }
2330} // addLoopEndbranchInitReg
2331
2332template<class PassT>
2333typename CFGStructurizer<PassT>::LoopLandInfo *
2334CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2335 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2336
2337 return theEntry;
2338} // getLoopLandInfo
2339
2340template<class PassT>
2341typename CFGStructurizer<PassT>::BlockT *
2342CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2343 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2344
2345 return theEntry ? theEntry->landBlk : NULL;
2346} // getLoopLandBlock
2347
2348
2349template<class PassT>
2350bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2351 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2352 if (loopRep == NULL)
2353 return false;
2354
2355 BlockT *loopHeader = loopRep->getHeader();
2356
2357 return curBlk->isSuccessor(loopHeader);
2358
2359} //hasBackEdge
2360
2361template<class PassT>
2362unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2363 return loopRep ? loopRep->getLoopDepth() : 0;
2364} //getLoopDepth
2365
2366template<class PassT>
2367int CFGStructurizer<PassT>::countActiveBlock
2368(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2369 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2370 int count = 0;
2371 while (iterStart != iterEnd) {
2372 if (!isRetiredBlock(*iterStart)) {
2373 ++count;
2374 }
2375 ++iterStart;
2376 }
2377
2378 return count;
2379} //countActiveBlock
2380
2381// This is work around solution for findNearestCommonDominator not avaiable to
2382// post dom a proper fix should go to Dominators.h.
2383
2384template<class PassT>
2385typename CFGStructurizer<PassT>::BlockT*
2386CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2387
2388 if (postDomTree->dominates(blk1, blk2)) {
2389 return blk1;
2390 }
2391 if (postDomTree->dominates(blk2, blk1)) {
2392 return blk2;
2393 }
2394
2395 DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2396 DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2397
2398 // Handle newly cloned node.
2399 if (node1 == NULL && blk1->succ_size() == 1) {
2400 return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2401 }
2402 if (node2 == NULL && blk2->succ_size() == 1) {
2403 return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2404 }
2405
2406 if (node1 == NULL || node2 == NULL) {
2407 return NULL;
2408 }
2409
2410 node1 = node1->getIDom();
2411 while (node1) {
2412 if (postDomTree->dominates(node1, node2)) {
2413 return node1->getBlock();
2414 }
2415 node1 = node1->getIDom();
2416 }
2417
2418 return NULL;
2419}
2420
2421template<class PassT>
2422typename CFGStructurizer<PassT>::BlockT *
2423CFGStructurizer<PassT>::findNearestCommonPostDom
2424(typename std::set<BlockT *> &blks) {
2425 BlockT *commonDom;
2426 typename std::set<BlockT *>::const_iterator iter = blks.begin();
2427 typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2428 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2429 BlockT *curBlk = *iter;
2430 if (curBlk != commonDom) {
2431 commonDom = findNearestCommonPostDom(curBlk, commonDom);
2432 }
2433 }
2434
2435 if (DEBUGME) {
2436 errs() << "Common post dominator for exit blocks is ";
2437 if (commonDom) {
2438 errs() << "BB" << commonDom->getNumber() << "\n";
2439 } else {
2440 errs() << "NULL\n";
2441 }
2442 }
2443
2444 return commonDom;
2445} //findNearestCommonPostDom
2446
2447} //end namespace llvm
2448
2449//todo: move-end
2450
2451
2452//===----------------------------------------------------------------------===//
2453//
2454// CFGStructurizer for AMDGPU
2455//
2456//===----------------------------------------------------------------------===//
2457
2458
2459using namespace llvmCFGStruct;
2460
2461namespace llvm {
2462class AMDGPUCFGStructurizer : public MachineFunctionPass {
2463public:
2464 typedef MachineInstr InstructionType;
2465 typedef MachineFunction FunctionType;
2466 typedef MachineBasicBlock BlockType;
2467 typedef MachineLoopInfo LoopinfoType;
2468 typedef MachineDominatorTree DominatortreeType;
2469 typedef MachinePostDominatorTree PostDominatortreeType;
2470 typedef MachineDomTreeNode DomTreeNodeType;
2471 typedef MachineLoop LoopType;
2472
2473protected:
2474 TargetMachine &TM;
2475 const TargetInstrInfo *TII;
2476 const AMDGPURegisterInfo *TRI;
2477
2478public:
2479 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2480 const TargetInstrInfo *getTargetInstrInfo() const;
2481
2482private:
2483
2484};
2485
2486} //end of namespace llvm
2487AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
2488: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2489 TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
2490}
2491
2492const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2493 return TII;
2494}
2495//===----------------------------------------------------------------------===//
2496//
2497// CFGPrepare
2498//
2499//===----------------------------------------------------------------------===//
2500
2501
2502using namespace llvmCFGStruct;
2503
2504namespace llvm {
2505class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
2506public:
2507 static char ID;
2508
2509public:
2510 AMDGPUCFGPrepare(TargetMachine &tm);
2511
2512 virtual const char *getPassName() const;
2513 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2514
2515 bool runOnMachineFunction(MachineFunction &F);
2516
2517private:
2518
2519};
2520
2521char AMDGPUCFGPrepare::ID = 0;
2522} //end of namespace llvm
2523
2524AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2525 : AMDGPUCFGStructurizer(ID, tm ) {
2526}
2527const char *AMDGPUCFGPrepare::getPassName() const {
2528 return "AMD IL Control Flow Graph Preparation Pass";
2529}
2530
2531void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2532 AU.addPreserved<MachineFunctionAnalysis>();
2533 AU.addRequired<MachineFunctionAnalysis>();
2534 AU.addRequired<MachineDominatorTree>();
2535 AU.addRequired<MachinePostDominatorTree>();
2536 AU.addRequired<MachineLoopInfo>();
2537}
2538
2539//===----------------------------------------------------------------------===//
2540//
2541// CFGPerform
2542//
2543//===----------------------------------------------------------------------===//
2544
2545
2546using namespace llvmCFGStruct;
2547
2548namespace llvm {
2549class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
2550public:
2551 static char ID;
2552
2553public:
2554 AMDGPUCFGPerform(TargetMachine &tm);
2555 virtual const char *getPassName() const;
2556 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2557 bool runOnMachineFunction(MachineFunction &F);
2558
2559private:
2560
2561};
2562
2563char AMDGPUCFGPerform::ID = 0;
2564} //end of namespace llvm
2565
2566 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2567: AMDGPUCFGStructurizer(ID, tm) {
2568}
2569
2570const char *AMDGPUCFGPerform::getPassName() const {
2571 return "AMD IL Control Flow Graph structurizer Pass";
2572}
2573
2574void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2575 AU.addPreserved<MachineFunctionAnalysis>();
2576 AU.addRequired<MachineFunctionAnalysis>();
2577 AU.addRequired<MachineDominatorTree>();
2578 AU.addRequired<MachinePostDominatorTree>();
2579 AU.addRequired<MachineLoopInfo>();
2580}
2581
2582//===----------------------------------------------------------------------===//
2583//
2584// CFGStructTraits<AMDGPUCFGStructurizer>
2585//
2586//===----------------------------------------------------------------------===//
2587
2588namespace llvmCFGStruct {
2589// this class is tailor to the AMDGPU backend
2590template<>
2591struct CFGStructTraits<AMDGPUCFGStructurizer> {
2592 typedef int RegiT;
2593
2594 static int getBranchNzeroOpcode(int oldOpcode) {
2595 switch(oldOpcode) {
2596 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2597 case AMDGPU::BRANCH_COND_i32:
2598 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
2599 case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
2600 default:
2601 assert(0 && "internal error");
2602 }
2603 return -1;
2604 }
2605
2606 static int getBranchZeroOpcode(int oldOpcode) {
2607 switch(oldOpcode) {
2608 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2609 case AMDGPU::BRANCH_COND_i32:
2610 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
2611 case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
2612 default:
2613 assert(0 && "internal error");
2614 }
2615 return -1;
2616 }
2617
2618 static int getContinueNzeroOpcode(int oldOpcode) {
2619 switch(oldOpcode) {
2620 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2621 default:
2622 assert(0 && "internal error");
2623 };
2624 return -1;
2625 }
2626
2627 static int getContinueZeroOpcode(int oldOpcode) {
2628 switch(oldOpcode) {
2629 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2630 default:
2631 assert(0 && "internal error");
2632 }
2633 return -1;
2634 }
2635
2636 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2637 return instr->getOperand(0).getMBB();
2638 }
2639
2640 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2641 instr->getOperand(0).setMBB(blk);
2642 }
2643
2644 static MachineBasicBlock *
2645 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2646 assert(blk->succ_size() == 2);
2647 MachineBasicBlock *trueBranch = getTrueBranch(instr);
2648 MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2649 MachineBasicBlock::succ_iterator iterNext = iter;
2650 ++iterNext;
2651
2652 return (*iter == trueBranch) ? *iterNext : *iter;
2653 }
2654
2655 static bool isCondBranch(MachineInstr *instr) {
2656 switch (instr->getOpcode()) {
2657 case AMDGPU::JUMP:
2658 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
2659 case AMDGPU::BRANCH_COND_i32:
2660 case AMDGPU::BRANCH_COND_f32:
2661 case AMDGPU::SI_IF_NZ:
2662 case AMDGPU::SI_IF_Z:
2663 break;
2664 default:
2665 return false;
2666 }
2667 return true;
2668 }
2669
2670 static bool isUncondBranch(MachineInstr *instr) {
2671 switch (instr->getOpcode()) {
2672 case AMDGPU::JUMP:
2673 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
2674 case AMDGPU::BRANCH:
2675 return true;
2676 default:
2677 return false;
2678 }
2679 return true;
2680 }
2681
2682 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2683 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2684 DebugLoc DL;
2685 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2686 MachineInstr *instr = &(*iter);
2687 if (instr->getDebugLoc().isUnknown() == false) {
2688 DL = instr->getDebugLoc();
2689 }
2690 }
2691 return DL;
2692 }
2693
2694 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2695 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2696 MachineInstr *instr = &*iter;
2697 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2698 return instr;
2699 }
2700 return NULL;
2701 }
2702
2703 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2704 //
2705 // BB with backward-edge could have move instructions after the branch
2706 // instruction. Such move instruction "belong to" the loop backward-edge.
2707 //
2708 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2709 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2710 blk->getParent()->getTarget().getInstrInfo());
2711
2712 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2713 iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2714 // FIXME: Simplify
2715 MachineInstr *instr = &*iter;
2716 if (instr) {
2717 if (isCondBranch(instr) || isUncondBranch(instr)) {
2718 return instr;
2719 } else if (!TII->isMov(instr->getOpcode())) {
2720 break;
2721 }
2722 }
2723 }
2724 return NULL;
2725 }
2726
2727 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2728 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2729 if (iter != blk->rend()) {
2730 MachineInstr *instr = &(*iter);
2731 if (instr->getOpcode() == AMDGPU::RETURN) {
2732 return instr;
2733 }
2734 }
2735 return NULL;
2736 }
2737
2738 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2739 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2740 if (iter != blk->rend()) {
2741 MachineInstr *instr = &(*iter);
2742 if (instr->getOpcode() == AMDGPU::CONTINUE) {
2743 return instr;
2744 }
2745 }
2746 return NULL;
2747 }
2748
2749 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2750 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2751 MachineInstr *instr = &(*iter);
2752 if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
2753 return instr;
2754 }
2755 }
2756 return NULL;
2757 }
2758
2759 static bool isReturnBlock(MachineBasicBlock *blk) {
2760 MachineInstr *instr = getReturnInstr(blk);
2761 bool isReturn = (blk->succ_size() == 0);
2762 if (instr) {
2763 assert(isReturn);
2764 } else if (isReturn) {
2765 if (DEBUGME) {
2766 errs() << "BB" << blk->getNumber()
2767 <<" is return block without RETURN instr\n";
2768 }
2769 }
2770
2771 return isReturn;
2772 }
2773
2774 static MachineBasicBlock::iterator
2775 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2776 assert(instr->getParent() == blk && "instruction doesn't belong to block");
2777 MachineBasicBlock::iterator iter = blk->begin();
2778 MachineBasicBlock::iterator iterEnd = blk->end();
2779 while (&(*iter) != instr && iter != iterEnd) {
2780 ++iter;
2781 }
2782
2783 assert(iter != iterEnd);
2784 return iter;
2785 }//getInstrPos
2786
2787 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2788 AMDGPUCFGStructurizer *passRep) {
2789 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2790 } //insertInstrBefore
2791
2792 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2793 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2794 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2795 MachineInstr *newInstr =
2796 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2797
2798 MachineBasicBlock::iterator res;
2799 if (blk->begin() != blk->end()) {
2800 blk->insert(blk->begin(), newInstr);
2801 } else {
2802 blk->push_back(newInstr);
2803 }
2804
2805 SHOWNEWINSTR(newInstr);
2806
2807 return newInstr;
2808 } //insertInstrBefore
2809
2810 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2811 AMDGPUCFGStructurizer *passRep) {
2812 insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2813 } //insertInstrEnd
2814
2815 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2816 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2817 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2818 MachineInstr *newInstr = blk->getParent()
2819 ->CreateMachineInstr(tii->get(newOpcode), DL);
2820
2821 blk->push_back(newInstr);
2822 //assume the instruction doesn't take any reg operand ...
2823
2824 SHOWNEWINSTR(newInstr);
2825 } //insertInstrEnd
2826
2827 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
2828 int newOpcode,
2829 AMDGPUCFGStructurizer *passRep) {
2830 MachineInstr *oldInstr = &(*instrPos);
2831 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2832 MachineBasicBlock *blk = oldInstr->getParent();
2833 MachineInstr *newInstr =
2834 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
2835 DebugLoc());
2836
2837 blk->insert(instrPos, newInstr);
2838 //assume the instruction doesn't take any reg operand ...
2839
2840 SHOWNEWINSTR(newInstr);
2841 return newInstr;
2842 } //insertInstrBefore
2843
2844 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
2845 int newOpcode,
2846 AMDGPUCFGStructurizer *passRep,
2847 DebugLoc DL) {
2848 MachineInstr *oldInstr = &(*instrPos);
2849 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2850 MachineBasicBlock *blk = oldInstr->getParent();
2851 MachineInstr *newInstr =
2852 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
2853 DL);
2854
2855 blk->insert(instrPos, newInstr);
2856 MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
2857 false);
2858
2859 SHOWNEWINSTR(newInstr);
2860 //erase later oldInstr->eraseFromParent();
2861 } //insertCondBranchBefore
2862
2863 static void insertCondBranchBefore(MachineBasicBlock *blk,
2864 MachineBasicBlock::iterator insertPos,
2865 int newOpcode,
2866 AMDGPUCFGStructurizer *passRep,
2867 RegiT regNum,
2868 DebugLoc DL) {
2869 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2870
2871 MachineInstr *newInstr =
2872 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2873
2874 //insert before
2875 blk->insert(insertPos, newInstr);
2876 MachineInstrBuilder(newInstr).addReg(regNum, false);
2877
2878 SHOWNEWINSTR(newInstr);
2879 } //insertCondBranchBefore
2880
2881 static void insertCondBranchEnd(MachineBasicBlock *blk,
2882 int newOpcode,
2883 AMDGPUCFGStructurizer *passRep,
2884 RegiT regNum) {
2885 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2886 MachineInstr *newInstr =
2887 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
2888
2889 blk->push_back(newInstr);
2890 MachineInstrBuilder(newInstr).addReg(regNum, false);
2891
2892 SHOWNEWINSTR(newInstr);
2893 } //insertCondBranchEnd
2894
2895
2896 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
2897 AMDGPUCFGStructurizer *passRep,
2898 RegiT regNum, int regVal) {
2899 MachineInstr *oldInstr = &(*instrPos);
2900 const AMDGPUInstrInfo *tii =
2901 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2902 MachineBasicBlock *blk = oldInstr->getParent();
2903 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2904 regVal);
2905 blk->insert(instrPos, newInstr);
2906
2907 SHOWNEWINSTR(newInstr);
2908 } //insertAssignInstrBefore
2909
2910 static void insertAssignInstrBefore(MachineBasicBlock *blk,
2911 AMDGPUCFGStructurizer *passRep,
2912 RegiT regNum, int regVal) {
2913 const AMDGPUInstrInfo *tii =
2914 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2915
2916 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2917 regVal);
2918 if (blk->begin() != blk->end()) {
2919 blk->insert(blk->begin(), newInstr);
2920 } else {
2921 blk->push_back(newInstr);
2922 }
2923
2924 SHOWNEWINSTR(newInstr);
2925
2926 } //insertInstrBefore
2927
2928 static void insertCompareInstrBefore(MachineBasicBlock *blk,
2929 MachineBasicBlock::iterator instrPos,
2930 AMDGPUCFGStructurizer *passRep,
2931 RegiT dstReg, RegiT src1Reg,
2932 RegiT src2Reg) {
2933 const AMDGPUInstrInfo *tii =
2934 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2935 MachineInstr *newInstr =
2936 blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
2937
2938 MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
2939 MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
2940 MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
2941
2942 blk->insert(instrPos, newInstr);
2943 SHOWNEWINSTR(newInstr);
2944
2945 } //insertCompareInstrBefore
2946
2947 static void cloneSuccessorList(MachineBasicBlock *dstBlk,
2948 MachineBasicBlock *srcBlk) {
2949 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
2950 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
2951 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
2952 }
2953 } //cloneSuccessorList
2954
2955 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
2956 MachineFunction *func = srcBlk->getParent();
2957 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
2958 func->push_back(newBlk); //insert to function
2959 for (MachineBasicBlock::iterator iter = srcBlk->begin(),
2960 iterEnd = srcBlk->end();
2961 iter != iterEnd; ++iter) {
2962 MachineInstr *instr = func->CloneMachineInstr(iter);
2963 newBlk->push_back(instr);
2964 }
2965 return newBlk;
2966 }
2967
2968 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
2969 //the AMDGPU instruction is not recognized as terminator fix this and retire
2970 //this routine
2971 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
2972 MachineBasicBlock *oldBlk,
2973 MachineBasicBlock *newBlk) {
2974 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
2975 if (branchInstr && isCondBranch(branchInstr) &&
2976 getTrueBranch(branchInstr) == oldBlk) {
2977 setTrueBranch(branchInstr, newBlk);
2978 }
2979 }
2980
2981 static void wrapup(MachineBasicBlock *entryBlk) {
2982 assert((!entryBlk->getParent()->getJumpTableInfo()
2983 || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
2984 && "found a jump table");
2985
2986 //collect continue right before endloop
2987 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
2988 MachineBasicBlock::iterator pre = entryBlk->begin();
2989 MachineBasicBlock::iterator iterEnd = entryBlk->end();
2990 MachineBasicBlock::iterator iter = pre;
2991 while (iter != iterEnd) {
2992 if (pre->getOpcode() == AMDGPU::CONTINUE
2993 && iter->getOpcode() == AMDGPU::ENDLOOP) {
2994 contInstr.push_back(pre);
2995 }
2996 pre = iter;
2997 ++iter;
2998 } //end while
2999
3000 //delete continue right before endloop
3001 for (unsigned i = 0; i < contInstr.size(); ++i) {
3002 contInstr[i]->eraseFromParent();
3003 }
3004
3005 // TODO to fix up jump table so later phase won't be confused. if
3006 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3007 // there isn't such an interface yet. alternatively, replace all the other
3008 // blocks in the jump table with the entryBlk //}
3009
3010 } //wrapup
3011
3012 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3013 return &pass.getAnalysis<MachineDominatorTree>();
3014 }
3015
3016 static MachinePostDominatorTree*
3017 getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3018 return &pass.getAnalysis<MachinePostDominatorTree>();
3019 }
3020
3021 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3022 return &pass.getAnalysis<MachineLoopInfo>();
3023 }
3024}; // template class CFGStructTraits
3025} //end of namespace llvm
3026
3027// createAMDGPUCFGPreparationPass- Returns a pass
3028FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
3029 ) {
3030 return new AMDGPUCFGPrepare(tm );
3031}
3032
3033bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3034 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
3035 *this,
3036 TRI);
3037}
3038
3039// createAMDGPUCFGStructurizerPass- Returns a pass
3040FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
3041 ) {
3042 return new AMDGPUCFGPerform(tm );
3043}
3044
3045bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3046 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
3047 *this,
3048 TRI);
3049}