blob: 4910e5dc9ce0e456dfd472153e1e301e7c103ec3 [file] [log] [blame]
Tom Stellardf98f2ce2012-12-11 21:25:42 +00001//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//==-----------------------------------------------------------------------===//
10
11#define DEBUGME 0
12#define DEBUG_TYPE "structcfg"
13
Tom Stellard3ff0abf2013-06-07 20:37:48 +000014#include "AMDGPU.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000015#include "AMDGPUInstrInfo.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000016#include "llvm/ADT/SCCIterator.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/DominatorInternals.h"
20#include "llvm/Analysis/Dominators.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000021#include "llvm/CodeGen/MachineDominators.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineFunctionAnalysis.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineJumpTableInfo.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
Chandler Carruth58a2cbe2013-01-02 10:22:59 +000028#include "llvm/CodeGen/MachinePostDominators.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000029#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/Target/TargetInstrInfo.h"
Tom Stellard3ff0abf2013-06-07 20:37:48 +000031#include "llvm/Target/TargetMachine.h"
Tom Stellardf98f2ce2012-12-11 21:25:42 +000032
33using namespace llvm;
34
Tom Stellard3ff0abf2013-06-07 20:37:48 +000035#define DEFAULT_VEC_SLOTS 8
36
Tom Stellardf98f2ce2012-12-11 21:25:42 +000037// TODO: move-begin.
38
39//===----------------------------------------------------------------------===//
40//
41// Statistics for CFGStructurizer.
42//
43//===----------------------------------------------------------------------===//
44
45STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
46 "matched");
47STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
48 "matched");
49STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
50 "pattern matched");
51STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
52 "pattern matched");
53STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
54 "matched");
55STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
56STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
57
58//===----------------------------------------------------------------------===//
59//
60// Miscellaneous utility for CFGStructurizer.
61//
62//===----------------------------------------------------------------------===//
Benjamin Kramer879b0712013-05-23 15:43:05 +000063namespace {
Tom Stellardf98f2ce2012-12-11 21:25:42 +000064#define SHOWNEWINSTR(i) \
65 if (DEBUGME) errs() << "New instr: " << *i << "\n"
66
67#define SHOWNEWBLK(b, msg) \
68if (DEBUGME) { \
69 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70 errs() << "\n"; \
71}
72
73#define SHOWBLK_DETAIL(b, msg) \
74if (DEBUGME) { \
75 if (b) { \
76 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77 b->print(errs()); \
78 errs() << "\n"; \
79 } \
80}
81
82#define INVALIDSCCNUM -1
83#define INVALIDREGNUM 0
84
85template<class LoopinfoT>
86void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
87 for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
88 iterEnd = LoopInfo.end();
89 iter != iterEnd; ++iter) {
90 (*iter)->print(OS, 0);
91 }
92}
93
94template<class NodeT>
95void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
96 size_t sz = Src.size();
97 for (size_t i = 0; i < sz/2; ++i) {
98 NodeT *t = Src[i];
99 Src[i] = Src[sz - i - 1];
100 Src[sz - i - 1] = t;
101 }
102}
103
Benjamin Kramer879b0712013-05-23 15:43:05 +0000104} // end anonymous namespace
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000105
106//===----------------------------------------------------------------------===//
107//
108// supporting data structure for CFGStructurizer
109//
110//===----------------------------------------------------------------------===//
111
Benjamin Kramer879b0712013-05-23 15:43:05 +0000112namespace {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000113template<class PassT>
114struct CFGStructTraits {
115};
116
117template <class InstrT>
118class BlockInformation {
119public:
120 bool isRetired;
121 int sccNum;
122 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
123 //Instructions defining the corresponding successor.
124 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
125};
126
127template <class BlockT, class InstrT, class RegiT>
128class LandInformation {
129public:
130 BlockT *landBlk;
131 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
132 //WHILELOOP(thisloop) init before entering
133 //thisloop.
134 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
135 //WHILELOOP(thisloop) init after entering
136 //thisloop.
137 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
138 //land block, branch cond on this reg.
139 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
140 //endif" after ENDLOOP(thisloop) break
141 //outerLoopOf(thisLoop).
142 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
143 //endif" after ENDLOOP(thisloop) continue on
144 //outerLoopOf(thisLoop).
145 LandInformation() : landBlk(NULL) {}
146};
147
Benjamin Kramer879b0712013-05-23 15:43:05 +0000148} // end anonymous namespace
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000149
150//===----------------------------------------------------------------------===//
151//
152// CFGStructurizer
153//
154//===----------------------------------------------------------------------===//
155
Benjamin Kramer879b0712013-05-23 15:43:05 +0000156namespace {
Tom Stellardf98f2ce2012-12-11 21:25:42 +0000157// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
158template<class PassT>
159class CFGStructurizer {
160public:
161 typedef enum {
162 Not_SinglePath = 0,
163 SinglePath_InPath = 1,
164 SinglePath_NotInPath = 2
165 } PathToKind;
166
167public:
168 typedef typename PassT::InstructionType InstrT;
169 typedef typename PassT::FunctionType FuncT;
170 typedef typename PassT::DominatortreeType DomTreeT;
171 typedef typename PassT::PostDominatortreeType PostDomTreeT;
172 typedef typename PassT::DomTreeNodeType DomTreeNodeT;
173 typedef typename PassT::LoopinfoType LoopInfoT;
174
175 typedef GraphTraits<FuncT *> FuncGTraits;
176 //typedef FuncGTraits::nodes_iterator BlockIterator;
177 typedef typename FuncT::iterator BlockIterator;
178
179 typedef typename FuncGTraits::NodeType BlockT;
180 typedef GraphTraits<BlockT *> BlockGTraits;
181 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
182 //typedef BlockGTraits::succ_iterator InstructionIterator;
183 typedef typename BlockT::iterator InstrIterator;
184
185 typedef CFGStructTraits<PassT> CFGTraits;
186 typedef BlockInformation<InstrT> BlockInfo;
187 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
188
189 typedef int RegiT;
190 typedef typename PassT::LoopType LoopT;
191 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
192 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
193 //landing info for loop break
194 typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
195
196public:
197 CFGStructurizer();
198 ~CFGStructurizer();
199
200 /// Perform the CFG structurization
201 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
202
203 /// Perform the CFG preparation
204 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
205
206private:
207 void reversePredicateSetter(typename BlockT::iterator);
208 void orderBlocks();
209 void printOrderedBlocks(llvm::raw_ostream &OS);
210 int patternMatch(BlockT *CurBlock);
211 int patternMatchGroup(BlockT *CurBlock);
212
213 int serialPatternMatch(BlockT *CurBlock);
214 int ifPatternMatch(BlockT *CurBlock);
215 int switchPatternMatch(BlockT *CurBlock);
216 int loopendPatternMatch(BlockT *CurBlock);
217 int loopPatternMatch(BlockT *CurBlock);
218
219 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
220 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
221 //int loopWithoutBreak(BlockT *);
222
223 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
224 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
225 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
226 BlockT *ContBlock, LoopT *contLoop);
227 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
228 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
229 BlockT *FalseBlock);
230 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
231 BlockT *FalseBlock);
232 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
233 BlockT *FalseBlock, BlockT **LandBlockPtr);
234 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
235 BlockT *FalseBlock, BlockT *LandBlock,
236 bool Detail = false);
237 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
238 bool AllowSideEntry = true);
239 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
240 bool AllowSideEntry = true);
241 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
242 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
243
244 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
245 BlockT *TrueBlock, BlockT *FalseBlock,
246 BlockT *LandBlock);
247 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
248 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
249 BlockT *ExitLandBlock, RegiT SetReg);
250 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
251 RegiT SetReg);
252 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
253 std::set<BlockT*> &ExitBlockSet,
254 BlockT *ExitLandBlk);
255 BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
256 BlockTSmallerVector &ExitingBlocks,
257 BlockTSmallerVector &ExitBlocks);
258 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
259 void removeUnconditionalBranch(BlockT *SrcBlock);
260 void removeRedundantConditionalBranch(BlockT *SrcBlock);
261 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
262
263 void removeSuccessor(BlockT *SrcBlock);
264 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
265 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
266
267 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
268 InstrIterator InsertPos);
269
270 void recordSccnum(BlockT *SrcBlock, int SCCNum);
271 int getSCCNum(BlockT *srcBlk);
272
273 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
274 bool isRetiredBlock(BlockT *SrcBlock);
275 bool isActiveLoophead(BlockT *CurBlock);
276 bool needMigrateBlock(BlockT *Block);
277
278 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
279 BlockTSmallerVector &exitBlocks,
280 std::set<BlockT*> &ExitBlockSet);
281 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
282 BlockT *getLoopLandBlock(LoopT *LoopRep);
283 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
284
285 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
286 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
287 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
288 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
289 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
290
291 bool hasBackEdge(BlockT *curBlock);
292 unsigned getLoopDepth (LoopT *LoopRep);
293 int countActiveBlock(
294 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
295 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
296 BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
297 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
298
299private:
300 DomTreeT *domTree;
301 PostDomTreeT *postDomTree;
302 LoopInfoT *loopInfo;
303 PassT *passRep;
304 FuncT *funcRep;
305
306 BlockInfoMap blockInfoMap;
307 LoopLandInfoMap loopLandInfoMap;
308 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
309 const AMDGPURegisterInfo *TRI;
310
311}; //template class CFGStructurizer
312
313template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
314 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
315}
316
317template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
318 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
319 E = blockInfoMap.end(); I != E; ++I) {
320 delete I->second;
321 }
322}
323
324template<class PassT>
325bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
326 const AMDGPURegisterInfo * tri) {
327 passRep = &pass;
328 funcRep = &func;
329 TRI = tri;
330
331 bool changed = false;
332
333 //FIXME: if not reducible flow graph, make it so ???
334
335 if (DEBUGME) {
336 errs() << "AMDGPUCFGStructurizer::prepare\n";
337 }
338
339 loopInfo = CFGTraits::getLoopInfo(pass);
340 if (DEBUGME) {
341 errs() << "LoopInfo:\n";
342 PrintLoopinfo(*loopInfo, errs());
343 }
344
345 orderBlocks();
346 if (DEBUGME) {
347 errs() << "Ordered blocks:\n";
348 printOrderedBlocks(errs());
349 }
350
351 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
352
353 for (typename LoopInfoT::iterator iter = loopInfo->begin(),
354 iterEnd = loopInfo->end();
355 iter != iterEnd; ++iter) {
356 LoopT* loopRep = (*iter);
357 BlockTSmallerVector exitingBlks;
358 loopRep->getExitingBlocks(exitingBlks);
359
360 if (exitingBlks.size() == 0) {
361 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
362 if (dummyExitBlk != NULL)
363 retBlks.push_back(dummyExitBlk);
364 }
365 }
366
367 // Remove unconditional branch instr.
368 // Add dummy exit block iff there are multiple returns.
369
370 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
371 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
372 iterBlk != iterEndBlk;
373 ++iterBlk) {
374 BlockT *curBlk = *iterBlk;
375 removeUnconditionalBranch(curBlk);
376 removeRedundantConditionalBranch(curBlk);
377 if (CFGTraits::isReturnBlock(curBlk)) {
378 retBlks.push_back(curBlk);
379 }
380 assert(curBlk->succ_size() <= 2);
381 } //for
382
383 if (retBlks.size() >= 2) {
384 addDummyExitBlock(retBlks);
385 changed = true;
386 }
387
388 return changed;
389} //CFGStructurizer::prepare
390
391template<class PassT>
392bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
393 const AMDGPURegisterInfo * tri) {
394 passRep = &pass;
395 funcRep = &func;
396 TRI = tri;
397
398 //Assume reducible CFG...
399 if (DEBUGME) {
400 errs() << "AMDGPUCFGStructurizer::run\n";
401 func.viewCFG();
402 }
403
404 domTree = CFGTraits::getDominatorTree(pass);
405 if (DEBUGME) {
406 domTree->print(errs(), (const llvm::Module*)0);
407 }
408
409 postDomTree = CFGTraits::getPostDominatorTree(pass);
410 if (DEBUGME) {
411 postDomTree->print(errs());
412 }
413
414 loopInfo = CFGTraits::getLoopInfo(pass);
415 if (DEBUGME) {
416 errs() << "LoopInfo:\n";
417 PrintLoopinfo(*loopInfo, errs());
418 }
419
420 orderBlocks();
421#ifdef STRESSTEST
422 //Use the worse block ordering to test the algorithm.
423 ReverseVector(orderedBlks);
424#endif
425
426 if (DEBUGME) {
427 errs() << "Ordered blocks:\n";
428 printOrderedBlocks(errs());
429 }
430 int numIter = 0;
431 bool finish = false;
432 BlockT *curBlk;
433 bool makeProgress = false;
434 int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
435 orderedBlks.end());
436
437 do {
438 ++numIter;
439 if (DEBUGME) {
440 errs() << "numIter = " << numIter
441 << ", numRemaintedBlk = " << numRemainedBlk << "\n";
442 }
443
444 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
445 iterBlk = orderedBlks.begin();
446 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
447 iterBlkEnd = orderedBlks.end();
448
449 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
450 sccBeginIter = iterBlk;
451 BlockT *sccBeginBlk = NULL;
452 int sccNumBlk = 0; // The number of active blocks, init to a
453 // maximum possible number.
454 int sccNumIter; // Number of iteration in this SCC.
455
456 while (iterBlk != iterBlkEnd) {
457 curBlk = *iterBlk;
458
459 if (sccBeginBlk == NULL) {
460 sccBeginIter = iterBlk;
461 sccBeginBlk = curBlk;
462 sccNumIter = 0;
463 sccNumBlk = numRemainedBlk; // Init to maximum possible number.
464 if (DEBUGME) {
465 errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
466 errs() << "\n";
467 }
468 }
469
470 if (!isRetiredBlock(curBlk)) {
471 patternMatch(curBlk);
472 }
473
474 ++iterBlk;
475
476 bool contNextScc = true;
477 if (iterBlk == iterBlkEnd
478 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
479 // Just finish one scc.
480 ++sccNumIter;
481 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
482 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
483 if (DEBUGME) {
484 errs() << "Can't reduce SCC " << getSCCNum(curBlk)
485 << ", sccNumIter = " << sccNumIter;
486 errs() << "doesn't make any progress\n";
487 }
488 contNextScc = true;
489 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
490 sccNumBlk = sccRemainedNumBlk;
491 iterBlk = sccBeginIter;
492 contNextScc = false;
493 if (DEBUGME) {
494 errs() << "repeat processing SCC" << getSCCNum(curBlk)
495 << "sccNumIter = " << sccNumIter << "\n";
496 func.viewCFG();
497 }
498 } else {
499 // Finish the current scc.
500 contNextScc = true;
501 }
502 } else {
503 // Continue on next component in the current scc.
504 contNextScc = false;
505 }
506
507 if (contNextScc) {
508 sccBeginBlk = NULL;
509 }
510 } //while, "one iteration" over the function.
511
512 BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
513 if (entryBlk->succ_size() == 0) {
514 finish = true;
515 if (DEBUGME) {
516 errs() << "Reduce to one block\n";
517 }
518 } else {
519 int newnumRemainedBlk
520 = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
521 // consider cloned blocks ??
522 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
523 makeProgress = true;
524 numRemainedBlk = newnumRemainedBlk;
525 } else {
526 makeProgress = false;
527 if (DEBUGME) {
528 errs() << "No progress\n";
529 }
530 }
531 }
532 } while (!finish && makeProgress);
533
534 // Misc wrap up to maintain the consistency of the Function representation.
535 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
536
537 // Detach retired Block, release memory.
538 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
539 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
540 if ((*iterMap).second && (*iterMap).second->isRetired) {
541 assert(((*iterMap).first)->getNumber() != -1);
542 if (DEBUGME) {
543 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
544 }
545 (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
546 }
547 delete (*iterMap).second;
548 }
549 blockInfoMap.clear();
550
551 // clear loopLandInfoMap
552 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
553 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
554 delete (*iterMap).second;
555 }
556 loopLandInfoMap.clear();
557
558 if (DEBUGME) {
559 func.viewCFG();
560 }
561
562 if (!finish) {
563 assert(!"IRREDUCIBL_CF");
564 }
565
566 return true;
567} //CFGStructurizer::run
568
569/// Print the ordered Blocks.
570///
571template<class PassT>
572void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
573 size_t i = 0;
574 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
575 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
576 iterBlk != iterBlkEnd;
577 ++iterBlk, ++i) {
578 os << "BB" << (*iterBlk)->getNumber();
579 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
580 if (i != 0 && i % 10 == 0) {
581 os << "\n";
582 } else {
583 os << " ";
584 }
585 }
586} //printOrderedBlocks
587
588/// Compute the reversed DFS post order of Blocks
589///
590template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
591 int sccNum = 0;
592 BlockT *bb;
593 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
594 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
595 std::vector<BlockT *> &sccNext = *sccIter;
596 for (typename std::vector<BlockT *>::const_iterator
597 blockIter = sccNext.begin(), blockEnd = sccNext.end();
598 blockIter != blockEnd; ++blockIter) {
599 bb = *blockIter;
600 orderedBlks.push_back(bb);
601 recordSccnum(bb, sccNum);
602 }
603 }
604
605 //walk through all the block in func to check for unreachable
606 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
607 blockEnd1 = FuncGTraits::nodes_end(funcRep);
608 blockIter1 != blockEnd1; ++blockIter1) {
609 BlockT *bb = &(*blockIter1);
610 sccNum = getSCCNum(bb);
611 if (sccNum == INVALIDSCCNUM) {
612 errs() << "unreachable block BB" << bb->getNumber() << "\n";
613 }
614 }
615} //orderBlocks
616
617template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
618 int numMatch = 0;
619 int curMatch;
620
621 if (DEBUGME) {
622 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
623 }
624
625 while ((curMatch = patternMatchGroup(curBlk)) > 0) {
626 numMatch += curMatch;
627 }
628
629 if (DEBUGME) {
630 errs() << "End patternMatch BB" << curBlk->getNumber()
631 << ", numMatch = " << numMatch << "\n";
632 }
633
634 return numMatch;
635} //patternMatch
636
637template<class PassT>
638int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
639 int numMatch = 0;
640 numMatch += serialPatternMatch(curBlk);
641 numMatch += ifPatternMatch(curBlk);
642 numMatch += loopendPatternMatch(curBlk);
643 numMatch += loopPatternMatch(curBlk);
644 return numMatch;
645}//patternMatchGroup
646
647template<class PassT>
648int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
649 if (curBlk->succ_size() != 1) {
650 return 0;
651 }
652
653 BlockT *childBlk = *curBlk->succ_begin();
654 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
655 return 0;
656 }
657
658 mergeSerialBlock(curBlk, childBlk);
659 ++numSerialPatternMatch;
660 return 1;
661} //serialPatternMatch
662
663template<class PassT>
664int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
665 //two edges
666 if (curBlk->succ_size() != 2) {
667 return 0;
668 }
669
670 if (hasBackEdge(curBlk)) {
671 return 0;
672 }
673
674 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
675 if (branchInstr == NULL) {
676 return 0;
677 }
678
679 assert(CFGTraits::isCondBranch(branchInstr));
680
681 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
682 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
683 BlockT *landBlk;
684 int cloned = 0;
685
686 // TODO: Simplify
687 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
688 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
689 landBlk = *trueBlk->succ_begin();
690 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
691 landBlk = NULL;
692 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
693 landBlk = falseBlk;
694 falseBlk = NULL;
695 } else if (falseBlk->succ_size() == 1
696 && *falseBlk->succ_begin() == trueBlk) {
697 landBlk = trueBlk;
698 trueBlk = NULL;
699 } else if (falseBlk->succ_size() == 1
700 && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
701 landBlk = *falseBlk->succ_begin();
702 } else if (trueBlk->succ_size() == 1
703 && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
704 landBlk = *trueBlk->succ_begin();
705 } else {
706 return handleJumpintoIf(curBlk, trueBlk, falseBlk);
707 }
708
709 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
710 // new BB created for landBlk==NULL may introduce new challenge to the
711 // reduction process.
712 if (landBlk != NULL &&
713 ((trueBlk && trueBlk->pred_size() > 1)
714 || (falseBlk && falseBlk->pred_size() > 1))) {
715 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
716 }
717
718 if (trueBlk && trueBlk->pred_size() > 1) {
719 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
720 ++cloned;
721 }
722
723 if (falseBlk && falseBlk->pred_size() > 1) {
724 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
725 ++cloned;
726 }
727
728 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
729
730 ++numIfPatternMatch;
731
732 numClonedBlock += cloned;
733
734 return 1 + cloned;
735} //ifPatternMatch
736
737template<class PassT>
738int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
739 return 0;
740} //switchPatternMatch
741
742template<class PassT>
743int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
744 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
745 typename std::vector<LoopT *> nestedLoops;
746 while (loopRep) {
747 nestedLoops.push_back(loopRep);
748 loopRep = loopRep->getParentLoop();
749 }
750
751 if (nestedLoops.size() == 0) {
752 return 0;
753 }
754
755 // Process nested loop outside->inside, so "continue" to a outside loop won't
756 // be mistaken as "break" of the current loop.
757 int num = 0;
758 for (typename std::vector<LoopT *>::reverse_iterator
759 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
760 iter != iterEnd; ++iter) {
761 loopRep = *iter;
762
763 if (getLoopLandBlock(loopRep) != NULL) {
764 continue;
765 }
766
767 BlockT *loopHeader = loopRep->getHeader();
768
769 int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
770
771 if (numBreak == -1) {
772 break;
773 }
774
775 int numCont = loopcontPatternMatch(loopRep, loopHeader);
776 num += numBreak + numCont;
777 }
778
779 return num;
780} //loopendPatternMatch
781
782template<class PassT>
783int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
784 if (curBlk->succ_size() != 0) {
785 return 0;
786 }
787
788 int numLoop = 0;
789 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
790 while (loopRep && loopRep->getHeader() == curBlk) {
791 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
792 if (loopLand) {
793 BlockT *landBlk = loopLand->landBlk;
794 assert(landBlk);
795 if (!isRetiredBlock(landBlk)) {
796 mergeLooplandBlock(curBlk, loopLand);
797 ++numLoop;
798 }
799 }
800 loopRep = loopRep->getParentLoop();
801 }
802
803 numLoopPatternMatch += numLoop;
804
805 return numLoop;
806} //loopPatternMatch
807
808template<class PassT>
809int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
810 BlockT *loopHeader) {
811 BlockTSmallerVector exitingBlks;
812 loopRep->getExitingBlocks(exitingBlks);
813
814 if (DEBUGME) {
815 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
816 }
817
818 if (exitingBlks.size() == 0) {
819 setLoopLandBlock(loopRep);
820 return 0;
821 }
822
823 // Compute the corresponding exitBlks and exit block set.
824 BlockTSmallerVector exitBlks;
825 std::set<BlockT *> exitBlkSet;
826 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
827 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
828 BlockT *exitingBlk = *iter;
829 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
830 exitBlks.push_back(exitBlk);
831 exitBlkSet.insert(exitBlk); //non-duplicate insert
832 }
833
834 assert(exitBlkSet.size() > 0);
835 assert(exitBlks.size() == exitingBlks.size());
836
837 if (DEBUGME) {
838 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
839 }
840
841 // Find exitLandBlk.
842 BlockT *exitLandBlk = NULL;
843 int numCloned = 0;
844 int numSerial = 0;
845
846 if (exitBlkSet.size() == 1) {
847 exitLandBlk = *exitBlkSet.begin();
848 } else {
849 exitLandBlk = findNearestCommonPostDom(exitBlkSet);
850
851 if (exitLandBlk == NULL) {
852 return -1;
853 }
854
855 bool allInPath = true;
856 bool allNotInPath = true;
857 for (typename std::set<BlockT*>::const_iterator
858 iter = exitBlkSet.begin(),
859 iterEnd = exitBlkSet.end();
860 iter != iterEnd; ++iter) {
861 BlockT *exitBlk = *iter;
862
863 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
864 if (DEBUGME) {
865 errs() << "BB" << exitBlk->getNumber()
866 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
867 << pathKind << "\n";
868 }
869
870 allInPath = allInPath && (pathKind == SinglePath_InPath);
871 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
872
873 if (!allInPath && !allNotInPath) {
874 if (DEBUGME) {
875 errs() << "singlePath check fail\n";
876 }
877 return -1;
878 }
879 } // check all exit blocks
880
881 if (allNotInPath) {
882
883 // TODO: Simplify, maybe separate function?
884 LoopT *parentLoopRep = loopRep->getParentLoop();
885 BlockT *parentLoopHeader = NULL;
886 if (parentLoopRep)
887 parentLoopHeader = parentLoopRep->getHeader();
888
889 if (exitLandBlk == parentLoopHeader &&
890 (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
891 loopRep,
892 exitBlkSet,
893 exitLandBlk)) != NULL) {
894 if (DEBUGME) {
895 errs() << "relocateLoopcontBlock success\n";
896 }
897 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
898 exitingBlks,
899 exitBlks)) != NULL) {
900 if (DEBUGME) {
901 errs() << "insertEndbranchBlock success\n";
902 }
903 } else {
904 if (DEBUGME) {
905 errs() << "loop exit fail\n";
906 }
907 return -1;
908 }
909 }
910
911 // Handle side entry to exit path.
912 exitBlks.clear();
913 exitBlkSet.clear();
914 for (typename BlockTSmallerVector::iterator iterExiting =
915 exitingBlks.begin(),
916 iterExitingEnd = exitingBlks.end();
917 iterExiting != iterExitingEnd; ++iterExiting) {
918 BlockT *exitingBlk = *iterExiting;
919 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
920 BlockT *newExitBlk = exitBlk;
921
922 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
923 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
924 ++numCloned;
925 }
926
927 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
928
929 exitBlks.push_back(newExitBlk);
930 exitBlkSet.insert(newExitBlk);
931 }
932
933 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
934 iterExitEnd = exitBlks.end();
935 iterExit != iterExitEnd; ++iterExit) {
936 BlockT *exitBlk = *iterExit;
937 numSerial += serialPatternMatch(exitBlk);
938 }
939
940 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
941 iterExitEnd = exitBlks.end();
942 iterExit != iterExitEnd; ++iterExit) {
943 BlockT *exitBlk = *iterExit;
944 if (exitBlk->pred_size() > 1) {
945 if (exitBlk != exitLandBlk) {
946 return -1;
947 }
948 } else {
949 if (exitBlk != exitLandBlk &&
950 (exitBlk->succ_size() != 1 ||
951 *exitBlk->succ_begin() != exitLandBlk)) {
952 return -1;
953 }
954 }
955 }
956 } // else
957
958 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
959
960 // Fold break into the breaking block. Leverage across level breaks.
961 assert(exitingBlks.size() == exitBlks.size());
962 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
963 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
964 iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
965 BlockT *exitBlk = *iterExit;
966 BlockT *exitingBlk = *iterExiting;
967 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
968 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
969 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
970 }
971
972 int numBreak = static_cast<int>(exitingBlks.size());
973 numLoopbreakPatternMatch += numBreak;
974 numClonedBlock += numCloned;
975 return numBreak + numSerial + numCloned;
976} //loopbreakPatternMatch
977
978template<class PassT>
979int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
980 BlockT *loopHeader) {
981 int numCont = 0;
982 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
983 for (typename InvBlockGTraits::ChildIteratorType iter =
984 InvBlockGTraits::child_begin(loopHeader),
985 iterEnd = InvBlockGTraits::child_end(loopHeader);
986 iter != iterEnd; ++iter) {
987 BlockT *curBlk = *iter;
988 if (loopRep->contains(curBlk)) {
989 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
990 loopHeader, loopRep);
991 contBlk.push_back(curBlk);
992 ++numCont;
993 }
994 }
995
996 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
997 iter = contBlk.begin(), iterEnd = contBlk.end();
998 iter != iterEnd; ++iter) {
999 (*iter)->removeSuccessor(loopHeader);
1000 }
1001
1002 numLoopcontPatternMatch += numCont;
1003
1004 return numCont;
1005} //loopcontPatternMatch
1006
1007
1008template<class PassT>
1009bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1010 BlockT *src2Blk) {
1011 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1012 // same loop with LoopLandInfo without explicitly keeping track of
1013 // loopContBlks and loopBreakBlks, this is a method to get the information.
1014 //
1015 if (src1Blk->succ_size() == 0) {
1016 LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1017 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1018 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1019 if (theEntry != NULL) {
1020 if (DEBUGME) {
1021 errs() << "isLoopContBreakBlock yes src1 = BB"
1022 << src1Blk->getNumber()
1023 << " src2 = BB" << src2Blk->getNumber() << "\n";
1024 }
1025 return true;
1026 }
1027 }
1028 }
1029 return false;
1030} //isSameloopDetachedContbreak
1031
1032template<class PassT>
1033int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1034 BlockT *trueBlk,
1035 BlockT *falseBlk) {
1036 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1037 if (num == 0) {
1038 if (DEBUGME) {
1039 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1040 }
1041 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1042 }
1043 return num;
1044}
1045
1046template<class PassT>
1047int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1048 BlockT *trueBlk,
1049 BlockT *falseBlk) {
1050 int num = 0;
1051 BlockT *downBlk;
1052
1053 //trueBlk could be the common post dominator
1054 downBlk = trueBlk;
1055
1056 if (DEBUGME) {
1057 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1058 << " true = BB" << trueBlk->getNumber()
1059 << ", numSucc=" << trueBlk->succ_size()
1060 << " false = BB" << falseBlk->getNumber() << "\n";
1061 }
1062
1063 while (downBlk) {
1064 if (DEBUGME) {
1065 errs() << "check down = BB" << downBlk->getNumber();
1066 }
1067
1068 if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1069 if (DEBUGME) {
1070 errs() << " working\n";
1071 }
1072
1073 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1074 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1075
1076 numClonedBlock += num;
1077 num += serialPatternMatch(*headBlk->succ_begin());
1078 num += serialPatternMatch(*(++headBlk->succ_begin()));
1079 num += ifPatternMatch(headBlk);
1080 assert(num > 0);
1081
1082 break;
1083 }
1084 if (DEBUGME) {
1085 errs() << " not working\n";
1086 }
1087 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1088 } // walk down the postDomTree
1089
1090 return num;
1091} //handleJumpintoIf
1092
1093template<class PassT>
1094void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1095 BlockT *trueBlk,
1096 BlockT *falseBlk,
1097 BlockT *landBlk,
1098 bool detail) {
1099 errs() << "head = BB" << headBlk->getNumber()
1100 << " size = " << headBlk->size();
1101 if (detail) {
1102 errs() << "\n";
1103 headBlk->print(errs());
1104 errs() << "\n";
1105 }
1106
1107 if (trueBlk) {
1108 errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1109 << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1110 if (detail) {
1111 errs() << "\n";
1112 trueBlk->print(errs());
1113 errs() << "\n";
1114 }
1115 }
1116 if (falseBlk) {
1117 errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1118 << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1119 if (detail) {
1120 errs() << "\n";
1121 falseBlk->print(errs());
1122 errs() << "\n";
1123 }
1124 }
1125 if (landBlk) {
1126 errs() << ", land = BB" << landBlk->getNumber() << " size = "
1127 << landBlk->size() << " numPred = " << landBlk->pred_size();
1128 if (detail) {
1129 errs() << "\n";
1130 landBlk->print(errs());
1131 errs() << "\n";
1132 }
1133 }
1134
1135 errs() << "\n";
1136} //showImproveSimpleJumpintoIf
1137
1138template<class PassT>
1139int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1140 BlockT *trueBlk,
1141 BlockT *falseBlk,
1142 BlockT **plandBlk) {
1143 bool migrateTrue = false;
1144 bool migrateFalse = false;
1145
1146 BlockT *landBlk = *plandBlk;
1147
1148 assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1149 && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1150
1151 if (trueBlk == falseBlk) {
1152 return 0;
1153 }
1154
1155 migrateTrue = needMigrateBlock(trueBlk);
1156 migrateFalse = needMigrateBlock(falseBlk);
1157
1158 if (!migrateTrue && !migrateFalse) {
1159 return 0;
1160 }
1161
1162 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1163 // have more than one predecessors. without doing this, its predecessor
1164 // rather than headBlk will have undefined value in initReg.
1165 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1166 migrateTrue = true;
1167 }
1168 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1169 migrateFalse = true;
1170 }
1171
1172 if (DEBUGME) {
1173 errs() << "before improveSimpleJumpintoIf: ";
1174 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1175 }
1176
1177 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1178 //
1179 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1180 // {initReg = 0; org falseBlk branch }
1181 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1182 // => org landBlk
1183 // if landBlk->pred_size() > 2, put the about if-else inside
1184 // if (initReg !=2) {...}
1185 //
1186 // add initReg = initVal to headBlk
1187
1188 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1189 unsigned initReg =
1190 funcRep->getRegInfo().createVirtualRegister(I32RC);
1191 if (!migrateTrue || !migrateFalse) {
1192 int initVal = migrateTrue ? 0 : 1;
1193 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1194 }
1195
1196 int numNewBlk = 0;
1197
1198 if (landBlk == NULL) {
1199 landBlk = funcRep->CreateMachineBasicBlock();
1200 funcRep->push_back(landBlk); //insert to function
1201
1202 if (trueBlk) {
1203 trueBlk->addSuccessor(landBlk);
1204 } else {
1205 headBlk->addSuccessor(landBlk);
1206 }
1207
1208 if (falseBlk) {
1209 falseBlk->addSuccessor(landBlk);
1210 } else {
1211 headBlk->addSuccessor(landBlk);
1212 }
1213
1214 numNewBlk ++;
1215 }
1216
1217 bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1218
1219 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1220 typename BlockT::iterator insertPos =
1221 CFGTraits::getInstrPos
1222 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1223
1224 if (landBlkHasOtherPred) {
1225 unsigned immReg =
1226 funcRep->getRegInfo().createVirtualRegister(I32RC);
1227 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1228 unsigned cmpResReg =
1229 funcRep->getRegInfo().createVirtualRegister(I32RC);
1230
1231 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1232 initReg, immReg);
1233 CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1234 AMDGPU::IF_PREDICATE_SET, passRep,
1235 cmpResReg, DebugLoc());
1236 }
1237
1238 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
1239 passRep, initReg, DebugLoc());
1240
1241 if (migrateTrue) {
1242 migrateInstruction(trueBlk, landBlk, insertPos);
1243 // need to uncondionally insert the assignment to ensure a path from its
1244 // predecessor rather than headBlk has valid value in initReg if
1245 // (initVal != 1).
1246 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1247 }
1248 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1249
1250 if (migrateFalse) {
1251 migrateInstruction(falseBlk, landBlk, insertPos);
1252 // need to uncondionally insert the assignment to ensure a path from its
1253 // predecessor rather than headBlk has valid value in initReg if
1254 // (initVal != 0)
1255 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1256 }
1257
1258 if (landBlkHasOtherPred) {
1259 // add endif
1260 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1261
1262 // put initReg = 2 to other predecessors of landBlk
1263 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1264 predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1265 ++predIter) {
1266 BlockT *curBlk = *predIter;
1267 if (curBlk != trueBlk && curBlk != falseBlk) {
1268 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1269 }
1270 } //for
1271 }
1272 if (DEBUGME) {
1273 errs() << "result from improveSimpleJumpintoIf: ";
1274 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1275 }
1276
1277 // update landBlk
1278 *plandBlk = landBlk;
1279
1280 return numNewBlk;
1281} //improveSimpleJumpintoIf
1282
1283template<class PassT>
1284void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1285 LoopT *exitingLoop,
1286 BlockT *exitBlk,
1287 LoopT *exitLoop,
1288 BlockT *landBlk) {
1289 if (DEBUGME) {
1290 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1291 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1292 }
1293 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1294
1295 RegiT initReg = INVALIDREGNUM;
1296 if (exitingLoop != exitLoop) {
1297 initReg = static_cast<int>
1298 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1299 assert(initReg != INVALIDREGNUM);
1300 addLoopBreakInitReg(exitLoop, initReg);
1301 while (exitingLoop != exitLoop && exitingLoop) {
1302 addLoopBreakOnReg(exitingLoop, initReg);
1303 exitingLoop = exitingLoop->getParentLoop();
1304 }
1305 assert(exitingLoop == exitLoop);
1306 }
1307
1308 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1309
1310} //handleLoopbreak
1311
1312template<class PassT>
1313void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1314 LoopT *contingLoop,
1315 BlockT *contBlk,
1316 LoopT *contLoop) {
1317 if (DEBUGME) {
1318 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1319 << " header = BB" << contBlk->getNumber() << "\n";
1320
1321 errs() << "Trying to continue loop-depth = "
1322 << getLoopDepth(contLoop)
1323 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1324 }
1325
1326 RegiT initReg = INVALIDREGNUM;
1327 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1328 if (contingLoop != contLoop) {
1329 initReg = static_cast<int>
1330 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1331 assert(initReg != INVALIDREGNUM);
1332 addLoopContInitReg(contLoop, initReg);
1333 while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1334 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
1335 contingLoop = contingLoop->getParentLoop();
1336 }
1337 assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1338 addLoopContOnReg(contingLoop, initReg);
1339 }
1340
1341 settleLoopcontBlock(contingBlk, contBlk, initReg);
1342} //handleLoopcontBlock
1343
1344template<class PassT>
1345void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1346 if (DEBUGME) {
1347 errs() << "serialPattern BB" << dstBlk->getNumber()
1348 << " <= BB" << srcBlk->getNumber() << "\n";
1349 }
1350 dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
1351
1352 dstBlk->removeSuccessor(srcBlk);
1353 CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1354
1355 removeSuccessor(srcBlk);
1356 retireBlock(dstBlk, srcBlk);
1357} //mergeSerialBlock
1358
1359template<class PassT>
1360void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1361 BlockT *curBlk,
1362 BlockT *trueBlk,
1363 BlockT *falseBlk,
1364 BlockT *landBlk) {
1365 if (DEBUGME) {
1366 errs() << "ifPattern BB" << curBlk->getNumber();
1367 errs() << "{ ";
1368 if (trueBlk) {
1369 errs() << "BB" << trueBlk->getNumber();
1370 }
1371 errs() << " } else ";
1372 errs() << "{ ";
1373 if (falseBlk) {
1374 errs() << "BB" << falseBlk->getNumber();
1375 }
1376 errs() << " }\n ";
1377 errs() << "landBlock: ";
1378 if (landBlk == NULL) {
1379 errs() << "NULL";
1380 } else {
1381 errs() << "BB" << landBlk->getNumber();
1382 }
1383 errs() << "\n";
1384 }
1385
1386 int oldOpcode = branchInstr->getOpcode();
1387 DebugLoc branchDL = branchInstr->getDebugLoc();
1388
1389// transform to
1390// if cond
1391// trueBlk
1392// else
1393// falseBlk
1394// endif
1395// landBlk
1396
1397 typename BlockT::iterator branchInstrPos =
1398 CFGTraits::getInstrPos(curBlk, branchInstr);
1399 CFGTraits::insertCondBranchBefore(branchInstrPos,
1400 CFGTraits::getBranchNzeroOpcode(oldOpcode),
1401 passRep,
1402 branchDL);
1403
1404 if (trueBlk) {
1405 curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
1406 curBlk->removeSuccessor(trueBlk);
1407 if (landBlk && trueBlk->succ_size()!=0) {
1408 trueBlk->removeSuccessor(landBlk);
1409 }
1410 retireBlock(curBlk, trueBlk);
1411 }
1412 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1413
1414 if (falseBlk) {
1415 curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
1416 falseBlk->end());
1417 curBlk->removeSuccessor(falseBlk);
1418 if (landBlk && falseBlk->succ_size() != 0) {
1419 falseBlk->removeSuccessor(landBlk);
1420 }
1421 retireBlock(curBlk, falseBlk);
1422 }
1423 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1424
1425 branchInstr->eraseFromParent();
1426
1427 if (landBlk && trueBlk && falseBlk) {
1428 curBlk->addSuccessor(landBlk);
1429 }
1430
1431} //mergeIfthenelseBlock
1432
1433template<class PassT>
1434void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1435 LoopLandInfo *loopLand) {
1436 BlockT *landBlk = loopLand->landBlk;
1437
1438 if (DEBUGME) {
1439 errs() << "loopPattern header = BB" << dstBlk->getNumber()
1440 << " land = BB" << landBlk->getNumber() << "\n";
1441 }
1442
1443 // Loop contInitRegs are init at the beginning of the loop.
1444 for (typename std::set<RegiT>::const_iterator iter =
1445 loopLand->contInitRegs.begin(),
1446 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1447 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1448 }
1449
1450 /* we last inserterd the DebugLoc in the
1451 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1452 * search for the DebugLoc in the that statement.
1453 * if not found, we have to insert the empty/default DebugLoc */
1454 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1455 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1456
1457 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1458 // Loop breakInitRegs are init before entering the loop.
1459 for (typename std::set<RegiT>::const_iterator iter =
1460 loopLand->breakInitRegs.begin(),
1461 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
1462 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1463 }
1464 // Loop endbranchInitRegs are init before entering the loop.
1465 for (typename std::set<RegiT>::const_iterator iter =
1466 loopLand->endbranchInitRegs.begin(),
1467 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1468 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1469 }
1470
1471 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1472 * search for the DebugLoc in the continue statement.
1473 * if not found, we have to insert the empty/default DebugLoc */
1474 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1475 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1476
1477 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1478 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1479 // loop.
1480 for (typename std::set<RegiT>::const_iterator iter =
1481 loopLand->breakOnRegs.begin(),
1482 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1483 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
1484 *iter);
1485 }
1486
1487 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1488 // loop.
1489 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1490 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1491 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1492 passRep, *iter);
1493 }
1494
1495 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1496
1497 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1498 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1499 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
1500 }
1501
1502 removeSuccessor(landBlk);
1503 retireBlock(dstBlk, landBlk);
1504} //mergeLooplandBlock
1505
1506template<class PassT>
1507void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
1508 while (I--) {
1509 if (I->getOpcode() == AMDGPU::PRED_X) {
1510 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1511 case OPCODE_IS_ZERO_INT:
1512 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1513 return;
1514 case OPCODE_IS_NOT_ZERO_INT:
1515 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1516 return;
1517 case OPCODE_IS_ZERO:
1518 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1519 return;
1520 case OPCODE_IS_NOT_ZERO:
1521 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1522 return;
1523 default:
1524 assert(0 && "PRED_X Opcode invalid!");
1525 }
1526 }
1527 }
1528}
1529
1530template<class PassT>
1531void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1532 BlockT *exitBlk,
1533 BlockT *exitLandBlk,
1534 RegiT setReg) {
1535 if (DEBUGME) {
1536 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1537 << " exit = BB" << exitBlk->getNumber()
1538 << " land = BB" << exitLandBlk->getNumber() << "\n";
1539 }
1540
1541 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1542 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1543
1544 DebugLoc DL = branchInstr->getDebugLoc();
1545
1546 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1547
1548 // transform exitingBlk to
1549 // if ( ) {
1550 // exitBlk (if exitBlk != exitLandBlk)
1551 // setReg = 1
1552 // break
1553 // }endif
1554 // successor = {orgSuccessor(exitingBlk) - exitBlk}
1555
1556 typename BlockT::iterator branchInstrPos =
1557 CFGTraits::getInstrPos(exitingBlk, branchInstr);
1558
1559 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1560 //break_logical
1561
1562 if (trueBranch != exitBlk) {
1563 reversePredicateSetter(branchInstrPos);
1564 }
1565 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1566 } else {
1567 if (trueBranch != exitBlk) {
1568 reversePredicateSetter(branchInstr);
1569 }
1570 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1571 if (exitBlk != exitLandBlk) {
1572 //splice is insert-before ...
1573 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1574 exitBlk->end());
1575 }
1576 if (setReg != INVALIDREGNUM) {
1577 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1578 }
1579 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1580 } //if_logical
1581
1582 //now branchInst can be erase safely
1583 branchInstr->eraseFromParent();
1584
1585 //now take care of successors, retire blocks
1586 exitingBlk->removeSuccessor(exitBlk);
1587 if (exitBlk != exitLandBlk) {
1588 //splice is insert-before ...
1589 exitBlk->removeSuccessor(exitLandBlk);
1590 retireBlock(exitingBlk, exitBlk);
1591 }
1592
1593} //mergeLoopbreakBlock
1594
1595template<class PassT>
1596void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1597 BlockT *contBlk,
1598 RegiT setReg) {
1599 if (DEBUGME) {
1600 errs() << "settleLoopcontBlock conting = BB"
1601 << contingBlk->getNumber()
1602 << ", cont = BB" << contBlk->getNumber() << "\n";
1603 }
1604
1605 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1606 if (branchInstr) {
1607 assert(CFGTraits::isCondBranch(branchInstr));
1608 typename BlockT::iterator branchInstrPos =
1609 CFGTraits::getInstrPos(contingBlk, branchInstr);
1610 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1611 int oldOpcode = branchInstr->getOpcode();
1612 DebugLoc DL = branchInstr->getDebugLoc();
1613
1614 // transform contingBlk to
1615 // if () {
1616 // move instr after branchInstr
1617 // continue
1618 // or
1619 // setReg = 1
1620 // break
1621 // }endif
1622 // successor = {orgSuccessor(contingBlk) - loopHeader}
1623
1624 bool useContinueLogical =
1625 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1626
1627 if (useContinueLogical == false) {
1628 int branchOpcode =
1629 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1630 : CFGTraits::getBranchZeroOpcode(oldOpcode);
1631
1632 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1633
1634 if (setReg != INVALIDREGNUM) {
1635 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1636 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1637 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1638 } else {
1639 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1640 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1641 }
1642
1643 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1644 } else {
1645 int branchOpcode =
1646 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1647 : CFGTraits::getContinueZeroOpcode(oldOpcode);
1648
1649 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1650 }
1651
1652 branchInstr->eraseFromParent();
1653 } else {
1654 // if we've arrived here then we've already erased the branch instruction
1655 // travel back up the basic block to see the last reference of our debug location
1656 // we've just inserted that reference here so it should be representative
1657 if (setReg != INVALIDREGNUM) {
1658 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1659 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1660 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1661 } else {
1662 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1663 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1664 }
1665 } //else
1666
1667} //settleLoopcontBlock
1668
1669// BBs in exitBlkSet are determined as in break-path for loopRep,
1670// before we can put code for BBs as inside loop-body for loopRep
1671// check whether those BBs are determined as cont-BB for parentLoopRep
1672// earlier.
1673// If so, generate a new BB newBlk
1674// (1) set newBlk common successor of BBs in exitBlkSet
1675// (2) change the continue-instr in BBs in exitBlkSet to break-instr
1676// (3) generate continue-instr in newBlk
1677//
1678template<class PassT>
1679typename CFGStructurizer<PassT>::BlockT *
1680CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1681 LoopT *loopRep,
1682 std::set<BlockT *> &exitBlkSet,
1683 BlockT *exitLandBlk) {
1684 std::set<BlockT *> endBlkSet;
1685
1686
1687
1688 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1689 iterEnd = exitBlkSet.end();
1690 iter != iterEnd; ++iter) {
1691 BlockT *exitBlk = *iter;
1692 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1693
1694 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1695 return NULL;
1696
1697 endBlkSet.insert(endBlk);
1698 }
1699
1700 BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1701 funcRep->push_back(newBlk); //insert to function
1702 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1703 SHOWNEWBLK(newBlk, "New continue block: ");
1704
1705 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1706 iterEnd = endBlkSet.end();
1707 iter != iterEnd; ++iter) {
1708 BlockT *endBlk = *iter;
1709 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1710 if (contInstr) {
1711 contInstr->eraseFromParent();
1712 }
1713 endBlk->addSuccessor(newBlk);
1714 if (DEBUGME) {
1715 errs() << "Add new continue Block to BB"
1716 << endBlk->getNumber() << " successors\n";
1717 }
1718 }
1719
1720 return newBlk;
1721} //relocateLoopcontBlock
1722
1723
1724// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1725// LoopLandBlock. This BB branch on the loop endBranchInit register to the
1726// pathes corresponding to the loop exiting branches.
1727
1728template<class PassT>
1729typename CFGStructurizer<PassT>::BlockT *
1730CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1731 BlockTSmallerVector &exitingBlks,
1732 BlockTSmallerVector &exitBlks) {
1733 const AMDGPUInstrInfo *tii =
1734 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1735 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1736
1737 RegiT endBranchReg = static_cast<int>
1738 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1739 assert(endBranchReg >= 0);
1740
1741 // reg = 0 before entering the loop
1742 addLoopEndbranchInitReg(loopRep, endBranchReg);
1743
1744 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1745 assert(numBlks >=2 && numBlks == exitBlks.size());
1746
1747 BlockT *preExitingBlk = exitingBlks[0];
1748 BlockT *preExitBlk = exitBlks[0];
1749 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1750 funcRep->push_back(preBranchBlk); //insert to function
1751 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1752
1753 BlockT *newLandBlk = preBranchBlk;
1754
1755 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1756 newLandBlk);
1757 preExitingBlk->removeSuccessor(preExitBlk);
1758 preExitingBlk->addSuccessor(newLandBlk);
1759
1760 //it is redundant to add reg = 0 to exitingBlks[0]
1761
1762 // For 1..n th exiting path (the last iteration handles two pathes) create the
1763 // branch to the previous path and the current path.
1764 for (uint32_t i = 1; i < numBlks; ++i) {
1765 BlockT *curExitingBlk = exitingBlks[i];
1766 BlockT *curExitBlk = exitBlks[i];
1767 BlockT *curBranchBlk;
1768
1769 if (i == numBlks - 1) {
1770 curBranchBlk = curExitBlk;
1771 } else {
1772 curBranchBlk = funcRep->CreateMachineBasicBlock();
1773 funcRep->push_back(curBranchBlk); //insert to function
1774 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1775 }
1776
1777 // Add reg = i to exitingBlks[i].
1778 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1779 endBranchReg, i);
1780
1781 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1782 // (exitingBlks[i], newLandBlk).
1783 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1784 newLandBlk);
1785 curExitingBlk->removeSuccessor(curExitBlk);
1786 curExitingBlk->addSuccessor(newLandBlk);
1787
1788 // add to preBranchBlk the branch instruction:
1789 // if (endBranchReg == preVal)
1790 // preExitBlk
1791 // else
1792 // curBranchBlk
1793 //
1794 // preValReg = i - 1
1795
1796 DebugLoc DL;
1797 RegiT preValReg = static_cast<int>
1798 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1799
1800 preBranchBlk->insert(preBranchBlk->begin(),
1801 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1802 i - 1));
1803
1804 // condResReg = (endBranchReg == preValReg)
1805 RegiT condResReg = static_cast<int>
1806 (funcRep->getRegInfo().createVirtualRegister(I32RC));
1807 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1808 .addReg(endBranchReg).addReg(preValReg);
1809
1810 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1811 .addMBB(preExitBlk).addReg(condResReg);
1812
1813 preBranchBlk->addSuccessor(preExitBlk);
1814 preBranchBlk->addSuccessor(curBranchBlk);
1815
1816 // Update preExitingBlk, preExitBlk, preBranchBlk.
1817 preExitingBlk = curExitingBlk;
1818 preExitBlk = curExitBlk;
1819 preBranchBlk = curBranchBlk;
1820
1821 } //end for 1 .. n blocks
1822
1823 return newLandBlk;
1824} //addLoopEndbranchBlock
1825
1826template<class PassT>
1827typename CFGStructurizer<PassT>::PathToKind
1828CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1829 bool allowSideEntry) {
1830 assert(dstBlk);
1831
1832 if (srcBlk == dstBlk) {
1833 return SinglePath_InPath;
1834 }
1835
1836 while (srcBlk && srcBlk->succ_size() == 1) {
1837 srcBlk = *srcBlk->succ_begin();
1838 if (srcBlk == dstBlk) {
1839 return SinglePath_InPath;
1840 }
1841
1842 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1843 return Not_SinglePath;
1844 }
1845 }
1846
1847 if (srcBlk && srcBlk->succ_size()==0) {
1848 return SinglePath_NotInPath;
1849 }
1850
1851 return Not_SinglePath;
1852} //singlePathTo
1853
1854// If there is a single path from srcBlk to dstBlk, return the last block before
1855// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1856// last block in the path Otherwise, return NULL
1857template<class PassT>
1858typename CFGStructurizer<PassT>::BlockT *
1859CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
1860 bool allowSideEntry) {
1861 assert(dstBlk);
1862
1863 if (srcBlk == dstBlk) {
1864 return srcBlk;
1865 }
1866
1867 if (srcBlk->succ_size() == 0) {
1868 return srcBlk;
1869 }
1870
1871 while (srcBlk && srcBlk->succ_size() == 1) {
1872 BlockT *preBlk = srcBlk;
1873
1874 srcBlk = *srcBlk->succ_begin();
1875 if (srcBlk == NULL) {
1876 return preBlk;
1877 }
1878
1879 if (!allowSideEntry && srcBlk->pred_size() > 1) {
1880 return NULL;
1881 }
1882 }
1883
1884 if (srcBlk && srcBlk->succ_size()==0) {
1885 return srcBlk;
1886 }
1887
1888 return NULL;
1889
1890} //singlePathEnd
1891
1892template<class PassT>
1893int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
1894 BlockT *dstBlk) {
1895 int cloned = 0;
1896 assert(preBlk->isSuccessor(srcBlk));
1897 while (srcBlk && srcBlk != dstBlk) {
1898 assert(srcBlk->succ_size() == 1);
1899 if (srcBlk->pred_size() > 1) {
1900 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
1901 ++cloned;
1902 }
1903
1904 preBlk = srcBlk;
1905 srcBlk = *srcBlk->succ_begin();
1906 }
1907
1908 return cloned;
1909} //cloneOnSideEntryTo
1910
1911template<class PassT>
1912typename CFGStructurizer<PassT>::BlockT *
1913CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
1914 BlockT *predBlk) {
1915 assert(predBlk->isSuccessor(curBlk) &&
1916 "succBlk is not a prececessor of curBlk");
1917
1918 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
1919 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
1920 //srcBlk, oldBlk, newBlk
1921
1922 predBlk->removeSuccessor(curBlk);
1923 predBlk->addSuccessor(cloneBlk);
1924
1925 // add all successor to cloneBlk
1926 CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
1927
1928 numClonedInstr += curBlk->size();
1929
1930 if (DEBUGME) {
1931 errs() << "Cloned block: " << "BB"
1932 << curBlk->getNumber() << "size " << curBlk->size() << "\n";
1933 }
1934
1935 SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
1936
1937 return cloneBlk;
1938} //cloneBlockForPredecessor
1939
1940template<class PassT>
1941typename CFGStructurizer<PassT>::BlockT *
1942CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
1943 BlockT *exitingBlk) {
1944 BlockT *exitBlk = NULL;
1945
1946 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
1947 iterSuccEnd = exitingBlk->succ_end();
1948 iterSucc != iterSuccEnd; ++iterSucc) {
1949 BlockT *curBlk = *iterSucc;
1950 if (!loopRep->contains(curBlk)) {
1951 assert(exitBlk == NULL);
1952 exitBlk = curBlk;
1953 }
1954 }
1955
1956 assert(exitBlk != NULL);
1957
1958 return exitBlk;
1959} //exitingBlock2ExitBlock
1960
1961template<class PassT>
1962void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
1963 BlockT *dstBlk,
1964 InstrIterator insertPos) {
1965 InstrIterator spliceEnd;
1966 //look for the input branchinstr, not the AMDGPU branchinstr
1967 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
1968 if (branchInstr == NULL) {
1969 if (DEBUGME) {
1970 errs() << "migrateInstruction don't see branch instr\n" ;
1971 }
1972 spliceEnd = srcBlk->end();
1973 } else {
1974 if (DEBUGME) {
1975 errs() << "migrateInstruction see branch instr\n" ;
1976 branchInstr->dump();
1977 }
1978 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
1979 }
1980 if (DEBUGME) {
1981 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
1982 << "srcSize = " << srcBlk->size() << "\n";
1983 }
1984
1985 //splice insert before insertPos
1986 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
1987
1988 if (DEBUGME) {
1989 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
1990 << "srcSize = " << srcBlk->size() << "\n";
1991 }
1992} //migrateInstruction
1993
1994// normalizeInfiniteLoopExit change
1995// B1:
1996// uncond_br LoopHeader
1997//
1998// to
1999// B1:
2000// cond_br 1 LoopHeader dummyExit
2001// and return the newly added dummy exit block
2002//
2003template<class PassT>
2004typename CFGStructurizer<PassT>::BlockT *
2005CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2006 BlockT *loopHeader;
2007 BlockT *loopLatch;
2008 loopHeader = LoopRep->getHeader();
2009 loopLatch = LoopRep->getLoopLatch();
2010 BlockT *dummyExitBlk = NULL;
2011 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2012 if (loopHeader!=NULL && loopLatch!=NULL) {
2013 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2014 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2015 dummyExitBlk = funcRep->CreateMachineBasicBlock();
2016 funcRep->push_back(dummyExitBlk); //insert to function
2017 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2018
2019 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2020
2021 typename BlockT::iterator insertPos =
2022 CFGTraits::getInstrPos(loopLatch, branchInstr);
2023 unsigned immReg =
2024 funcRep->getRegInfo().createVirtualRegister(I32RC);
2025 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2026 InstrT *newInstr =
2027 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002028 MachineInstrBuilder MIB(*funcRep, newInstr);
2029 MIB.addMBB(loopHeader);
2030 MIB.addReg(immReg, false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002031
2032 SHOWNEWINSTR(newInstr);
2033
2034 branchInstr->eraseFromParent();
2035 loopLatch->addSuccessor(dummyExitBlk);
2036 }
2037 }
2038
2039 return dummyExitBlk;
2040} //normalizeInfiniteLoopExit
2041
2042template<class PassT>
2043void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2044 InstrT *branchInstr;
2045
2046 // I saw two unconditional branch in one basic block in example
2047 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2048 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2049 && CFGTraits::isUncondBranch(branchInstr)) {
2050 if (DEBUGME) {
2051 errs() << "Removing unconditional branch instruction" ;
2052 branchInstr->dump();
2053 }
2054 branchInstr->eraseFromParent();
2055 }
2056} //removeUnconditionalBranch
2057
2058template<class PassT>
2059void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2060 if (srcBlk->succ_size() == 2) {
2061 BlockT *blk1 = *srcBlk->succ_begin();
2062 BlockT *blk2 = *(++srcBlk->succ_begin());
2063
2064 if (blk1 == blk2) {
2065 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2066 assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2067 if (DEBUGME) {
2068 errs() << "Removing unneeded conditional branch instruction" ;
2069 branchInstr->dump();
2070 }
2071 branchInstr->eraseFromParent();
2072 SHOWNEWBLK(blk1, "Removing redundant successor");
2073 srcBlk->removeSuccessor(blk1);
2074 }
2075 }
2076} //removeRedundantConditionalBranch
2077
2078template<class PassT>
2079void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2080 DEFAULT_VEC_SLOTS> &retBlks) {
2081 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2082 funcRep->push_back(dummyExitBlk); //insert to function
2083 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2084
2085 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2086 retBlks.begin(),
2087 iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2088 BlockT *curBlk = *iter;
2089 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2090 if (curInstr) {
2091 curInstr->eraseFromParent();
2092 }
2093 curBlk->addSuccessor(dummyExitBlk);
2094 if (DEBUGME) {
2095 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2096 << " successors\n";
2097 }
2098 } //for
2099
2100 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2101} //addDummyExitBlock
2102
2103template<class PassT>
2104void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2105 while (srcBlk->succ_size()) {
2106 srcBlk->removeSuccessor(*srcBlk->succ_begin());
2107 }
2108}
2109
2110template<class PassT>
2111void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2112 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2113
2114 if (srcBlkInfo == NULL) {
2115 srcBlkInfo = new BlockInfo();
2116 }
2117
2118 srcBlkInfo->sccNum = sccNum;
2119}
2120
2121template<class PassT>
2122int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2123 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2124 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2125}
2126
2127template<class PassT>
2128void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2129 if (DEBUGME) {
2130 errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2131 }
2132
2133 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2134
2135 if (srcBlkInfo == NULL) {
2136 srcBlkInfo = new BlockInfo();
2137 }
2138
2139 srcBlkInfo->isRetired = true;
2140 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2141 && "can't retire block yet");
2142}
2143
2144template<class PassT>
2145bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2146 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2147 return (srcBlkInfo && srcBlkInfo->isRetired);
2148}
2149
2150template<class PassT>
2151bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2152 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2153 while (loopRep && loopRep->getHeader() == curBlk) {
2154 LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2155
2156 if(loopLand == NULL)
2157 return true;
2158
2159 BlockT *landBlk = loopLand->landBlk;
2160 assert(landBlk);
2161 if (!isRetiredBlock(landBlk)) {
2162 return true;
2163 }
2164
2165 loopRep = loopRep->getParentLoop();
2166 }
2167
2168 return false;
2169} //isActiveLoophead
2170
2171template<class PassT>
2172bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2173 const unsigned blockSizeThreshold = 30;
2174 const unsigned cloneInstrThreshold = 100;
2175
2176 bool multiplePreds = blk && (blk->pred_size() > 1);
2177
2178 if(!multiplePreds)
2179 return false;
2180
2181 unsigned blkSize = blk->size();
2182 return ((blkSize > blockSizeThreshold)
2183 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2184} //needMigrateBlock
2185
2186template<class PassT>
2187typename CFGStructurizer<PassT>::BlockT *
2188CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2189 BlockTSmallerVector &exitBlks,
2190 std::set<BlockT *> &exitBlkSet) {
2191 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
2192
2193 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2194 predIterEnd = landBlk->pred_end();
2195 predIter != predIterEnd; ++predIter) {
2196 BlockT *curBlk = *predIter;
2197 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2198 inpathBlks.push_back(curBlk);
2199 }
2200 } //for
2201
2202 //if landBlk has predecessors that are not in the given loop,
2203 //create a new block
2204 BlockT *newLandBlk = landBlk;
2205 if (inpathBlks.size() != landBlk->pred_size()) {
2206 newLandBlk = funcRep->CreateMachineBasicBlock();
2207 funcRep->push_back(newLandBlk); //insert to function
2208 newLandBlk->addSuccessor(landBlk);
2209 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2210 inpathBlks.begin(),
2211 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2212 BlockT *curBlk = *iter;
2213 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2214 //srcBlk, oldBlk, newBlk
2215 curBlk->removeSuccessor(landBlk);
2216 curBlk->addSuccessor(newLandBlk);
2217 }
2218 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2219 if (exitBlks[i] == landBlk) {
2220 exitBlks[i] = newLandBlk;
2221 }
2222 }
2223 SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2224 }
2225
2226 setLoopLandBlock(loopRep, newLandBlk);
2227
2228 return newLandBlk;
2229} // recordLoopbreakLand
2230
2231template<class PassT>
2232void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2233 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2234
2235 if (theEntry == NULL) {
2236 theEntry = new LoopLandInfo();
2237 }
2238 assert(theEntry->landBlk == NULL);
2239
2240 if (blk == NULL) {
2241 blk = funcRep->CreateMachineBasicBlock();
2242 funcRep->push_back(blk); //insert to function
2243 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2244 }
2245
2246 theEntry->landBlk = blk;
2247
2248 if (DEBUGME) {
2249 errs() << "setLoopLandBlock loop-header = BB"
2250 << loopRep->getHeader()->getNumber()
2251 << " landing-block = BB" << blk->getNumber() << "\n";
2252 }
2253} // setLoopLandBlock
2254
2255template<class PassT>
2256void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2257 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2258
2259 if (theEntry == NULL) {
2260 theEntry = new LoopLandInfo();
2261 }
2262
2263 theEntry->breakOnRegs.insert(regNum);
2264
2265 if (DEBUGME) {
2266 errs() << "addLoopBreakOnReg loop-header = BB"
2267 << loopRep->getHeader()->getNumber()
2268 << " regNum = " << regNum << "\n";
2269 }
2270} // addLoopBreakOnReg
2271
2272template<class PassT>
2273void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2274 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2275
2276 if (theEntry == NULL) {
2277 theEntry = new LoopLandInfo();
2278 }
2279 theEntry->contOnRegs.insert(regNum);
2280
2281 if (DEBUGME) {
2282 errs() << "addLoopContOnReg loop-header = BB"
2283 << loopRep->getHeader()->getNumber()
2284 << " regNum = " << regNum << "\n";
2285 }
2286} // addLoopContOnReg
2287
2288template<class PassT>
2289void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2290 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2291
2292 if (theEntry == NULL) {
2293 theEntry = new LoopLandInfo();
2294 }
2295 theEntry->breakInitRegs.insert(regNum);
2296
2297 if (DEBUGME) {
2298 errs() << "addLoopBreakInitReg loop-header = BB"
2299 << loopRep->getHeader()->getNumber()
2300 << " regNum = " << regNum << "\n";
2301 }
2302} // addLoopBreakInitReg
2303
2304template<class PassT>
2305void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2306 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2307
2308 if (theEntry == NULL) {
2309 theEntry = new LoopLandInfo();
2310 }
2311 theEntry->contInitRegs.insert(regNum);
2312
2313 if (DEBUGME) {
2314 errs() << "addLoopContInitReg loop-header = BB"
2315 << loopRep->getHeader()->getNumber()
2316 << " regNum = " << regNum << "\n";
2317 }
2318} // addLoopContInitReg
2319
2320template<class PassT>
2321void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2322 RegiT regNum) {
2323 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2324
2325 if (theEntry == NULL) {
2326 theEntry = new LoopLandInfo();
2327 }
2328 theEntry->endbranchInitRegs.insert(regNum);
2329
2330 if (DEBUGME) {
2331 errs() << "addLoopEndbranchInitReg loop-header = BB"
2332 << loopRep->getHeader()->getNumber()
2333 << " regNum = " << regNum << "\n";
2334 }
2335} // addLoopEndbranchInitReg
2336
2337template<class PassT>
2338typename CFGStructurizer<PassT>::LoopLandInfo *
2339CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2340 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2341
2342 return theEntry;
2343} // getLoopLandInfo
2344
2345template<class PassT>
2346typename CFGStructurizer<PassT>::BlockT *
2347CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2348 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2349
2350 return theEntry ? theEntry->landBlk : NULL;
2351} // getLoopLandBlock
2352
2353
2354template<class PassT>
2355bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2356 LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2357 if (loopRep == NULL)
2358 return false;
2359
2360 BlockT *loopHeader = loopRep->getHeader();
2361
2362 return curBlk->isSuccessor(loopHeader);
2363
2364} //hasBackEdge
2365
2366template<class PassT>
2367unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2368 return loopRep ? loopRep->getLoopDepth() : 0;
2369} //getLoopDepth
2370
2371template<class PassT>
2372int CFGStructurizer<PassT>::countActiveBlock
2373(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2374 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2375 int count = 0;
2376 while (iterStart != iterEnd) {
2377 if (!isRetiredBlock(*iterStart)) {
2378 ++count;
2379 }
2380 ++iterStart;
2381 }
2382
2383 return count;
2384} //countActiveBlock
2385
2386// This is work around solution for findNearestCommonDominator not avaiable to
2387// post dom a proper fix should go to Dominators.h.
2388
2389template<class PassT>
2390typename CFGStructurizer<PassT>::BlockT*
2391CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2392
2393 if (postDomTree->dominates(blk1, blk2)) {
2394 return blk1;
2395 }
2396 if (postDomTree->dominates(blk2, blk1)) {
2397 return blk2;
2398 }
2399
2400 DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2401 DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2402
2403 // Handle newly cloned node.
2404 if (node1 == NULL && blk1->succ_size() == 1) {
2405 return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2406 }
2407 if (node2 == NULL && blk2->succ_size() == 1) {
2408 return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2409 }
2410
2411 if (node1 == NULL || node2 == NULL) {
2412 return NULL;
2413 }
2414
2415 node1 = node1->getIDom();
2416 while (node1) {
2417 if (postDomTree->dominates(node1, node2)) {
2418 return node1->getBlock();
2419 }
2420 node1 = node1->getIDom();
2421 }
2422
2423 return NULL;
2424}
2425
2426template<class PassT>
2427typename CFGStructurizer<PassT>::BlockT *
2428CFGStructurizer<PassT>::findNearestCommonPostDom
2429(typename std::set<BlockT *> &blks) {
2430 BlockT *commonDom;
2431 typename std::set<BlockT *>::const_iterator iter = blks.begin();
2432 typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2433 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2434 BlockT *curBlk = *iter;
2435 if (curBlk != commonDom) {
2436 commonDom = findNearestCommonPostDom(curBlk, commonDom);
2437 }
2438 }
2439
2440 if (DEBUGME) {
2441 errs() << "Common post dominator for exit blocks is ";
2442 if (commonDom) {
2443 errs() << "BB" << commonDom->getNumber() << "\n";
2444 } else {
2445 errs() << "NULL\n";
2446 }
2447 }
2448
2449 return commonDom;
2450} //findNearestCommonPostDom
2451
Benjamin Kramer879b0712013-05-23 15:43:05 +00002452} // end anonymous namespace
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002453
2454//todo: move-end
2455
2456
2457//===----------------------------------------------------------------------===//
2458//
2459// CFGStructurizer for AMDGPU
2460//
2461//===----------------------------------------------------------------------===//
2462
2463
Benjamin Kramer879b0712013-05-23 15:43:05 +00002464namespace {
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002465class AMDGPUCFGStructurizer : public MachineFunctionPass {
2466public:
2467 typedef MachineInstr InstructionType;
2468 typedef MachineFunction FunctionType;
2469 typedef MachineBasicBlock BlockType;
2470 typedef MachineLoopInfo LoopinfoType;
2471 typedef MachineDominatorTree DominatortreeType;
2472 typedef MachinePostDominatorTree PostDominatortreeType;
2473 typedef MachineDomTreeNode DomTreeNodeType;
2474 typedef MachineLoop LoopType;
2475
2476protected:
2477 TargetMachine &TM;
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002478
2479public:
2480 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2481 const TargetInstrInfo *getTargetInstrInfo() const;
Bill Wendlingb5632b52013-06-07 20:28:55 +00002482 const AMDGPURegisterInfo *getTargetRegisterInfo() const;
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002483};
2484
Benjamin Kramer879b0712013-05-23 15:43:05 +00002485} // end anonymous namespace
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002486AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
Bill Wendlingb5632b52013-06-07 20:28:55 +00002487 : MachineFunctionPass(pid), TM(tm) {
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002488}
2489
2490const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
Bill Wendlingb5632b52013-06-07 20:28:55 +00002491 return TM.getInstrInfo();
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002492}
Bill Wendlingb5632b52013-06-07 20:28:55 +00002493
2494const AMDGPURegisterInfo *AMDGPUCFGStructurizer::getTargetRegisterInfo() const {
2495 return static_cast<const AMDGPURegisterInfo *>(TM.getRegisterInfo());
2496}
2497
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002498//===----------------------------------------------------------------------===//
2499//
2500// CFGPrepare
2501//
2502//===----------------------------------------------------------------------===//
2503
2504
Benjamin Kramer879b0712013-05-23 15:43:05 +00002505namespace {
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002506class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
2507public:
2508 static char ID;
2509
2510public:
2511 AMDGPUCFGPrepare(TargetMachine &tm);
2512
2513 virtual const char *getPassName() const;
2514 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2515
2516 bool runOnMachineFunction(MachineFunction &F);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002517};
2518
2519char AMDGPUCFGPrepare::ID = 0;
Benjamin Kramer879b0712013-05-23 15:43:05 +00002520} // end anonymous namespace
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002521
2522AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2523 : AMDGPUCFGStructurizer(ID, tm ) {
2524}
2525const char *AMDGPUCFGPrepare::getPassName() const {
2526 return "AMD IL Control Flow Graph Preparation Pass";
2527}
2528
2529void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2530 AU.addPreserved<MachineFunctionAnalysis>();
2531 AU.addRequired<MachineFunctionAnalysis>();
2532 AU.addRequired<MachineDominatorTree>();
2533 AU.addRequired<MachinePostDominatorTree>();
2534 AU.addRequired<MachineLoopInfo>();
2535}
2536
2537//===----------------------------------------------------------------------===//
2538//
2539// CFGPerform
2540//
2541//===----------------------------------------------------------------------===//
2542
2543
Benjamin Kramer879b0712013-05-23 15:43:05 +00002544namespace {
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002545class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
2546public:
2547 static char ID;
2548
2549public:
2550 AMDGPUCFGPerform(TargetMachine &tm);
2551 virtual const char *getPassName() const;
2552 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2553 bool runOnMachineFunction(MachineFunction &F);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002554};
2555
2556char AMDGPUCFGPerform::ID = 0;
Benjamin Kramer879b0712013-05-23 15:43:05 +00002557} // end anonymous namespace
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002558
2559 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2560: AMDGPUCFGStructurizer(ID, tm) {
2561}
2562
2563const char *AMDGPUCFGPerform::getPassName() const {
2564 return "AMD IL Control Flow Graph structurizer Pass";
2565}
2566
2567void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2568 AU.addPreserved<MachineFunctionAnalysis>();
2569 AU.addRequired<MachineFunctionAnalysis>();
2570 AU.addRequired<MachineDominatorTree>();
2571 AU.addRequired<MachinePostDominatorTree>();
2572 AU.addRequired<MachineLoopInfo>();
2573}
2574
2575//===----------------------------------------------------------------------===//
2576//
2577// CFGStructTraits<AMDGPUCFGStructurizer>
2578//
2579//===----------------------------------------------------------------------===//
2580
Benjamin Kramer879b0712013-05-23 15:43:05 +00002581namespace {
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002582// this class is tailor to the AMDGPU backend
2583template<>
2584struct CFGStructTraits<AMDGPUCFGStructurizer> {
2585 typedef int RegiT;
2586
2587 static int getBranchNzeroOpcode(int oldOpcode) {
2588 switch(oldOpcode) {
Vincent Lejeunefd49dac2013-03-11 18:15:06 +00002589 case AMDGPU::JUMP_COND:
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002590 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2591 case AMDGPU::BRANCH_COND_i32:
2592 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002593 default:
2594 assert(0 && "internal error");
2595 }
2596 return -1;
2597 }
2598
2599 static int getBranchZeroOpcode(int oldOpcode) {
2600 switch(oldOpcode) {
Vincent Lejeunefd49dac2013-03-11 18:15:06 +00002601 case AMDGPU::JUMP_COND:
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002602 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2603 case AMDGPU::BRANCH_COND_i32:
2604 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002605 default:
2606 assert(0 && "internal error");
2607 }
2608 return -1;
2609 }
2610
2611 static int getContinueNzeroOpcode(int oldOpcode) {
2612 switch(oldOpcode) {
Vincent Lejeunefd49dac2013-03-11 18:15:06 +00002613 case AMDGPU::JUMP_COND:
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002614 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2615 default:
2616 assert(0 && "internal error");
2617 };
2618 return -1;
2619 }
2620
2621 static int getContinueZeroOpcode(int oldOpcode) {
2622 switch(oldOpcode) {
Vincent Lejeunefd49dac2013-03-11 18:15:06 +00002623 case AMDGPU::JUMP_COND:
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002624 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2625 default:
2626 assert(0 && "internal error");
2627 }
2628 return -1;
2629 }
2630
2631 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2632 return instr->getOperand(0).getMBB();
2633 }
2634
2635 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2636 instr->getOperand(0).setMBB(blk);
2637 }
2638
2639 static MachineBasicBlock *
2640 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2641 assert(blk->succ_size() == 2);
2642 MachineBasicBlock *trueBranch = getTrueBranch(instr);
2643 MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2644 MachineBasicBlock::succ_iterator iterNext = iter;
2645 ++iterNext;
2646
2647 return (*iter == trueBranch) ? *iterNext : *iter;
2648 }
2649
2650 static bool isCondBranch(MachineInstr *instr) {
2651 switch (instr->getOpcode()) {
Vincent Lejeunefd49dac2013-03-11 18:15:06 +00002652 case AMDGPU::JUMP_COND:
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002653 case AMDGPU::BRANCH_COND_i32:
2654 case AMDGPU::BRANCH_COND_f32:
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002655 break;
2656 default:
2657 return false;
2658 }
2659 return true;
2660 }
2661
2662 static bool isUncondBranch(MachineInstr *instr) {
2663 switch (instr->getOpcode()) {
2664 case AMDGPU::JUMP:
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002665 case AMDGPU::BRANCH:
2666 return true;
2667 default:
2668 return false;
2669 }
2670 return true;
2671 }
2672
2673 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2674 //get DebugLoc from the first MachineBasicBlock instruction with debug info
2675 DebugLoc DL;
2676 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2677 MachineInstr *instr = &(*iter);
2678 if (instr->getDebugLoc().isUnknown() == false) {
2679 DL = instr->getDebugLoc();
2680 }
2681 }
2682 return DL;
2683 }
2684
2685 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2686 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2687 MachineInstr *instr = &*iter;
2688 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2689 return instr;
2690 }
2691 return NULL;
2692 }
2693
2694 // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2695 //
2696 // BB with backward-edge could have move instructions after the branch
2697 // instruction. Such move instruction "belong to" the loop backward-edge.
2698 //
2699 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2700 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2701 blk->getParent()->getTarget().getInstrInfo());
2702
2703 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2704 iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2705 // FIXME: Simplify
2706 MachineInstr *instr = &*iter;
2707 if (instr) {
2708 if (isCondBranch(instr) || isUncondBranch(instr)) {
2709 return instr;
2710 } else if (!TII->isMov(instr->getOpcode())) {
2711 break;
2712 }
2713 }
2714 }
2715 return NULL;
2716 }
2717
2718 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2719 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2720 if (iter != blk->rend()) {
2721 MachineInstr *instr = &(*iter);
2722 if (instr->getOpcode() == AMDGPU::RETURN) {
2723 return instr;
2724 }
2725 }
2726 return NULL;
2727 }
2728
2729 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2730 MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2731 if (iter != blk->rend()) {
2732 MachineInstr *instr = &(*iter);
2733 if (instr->getOpcode() == AMDGPU::CONTINUE) {
2734 return instr;
2735 }
2736 }
2737 return NULL;
2738 }
2739
2740 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2741 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2742 MachineInstr *instr = &(*iter);
2743 if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
2744 return instr;
2745 }
2746 }
2747 return NULL;
2748 }
2749
2750 static bool isReturnBlock(MachineBasicBlock *blk) {
2751 MachineInstr *instr = getReturnInstr(blk);
2752 bool isReturn = (blk->succ_size() == 0);
2753 if (instr) {
2754 assert(isReturn);
2755 } else if (isReturn) {
2756 if (DEBUGME) {
2757 errs() << "BB" << blk->getNumber()
2758 <<" is return block without RETURN instr\n";
2759 }
2760 }
2761
2762 return isReturn;
2763 }
2764
2765 static MachineBasicBlock::iterator
2766 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2767 assert(instr->getParent() == blk && "instruction doesn't belong to block");
2768 MachineBasicBlock::iterator iter = blk->begin();
2769 MachineBasicBlock::iterator iterEnd = blk->end();
2770 while (&(*iter) != instr && iter != iterEnd) {
2771 ++iter;
2772 }
2773
2774 assert(iter != iterEnd);
2775 return iter;
2776 }//getInstrPos
2777
2778 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2779 AMDGPUCFGStructurizer *passRep) {
2780 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2781 } //insertInstrBefore
2782
2783 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2784 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2785 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2786 MachineInstr *newInstr =
2787 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2788
2789 MachineBasicBlock::iterator res;
2790 if (blk->begin() != blk->end()) {
2791 blk->insert(blk->begin(), newInstr);
2792 } else {
2793 blk->push_back(newInstr);
2794 }
2795
2796 SHOWNEWINSTR(newInstr);
2797
2798 return newInstr;
2799 } //insertInstrBefore
2800
2801 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2802 AMDGPUCFGStructurizer *passRep) {
2803 insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2804 } //insertInstrEnd
2805
2806 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2807 AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2808 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2809 MachineInstr *newInstr = blk->getParent()
2810 ->CreateMachineInstr(tii->get(newOpcode), DL);
2811
2812 blk->push_back(newInstr);
2813 //assume the instruction doesn't take any reg operand ...
2814
2815 SHOWNEWINSTR(newInstr);
2816 } //insertInstrEnd
2817
2818 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
2819 int newOpcode,
2820 AMDGPUCFGStructurizer *passRep) {
2821 MachineInstr *oldInstr = &(*instrPos);
2822 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2823 MachineBasicBlock *blk = oldInstr->getParent();
2824 MachineInstr *newInstr =
2825 blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
2826 DebugLoc());
2827
2828 blk->insert(instrPos, newInstr);
2829 //assume the instruction doesn't take any reg operand ...
2830
2831 SHOWNEWINSTR(newInstr);
2832 return newInstr;
2833 } //insertInstrBefore
2834
2835 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
2836 int newOpcode,
2837 AMDGPUCFGStructurizer *passRep,
2838 DebugLoc DL) {
2839 MachineInstr *oldInstr = &(*instrPos);
2840 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2841 MachineBasicBlock *blk = oldInstr->getParent();
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002842 MachineFunction *MF = blk->getParent();
2843 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002844
2845 blk->insert(instrPos, newInstr);
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002846 MachineInstrBuilder MIB(*MF, newInstr);
2847 MIB.addReg(oldInstr->getOperand(1).getReg(), false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002848
2849 SHOWNEWINSTR(newInstr);
2850 //erase later oldInstr->eraseFromParent();
2851 } //insertCondBranchBefore
2852
2853 static void insertCondBranchBefore(MachineBasicBlock *blk,
2854 MachineBasicBlock::iterator insertPos,
2855 int newOpcode,
2856 AMDGPUCFGStructurizer *passRep,
2857 RegiT regNum,
2858 DebugLoc DL) {
2859 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002860 MachineFunction *MF = blk->getParent();
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002861
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002862 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002863
2864 //insert before
2865 blk->insert(insertPos, newInstr);
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002866 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002867
2868 SHOWNEWINSTR(newInstr);
2869 } //insertCondBranchBefore
2870
2871 static void insertCondBranchEnd(MachineBasicBlock *blk,
2872 int newOpcode,
2873 AMDGPUCFGStructurizer *passRep,
2874 RegiT regNum) {
2875 const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002876 MachineFunction *MF = blk->getParent();
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002877 MachineInstr *newInstr =
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002878 MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002879
2880 blk->push_back(newInstr);
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002881 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002882
2883 SHOWNEWINSTR(newInstr);
2884 } //insertCondBranchEnd
2885
2886
2887 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
2888 AMDGPUCFGStructurizer *passRep,
2889 RegiT regNum, int regVal) {
2890 MachineInstr *oldInstr = &(*instrPos);
2891 const AMDGPUInstrInfo *tii =
2892 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2893 MachineBasicBlock *blk = oldInstr->getParent();
2894 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2895 regVal);
2896 blk->insert(instrPos, newInstr);
2897
2898 SHOWNEWINSTR(newInstr);
2899 } //insertAssignInstrBefore
2900
2901 static void insertAssignInstrBefore(MachineBasicBlock *blk,
2902 AMDGPUCFGStructurizer *passRep,
2903 RegiT regNum, int regVal) {
2904 const AMDGPUInstrInfo *tii =
2905 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2906
2907 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2908 regVal);
2909 if (blk->begin() != blk->end()) {
2910 blk->insert(blk->begin(), newInstr);
2911 } else {
2912 blk->push_back(newInstr);
2913 }
2914
2915 SHOWNEWINSTR(newInstr);
2916
2917 } //insertInstrBefore
2918
2919 static void insertCompareInstrBefore(MachineBasicBlock *blk,
2920 MachineBasicBlock::iterator instrPos,
2921 AMDGPUCFGStructurizer *passRep,
2922 RegiT dstReg, RegiT src1Reg,
2923 RegiT src2Reg) {
2924 const AMDGPUInstrInfo *tii =
2925 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002926 MachineFunction *MF = blk->getParent();
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002927 MachineInstr *newInstr =
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002928 MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002929
NAKAMURA Takumi6b207d32012-12-20 00:22:11 +00002930 MachineInstrBuilder MIB(*MF, newInstr);
2931 MIB.addReg(dstReg, RegState::Define); //set target
2932 MIB.addReg(src1Reg); //set src value
2933 MIB.addReg(src2Reg); //set src value
Tom Stellardf98f2ce2012-12-11 21:25:42 +00002934
2935 blk->insert(instrPos, newInstr);
2936 SHOWNEWINSTR(newInstr);
2937
2938 } //insertCompareInstrBefore
2939
2940 static void cloneSuccessorList(MachineBasicBlock *dstBlk,
2941 MachineBasicBlock *srcBlk) {
2942 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
2943 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
2944 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
2945 }
2946 } //cloneSuccessorList
2947
2948 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
2949 MachineFunction *func = srcBlk->getParent();
2950 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
2951 func->push_back(newBlk); //insert to function
2952 for (MachineBasicBlock::iterator iter = srcBlk->begin(),
2953 iterEnd = srcBlk->end();
2954 iter != iterEnd; ++iter) {
2955 MachineInstr *instr = func->CloneMachineInstr(iter);
2956 newBlk->push_back(instr);
2957 }
2958 return newBlk;
2959 }
2960
2961 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
2962 //the AMDGPU instruction is not recognized as terminator fix this and retire
2963 //this routine
2964 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
2965 MachineBasicBlock *oldBlk,
2966 MachineBasicBlock *newBlk) {
2967 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
2968 if (branchInstr && isCondBranch(branchInstr) &&
2969 getTrueBranch(branchInstr) == oldBlk) {
2970 setTrueBranch(branchInstr, newBlk);
2971 }
2972 }
2973
2974 static void wrapup(MachineBasicBlock *entryBlk) {
2975 assert((!entryBlk->getParent()->getJumpTableInfo()
2976 || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
2977 && "found a jump table");
2978
2979 //collect continue right before endloop
2980 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
2981 MachineBasicBlock::iterator pre = entryBlk->begin();
2982 MachineBasicBlock::iterator iterEnd = entryBlk->end();
2983 MachineBasicBlock::iterator iter = pre;
2984 while (iter != iterEnd) {
2985 if (pre->getOpcode() == AMDGPU::CONTINUE
2986 && iter->getOpcode() == AMDGPU::ENDLOOP) {
2987 contInstr.push_back(pre);
2988 }
2989 pre = iter;
2990 ++iter;
2991 } //end while
2992
2993 //delete continue right before endloop
2994 for (unsigned i = 0; i < contInstr.size(); ++i) {
2995 contInstr[i]->eraseFromParent();
2996 }
2997
2998 // TODO to fix up jump table so later phase won't be confused. if
2999 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3000 // there isn't such an interface yet. alternatively, replace all the other
3001 // blocks in the jump table with the entryBlk //}
3002
3003 } //wrapup
3004
3005 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3006 return &pass.getAnalysis<MachineDominatorTree>();
3007 }
3008
3009 static MachinePostDominatorTree*
3010 getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3011 return &pass.getAnalysis<MachinePostDominatorTree>();
3012 }
3013
3014 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3015 return &pass.getAnalysis<MachineLoopInfo>();
3016 }
3017}; // template class CFGStructTraits
Benjamin Kramer879b0712013-05-23 15:43:05 +00003018} // end anonymous namespace
Tom Stellardf98f2ce2012-12-11 21:25:42 +00003019
3020// createAMDGPUCFGPreparationPass- Returns a pass
Benjamin Kramer879b0712013-05-23 15:43:05 +00003021FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm) {
3022 return new AMDGPUCFGPrepare(tm);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00003023}
3024
3025bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
Bill Wendlingb5632b52013-06-07 20:28:55 +00003026 return CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, *this,
3027 getTargetRegisterInfo());
Tom Stellardf98f2ce2012-12-11 21:25:42 +00003028}
3029
3030// createAMDGPUCFGStructurizerPass- Returns a pass
Benjamin Kramer879b0712013-05-23 15:43:05 +00003031FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
3032 return new AMDGPUCFGPerform(tm);
Tom Stellardf98f2ce2012-12-11 21:25:42 +00003033}
3034
3035bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
Bill Wendlingb5632b52013-06-07 20:28:55 +00003036 return CFGStructurizer<AMDGPUCFGStructurizer>().run(func, *this,
3037 getTargetRegisterInfo());
Tom Stellardf98f2ce2012-12-11 21:25:42 +00003038}