blob: 7700efc6f9020d2cee21046f4ee1f7fc634d4f1c [file] [log] [blame]
Chandler Carruthdb350872011-10-21 06:46:38 +00001//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements basic block placement transformations using branch
11// probability estimates. It is based around "Algo2" from Profile Guided Code
12// Positioning [http://portal.acm.org/citation.cfm?id=989433].
13//
14// We combine the BlockFrequencyInfo with BranchProbabilityInfo to simulate
15// measured edge-weights. The BlockFrequencyInfo effectively summarizes the
16// probability of starting from any particular block, and the
17// BranchProbabilityInfo the probability of exiting the block via a particular
18// edge. Combined they form a function-wide ordering of the edges.
19//
20//===----------------------------------------------------------------------===//
21
22#define DEBUG_TYPE "block-placement2"
Chandler Carruth4a85cc92011-10-21 08:57:37 +000023#include "llvm/CodeGen/MachineBasicBlock.h"
Chandler Carruthdb350872011-10-21 06:46:38 +000024#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
25#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
26#include "llvm/CodeGen/MachineFunction.h"
Chandler Carruthdb350872011-10-21 06:46:38 +000027#include "llvm/CodeGen/MachineFunctionPass.h"
Chandler Carruth4a85cc92011-10-21 08:57:37 +000028#include "llvm/CodeGen/MachineLoopInfo.h"
29#include "llvm/CodeGen/MachineModuleInfo.h"
30#include "llvm/CodeGen/Passes.h"
Chandler Carruthdb350872011-10-21 06:46:38 +000031#include "llvm/Support/Allocator.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/SCCIterator.h"
35#include "llvm/ADT/SmallPtrSet.h"
36#include "llvm/ADT/SmallVector.h"
37#include "llvm/ADT/Statistic.h"
38#include "llvm/Target/TargetInstrInfo.h"
Chandler Carruth4a85cc92011-10-21 08:57:37 +000039#include "llvm/Target/TargetLowering.h"
Chandler Carruthdb350872011-10-21 06:46:38 +000040#include <algorithm>
41using namespace llvm;
42
43namespace {
44/// \brief A structure for storing a weighted edge.
45///
46/// This stores an edge and its weight, computed as the product of the
47/// frequency that the starting block is entered with the probability of
48/// a particular exit block.
49struct WeightedEdge {
50 BlockFrequency EdgeFrequency;
51 MachineBasicBlock *From, *To;
52
53 bool operator<(const WeightedEdge &RHS) const {
54 return EdgeFrequency < RHS.EdgeFrequency;
55 }
56};
57}
58
59namespace {
60struct BlockChain;
61/// \brief Type for our function-wide basic block -> block chain mapping.
62typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
63}
64
65namespace {
66/// \brief A chain of blocks which will be laid out contiguously.
67///
68/// This is the datastructure representing a chain of consecutive blocks that
69/// are profitable to layout together in order to maximize fallthrough
70/// probabilities. We also can use a block chain to represent a sequence of
71/// basic blocks which have some external (correctness) requirement for
72/// sequential layout.
73///
74/// Eventually, the block chains will form a directed graph over the function.
75/// We provide an SCC-supporting-iterator in order to quicky build and walk the
76/// SCCs of block chains within a function.
77///
78/// The block chains also have support for calculating and caching probability
79/// information related to the chain itself versus other chains. This is used
80/// for ranking during the final layout of block chains.
81struct BlockChain {
82 class SuccIterator;
83
84 /// \brief The first and last basic block that from this chain.
85 ///
86 /// The chain is stored within the existing function ilist of basic blocks.
87 /// When merging chains or otherwise manipulating them, we splice the blocks
88 /// within this ilist, giving us very cheap storage here and constant time
89 /// merge operations.
90 ///
91 /// It is extremely important to note that LastBB is the iterator pointing
92 /// *at* the last basic block in the chain. That is, the chain consists of
93 /// the *closed* range [FirstBB, LastBB]. We cannot use half-open ranges
94 /// because the next basic block may get relocated to a different part of the
95 /// function at any time during the run of this pass.
96 MachineFunction::iterator FirstBB, LastBB;
97
98 /// \brief A handle to the function-wide basic block to block chain mapping.
99 ///
100 /// This is retained in each block chain to simplify the computation of child
101 /// block chains for SCC-formation and iteration. We store the edges to child
102 /// basic blocks, and map them back to their associated chains using this
103 /// structure.
104 BlockToChainMapType &BlockToChain;
105
106 /// \brief The weight used to rank two block chains in the same SCC.
107 ///
108 /// This is used during SCC layout of block chains to cache and rank the
109 /// chains. It is supposed to represent the expected frequency with which
110 /// control reaches a block within this chain, has the option of branching to
111 /// a block in some other chain participating in the SCC, but instead
112 /// continues within this chain. The higher this is, the more costly we
113 /// expect mis-predicted branches between this chain and other chains within
114 /// the SCC to be. Thus, since we expect branches between chains to be
115 /// predicted when backwards and not predicted when forwards, the higher this
116 /// is the more important that this chain is laid out first among those
117 /// chains in the same SCC as it.
118 BlockFrequency InChainEdgeFrequency;
119
120 /// \brief Construct a new BlockChain.
121 ///
122 /// This builds a new block chain representing a single basic block in the
123 /// function. It also registers itself as the chain that block participates
124 /// in with the BlockToChain mapping.
125 BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
126 : FirstBB(BB), LastBB(BB), BlockToChain(BlockToChain) {
127 assert(BB && "Cannot create a chain with a null basic block");
128 BlockToChain[BB] = this;
129 }
130
131 /// \brief Merge another block chain into this one.
132 ///
133 /// This routine merges a block chain into this one. It takes care of forming
134 /// a contiguous sequence of basic blocks, updating the edge list, and
135 /// updating the block -> chain mapping. It does not free or tear down the
136 /// old chain, but the old chain's block list is no longer valid.
137 void merge(BlockChain *Chain) {
138 assert(Chain && "Cannot merge a null chain");
139 MachineFunction::iterator EndBB = llvm::next(LastBB);
140 MachineFunction::iterator ChainEndBB = llvm::next(Chain->LastBB);
141
142 // Update the incoming blocks to point to this chain.
143 for (MachineFunction::iterator BI = Chain->FirstBB, BE = ChainEndBB;
144 BI != BE; ++BI) {
145 assert(BlockToChain[BI] == Chain && "Incoming blocks not in chain");
146 BlockToChain[BI] = this;
147 }
148
149 // We splice the blocks together within the function (unless they already
150 // are adjacent) so we can represent the new chain with a pair of pointers
151 // to basic blocks within the function. This is also useful as each chain
152 // of blocks will end up being laid out contiguously within the function.
153 if (EndBB != Chain->FirstBB)
154 FirstBB->getParent()->splice(EndBB, Chain->FirstBB, ChainEndBB);
155 LastBB = Chain->LastBB;
156 }
157};
158}
159
160namespace {
161/// \brief Successor iterator for BlockChains.
162///
163/// This is an iterator that walks over the successor block chains by looking
164/// through its blocks successors and mapping those back to block chains. This
165/// iterator is not a fully-functioning iterator, it is designed specifically
166/// to support the interface required by SCCIterator when forming and walking
167/// SCCs of BlockChains.
168///
169/// Note that this iterator cannot be used while the chains are still being
170/// formed and/or merged. Unlike the chains themselves, it does store end
171/// iterators which could be moved if the chains are re-ordered. Once we begin
172/// forming and iterating over an SCC of chains, the order of blocks within the
173/// function must not change until we finish using the SCC iterators.
174class BlockChain::SuccIterator
175 : public std::iterator<std::forward_iterator_tag,
176 BlockChain *, ptrdiff_t> {
177 BlockChain *Chain;
178 MachineFunction::iterator BI, BE;
179 MachineBasicBlock::succ_iterator SI;
180
181public:
182 explicit SuccIterator(BlockChain *Chain)
183 : Chain(Chain), BI(Chain->FirstBB), BE(llvm::next(Chain->LastBB)),
184 SI(BI->succ_begin()) {
185 while (BI != BE && BI->succ_begin() == BI->succ_end())
186 ++BI;
187 if (BI != BE)
188 SI = BI->succ_begin();
189 }
190
191 /// \brief Helper function to create an end iterator for a particular chain.
192 ///
193 /// The "end" state is extremely arbitrary. We chose to have BI == BE, and SI
194 /// == Chain->FirstBB->succ_begin(). The value of SI doesn't really make any
195 /// sense, but rather than try to rationalize SI and our increment, when we
196 /// detect an "end" state, we just immediately call this function to build
197 /// the canonical end iterator.
198 static SuccIterator CreateEnd(BlockChain *Chain) {
199 SuccIterator It(Chain);
200 It.BI = It.BE;
201 return It;
202 }
203
204 bool operator==(const SuccIterator &RHS) const {
205 return (Chain == RHS.Chain && BI == RHS.BI && SI == RHS.SI);
206 }
207 bool operator!=(const SuccIterator &RHS) const {
208 return !operator==(RHS);
209 }
210
211 SuccIterator& operator++() {
212 assert(*this != CreateEnd(Chain) && "Cannot increment the end iterator");
213 // There may be null successor pointers, skip over them.
214 // FIXME: I don't understand *why* there are null successor pointers.
215 do {
216 ++SI;
217 if (SI != BI->succ_end() && *SI)
218 return *this;
219
220 // There may be a basic block without successors. Skip over them.
221 do {
222 ++BI;
223 if (BI == BE)
224 return *this = CreateEnd(Chain);
225 } while (BI->succ_begin() == BI->succ_end());
226 SI = BI->succ_begin();
227 } while (!*SI);
228 return *this;
229 }
230 SuccIterator operator++(int) {
231 SuccIterator tmp = *this;
232 ++*this;
233 return tmp;
234 }
235
236 BlockChain *operator*() const {
237 assert(Chain->BlockToChain.lookup(*SI) && "Missing chain");
238 return Chain->BlockToChain.lookup(*SI);
239 }
240};
241}
242
243namespace {
244/// \brief Sorter used with containers of BlockChain pointers.
245///
246/// Sorts based on the \see BlockChain::InChainEdgeFrequency -- see its
247/// comments for details on what this ordering represents.
248struct ChainPtrPrioritySorter {
249 bool operator()(const BlockChain *LHS, const BlockChain *RHS) const {
250 assert(LHS && RHS && "Null chain entry");
251 return LHS->InChainEdgeFrequency < RHS->InChainEdgeFrequency;
252 }
253};
254}
255
256namespace {
257class MachineBlockPlacement : public MachineFunctionPass {
258 /// \brief A handle to the branch probability pass.
259 const MachineBranchProbabilityInfo *MBPI;
260
261 /// \brief A handle to the function-wide block frequency pass.
262 const MachineBlockFrequencyInfo *MBFI;
263
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000264 /// \brief A handle to the loop info.
265 const MachineLoopInfo *MLI;
266
Chandler Carruthdb350872011-10-21 06:46:38 +0000267 /// \brief A handle to the target's instruction info.
268 const TargetInstrInfo *TII;
269
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000270 /// \brief A handle to the target's lowering info.
271 const TargetLowering *TLI;
272
Chandler Carruthdb350872011-10-21 06:46:38 +0000273 /// \brief A prioritized list of edges in the BB-graph.
274 ///
275 /// For each function, we insert all control flow edges between BBs, along
276 /// with their "global" frequency. The Frequency of an edge being taken is
277 /// defined as the frequency of entering the source BB (from MBFI) times the
278 /// probability of taking a particular branch out of that block (from MBPI).
279 ///
280 /// Once built, this list is sorted in ascending frequency, making the last
281 /// edge the hottest one in the function.
282 SmallVector<WeightedEdge, 64> Edges;
283
284 /// \brief Allocator and owner of BlockChain structures.
285 ///
286 /// We build BlockChains lazily by merging together high probability BB
287 /// sequences acording to the "Algo2" in the paper mentioned at the top of
288 /// the file. To reduce malloc traffic, we allocate them using this slab-like
289 /// allocator, and destroy them after the pass completes.
290 SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
291
292 /// \brief Function wide BasicBlock to BlockChain mapping.
293 ///
294 /// This mapping allows efficiently moving from any given basic block to the
295 /// BlockChain it participates in, if any. We use it to, among other things,
296 /// allow implicitly defining edges between chains as the existing edges
297 /// between basic blocks.
298 DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
299
300 /// \brief A prioritized sequence of chains.
301 ///
302 /// We build up the ideal sequence of basic block chains in reverse order
303 /// here, and then walk backwards to arrange the final function ordering.
304 SmallVector<BlockChain *, 16> PChains;
305
306#ifndef NDEBUG
307 /// \brief A set of active chains used to sanity-check the pass algorithm.
308 ///
309 /// All operations on this member should be wrapped in an assert or NDEBUG.
310 SmallPtrSet<BlockChain *, 16> ActiveChains;
311#endif
312
313 BlockChain *CreateChain(MachineBasicBlock *BB);
314 void PrioritizeEdges(MachineFunction &F);
315 void BuildBlockChains();
316 void PrioritizeChains(MachineFunction &F);
317 void PlaceBlockChains(MachineFunction &F);
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000318 void AlignLoops(MachineFunction &F);
Chandler Carruthdb350872011-10-21 06:46:38 +0000319
320public:
321 static char ID; // Pass identification, replacement for typeid
322 MachineBlockPlacement() : MachineFunctionPass(ID) {
323 initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
324 }
325
326 bool runOnMachineFunction(MachineFunction &F);
327
328 void getAnalysisUsage(AnalysisUsage &AU) const {
329 AU.addRequired<MachineBranchProbabilityInfo>();
330 AU.addRequired<MachineBlockFrequencyInfo>();
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000331 AU.addRequired<MachineLoopInfo>();
Chandler Carruthdb350872011-10-21 06:46:38 +0000332 MachineFunctionPass::getAnalysisUsage(AU);
333 }
334
335 const char *getPassName() const { return "Block Placement"; }
336};
337}
338
339char MachineBlockPlacement::ID = 0;
340INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
341 "Branch Probability Basic Block Placement", false, false)
342INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
343INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000344INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
Chandler Carruthdb350872011-10-21 06:46:38 +0000345INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
346 "Branch Probability Basic Block Placement", false, false)
347
348FunctionPass *llvm::createMachineBlockPlacementPass() {
349 return new MachineBlockPlacement();
350}
351
352namespace llvm {
353/// \brief GraphTraits specialization for our BlockChain graph.
354template <> struct GraphTraits<BlockChain *> {
355 typedef BlockChain NodeType;
356 typedef BlockChain::SuccIterator ChildIteratorType;
357
358 static NodeType *getEntryNode(NodeType *N) { return N; }
359 static BlockChain::SuccIterator child_begin(NodeType *N) {
360 return BlockChain::SuccIterator(N);
361 }
362 static BlockChain::SuccIterator child_end(NodeType *N) {
363 return BlockChain::SuccIterator::CreateEnd(N);
364 }
365};
366}
367
368/// \brief Helper to create a new chain for a single BB.
369///
370/// Takes care of growing the Chains, setting up the BlockChain object, and any
371/// debug checking logic.
372/// \returns A pointer to the new BlockChain.
373BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) {
374 BlockChain *Chain =
375 new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
376 assert(ActiveChains.insert(Chain));
377 return Chain;
378}
379
380/// \brief Build a prioritized list of edges.
381///
382/// The priority is determined by the product of the block frequency (how
383/// likely it is to arrive at a particular block) times the probability of
384/// taking this particular edge out of the block. This provides a function-wide
385/// ordering of the edges.
386void MachineBlockPlacement::PrioritizeEdges(MachineFunction &F) {
387 assert(Edges.empty() && "Already have an edge list");
388 SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
389 BlockChain *RequiredChain = 0;
390 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
391 MachineBasicBlock *From = &*FI;
392 // We only consider MBBs with analyzable branches. Even if the analysis
393 // fails, if there is no fallthrough, we can still work with the MBB.
394 MachineBasicBlock *TBB = 0, *FBB = 0;
395 Cond.clear();
396 if (TII->AnalyzeBranch(*From, TBB, FBB, Cond) && From->canFallThrough()) {
397 // We push all unanalyzed blocks onto a chain eagerly to prevent them
398 // from being split later. Create the chain if needed, otherwise just
399 // keep track that these blocks reside on it.
400 if (!RequiredChain)
401 RequiredChain = CreateChain(From);
402 else
403 BlockToChain[From] = RequiredChain;
404 } else {
405 // As soon as we find an analyzable branch, add that block to and
406 // finalize any required chain that has been started. The required chain
407 // is only modeling potentially inexplicable fallthrough, so the first
408 // block to have analyzable fallthrough is a known-safe stopping point.
409 if (RequiredChain) {
410 BlockToChain[From] = RequiredChain;
411 RequiredChain->LastBB = FI;
412 RequiredChain = 0;
413 }
414 }
415
416 BlockFrequency BaseFrequency = MBFI->getBlockFreq(From);
417 for (MachineBasicBlock::succ_iterator SI = From->succ_begin(),
418 SE = From->succ_end();
419 SI != SE; ++SI) {
420 MachineBasicBlock *To = *SI;
421 WeightedEdge WE = { BaseFrequency * MBPI->getEdgeProbability(From, To),
422 From, To };
423 Edges.push_back(WE);
424 }
425 }
426 assert(!RequiredChain && "Never found a terminator for a required chain");
427 std::stable_sort(Edges.begin(), Edges.end());
428}
429
430/// \brief Build chains of basic blocks along hot paths.
431///
432/// Build chains by trying to merge each pair of blocks from the mostly costly
433/// edge first. This is essentially "Algo2" from the Profile Guided Code
434/// Placement paper. While each node is considered a chain of one block, this
435/// routine lazily build the chain objects themselves so that when possible it
436/// can just merge a block into an existing chain.
437void MachineBlockPlacement::BuildBlockChains() {
438 for (SmallVectorImpl<WeightedEdge>::reverse_iterator EI = Edges.rbegin(),
439 EE = Edges.rend();
440 EI != EE; ++EI) {
441 MachineBasicBlock *SourceB = EI->From, *DestB = EI->To;
442 if (SourceB == DestB) continue;
443
444 BlockChain *SourceChain = BlockToChain.lookup(SourceB);
445 if (!SourceChain) SourceChain = CreateChain(SourceB);
446 BlockChain *DestChain = BlockToChain.lookup(DestB);
447 if (!DestChain) DestChain = CreateChain(DestB);
448 if (SourceChain == DestChain)
449 continue;
450
451 bool IsSourceTail =
452 SourceChain->LastBB == MachineFunction::iterator(SourceB);
453 bool IsDestHead =
454 DestChain->FirstBB == MachineFunction::iterator(DestB);
455
456 if (!IsSourceTail || !IsDestHead)
457 continue;
458
459 SourceChain->merge(DestChain);
460 assert(ActiveChains.erase(DestChain));
461 }
462}
463
464/// \brief Prioritize the chains to minimize back-edges between chains.
465///
466/// This is the trickiest part of the placement algorithm. Each chain is
467/// a hot-path through a sequence of basic blocks, but there are conditional
468/// branches away from this hot path, and to some other chain. Hardware branch
469/// predictors favor back edges over forward edges, and so it is desirable to
470/// arrange the targets of branches away from a hot path and to some other
471/// chain to come later in the function, making them forward branches, and
472/// helping the branch predictor to predict fallthrough.
473///
474/// In some cases, this is easy. simply topologically walking from the entry
475/// chain through its successors in order would work if there were no cycles
476/// between the chains of blocks, but often there are. In such a case, we first
477/// need to identify the participants in the cycle, and then rank them so that
478/// the linearizing of the chains has the lowest *probability* of causing
479/// a mispredicted branch. To compute the correct rank for a chain, we take the
480/// complement of the branch probability for each branch leading away from the
481/// chain and multiply it by the frequency of the source block for that branch.
482/// This gives us the probability of that particular branch *not* being taken
483/// in this function. The sum of these probabilities for each chain is used as
484/// a rank, so that we order the chain with the highest such sum first.
485/// FIXME: This seems like a good approximation, but there is probably a known
486/// technique for ordering of an SCC given edge weights. It would be good to
487/// use that, or even use its code if possible.
488///
489/// Also notable is that we prioritize the chains from the bottom up, and so
490/// all of the "first" and "before" relationships end up inverted in the code.
491void MachineBlockPlacement::PrioritizeChains(MachineFunction &F) {
492 MachineBasicBlock *EntryB = &F.front();
493 BlockChain *EntryChain = BlockToChain[EntryB];
494 assert(EntryChain && "Missing chain for entry block");
495 assert(EntryChain->FirstBB == F.begin() &&
496 "Entry block is not the head of the entry block chain");
497
498 // Form an SCC and walk it from the bottom up.
499 SmallPtrSet<BlockChain *, 4> IsInSCC;
500 for (scc_iterator<BlockChain *> I = scc_begin(EntryChain);
501 !I.isAtEnd(); ++I) {
502 const std::vector<BlockChain *> &SCC = *I;
503 PChains.insert(PChains.end(), SCC.begin(), SCC.end());
504
505 // If there is only one chain in the SCC, it's trivially sorted so just
506 // bail out early. Sorting the SCC is expensive.
507 if (SCC.size() == 1)
508 continue;
509
510 // We work strictly on the PChains range from here on out to maximize
511 // locality.
512 SmallVectorImpl<BlockChain *>::iterator SCCEnd = PChains.end(),
513 SCCBegin = SCCEnd - SCC.size();
514 IsInSCC.clear();
515 IsInSCC.insert(SCCBegin, SCCEnd);
516
517 // Compute the edge frequency of staying in a chain, despite the existency
518 // of an edge to some other chain within this SCC.
519 for (SmallVectorImpl<BlockChain *>::iterator SCCI = SCCBegin;
520 SCCI != SCCEnd; ++SCCI) {
521 BlockChain *Chain = *SCCI;
522
523 // Special case the entry chain. Regardless of the weights of other
524 // chains, the entry chain *must* come first, so move it to the end, and
525 // avoid processing that chain at all.
526 if (Chain == EntryChain) {
527 --SCCEnd;
528 if (SCCI == SCCEnd) break;
529 Chain = *SCCI = *SCCEnd;
530 *SCCEnd = EntryChain;
531 }
532
533 // Walk over every block in this chain looking for out-bound edges to
534 // other chains in this SCC.
535 for (MachineFunction::iterator BI = Chain->FirstBB,
536 BE = llvm::next(Chain->LastBB);
537 BI != BE; ++BI) {
538 MachineBasicBlock *From = &*BI;
539 for (MachineBasicBlock::succ_iterator SI = BI->succ_begin(),
540 SE = BI->succ_end();
541 SI != SE; ++SI) {
542 MachineBasicBlock *To = *SI;
543 if (!To || !IsInSCC.count(BlockToChain[To]))
544 continue;
545 BranchProbability ComplEdgeProb =
546 MBPI->getEdgeProbability(From, To).getCompl();
547 Chain->InChainEdgeFrequency +=
548 MBFI->getBlockFreq(From) * ComplEdgeProb;
549 }
550 }
551 }
552
553 // Sort the chains within the SCC according to their edge frequencies,
554 // which should make the least costly chain of blocks to mis-place be
555 // ordered first in the prioritized sequence.
556 std::stable_sort(SCCBegin, SCCEnd, ChainPtrPrioritySorter());
557 }
558}
559
560/// \brief Splice the function blocks together based on the chain priorities.
561///
562/// Each chain is already represented as a contiguous range of blocks in the
563/// function. Simply walk backwards down the prioritized chains and splice in
564/// any chains out of order. Note that the first chain we visit is necessarily
565/// the entry chain. It has no predecessors and so must be the top of the SCC.
566/// Also, we cannot splice any chain prior to the entry chain as we can't
567/// splice any blocks prior to the entry block.
568void MachineBlockPlacement::PlaceBlockChains(MachineFunction &F) {
569 assert(!PChains.empty() && "No chains were prioritized");
570 assert(PChains.back() == BlockToChain[&F.front()] &&
571 "The entry chain must always be the final chain");
572
573 MachineFunction::iterator InsertPos = F.begin();
574 for (SmallVectorImpl<BlockChain *>::reverse_iterator CI = PChains.rbegin(),
575 CE = PChains.rend();
576 CI != CE; ++CI) {
577 BlockChain *Chain = *CI;
578 // Check that we process this chain only once for debugging.
579 assert(ActiveChains.erase(Chain) && "Processed a chain twice");
580
581 // If this chain is already in the right position, just skip past it.
582 // Otherwise, splice it into position.
583 if (InsertPos == Chain->FirstBB)
584 InsertPos = llvm::next(Chain->LastBB);
585 else
586 F.splice(InsertPos, Chain->FirstBB, llvm::next(Chain->LastBB));
587 }
588
589 // Note that we can't assert this is empty as there may be unreachable blocks
590 // in the function.
591#ifndef NDEBUG
592 ActiveChains.clear();
593#endif
594
595 // Now that every block is in its final position, update all of the
596 // terminators.
597 SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
598 for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
599 // FIXME: It would be awesome of updateTerminator would just return rather
600 // than assert when the branch cannot be analyzed in order to remove this
601 // boiler plate.
602 Cond.clear();
603 MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
604 if (!TII->AnalyzeBranch(*FI, TBB, FBB, Cond))
605 FI->updateTerminator();
606 }
607}
608
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000609/// \brief Recursive helper to align a loop and any nested loops.
610static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) {
611 // Recurse through nested loops.
612 for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
613 AlignLoop(F, *I, Align);
614
615 L->getTopBlock()->setAlignment(Align);
616}
617
618/// \brief Align loop headers to target preferred alignments.
619void MachineBlockPlacement::AlignLoops(MachineFunction &F) {
620 if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
621 return;
622
623 unsigned Align = TLI->getPrefLoopAlignment();
624 if (!Align)
625 return; // Don't care about loop alignment.
626
627 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
628 AlignLoop(F, *I, Align);
629}
630
Chandler Carruthdb350872011-10-21 06:46:38 +0000631bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
632 // Check for single-block functions and skip them.
633 if (llvm::next(F.begin()) == F.end())
634 return false;
635
636 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
637 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000638 MLI = &getAnalysis<MachineLoopInfo>();
Chandler Carruthdb350872011-10-21 06:46:38 +0000639 TII = F.getTarget().getInstrInfo();
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000640 TLI = F.getTarget().getTargetLowering();
Chandler Carruthdb350872011-10-21 06:46:38 +0000641 assert(Edges.empty());
642 assert(BlockToChain.empty());
643 assert(PChains.empty());
644 assert(ActiveChains.empty());
645
646 PrioritizeEdges(F);
647 BuildBlockChains();
648 PrioritizeChains(F);
649 PlaceBlockChains(F);
Chandler Carruth4a85cc92011-10-21 08:57:37 +0000650 AlignLoops(F);
Chandler Carruthdb350872011-10-21 06:46:38 +0000651
652 Edges.clear();
653 BlockToChain.clear();
654 PChains.clear();
655 ChainAllocator.DestroyAll();
656
657 // We always return true as we have no way to track whether the final order
658 // differs from the original order.
659 return true;
660}