blob: e44a138cf92504e73910726d54dae14ab02620c2 [file] [log] [blame]
John Mosby378553c2009-05-12 20:33:29 +00001//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a shrink wrapping variant of prolog/epilog insertion:
11// - Spills and restores of callee-saved registers (CSRs) are placed in the
12// machine CFG to tightly surround their uses so that execution paths that
13// do not use CSRs do not pay the spill/restore penalty.
14//
15// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
16// loop the spills are placed in the loop preheader, and restores are
17// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
18//
19// - Covering paths without CSR uses:
20// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
21// the use info for the CSRs inside the region is propagated outward in the
22// CFG to ensure validity of the spill/restore placements. This decreases
23// the effectiveness of shrink wrapping but does not require edge splitting
24// in the machine CFG.
25//
26// This shrink wrapping implementation uses an iterative analysis to determine
27// which basic blocks require spills and restores for CSRs.
28//
29// This pass uses MachineDominators and MachineLoopInfo. Loop information
30// is used to prevent placement of callee-saved register spills/restores
31// in the bodies of loops.
32//
33//===----------------------------------------------------------------------===//
34
35#define DEBUG_TYPE "shrink-wrap"
36
John Mosby752c1df2009-05-13 17:52:11 +000037#include "PrologEpilogInserter.h"
John Mosby378553c2009-05-12 20:33:29 +000038#include "llvm/CodeGen/MachineDominators.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineInstr.h"
41#include "llvm/CodeGen/MachineFrameInfo.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/Target/TargetMachine.h"
44#include "llvm/Target/TargetRegisterInfo.h"
45#include "llvm/ADT/SparseBitVector.h"
46#include "llvm/ADT/DenseMap.h"
47#include "llvm/ADT/PostOrderIterator.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/ADT/STLExtras.h"
53#include "llvm/ADT/Statistic.h"
54#include <sstream>
55
56using namespace llvm;
57
58STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
59
60// Shrink Wrapping:
61static cl::opt<bool>
62ShrinkWrapping("shrink-wrap",
63 cl::desc("Shrink wrap callee-saved register spills/restores"));
64
65// Shrink wrap only the specified function, a debugging aid.
66static cl::opt<std::string>
67ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
68 cl::desc("Shrink wrap the specified function"),
69 cl::value_desc("funcname"),
70 cl::init(""));
71
72// Debugging level for shrink wrapping.
73enum ShrinkWrapDebugLevel {
74 None, BasicInfo, Iterations, Details
75};
76
77static cl::opt<enum ShrinkWrapDebugLevel>
78ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
79 cl::desc("Print shrink wrapping debugging information"),
80 cl::values(
81 clEnumVal(None , "disable debug output"),
82 clEnumVal(BasicInfo , "print basic DF sets"),
83 clEnumVal(Iterations, "print SR sets for each iteration"),
84 clEnumVal(Details , "print all DF sets"),
85 clEnumValEnd));
86
87
88void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
89 AU.setPreservesCFG();
90 if (ShrinkWrapping || ShrinkWrapFunc != "") {
91 AU.addRequired<MachineLoopInfo>();
92 AU.addRequired<MachineDominatorTree>();
93 }
94 AU.addPreserved<MachineLoopInfo>();
95 AU.addPreserved<MachineDominatorTree>();
96 MachineFunctionPass::getAnalysisUsage(AU);
97}
98
99//===----------------------------------------------------------------------===//
100// ShrinkWrapping implementation
101//===----------------------------------------------------------------------===//
102
103// Convienences for dealing with machine loops.
104MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
105 assert(LP && "Machine loop is NULL.");
106 MachineBasicBlock* PHDR = LP->getLoopPreheader();
107 MachineLoop* PLP = LP->getParentLoop();
108 while (PLP) {
109 PHDR = PLP->getLoopPreheader();
110 PLP = PLP->getParentLoop();
111 }
112 return PHDR;
113}
114
115MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
116 if (LP == 0)
117 return 0;
118 MachineLoop* PLP = LP->getParentLoop();
119 while (PLP) {
120 LP = PLP;
121 PLP = PLP->getParentLoop();
122 }
123 return LP;
124}
125
126bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
127 return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
128}
129
130// Initialize shrink wrapping DFA sets, called before iterations.
131void PEI::clearAnticAvailSets() {
132 AnticIn.clear();
133 AnticOut.clear();
134 AvailIn.clear();
135 AvailOut.clear();
136}
137
138// Clear all sets constructed by shrink wrapping.
139void PEI::clearAllSets() {
140 ReturnBlocks.clear();
141 clearAnticAvailSets();
142 UsedCSRegs.clear();
143 CSRUsed.clear();
144 TLLoops.clear();
145 CSRSave.clear();
146 CSRRestore.clear();
147}
148
149// Initialize all shrink wrapping data.
150void PEI::initShrinkWrappingInfo() {
151 clearAllSets();
152 EntryBlock = 0;
153#ifndef NDEBUG
154 HasFastExitPath = false;
155#endif
156 ShrinkWrapThisFunction = ShrinkWrapping;
157 // DEBUG: enable or disable shrink wrapping for the current function
158 // via --shrink-wrap-func=<funcname>.
159#ifndef NDEBUG
160 if (ShrinkWrapFunc != "") {
161 std::string MFName = MF->getFunction()->getName();
162 ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
163 }
164#endif
165}
166
167
168/// placeCSRSpillsAndRestores - determine which MBBs of the function
169/// need save, restore code for callee-saved registers by doing a DF analysis
170/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
171/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
172/// is used to ensure that CSR save/restore code is not placed inside loops.
173/// This function computes the maps of MBBs -> CSRs to spill and restore
174/// in CSRSave, CSRRestore.
175///
176/// If shrink wrapping is not being performed, place all spills in
177/// the entry block, all restores in return blocks. In this case,
178/// CSRSave has a single mapping, CSRRestore has mappings for each
179/// return block.
180///
181void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
182
183 DEBUG(MF = &Fn);
184
185 initShrinkWrappingInfo();
186
187 DEBUG(if (ShrinkWrapThisFunction) {
188 DOUT << "Place CSR spills/restores for "
189 << MF->getFunction()->getName() << "\n";
190 });
191
192 if (calculateSets(Fn))
193 placeSpillsAndRestores(Fn);
194}
195
196/// calcAnticInOut - calculate the anticipated in/out reg sets
197/// for the given MBB by looking forward in the MCFG at MBB's
198/// successors.
199///
200bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
201 bool changed = false;
202
203 // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
204 SmallVector<MachineBasicBlock*, 4> successors;
205 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
206 SE = MBB->succ_end(); SI != SE; ++SI) {
207 MachineBasicBlock* SUCC = *SI;
208 if (SUCC != MBB)
209 successors.push_back(SUCC);
210 }
211
212 unsigned i = 0, e = successors.size();
213 if (i != e) {
214 CSRegSet prevAnticOut = AnticOut[MBB];
215 MachineBasicBlock* SUCC = successors[i];
216
217 AnticOut[MBB] = AnticIn[SUCC];
218 for (++i; i != e; ++i) {
219 SUCC = successors[i];
220 AnticOut[MBB] &= AnticIn[SUCC];
221 }
222 if (prevAnticOut != AnticOut[MBB])
223 changed = true;
224 }
225
226 // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
227 CSRegSet prevAnticIn = AnticIn[MBB];
228 AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
229 if (prevAnticIn |= AnticIn[MBB])
230 changed = true;
231 return changed;
232}
233
234/// calcAvailInOut - calculate the available in/out reg sets
235/// for the given MBB by looking backward in the MCFG at MBB's
236/// predecessors.
237///
238bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
239 bool changed = false;
240
241 // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
242 SmallVector<MachineBasicBlock*, 4> predecessors;
243 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
244 PE = MBB->pred_end(); PI != PE; ++PI) {
245 MachineBasicBlock* PRED = *PI;
246 if (PRED != MBB)
247 predecessors.push_back(PRED);
248 }
249
250 unsigned i = 0, e = predecessors.size();
251 if (i != e) {
252 CSRegSet prevAvailIn = AvailIn[MBB];
253 MachineBasicBlock* PRED = predecessors[i];
254
255 AvailIn[MBB] = AvailOut[PRED];
256 for (++i; i != e; ++i) {
257 PRED = predecessors[i];
258 AvailIn[MBB] &= AvailOut[PRED];
259 }
260 if (prevAvailIn != AvailIn[MBB])
261 changed = true;
262 }
263
264 // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
265 CSRegSet prevAvailOut = AvailOut[MBB];
266 AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
267 if (prevAvailOut |= AvailOut[MBB])
268 changed = true;
269 return changed;
270}
271
272/// calculateAnticAvail - build the sets anticipated and available
273/// registers in the MCFG of the current function iteratively,
274/// doing a combined forward and backward analysis.
275///
276void PEI::calculateAnticAvail(MachineFunction &Fn) {
277 // Initialize data flow sets.
278 clearAnticAvailSets();
279
280 // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
281 bool changed = true;
282 unsigned iterations = 0;
283 while (changed) {
284 changed = false;
285 ++iterations;
286 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
287 MBBI != MBBE; ++MBBI) {
288 MachineBasicBlock* MBB = MBBI;
289
290 // Calculate anticipated in, out regs at MBB from
291 // anticipated at successors of MBB.
292 changed |= calcAnticInOut(MBB);
293
294 // Calculate available in, out regs at MBB from
295 // available at predecessors of MBB.
296 changed |= calcAvailInOut(MBB);
297 }
298 }
299
300 DEBUG(if (ShrinkWrapDebugging >= Details) {
301 DOUT << "-----------------------------------------------------------\n";
302 DOUT << " Antic/Avail Sets:\n";
303 DOUT << "-----------------------------------------------------------\n";
304 DOUT << "iterations = " << iterations << "\n";
305 DOUT << "-----------------------------------------------------------\n";
306 DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
307 DOUT << "-----------------------------------------------------------\n";
308 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
309 MBBI != MBBE; ++MBBI) {
310 MachineBasicBlock* MBB = MBBI;
311 dumpSets(MBB);
312 }
313 DOUT << "-----------------------------------------------------------\n";
314 });
315}
316
317/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
318/// of the loop given by LP and its parent loops. This prevents spills/restores
319/// from being placed in the bodies of loops.
320///
321void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
322 if (! MBB || !LP)
323 return;
324
325 std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
326 for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
327 MachineBasicBlock* LBB = loopBlocks[i];
328 if (LBB == MBB)
329 continue;
330 if (CSRUsed[LBB].contains(CSRUsed[MBB]))
331 continue;
332 CSRUsed[LBB] |= CSRUsed[MBB];
333 }
334}
335
336/// calculateSets - collect the CSRs used in this function, compute
337/// the DF sets that describe the initial minimal regions in the
338/// Machine CFG around which CSR spills and restores must be placed.
339///
340/// Additionally, this function decides if shrink wrapping should
341/// be disabled for the current function, checking the following:
342/// 1. the current function has more than 500 MBBs: heuristic limit
343/// on function size to reduce compile time impact of the current
344/// iterative algorithm.
345/// 2. all CSRs are used in the entry block.
346/// 3. all CSRs are used in all immediate successors of the entry block.
347/// 4. all CSRs are used in a subset of blocks, each of which dominates
348/// all return blocks. These blocks, taken as a subgraph of the MCFG,
349/// are equivalent to the entry block since all execution paths pass
350/// through them.
351///
352bool PEI::calculateSets(MachineFunction &Fn) {
353 // Sets used to compute spill, restore placement sets.
354 const std::vector<CalleeSavedInfo> CSI =
355 Fn.getFrameInfo()->getCalleeSavedInfo();
356
357 // If no CSRs used, we are done.
358 if (CSI.empty()) {
359 DEBUG(if (ShrinkWrapThisFunction)
360 DOUT << "DISABLED: " << Fn.getFunction()->getName()
361 << ": uses no callee-saved registers\n");
362 return false;
363 }
364
365 // Save refs to entry and return blocks.
366 EntryBlock = Fn.begin();
367 for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
368 MBB != E; ++MBB)
369 if (isReturnBlock(MBB))
370 ReturnBlocks.push_back(MBB);
371
372 // Determine if this function has fast exit paths.
373 DEBUG(if (ShrinkWrapThisFunction)
374 findFastExitPath());
375
376 // Limit shrink wrapping via the current iterative bit vector
377 // implementation to functions with <= 500 MBBs.
378 if (Fn.size() > 500) {
379 DEBUG(if (ShrinkWrapThisFunction)
380 DOUT << "DISABLED: " << Fn.getFunction()->getName()
381 << ": too large (" << Fn.size() << " MBBs)\n");
382 ShrinkWrapThisFunction = false;
383 }
384
385 // Return now if not shrink wrapping.
386 if (! ShrinkWrapThisFunction)
387 return false;
388
389 // Collect set of used CSRs.
390 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
391 UsedCSRegs.set(inx);
392 }
393
394 // Walk instructions in all MBBs, create CSRUsed[] sets, choose
395 // whether or not to shrink wrap this function.
396 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
397 MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
398 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
399
400 bool allCSRUsesInEntryBlock = true;
401 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
402 MBBI != MBBE; ++MBBI) {
403 MachineBasicBlock* MBB = MBBI;
404 for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
405 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
406 unsigned Reg = CSI[inx].getReg();
407 // If instruction I reads or modifies Reg, add it to UsedCSRegs,
408 // CSRUsed map for the current block.
409 for (unsigned opInx = 0, opEnd = I->getNumOperands();
410 opInx != opEnd; ++opInx) {
411 const MachineOperand &MO = I->getOperand(opInx);
412 if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
413 continue;
414 unsigned MOReg = MO.getReg();
415 if (!MOReg)
416 continue;
417 if (MOReg == Reg ||
418 (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
419 TargetRegisterInfo::isPhysicalRegister(Reg) &&
420 TRI->isSubRegister(Reg, MOReg))) {
421 // CSR Reg is defined/used in block MBB.
422 CSRUsed[MBB].set(inx);
423 // Check for uses in EntryBlock.
424 if (MBB != EntryBlock)
425 allCSRUsesInEntryBlock = false;
426 }
427 }
428 }
429 }
430
431 if (CSRUsed[MBB].empty())
432 continue;
433
434 // Propagate CSRUsed[MBB] in loops
435 if (MachineLoop* LP = LI.getLoopFor(MBB)) {
436 // Add top level loop to work list.
437 MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
438 MachineLoop* PLP = getTopLevelLoopParent(LP);
439
440 if (! HDR) {
441 HDR = PLP->getHeader();
442 assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
443 MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
444 HDR = *PI;
445 }
446 TLLoops[HDR] = PLP;
447
448 // Push uses from inside loop to its parent loops,
449 // or to all other MBBs in its loop.
450 if (LP->getLoopDepth() > 1) {
451 for (MachineLoop* PLP = LP->getParentLoop(); PLP;
452 PLP = PLP->getParentLoop()) {
453 propagateUsesAroundLoop(MBB, PLP);
454 }
455 } else {
456 propagateUsesAroundLoop(MBB, LP);
457 }
458 }
459 }
460
461 if (allCSRUsesInEntryBlock) {
462 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
463 << ": all CSRs used in EntryBlock\n");
464 ShrinkWrapThisFunction = false;
465 } else {
466 bool allCSRsUsedInEntryFanout = true;
467 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
468 SE = EntryBlock->succ_end(); SI != SE; ++SI) {
469 MachineBasicBlock* SUCC = *SI;
470 if (CSRUsed[SUCC] != UsedCSRegs)
471 allCSRsUsedInEntryFanout = false;
472 }
473 if (allCSRsUsedInEntryFanout) {
474 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
475 << ": all CSRs used in imm successors of EntryBlock\n");
476 ShrinkWrapThisFunction = false;
477 }
478 }
479
480 if (ShrinkWrapThisFunction) {
481 // Check if MBB uses CSRs and dominates all exit nodes.
482 // Such nodes are equiv. to the entry node w.r.t.
483 // CSR uses: every path through the function must
484 // pass through this node. If each CSR is used at least
485 // once by these nodes, shrink wrapping is disabled.
486 CSRegSet CSRUsedInChokePoints;
487 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
488 MBBI != MBBE; ++MBBI) {
489 MachineBasicBlock* MBB = MBBI;
490 if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
491 continue;
492 bool dominatesExitNodes = true;
493 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
494 if (! DT.dominates(MBB, ReturnBlocks[ri])) {
495 dominatesExitNodes = false;
496 break;
497 }
498 if (dominatesExitNodes) {
499 CSRUsedInChokePoints |= CSRUsed[MBB];
500 if (CSRUsedInChokePoints == UsedCSRegs) {
501 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
502 << ": all CSRs used in choke point(s) at "
503 << getBasicBlockName(MBB) << "\n");
504 ShrinkWrapThisFunction = false;
505 break;
506 }
507 }
508 }
509 }
510
511 // Return now if we have decided not to apply shrink wrapping
512 // to the current function.
513 if (! ShrinkWrapThisFunction)
514 return false;
515
516 DEBUG({
517 DOUT << "ENABLED: " << Fn.getFunction()->getName();
518 if (HasFastExitPath)
519 DOUT << " (fast exit path)";
520 DOUT << "\n";
521 if (ShrinkWrapDebugging >= BasicInfo) {
522 DOUT << "------------------------------"
523 << "-----------------------------\n";
524 DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
525 if (ShrinkWrapDebugging >= Details) {
526 DOUT << "------------------------------"
527 << "-----------------------------\n";
528 dumpAllUsed();
529 }
530 }
531 });
532
533 // Build initial DF sets to determine minimal regions in the
534 // Machine CFG around which CSRs must be spilled and restored.
535 calculateAnticAvail(Fn);
536
537 return true;
538}
539
540/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
541/// multi-entry, multi-exit (MEME) regions so spill and restore
542/// placement will not break code that enters or leaves a
543/// shrink-wrapped region by inducing spills with no matching
544/// restores or restores with no matching spills. A MEME region
545/// is a subgraph of the MCFG with multiple entry edges, multiple
546/// exit edges, or both. This code propagates use information
547/// through the MCFG until all paths requiring spills and restores
548/// _outside_ the computed minimal placement regions have been covered.
549///
550bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
551 SmallVector<MachineBasicBlock*, 4>& blks) {
552 if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
553 bool processThisBlock = false;
554 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
555 SE = MBB->succ_end(); SI != SE; ++SI) {
556 MachineBasicBlock* SUCC = *SI;
557 if (SUCC->pred_size() > 1) {
558 processThisBlock = true;
559 break;
560 }
561 }
562 if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
563 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
564 PE = MBB->pred_end(); PI != PE; ++PI) {
565 MachineBasicBlock* PRED = *PI;
566 if (PRED->succ_size() > 1) {
567 processThisBlock = true;
568 break;
569 }
570 }
571 }
572 if (! processThisBlock)
573 return false;
574 }
575
576 CSRegSet prop;
577 if (!CSRSave[MBB].empty())
578 prop = CSRSave[MBB];
579 else if (!CSRRestore[MBB].empty())
580 prop = CSRRestore[MBB];
581 else
582 prop = CSRUsed[MBB];
583 if (prop.empty())
584 return false;
585
586 // Propagate selected bits to successors, predecessors of MBB.
587 bool addedUses = false;
588 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
589 SE = MBB->succ_end(); SI != SE; ++SI) {
590 MachineBasicBlock* SUCC = *SI;
591 // Self-loop
592 if (SUCC == MBB)
593 continue;
594 if (! CSRUsed[SUCC].contains(prop)) {
595 CSRUsed[SUCC] |= prop;
596 addedUses = true;
597 blks.push_back(SUCC);
598 DEBUG(if (ShrinkWrapDebugging >= Iterations)
599 DOUT << getBasicBlockName(MBB)
600 << "(" << stringifyCSRegSet(prop) << ")->"
601 << "successor " << getBasicBlockName(SUCC) << "\n");
602 }
603 }
604 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
605 PE = MBB->pred_end(); PI != PE; ++PI) {
606 MachineBasicBlock* PRED = *PI;
607 // Self-loop
608 if (PRED == MBB)
609 continue;
610 if (! CSRUsed[PRED].contains(prop)) {
611 CSRUsed[PRED] |= prop;
612 addedUses = true;
613 blks.push_back(PRED);
614 DEBUG(if (ShrinkWrapDebugging >= Iterations)
615 DOUT << getBasicBlockName(MBB)
616 << "(" << stringifyCSRegSet(prop) << ")->"
617 << "predecessor " << getBasicBlockName(PRED) << "\n");
618 }
619 }
620 return addedUses;
621}
622
623/// addUsesForTopLevelLoops - add uses for CSRs used inside top
624/// level loops to the exit blocks of those loops.
625///
626bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
627 bool addedUses = false;
628
629 // Place restores for top level loops where needed.
630 for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
631 I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
632 MachineBasicBlock* MBB = I->first;
633 MachineLoop* LP = I->second;
634 MachineBasicBlock* HDR = LP->getHeader();
635 SmallVector<MachineBasicBlock*, 4> exitBlocks;
636 CSRegSet loopSpills;
637
638 loopSpills = CSRSave[MBB];
639 if (CSRSave[MBB].empty()) {
640 loopSpills = CSRUsed[HDR];
641 assert(!loopSpills.empty() && "No CSRs used in loop?");
642 } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
643 continue;
644
645 LP->getExitBlocks(exitBlocks);
646 assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
647 for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
648 MachineBasicBlock* EXB = exitBlocks[i];
649 if (! CSRUsed[EXB].contains(loopSpills)) {
650 CSRUsed[EXB] |= loopSpills;
651 addedUses = true;
652 DEBUG(if (ShrinkWrapDebugging >= Iterations)
653 DOUT << "LOOP " << getBasicBlockName(MBB)
654 << "(" << stringifyCSRegSet(loopSpills) << ")->"
655 << getBasicBlockName(EXB) << "\n");
656 if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
657 blks.push_back(EXB);
658 }
659 }
660 }
661 return addedUses;
662}
663
664/// calcSpillPlacements - determine which CSRs should be spilled
665/// in MBB using AnticIn sets of MBB's predecessors, keeping track
666/// of changes to spilled reg sets. Add MBB to the set of blocks
667/// that need to be processed for propagating use info to cover
668/// multi-entry/exit regions.
669///
670bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
671 SmallVector<MachineBasicBlock*, 4> &blks,
672 CSRegBlockMap &prevSpills) {
673 bool placedSpills = false;
674 // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
675 CSRegSet anticInPreds;
676 SmallVector<MachineBasicBlock*, 4> predecessors;
677 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
678 PE = MBB->pred_end(); PI != PE; ++PI) {
679 MachineBasicBlock* PRED = *PI;
680 if (PRED != MBB)
681 predecessors.push_back(PRED);
682 }
683 unsigned i = 0, e = predecessors.size();
684 if (i != e) {
685 MachineBasicBlock* PRED = predecessors[i];
686 anticInPreds = UsedCSRegs - AnticIn[PRED];
687 for (++i; i != e; ++i) {
688 PRED = predecessors[i];
689 anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
690 }
691 } else {
692 // Handle uses in entry blocks (which have no predecessors).
693 // This is necessary because the DFA formulation assumes the
694 // entry and (multiple) exit nodes cannot have CSR uses, which
695 // is not the case in the real world.
696 anticInPreds = UsedCSRegs;
697 }
698 // Compute spills required at MBB:
699 CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
700
701 if (! CSRSave[MBB].empty()) {
702 if (MBB == EntryBlock) {
703 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
704 CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
705 } else {
706 // Reset all regs spilled in MBB that are also spilled in EntryBlock.
707 if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
708 CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
709 }
710 }
711 }
712 placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
713 prevSpills[MBB] = CSRSave[MBB];
714 // Remember this block for adding restores to successor
715 // blocks for multi-entry region.
716 if (placedSpills)
717 blks.push_back(MBB);
718
719 DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
720 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
721 << stringifyCSRegSet(CSRSave[MBB]) << "\n");
722
723 return placedSpills;
724}
725
726/// calcRestorePlacements - determine which CSRs should be restored
727/// in MBB using AvailOut sets of MBB's succcessors, keeping track
728/// of changes to restored reg sets. Add MBB to the set of blocks
729/// that need to be processed for propagating use info to cover
730/// multi-entry/exit regions.
731///
732bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
733 SmallVector<MachineBasicBlock*, 4> &blks,
734 CSRegBlockMap &prevRestores) {
735 bool placedRestores = false;
736 // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
737 CSRegSet availOutSucc;
738 SmallVector<MachineBasicBlock*, 4> successors;
739 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
740 SE = MBB->succ_end(); SI != SE; ++SI) {
741 MachineBasicBlock* SUCC = *SI;
742 if (SUCC != MBB)
743 successors.push_back(SUCC);
744 }
745 unsigned i = 0, e = successors.size();
746 if (i != e) {
747 MachineBasicBlock* SUCC = successors[i];
748 availOutSucc = UsedCSRegs - AvailOut[SUCC];
749 for (++i; i != e; ++i) {
750 SUCC = successors[i];
751 availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
752 }
753 } else {
754 if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
755 // Handle uses in return blocks (which have no successors).
756 // This is necessary because the DFA formulation assumes the
757 // entry and (multiple) exit nodes cannot have CSR uses, which
758 // is not the case in the real world.
759 availOutSucc = UsedCSRegs;
760 }
761 }
762 // Compute restores required at MBB:
763 CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
764
765 // Postprocess restore placements at MBB.
766 // Remove the CSRs that are restored in the return blocks.
767 // Lest this be confusing, note that:
768 // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
769 if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
770 if (! CSRSave[EntryBlock].empty())
771 CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
772 }
773 placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
774 prevRestores[MBB] = CSRRestore[MBB];
775 // Remember this block for adding saves to predecessor
776 // blocks for multi-entry region.
777 if (placedRestores)
778 blks.push_back(MBB);
779
780 DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
781 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
782 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
783
784 return placedRestores;
785}
786
787/// placeSpillsAndRestores - place spills and restores of CSRs
788/// used in MBBs in minimal regions that contain the uses.
789///
790void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
791 CSRegBlockMap prevCSRSave;
792 CSRegBlockMap prevCSRRestore;
793 SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
794 bool changed = true;
795 unsigned iterations = 0;
796
797 // Iterate computation of spill and restore placements in the MCFG until:
798 // 1. CSR use info has been fully propagated around the MCFG, and
799 // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
800 while (changed) {
801 changed = false;
802 ++iterations;
803
804 DEBUG(if (ShrinkWrapDebugging >= Iterations)
805 DOUT << "iter " << iterations
806 << " --------------------------------------------------\n");
807
808 // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
809 // which determines the placements of spills and restores.
810 // Keep track of changes to spills, restores in each iteration to
811 // minimize the total iterations.
812 bool SRChanged = false;
813 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
814 MBBI != MBBE; ++MBBI) {
815 MachineBasicBlock* MBB = MBBI;
816
817 // Place spills for CSRs in MBB.
818 SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
819
820 // Place restores for CSRs in MBB.
821 SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
822 }
823
824 // Add uses of CSRs used inside loops where needed.
825 changed |= addUsesForTopLevelLoops(cvBlocks);
826
827 // Add uses for CSRs spilled or restored at branch, join points.
828 if (changed || SRChanged) {
829 while (! cvBlocks.empty()) {
830 MachineBasicBlock* MBB = cvBlocks.pop_back_val();
831 changed |= addUsesForMEMERegion(MBB, ncvBlocks);
832 }
833 if (! ncvBlocks.empty()) {
834 cvBlocks = ncvBlocks;
835 ncvBlocks.clear();
836 }
837 }
838
839 if (changed) {
840 calculateAnticAvail(Fn);
841 CSRSave.clear();
842 CSRRestore.clear();
843 }
844 }
845
846 // Check for effectiveness:
847 // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
848 // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
849 // Gives a measure of how many CSR spills have been moved from EntryBlock
850 // to minimal regions enclosing their uses.
851 CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
852 unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
853 numSRReduced += numSRReducedThisFunc;
854 DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
855 DOUT << "-----------------------------------------------------------\n";
856 DOUT << "total iterations = " << iterations << " ( "
857 << Fn.getFunction()->getName()
858 << " " << numSRReducedThisFunc
859 << " " << Fn.size()
860 << " )\n";
861 DOUT << "-----------------------------------------------------------\n";
862 dumpSRSets();
863 DOUT << "-----------------------------------------------------------\n";
864 if (numSRReducedThisFunc)
865 verifySpillRestorePlacement();
866 });
867}
868
869// Debugging methods.
870#ifndef NDEBUG
871/// findFastExitPath - debugging method used to detect functions
872/// with at least one path from the entry block to a return block
873/// directly or which has a very small number of edges.
874///
875void PEI::findFastExitPath() {
876 if (! EntryBlock)
877 return;
878 // Fina a path from EntryBlock to any return block that does not branch:
879 // Entry
880 // | ...
881 // v |
882 // B1<-----+
883 // |
884 // v
885 // Return
886 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
887 SE = EntryBlock->succ_end(); SI != SE; ++SI) {
888 MachineBasicBlock* SUCC = *SI;
889
890 // Assume positive, disprove existence of fast path.
891 HasFastExitPath = true;
892
893 // Check the immediate successors.
894 if (isReturnBlock(SUCC)) {
895 if (ShrinkWrapDebugging >= BasicInfo)
896 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
897 << "->" << getBasicBlockName(SUCC) << "\n";
898 break;
899 }
900 // Traverse df from SUCC, look for a branch block.
901 std::string exitPath = getBasicBlockName(SUCC);
902 for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
903 BE = df_end(SUCC); BI != BE; ++BI) {
904 MachineBasicBlock* SBB = *BI;
905 // Reject paths with branch nodes.
906 if (SBB->succ_size() > 1) {
907 HasFastExitPath = false;
908 break;
909 }
910 exitPath += "->" + getBasicBlockName(SBB);
911 }
912 if (HasFastExitPath) {
913 if (ShrinkWrapDebugging >= BasicInfo)
914 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
915 << "->" << exitPath << "\n";
916 break;
917 }
918 }
919}
920
921/// verifySpillRestorePlacement - check the current spill/restore
922/// sets for safety. Attempt to find spills without restores or
923/// restores without spills.
924/// Spills: walk df from each MBB in spill set ensuring that
925/// all CSRs spilled at MMBB are restored on all paths
926/// from MBB to all exit blocks.
927/// Restores: walk idf from each MBB in restore set ensuring that
928/// all CSRs restored at MBB are spilled on all paths
929/// reaching MBB.
930///
931void PEI::verifySpillRestorePlacement() {
932 unsigned numReturnBlocks = 0;
933 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
934 MBBI != MBBE; ++MBBI) {
935 MachineBasicBlock* MBB = MBBI;
936 if (isReturnBlock(MBB) || MBB->succ_size() == 0)
937 ++numReturnBlocks;
938 }
939 for (CSRegBlockMap::iterator BI = CSRSave.begin(),
940 BE = CSRSave.end(); BI != BE; ++BI) {
941 MachineBasicBlock* MBB = BI->first;
942 CSRegSet spilled = BI->second;
943 CSRegSet restored;
944
945 if (spilled.empty())
946 continue;
947
948 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
949 << stringifyCSRegSet(spilled)
950 << " RESTORE[" << getBasicBlockName(MBB) << "] = "
951 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
952
953 if (CSRRestore[MBB].intersects(spilled)) {
954 restored |= (CSRRestore[MBB] & spilled);
955 }
956
957 // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
958 // we must find restores for all spills w/no intervening spills on all
959 // paths from MBB to all return blocks.
960 for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
961 BE = df_end(MBB); BI != BE; ++BI) {
962 MachineBasicBlock* SBB = *BI;
963 if (SBB == MBB)
964 continue;
965 // Stop when we encounter spills of any CSRs spilled at MBB that
966 // have not yet been seen to be restored.
967 if (CSRSave[SBB].intersects(spilled) &&
968 !restored.contains(CSRSave[SBB] & spilled))
969 break;
970 // Collect the CSRs spilled at MBB that are restored
971 // at this DF successor of MBB.
972 if (CSRRestore[SBB].intersects(spilled))
973 restored |= (CSRRestore[SBB] & spilled);
974 // If we are at a retun block, check that the restores
975 // we have seen so far exhaust the spills at MBB, then
976 // reset the restores.
977 if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
978 if (restored != spilled) {
979 CSRegSet notRestored = (spilled - restored);
980 DOUT << MF->getFunction()->getName() << ": "
981 << stringifyCSRegSet(notRestored)
982 << " spilled at " << getBasicBlockName(MBB)
983 << " are never restored on path to return "
984 << getBasicBlockName(SBB) << "\n";
985 }
986 restored.clear();
987 }
988 }
989 }
990
991 // Check restore placements.
992 for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
993 BE = CSRRestore.end(); BI != BE; ++BI) {
994 MachineBasicBlock* MBB = BI->first;
995 CSRegSet restored = BI->second;
996 CSRegSet spilled;
997
998 if (restored.empty())
999 continue;
1000
1001 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1002 << stringifyCSRegSet(CSRSave[MBB])
1003 << " RESTORE[" << getBasicBlockName(MBB) << "] = "
1004 << stringifyCSRegSet(restored) << "\n";
1005
1006 if (CSRSave[MBB].intersects(restored)) {
1007 spilled |= (CSRSave[MBB] & restored);
1008 }
1009 // Walk inverse depth first from MBB to find spills of all
1010 // CSRs restored at MBB:
1011 for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
1012 BE = idf_end(MBB); BI != BE; ++BI) {
1013 MachineBasicBlock* PBB = *BI;
1014 if (PBB == MBB)
1015 continue;
1016 // Stop when we encounter restores of any CSRs restored at MBB that
1017 // have not yet been seen to be spilled.
1018 if (CSRRestore[PBB].intersects(restored) &&
1019 !spilled.contains(CSRRestore[PBB] & restored))
1020 break;
1021 // Collect the CSRs restored at MBB that are spilled
1022 // at this DF predecessor of MBB.
1023 if (CSRSave[PBB].intersects(restored))
1024 spilled |= (CSRSave[PBB] & restored);
1025 }
1026 if (spilled != restored) {
1027 CSRegSet notSpilled = (restored - spilled);
1028 DOUT << MF->getFunction()->getName() << ": "
1029 << stringifyCSRegSet(notSpilled)
1030 << " restored at " << getBasicBlockName(MBB)
1031 << " are never spilled\n";
1032 }
1033 }
1034}
1035
1036// Debugging print methods.
1037std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
1038 std::ostringstream name;
1039 if (MBB) {
1040 if (MBB->getBasicBlock())
1041 name << MBB->getBasicBlock()->getName();
1042 else
1043 name << "_MBB_" << MBB->getNumber();
1044 }
1045 return name.str();
1046}
1047
1048std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
1049 const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
1050 const std::vector<CalleeSavedInfo> CSI =
1051 MF->getFrameInfo()->getCalleeSavedInfo();
1052
1053 std::ostringstream srep;
1054 if (CSI.size() == 0) {
1055 srep << "[]";
1056 return srep.str();
1057 }
1058 srep << "[";
1059 CSRegSet::iterator I = s.begin(), E = s.end();
1060 if (I != E) {
1061 unsigned reg = CSI[*I].getReg();
1062 srep << TRI->getName(reg);
1063 for (++I; I != E; ++I) {
1064 reg = CSI[*I].getReg();
1065 srep << ",";
1066 srep << TRI->getName(reg);
1067 }
1068 }
1069 srep << "]";
1070 return srep.str();
1071}
1072
1073void PEI::dumpSet(const CSRegSet& s) {
1074 DOUT << stringifyCSRegSet(s) << "\n";
1075}
1076
1077void PEI::dumpUsed(MachineBasicBlock* MBB) {
1078 if (MBB) {
1079 DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
1080 << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
1081 }
1082}
1083
1084void PEI::dumpAllUsed() {
1085 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1086 MBBI != MBBE; ++MBBI) {
1087 MachineBasicBlock* MBB = MBBI;
1088 dumpUsed(MBB);
1089 }
1090}
1091
1092void PEI::dumpSets(MachineBasicBlock* MBB) {
1093 if (MBB) {
1094 DOUT << getBasicBlockName(MBB) << " | "
1095 << stringifyCSRegSet(CSRUsed[MBB]) << " | "
1096 << stringifyCSRegSet(AnticIn[MBB]) << " | "
1097 << stringifyCSRegSet(AnticOut[MBB]) << " | "
1098 << stringifyCSRegSet(AvailIn[MBB]) << " | "
1099 << stringifyCSRegSet(AvailOut[MBB]) << "\n";
1100 }
1101}
1102
1103void PEI::dumpSets1(MachineBasicBlock* MBB) {
1104 if (MBB) {
1105 DOUT << getBasicBlockName(MBB) << " | "
1106 << stringifyCSRegSet(CSRUsed[MBB]) << " | "
1107 << stringifyCSRegSet(AnticIn[MBB]) << " | "
1108 << stringifyCSRegSet(AnticOut[MBB]) << " | "
1109 << stringifyCSRegSet(AvailIn[MBB]) << " | "
1110 << stringifyCSRegSet(AvailOut[MBB]) << " | "
1111 << stringifyCSRegSet(CSRSave[MBB]) << " | "
1112 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1113 }
1114}
1115
1116void PEI::dumpAllSets() {
1117 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1118 MBBI != MBBE; ++MBBI) {
1119 MachineBasicBlock* MBB = MBBI;
1120 dumpSets1(MBB);
1121 }
1122}
1123
1124void PEI::dumpSRSets() {
1125 for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
1126 MBB != E; ++MBB) {
1127 if (! CSRSave[MBB].empty()) {
1128 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1129 << stringifyCSRegSet(CSRSave[MBB]);
1130 if (CSRRestore[MBB].empty())
1131 DOUT << "\n";
1132 }
1133 if (! CSRRestore[MBB].empty()) {
1134 if (! CSRSave[MBB].empty())
1135 DOUT << " ";
1136 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
1137 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1138 }
1139 }
1140}
1141#endif