blob: 2a25b3d1dbbd66da1d18f9c75acbea5e0faee9fd [file] [log] [blame]
Kyle Butt3232dbb2016-04-08 20:35:01 +00001//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This utility class duplicates basic blocks ending in unconditional branches
11// into the tails of their predecessors.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/TailDuplicator.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/SetVector.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineModuleInfo.h"
24#include "llvm/CodeGen/Passes.h"
25#include "llvm/IR/Function.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/ErrorHandling.h"
29#include "llvm/Support/raw_ostream.h"
30using namespace llvm;
31
32#define DEBUG_TYPE "tailduplication"
33
34STATISTIC(NumTails, "Number of tails duplicated");
35STATISTIC(NumTailDups, "Number of tail duplicated blocks");
Geoff Berryf8c29d62016-06-14 19:40:10 +000036STATISTIC(NumTailDupAdded,
37 "Number of instructions added due to tail duplication");
38STATISTIC(NumTailDupRemoved,
39 "Number of instructions removed due to tail duplication");
Kyle Butt3232dbb2016-04-08 20:35:01 +000040STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
41STATISTIC(NumAddedPHIs, "Number of phis added");
42
43// Heuristic for tail duplication.
44static cl::opt<unsigned> TailDuplicateSize(
45 "tail-dup-size",
46 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
47 cl::Hidden);
48
49static cl::opt<bool>
50 TailDupVerify("tail-dup-verify",
51 cl::desc("Verify sanity of PHI instructions during taildup"),
52 cl::init(false), cl::Hidden);
53
54static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
55 cl::Hidden);
56
57namespace llvm {
58
Kyle Butt3ed42732016-08-25 01:37:03 +000059void TailDuplicator::initMF(MachineFunction &MFin,
60 const MachineModuleInfo *MMIin,
Kyle Buttdb3391e2016-08-17 21:07:35 +000061 const MachineBranchProbabilityInfo *MBPIin,
62 unsigned TailDupSizeIn) {
Kyle Butt3ed42732016-08-25 01:37:03 +000063 MF = &MFin;
64 TII = MF->getSubtarget().getInstrInfo();
65 TRI = MF->getSubtarget().getRegisterInfo();
66 MRI = &MF->getRegInfo();
Kyle Butt3232dbb2016-04-08 20:35:01 +000067 MMI = MMIin;
68 MBPI = MBPIin;
Kyle Buttdb3391e2016-08-17 21:07:35 +000069 TailDupSize = TailDupSizeIn;
Kyle Butt3232dbb2016-04-08 20:35:01 +000070
71 assert(MBPI != nullptr && "Machine Branch Probability Info required");
72
73 PreRegAlloc = MRI->isSSA();
Kyle Butt3232dbb2016-04-08 20:35:01 +000074}
75
76static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
77 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
78 MachineBasicBlock *MBB = &*I;
79 SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
80 MBB->pred_end());
81 MachineBasicBlock::iterator MI = MBB->begin();
82 while (MI != MBB->end()) {
83 if (!MI->isPHI())
84 break;
Matt Arsenaultb8037a12016-08-16 20:38:05 +000085 for (MachineBasicBlock *PredBB : Preds) {
Kyle Butt3232dbb2016-04-08 20:35:01 +000086 bool Found = false;
87 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
88 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
89 if (PHIBB == PredBB) {
90 Found = true;
91 break;
92 }
93 }
94 if (!Found) {
95 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
96 dbgs() << " missing input from predecessor BB#"
97 << PredBB->getNumber() << '\n';
98 llvm_unreachable(nullptr);
99 }
100 }
101
102 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
103 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
104 if (CheckExtra && !Preds.count(PHIBB)) {
105 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": "
106 << *MI;
107 dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber()
108 << '\n';
109 llvm_unreachable(nullptr);
110 }
111 if (PHIBB->getNumber() < 0) {
112 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
113 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
114 llvm_unreachable(nullptr);
115 }
116 }
117 ++MI;
118 }
119 }
120}
121
122/// Tail duplicate the block and cleanup.
Kyle Butt3ed42732016-08-25 01:37:03 +0000123bool TailDuplicator::tailDuplicateAndUpdate(bool IsSimple,
Kyle Butt3232dbb2016-04-08 20:35:01 +0000124 MachineBasicBlock *MBB) {
125 // Save the successors list.
126 SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
127 MBB->succ_end());
128
129 SmallVector<MachineBasicBlock *, 8> TDBBs;
130 SmallVector<MachineInstr *, 16> Copies;
Kyle Butt3ed42732016-08-25 01:37:03 +0000131 if (!tailDuplicate(IsSimple, MBB, TDBBs, Copies))
Kyle Butt3232dbb2016-04-08 20:35:01 +0000132 return false;
133
134 ++NumTails;
135
136 SmallVector<MachineInstr *, 8> NewPHIs;
Kyle Butt3ed42732016-08-25 01:37:03 +0000137 MachineSSAUpdater SSAUpdate(*MF, &NewPHIs);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000138
139 // TailBB's immediate successors are now successors of those predecessors
140 // which duplicated TailBB. Add the predecessors as sources to the PHI
141 // instructions.
142 bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
143 if (PreRegAlloc)
144 updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
145
146 // If it is dead, remove it.
147 if (isDead) {
Geoff Berryf8c29d62016-06-14 19:40:10 +0000148 NumTailDupRemoved += MBB->size();
Kyle Butt3232dbb2016-04-08 20:35:01 +0000149 removeDeadBlock(MBB);
150 ++NumDeadBlocks;
151 }
152
153 // Update SSA form.
154 if (!SSAUpdateVRs.empty()) {
155 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
156 unsigned VReg = SSAUpdateVRs[i];
157 SSAUpdate.Initialize(VReg);
158
159 // If the original definition is still around, add it as an available
160 // value.
161 MachineInstr *DefMI = MRI->getVRegDef(VReg);
162 MachineBasicBlock *DefBB = nullptr;
163 if (DefMI) {
164 DefBB = DefMI->getParent();
165 SSAUpdate.AddAvailableValue(DefBB, VReg);
166 }
167
168 // Add the new vregs as available values.
169 DenseMap<unsigned, AvailableValsTy>::iterator LI =
170 SSAUpdateVals.find(VReg);
171 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
172 MachineBasicBlock *SrcBB = LI->second[j].first;
173 unsigned SrcReg = LI->second[j].second;
174 SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
175 }
176
177 // Rewrite uses that are outside of the original def's block.
178 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
179 while (UI != MRI->use_end()) {
180 MachineOperand &UseMO = *UI;
181 MachineInstr *UseMI = UseMO.getParent();
182 ++UI;
183 if (UseMI->isDebugValue()) {
184 // SSAUpdate can replace the use with an undef. That creates
185 // a debug instruction that is a kill.
186 // FIXME: Should it SSAUpdate job to delete debug instructions
187 // instead of replacing the use with undef?
188 UseMI->eraseFromParent();
189 continue;
190 }
191 if (UseMI->getParent() == DefBB && !UseMI->isPHI())
192 continue;
193 SSAUpdate.RewriteUse(UseMO);
194 }
195 }
196
197 SSAUpdateVRs.clear();
198 SSAUpdateVals.clear();
199 }
200
201 // Eliminate some of the copies inserted by tail duplication to maintain
202 // SSA form.
203 for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
204 MachineInstr *Copy = Copies[i];
205 if (!Copy->isCopy())
206 continue;
207 unsigned Dst = Copy->getOperand(0).getReg();
208 unsigned Src = Copy->getOperand(1).getReg();
209 if (MRI->hasOneNonDBGUse(Src) &&
210 MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
211 // Copy is the only use. Do trivial copy propagation here.
212 MRI->replaceRegWith(Dst, Src);
213 Copy->eraseFromParent();
214 }
215 }
216
217 if (NewPHIs.size())
218 NumAddedPHIs += NewPHIs.size();
219
220 return true;
221}
222
223/// Look for small blocks that are unconditionally branched to and do not fall
224/// through. Tail-duplicate their instructions into their predecessors to
225/// eliminate (dynamic) branches.
Kyle Butt3ed42732016-08-25 01:37:03 +0000226bool TailDuplicator::tailDuplicateBlocks() {
Kyle Butt3232dbb2016-04-08 20:35:01 +0000227 bool MadeChange = false;
228
229 if (PreRegAlloc && TailDupVerify) {
230 DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
Kyle Butt3ed42732016-08-25 01:37:03 +0000231 VerifyPHIs(*MF, true);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000232 }
233
Kyle Butt3ed42732016-08-25 01:37:03 +0000234 for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
Kyle Butt3232dbb2016-04-08 20:35:01 +0000235 MachineBasicBlock *MBB = &*I++;
236
237 if (NumTails == TailDupLimit)
238 break;
239
240 bool IsSimple = isSimpleBB(MBB);
241
Kyle Butt3ed42732016-08-25 01:37:03 +0000242 if (!shouldTailDuplicate(IsSimple, *MBB))
Kyle Butt3232dbb2016-04-08 20:35:01 +0000243 continue;
244
Kyle Butt3ed42732016-08-25 01:37:03 +0000245 MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000246 }
247
248 if (PreRegAlloc && TailDupVerify)
Kyle Butt3ed42732016-08-25 01:37:03 +0000249 VerifyPHIs(*MF, false);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000250
251 return MadeChange;
252}
253
254static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
255 const MachineRegisterInfo *MRI) {
256 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
257 if (UseMI.isDebugValue())
258 continue;
259 if (UseMI.getParent() != BB)
260 return true;
261 }
262 return false;
263}
264
265static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
266 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
267 if (MI->getOperand(i + 1).getMBB() == SrcBB)
268 return i;
269 return 0;
270}
271
272// Remember which registers are used by phis in this block. This is
273// used to determine which registers are liveout while modifying the
274// block (which is why we need to copy the information).
275static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
276 DenseSet<unsigned> *UsedByPhi) {
277 for (const auto &MI : BB) {
278 if (!MI.isPHI())
279 break;
280 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
281 unsigned SrcReg = MI.getOperand(i).getReg();
282 UsedByPhi->insert(SrcReg);
283 }
284 }
285}
286
287/// Add a definition and source virtual registers pair for SSA update.
288void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
289 MachineBasicBlock *BB) {
290 DenseMap<unsigned, AvailableValsTy>::iterator LI =
291 SSAUpdateVals.find(OrigReg);
292 if (LI != SSAUpdateVals.end())
293 LI->second.push_back(std::make_pair(BB, NewReg));
294 else {
295 AvailableValsTy Vals;
296 Vals.push_back(std::make_pair(BB, NewReg));
297 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
298 SSAUpdateVRs.push_back(OrigReg);
299 }
300}
301
302/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
303/// source register that's contributed by PredBB and update SSA update map.
304void TailDuplicator::processPHI(
305 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000306 DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
307 SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
Kyle Butt3232dbb2016-04-08 20:35:01 +0000308 const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
309 unsigned DefReg = MI->getOperand(0).getReg();
310 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
311 assert(SrcOpIdx && "Unable to find matching PHI source?");
312 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000313 unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
Kyle Butt3232dbb2016-04-08 20:35:01 +0000314 const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000315 LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
Kyle Butt3232dbb2016-04-08 20:35:01 +0000316
317 // Insert a copy from source to the end of the block. The def register is the
318 // available value liveout of the block.
319 unsigned NewDef = MRI->createVirtualRegister(RC);
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000320 Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
Kyle Butt3232dbb2016-04-08 20:35:01 +0000321 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
322 addSSAUpdateEntry(DefReg, NewDef, PredBB);
323
324 if (!Remove)
325 return;
326
327 // Remove PredBB from the PHI node.
328 MI->RemoveOperand(SrcOpIdx + 1);
329 MI->RemoveOperand(SrcOpIdx);
330 if (MI->getNumOperands() == 1)
331 MI->eraseFromParent();
332}
333
334/// Duplicate a TailBB instruction to PredBB and update
335/// the source operands due to earlier PHI translation.
336void TailDuplicator::duplicateInstruction(
337 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000338 DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
Kyle Butt3232dbb2016-04-08 20:35:01 +0000339 const DenseSet<unsigned> &UsedByPhi) {
Kyle Butt3ed42732016-08-25 01:37:03 +0000340 MachineInstr *NewMI = TII->duplicate(*MI, *MF);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000341 if (PreRegAlloc) {
342 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
343 MachineOperand &MO = NewMI->getOperand(i);
344 if (!MO.isReg())
345 continue;
346 unsigned Reg = MO.getReg();
347 if (!TargetRegisterInfo::isVirtualRegister(Reg))
348 continue;
349 if (MO.isDef()) {
350 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
351 unsigned NewReg = MRI->createVirtualRegister(RC);
352 MO.setReg(NewReg);
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000353 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
Kyle Butt3232dbb2016-04-08 20:35:01 +0000354 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
355 addSSAUpdateEntry(Reg, NewReg, PredBB);
356 } else {
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000357 auto VI = LocalVRMap.find(Reg);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000358 if (VI != LocalVRMap.end()) {
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000359 // Need to make sure that the register class of the mapped register
360 // will satisfy the constraints of the class of the register being
361 // replaced.
362 auto *OrigRC = MRI->getRegClass(Reg);
363 auto *MappedRC = MRI->getRegClass(VI->second.Reg);
364 const TargetRegisterClass *ConstrRC;
365 if (VI->second.SubReg != 0) {
366 ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
367 VI->second.SubReg);
368 if (ConstrRC) {
369 // The actual constraining (as in "find appropriate new class")
370 // is done by getMatchingSuperRegClass, so now we only need to
371 // change the class of the mapped register.
372 MRI->setRegClass(VI->second.Reg, ConstrRC);
373 }
374 } else {
375 // For mapped registers that do not have sub-registers, simply
376 // restrict their class to match the original one.
377 ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
378 }
379
380 if (ConstrRC) {
381 // If the class constraining succeeded, we can simply replace
382 // the old register with the mapped one.
383 MO.setReg(VI->second.Reg);
384 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
385 // sub-register, we need to compose the sub-register indices.
386 MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
387 VI->second.SubReg));
388 } else {
389 // The direct replacement is not possible, due to failing register
390 // class constraints. An explicit COPY is necessary. Create one
391 // that can be reused
392 auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
393 if (NewRC == nullptr)
394 NewRC = OrigRC;
395 unsigned NewReg = MRI->createVirtualRegister(NewRC);
396 BuildMI(*PredBB, MI, MI->getDebugLoc(),
397 TII->get(TargetOpcode::COPY), NewReg)
398 .addReg(VI->second.Reg, 0, VI->second.SubReg);
399 LocalVRMap.erase(VI);
400 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
401 MO.setReg(NewReg);
402 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
403 // is equivalent to the whole register Reg. Hence, Reg:subreg
404 // is same as NewReg:subreg, so keep the sub-register index
405 // unchanged.
406 }
Kyle Butt3232dbb2016-04-08 20:35:01 +0000407 // Clear any kill flags from this operand. The new register could
408 // have uses after this one, so kills are not valid here.
409 MO.setIsKill(false);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000410 }
411 }
412 }
413 }
414 PredBB->insert(PredBB->instr_end(), NewMI);
415}
416
417/// After FromBB is tail duplicated into its predecessor blocks, the successors
418/// have gained new predecessors. Update the PHI instructions in them
419/// accordingly.
420void TailDuplicator::updateSuccessorsPHIs(
421 MachineBasicBlock *FromBB, bool isDead,
422 SmallVectorImpl<MachineBasicBlock *> &TDBBs,
423 SmallSetVector<MachineBasicBlock *, 8> &Succs) {
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000424 for (MachineBasicBlock *SuccBB : Succs) {
425 for (MachineInstr &MI : *SuccBB) {
426 if (!MI.isPHI())
Kyle Butt3232dbb2016-04-08 20:35:01 +0000427 break;
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000428 MachineInstrBuilder MIB(*FromBB->getParent(), MI);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000429 unsigned Idx = 0;
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000430 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
431 MachineOperand &MO = MI.getOperand(i + 1);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000432 if (MO.getMBB() == FromBB) {
433 Idx = i;
434 break;
435 }
436 }
437
438 assert(Idx != 0);
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000439 MachineOperand &MO0 = MI.getOperand(Idx);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000440 unsigned Reg = MO0.getReg();
441 if (isDead) {
442 // Folded into the previous BB.
443 // There could be duplicate phi source entries. FIXME: Should sdisel
444 // or earlier pass fixed this?
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000445 for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
446 MachineOperand &MO = MI.getOperand(i + 1);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000447 if (MO.getMBB() == FromBB) {
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000448 MI.RemoveOperand(i + 1);
449 MI.RemoveOperand(i);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000450 }
451 }
452 } else
453 Idx = 0;
454
455 // If Idx is set, the operands at Idx and Idx+1 must be removed.
456 // We reuse the location to avoid expensive RemoveOperand calls.
457
458 DenseMap<unsigned, AvailableValsTy>::iterator LI =
459 SSAUpdateVals.find(Reg);
460 if (LI != SSAUpdateVals.end()) {
461 // This register is defined in the tail block.
462 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
463 MachineBasicBlock *SrcBB = LI->second[j].first;
464 // If we didn't duplicate a bb into a particular predecessor, we
465 // might still have added an entry to SSAUpdateVals to correcly
466 // recompute SSA. If that case, avoid adding a dummy extra argument
467 // this PHI.
468 if (!SrcBB->isSuccessor(SuccBB))
469 continue;
470
471 unsigned SrcReg = LI->second[j].second;
472 if (Idx != 0) {
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000473 MI.getOperand(Idx).setReg(SrcReg);
474 MI.getOperand(Idx + 1).setMBB(SrcBB);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000475 Idx = 0;
476 } else {
477 MIB.addReg(SrcReg).addMBB(SrcBB);
478 }
479 }
480 } else {
481 // Live in tail block, must also be live in predecessors.
482 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
483 MachineBasicBlock *SrcBB = TDBBs[j];
484 if (Idx != 0) {
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000485 MI.getOperand(Idx).setReg(Reg);
486 MI.getOperand(Idx + 1).setMBB(SrcBB);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000487 Idx = 0;
488 } else {
489 MIB.addReg(Reg).addMBB(SrcBB);
490 }
491 }
492 }
493 if (Idx != 0) {
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000494 MI.RemoveOperand(Idx + 1);
495 MI.RemoveOperand(Idx);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000496 }
497 }
498 }
499}
500
501/// Determine if it is profitable to duplicate this block.
Kyle Butt3ed42732016-08-25 01:37:03 +0000502bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
Kyle Butt3232dbb2016-04-08 20:35:01 +0000503 MachineBasicBlock &TailBB) {
504 // Only duplicate blocks that end with unconditional branches.
505 if (TailBB.canFallThrough())
506 return false;
507
508 // Don't try to tail-duplicate single-block loops.
509 if (TailBB.isSuccessor(&TailBB))
510 return false;
511
512 // Set the limit on the cost to duplicate. When optimizing for size,
513 // duplicate only one, because one branch instruction can be eliminated to
514 // compensate for the duplication.
515 unsigned MaxDuplicateCount;
Kyle Buttdb3391e2016-08-17 21:07:35 +0000516 if (TailDupSize == 0 &&
517 TailDuplicateSize.getNumOccurrences() == 0 &&
Kyle Butt3ed42732016-08-25 01:37:03 +0000518 MF->getFunction()->optForSize())
Kyle Butt3232dbb2016-04-08 20:35:01 +0000519 MaxDuplicateCount = 1;
Kyle Buttdb3391e2016-08-17 21:07:35 +0000520 else if (TailDupSize == 0)
Kyle Butt3232dbb2016-04-08 20:35:01 +0000521 MaxDuplicateCount = TailDuplicateSize;
Kyle Buttdb3391e2016-08-17 21:07:35 +0000522 else
523 MaxDuplicateCount = TailDupSize;
Kyle Butt3232dbb2016-04-08 20:35:01 +0000524
Kyle Butt07d61422016-08-16 22:56:14 +0000525 // If the block to be duplicated ends in an unanalyzable fallthrough, don't
526 // duplicate it.
527 // A similar check is necessary in MachineBlockPlacement to make sure pairs of
528 // blocks with unanalyzable fallthrough get layed out contiguously.
529 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
530 SmallVector<MachineOperand, 4> PredCond;
531 if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond, true)
532 && TailBB.canFallThrough())
533 return false;
534
Kyle Butt3232dbb2016-04-08 20:35:01 +0000535 // If the target has hardware branch prediction that can handle indirect
536 // branches, duplicating them can often make them predictable when there
537 // are common paths through the code. The limit needs to be high enough
538 // to allow undoing the effects of tail merging and other optimizations
539 // that rearrange the predecessors of the indirect branch.
540
541 bool HasIndirectbr = false;
542 if (!TailBB.empty())
543 HasIndirectbr = TailBB.back().isIndirectBranch();
544
545 if (HasIndirectbr && PreRegAlloc)
546 MaxDuplicateCount = 20;
547
548 // Check the instructions in the block to determine whether tail-duplication
549 // is invalid or unlikely to be profitable.
550 unsigned InstrCount = 0;
551 for (MachineInstr &MI : TailBB) {
552 // Non-duplicable things shouldn't be tail-duplicated.
553 if (MI.isNotDuplicable())
554 return false;
555
556 // Convergent instructions can be duplicated only if doing so doesn't add
557 // new control dependencies, which is what we're going to do here.
558 if (MI.isConvergent())
559 return false;
560
561 // Do not duplicate 'return' instructions if this is a pre-regalloc run.
562 // A return may expand into a lot more instructions (e.g. reload of callee
563 // saved registers) after PEI.
564 if (PreRegAlloc && MI.isReturn())
565 return false;
566
567 // Avoid duplicating calls before register allocation. Calls presents a
568 // barrier to register allocation so duplicating them may end up increasing
569 // spills.
570 if (PreRegAlloc && MI.isCall())
571 return false;
572
573 if (!MI.isPHI() && !MI.isDebugValue())
574 InstrCount += 1;
575
576 if (InstrCount > MaxDuplicateCount)
577 return false;
578 }
579
580 // Check if any of the successors of TailBB has a PHI node in which the
581 // value corresponding to TailBB uses a subregister.
582 // If a phi node uses a register paired with a subregister, the actual
583 // "value type" of the phi may differ from the type of the register without
584 // any subregisters. Due to a bug, tail duplication may add a new operand
585 // without a necessary subregister, producing an invalid code. This is
586 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
587 // Disable tail duplication for this case for now, until the problem is
588 // fixed.
589 for (auto SB : TailBB.successors()) {
590 for (auto &I : *SB) {
591 if (!I.isPHI())
592 break;
593 unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
594 assert(Idx != 0);
595 MachineOperand &PU = I.getOperand(Idx);
596 if (PU.getSubReg() != 0)
597 return false;
598 }
599 }
600
601 if (HasIndirectbr && PreRegAlloc)
602 return true;
603
604 if (IsSimple)
605 return true;
606
607 if (!PreRegAlloc)
608 return true;
609
610 return canCompletelyDuplicateBB(TailBB);
611}
612
613/// True if this BB has only one unconditional jump.
614bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
615 if (TailBB->succ_size() != 1)
616 return false;
617 if (TailBB->pred_empty())
618 return false;
619 MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
620 if (I == TailBB->end())
621 return true;
622 return I->isUnconditionalBranch();
623}
624
625static bool bothUsedInPHI(const MachineBasicBlock &A,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000626 const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) {
Kyle Butt3232dbb2016-04-08 20:35:01 +0000627 for (MachineBasicBlock *BB : A.successors())
628 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
629 return true;
630
631 return false;
632}
633
634bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
635 for (MachineBasicBlock *PredBB : BB.predecessors()) {
636 if (PredBB->succ_size() > 1)
637 return false;
638
639 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
640 SmallVector<MachineOperand, 4> PredCond;
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000641 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
Kyle Butt3232dbb2016-04-08 20:35:01 +0000642 return false;
643
644 if (!PredCond.empty())
645 return false;
646 }
647 return true;
648}
649
650bool TailDuplicator::duplicateSimpleBB(
651 MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
652 const DenseSet<unsigned> &UsedByPhi,
653 SmallVectorImpl<MachineInstr *> &Copies) {
654 SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
655 TailBB->succ_end());
656 SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
657 TailBB->pred_end());
658 bool Changed = false;
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000659 for (MachineBasicBlock *PredBB : Preds) {
Kyle Butt3232dbb2016-04-08 20:35:01 +0000660 if (PredBB->hasEHPadSuccessor())
661 continue;
662
663 if (bothUsedInPHI(*PredBB, Succs))
664 continue;
665
666 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
667 SmallVector<MachineOperand, 4> PredCond;
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000668 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
Kyle Butt3232dbb2016-04-08 20:35:01 +0000669 continue;
670
671 Changed = true;
672 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
673 << "From simple Succ: " << *TailBB);
674
675 MachineBasicBlock *NewTarget = *TailBB->succ_begin();
Matthias Braun6442fc12016-08-18 00:59:32 +0000676 MachineBasicBlock *NextBB = PredBB->getNextNode();
Kyle Butt3232dbb2016-04-08 20:35:01 +0000677
678 // Make PredFBB explicit.
679 if (PredCond.empty())
680 PredFBB = PredTBB;
681
682 // Make fall through explicit.
683 if (!PredTBB)
684 PredTBB = NextBB;
685 if (!PredFBB)
686 PredFBB = NextBB;
687
688 // Redirect
689 if (PredFBB == TailBB)
690 PredFBB = NewTarget;
691 if (PredTBB == TailBB)
692 PredTBB = NewTarget;
693
694 // Make the branch unconditional if possible
695 if (PredTBB == PredFBB) {
696 PredCond.clear();
697 PredFBB = nullptr;
698 }
699
700 // Avoid adding fall through branches.
701 if (PredFBB == NextBB)
702 PredFBB = nullptr;
703 if (PredTBB == NextBB && PredFBB == nullptr)
704 PredTBB = nullptr;
705
706 TII->RemoveBranch(*PredBB);
707
708 if (!PredBB->isSuccessor(NewTarget))
709 PredBB->replaceSuccessor(TailBB, NewTarget);
710 else {
711 PredBB->removeSuccessor(TailBB, true);
712 assert(PredBB->succ_size() <= 1);
713 }
714
715 if (PredTBB)
716 TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
717
718 TDBBs.push_back(PredBB);
719 }
720 return Changed;
721}
722
Kyle Butt9e52c062016-07-19 23:54:21 +0000723bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
724 MachineBasicBlock *PredBB) {
725 // EH edges are ignored by AnalyzeBranch.
726 if (PredBB->succ_size() > 1)
727 return false;
728
729 MachineBasicBlock *PredTBB, *PredFBB;
730 SmallVector<MachineOperand, 4> PredCond;
731 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
732 return false;
733 if (!PredCond.empty())
734 return false;
735 return true;
736}
737
Kyle Butt3232dbb2016-04-08 20:35:01 +0000738/// If it is profitable, duplicate TailBB's contents in each
739/// of its predecessors.
Kyle Butt3ed42732016-08-25 01:37:03 +0000740bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
Kyle Butt3232dbb2016-04-08 20:35:01 +0000741 SmallVectorImpl<MachineBasicBlock *> &TDBBs,
742 SmallVectorImpl<MachineInstr *> &Copies) {
743 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
744
745 DenseSet<unsigned> UsedByPhi;
746 getRegsUsedByPHIs(*TailBB, &UsedByPhi);
747
748 if (IsSimple)
749 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
750
751 // Iterate through all the unique predecessors and tail-duplicate this
752 // block into them, if possible. Copying the list ahead of time also
753 // avoids trouble with the predecessor list reallocating.
754 bool Changed = false;
755 SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
756 TailBB->pred_end());
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000757 for (MachineBasicBlock *PredBB : Preds) {
Kyle Butt3232dbb2016-04-08 20:35:01 +0000758 assert(TailBB != PredBB &&
759 "Single-block loop should have been rejected earlier!");
Kyle Butt9e52c062016-07-19 23:54:21 +0000760
761 if (!canTailDuplicate(TailBB, PredBB))
Kyle Butt3232dbb2016-04-08 20:35:01 +0000762 continue;
763
Kyle Butt3232dbb2016-04-08 20:35:01 +0000764 // Don't duplicate into a fall-through predecessor (at least for now).
765 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
766 continue;
767
768 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
769 << "From Succ: " << *TailBB);
770
771 TDBBs.push_back(PredBB);
772
773 // Remove PredBB's unconditional branch.
774 TII->RemoveBranch(*PredBB);
775
Kyle Butt3232dbb2016-04-08 20:35:01 +0000776 // Clone the contents of TailBB into PredBB.
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000777 DenseMap<unsigned, RegSubRegPair> LocalVRMap;
778 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
Kyle Butt3232dbb2016-04-08 20:35:01 +0000779 // Use instr_iterator here to properly handle bundles, e.g.
780 // ARM Thumb2 IT block.
781 MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
782 while (I != TailBB->instr_end()) {
783 MachineInstr *MI = &*I;
784 ++I;
785 if (MI->isPHI()) {
786 // Replace the uses of the def of the PHI with the register coming
787 // from PredBB.
788 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
789 } else {
790 // Replace def of virtual registers with new registers, and update
791 // uses with PHI source register or the new registers.
Kyle Butt3ed42732016-08-25 01:37:03 +0000792 duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000793 }
794 }
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000795 appendCopies(PredBB, CopyInfos, Copies);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000796
797 // Simplify
Kyle Butt9e52c062016-07-19 23:54:21 +0000798 MachineBasicBlock *PredTBB, *PredFBB;
799 SmallVector<MachineOperand, 4> PredCond;
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000800 TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000801
Geoff Berryf8c29d62016-06-14 19:40:10 +0000802 NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
Kyle Butt3232dbb2016-04-08 20:35:01 +0000803
804 // Update the CFG.
805 PredBB->removeSuccessor(PredBB->succ_begin());
806 assert(PredBB->succ_empty() &&
807 "TailDuplicate called on block with multiple successors!");
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000808 for (MachineBasicBlock *Succ : TailBB->successors())
809 PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
Kyle Butt3232dbb2016-04-08 20:35:01 +0000810
811 Changed = true;
812 ++NumTailDups;
813 }
814
815 // If TailBB was duplicated into all its predecessors except for the prior
816 // block, which falls through unconditionally, move the contents of this
817 // block into the prior block.
818 MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
819 MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
820 SmallVector<MachineOperand, 4> PriorCond;
821 // This has to check PrevBB->succ_size() because EH edges are ignored by
822 // AnalyzeBranch.
823 if (PrevBB->succ_size() == 1 &&
Kyle Buttd2b886e2016-07-20 00:01:51 +0000824 // Layout preds are not always CFG preds. Check.
825 *PrevBB->succ_begin() == TailBB &&
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000826 !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
Kyle Butt3232dbb2016-04-08 20:35:01 +0000827 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
828 !TailBB->hasAddressTaken()) {
829 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
830 << "From MBB: " << *TailBB);
831 if (PreRegAlloc) {
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000832 DenseMap<unsigned, RegSubRegPair> LocalVRMap;
833 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
Kyle Butt3232dbb2016-04-08 20:35:01 +0000834 MachineBasicBlock::iterator I = TailBB->begin();
835 // Process PHI instructions first.
836 while (I != TailBB->end() && I->isPHI()) {
837 // Replace the uses of the def of the PHI with the register coming
838 // from PredBB.
839 MachineInstr *MI = &*I++;
840 processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000841 }
842
843 // Now copy the non-PHI instructions.
844 while (I != TailBB->end()) {
845 // Replace def of virtual registers with new registers, and update
846 // uses with PHI source register or the new registers.
847 MachineInstr *MI = &*I++;
848 assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
Kyle Butt3ed42732016-08-25 01:37:03 +0000849 duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000850 MI->eraseFromParent();
851 }
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000852 appendCopies(PrevBB, CopyInfos, Copies);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000853 } else {
854 // No PHIs to worry about, just splice the instructions over.
855 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
856 }
857 PrevBB->removeSuccessor(PrevBB->succ_begin());
858 assert(PrevBB->succ_empty());
859 PrevBB->transferSuccessors(TailBB);
860 TDBBs.push_back(PrevBB);
861 Changed = true;
862 }
863
864 // If this is after register allocation, there are no phis to fix.
865 if (!PreRegAlloc)
866 return Changed;
867
868 // If we made no changes so far, we are safe.
869 if (!Changed)
870 return Changed;
871
872 // Handle the nasty case in that we duplicated a block that is part of a loop
873 // into some but not all of its predecessors. For example:
874 // 1 -> 2 <-> 3 |
875 // \ |
876 // \---> rest |
877 // if we duplicate 2 into 1 but not into 3, we end up with
878 // 12 -> 3 <-> 2 -> rest |
879 // \ / |
880 // \----->-----/ |
881 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
882 // with a phi in 3 (which now dominates 2).
883 // What we do here is introduce a copy in 3 of the register defined by the
884 // phi, just like when we are duplicating 2 into 3, but we don't copy any
885 // real instructions or remove the 3 -> 2 edge from the phi in 2.
Matt Arsenaultb8037a12016-08-16 20:38:05 +0000886 for (MachineBasicBlock *PredBB : Preds) {
David Majnemer0d955d02016-08-11 22:21:41 +0000887 if (is_contained(TDBBs, PredBB))
Kyle Butt3232dbb2016-04-08 20:35:01 +0000888 continue;
889
890 // EH edges
891 if (PredBB->succ_size() != 1)
892 continue;
893
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000894 DenseMap<unsigned, RegSubRegPair> LocalVRMap;
895 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
Kyle Butt3232dbb2016-04-08 20:35:01 +0000896 MachineBasicBlock::iterator I = TailBB->begin();
897 // Process PHI instructions first.
898 while (I != TailBB->end() && I->isPHI()) {
899 // Replace the uses of the def of the PHI with the register coming
900 // from PredBB.
901 MachineInstr *MI = &*I++;
902 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
903 }
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000904 appendCopies(PredBB, CopyInfos, Copies);
Kyle Butt3232dbb2016-04-08 20:35:01 +0000905 }
906
907 return Changed;
908}
909
Krzysztof Parzyszek4773f642016-04-26 18:36:34 +0000910/// At the end of the block \p MBB generate COPY instructions between registers
911/// described by \p CopyInfos. Append resulting instructions to \p Copies.
912void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
913 SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
914 SmallVectorImpl<MachineInstr*> &Copies) {
915 MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
916 const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
917 for (auto &CI : CopyInfos) {
918 auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
919 .addReg(CI.second.Reg, 0, CI.second.SubReg);
920 Copies.push_back(C);
921 }
922}
923
Kyle Butt3232dbb2016-04-08 20:35:01 +0000924/// Remove the specified dead machine basic block from the function, updating
925/// the CFG.
926void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
927 assert(MBB->pred_empty() && "MBB must be dead!");
928 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
929
930 // Remove all successors.
931 while (!MBB->succ_empty())
932 MBB->removeSuccessor(MBB->succ_end() - 1);
933
934 // Remove the block.
935 MBB->eraseFromParent();
936}
937
938} // End llvm namespace