blob: 9a5ae704171ccb041b34e3212c8d95ad38c785f9 [file] [log] [blame]
Rong Xu3d2efdf2018-10-09 22:03:40 +00001//===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// This file defines a pass that optimizes condition branches on x86 by taking
10// advantage of the three-way conditional code generated by compare
11// instructions.
12// Currently, it tries to hoisting EQ and NE conditional branch to a dominant
13// conditional branch condition where the same EQ/NE conditional code is
14// computed. An example:
15// bb_0:
16// cmp %0, 19
17// jg bb_1
18// jmp bb_2
19// bb_1:
20// cmp %0, 40
21// jg bb_3
22// jmp bb_4
23// bb_4:
24// cmp %0, 20
25// je bb_5
26// jmp bb_6
27// Here we could combine the two compares in bb_0 and bb_4 and have the
28// following code:
29// bb_0:
30// cmp %0, 20
31// jg bb_1
32// jl bb_2
33// jmp bb_5
34// bb_1:
35// cmp %0, 40
36// jg bb_3
37// jmp bb_6
38// For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control
39// height for bb_6 is also reduced. bb_4 is gone after the optimization.
40//
41// There are plenty of this code patterns, especially from the switch case
42// lowing where we generate compare of "pivot-1" for the inner nodes in the
43// binary search tree.
44//===----------------------------------------------------------------------===//
45
46#include "X86.h"
47#include "X86InstrInfo.h"
48#include "X86Subtarget.h"
49#include "llvm/ADT/Statistic.h"
50#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
51#include "llvm/CodeGen/MachineFunctionPass.h"
52#include "llvm/CodeGen/MachineInstrBuilder.h"
53#include "llvm/CodeGen/MachineRegisterInfo.h"
54#include "llvm/Support/BranchProbability.h"
55
56using namespace llvm;
57
58#define DEBUG_TYPE "x86-condbr-folding"
59
60STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
61
62namespace {
63class X86CondBrFoldingPass : public MachineFunctionPass {
64public:
65 X86CondBrFoldingPass() : MachineFunctionPass(ID) {}
66
67 StringRef getPassName() const override { return "X86 CondBr Folding"; }
68
69 bool runOnMachineFunction(MachineFunction &MF) override;
70
71 void getAnalysisUsage(AnalysisUsage &AU) const override {
72 MachineFunctionPass::getAnalysisUsage(AU);
73 AU.addRequired<MachineBranchProbabilityInfo>();
74 }
75
76private:
77 static char ID;
78};
79
80char X86CondBrFoldingPass::ID = 0;
81} // namespace
82
83FunctionPass *llvm::createX86CondBrFolding() {
84 return new X86CondBrFoldingPass();
85}
86
87// A class the stores the auxiliary information for each MBB.
88struct TargetMBBInfo {
89 MachineBasicBlock *TBB;
90 MachineBasicBlock *FBB;
91 MachineInstr *BrInstr;
92 MachineInstr *CmpInstr;
93 X86::CondCode BranchCode;
94 unsigned SrcReg;
95 int CmpValue;
96 bool Modified;
97 bool CmpBrOnly;
98};
99
100// A class that optimizes the conditional branch by hoisting and merge CondCode.
101class X86CondBrFolding {
102public:
103 X86CondBrFolding(const X86InstrInfo *TII,
104 const MachineBranchProbabilityInfo *MBPI,
105 MachineFunction &MF)
106 : TII(TII), MBPI(MBPI), MF(MF) {}
107 bool optimize();
108
109private:
110 const X86InstrInfo *TII;
111 const MachineBranchProbabilityInfo *MBPI;
112 MachineFunction &MF;
113 std::vector<std::unique_ptr<TargetMBBInfo>> MBBInfos;
114 SmallVector<MachineBasicBlock *, 4> RemoveList;
115
116 void optimizeCondBr(MachineBasicBlock &MBB,
117 SmallVectorImpl<MachineBasicBlock *> &BranchPath);
118 void fixBranchProb(MachineBasicBlock *NextMBB, MachineBasicBlock *RootMBB,
119 SmallVectorImpl<MachineBasicBlock *> &BranchPath);
120 void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
121 MachineBasicBlock *NewDest);
122 void fixupModifiedCond(MachineBasicBlock *MBB);
123 std::unique_ptr<TargetMBBInfo> analyzeMBB(MachineBasicBlock &MBB);
124 static bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
125 int &CmpValue);
126 bool findPath(MachineBasicBlock *MBB,
127 SmallVectorImpl<MachineBasicBlock *> &BranchPath);
128 TargetMBBInfo *getMBBInfo(MachineBasicBlock *MBB) const {
129 return MBBInfos[MBB->getNumber()].get();
130 }
131};
132
133// Find a valid path that we can reuse the CondCode.
134// The resulted path (if return true) is stored in BranchPath.
135// Return value:
136// false: is no valid path is found.
137// true: a valid path is found and the targetBB can be reached.
138bool X86CondBrFolding::findPath(
139 MachineBasicBlock *MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
140 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
141 assert(MBBInfo && "Expecting a candidate MBB");
142 int CmpValue = MBBInfo->CmpValue;
143
144 MachineBasicBlock *PredMBB = *MBB->pred_begin();
145 MachineBasicBlock *SaveMBB = MBB;
146 while (PredMBB) {
147 TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
148 if (!PredMBBInfo || PredMBBInfo->SrcReg != MBBInfo->SrcReg)
149 return false;
150
151 assert(SaveMBB == PredMBBInfo->TBB || SaveMBB == PredMBBInfo->FBB);
152 bool IsFalseBranch = (SaveMBB == PredMBBInfo->FBB);
153
154 X86::CondCode CC = PredMBBInfo->BranchCode;
155 assert(CC == X86::COND_L || CC == X86::COND_G || CC == X86::COND_E);
156 int PredCmpValue = PredMBBInfo->CmpValue;
157 bool ValueCmpTrue = ((CmpValue < PredCmpValue && CC == X86::COND_L) ||
158 (CmpValue > PredCmpValue && CC == X86::COND_G) ||
159 (CmpValue == PredCmpValue && CC == X86::COND_E));
160 // Check if both the result of value compare and the branch target match.
161 if (!(ValueCmpTrue ^ IsFalseBranch)) {
162 LLVM_DEBUG(dbgs() << "Dead BB detected!\n");
163 return false;
164 }
165
166 BranchPath.push_back(PredMBB);
167 // These are the conditions on which we could combine the compares.
168 if ((CmpValue == PredCmpValue) ||
169 (CmpValue == PredCmpValue - 1 && CC == X86::COND_L) ||
170 (CmpValue == PredCmpValue + 1 && CC == X86::COND_G))
171 return true;
172
173 // If PredMBB has more than on preds, or not a pure cmp and br, we bailout.
174 if (PredMBB->pred_size() != 1 || !PredMBBInfo->CmpBrOnly)
175 return false;
176
177 SaveMBB = PredMBB;
178 PredMBB = *PredMBB->pred_begin();
179 }
180 return false;
181}
182
183// Fix up any PHI node in the successor of MBB.
184static void fixPHIsInSucc(MachineBasicBlock *MBB, MachineBasicBlock *OldMBB,
185 MachineBasicBlock *NewMBB) {
186 if (NewMBB == OldMBB)
187 return;
188 for (auto MI = MBB->instr_begin(), ME = MBB->instr_end();
189 MI != ME && MI->isPHI(); ++MI)
190 for (unsigned i = 2, e = MI->getNumOperands() + 1; i != e; i += 2) {
191 MachineOperand &MO = MI->getOperand(i);
192 if (MO.getMBB() == OldMBB)
193 MO.setMBB(NewMBB);
194 }
195}
196
197// Utility function to set branch probability for edge MBB->SuccMBB.
198static inline bool setBranchProb(MachineBasicBlock *MBB,
199 MachineBasicBlock *SuccMBB,
200 BranchProbability Prob) {
201 auto MBBI = std::find(MBB->succ_begin(), MBB->succ_end(), SuccMBB);
202 if (MBBI == MBB->succ_end())
203 return false;
204 MBB->setSuccProbability(MBBI, Prob);
205 return true;
206}
207
208// Utility function to find the unconditional br instruction in MBB.
209static inline MachineBasicBlock::iterator
210findUncondBrI(MachineBasicBlock *MBB) {
211 return std::find_if(MBB->begin(), MBB->end(), [](MachineInstr &MI) -> bool {
212 return MI.getOpcode() == X86::JMP_1;
213 });
214}
215
216// Replace MBB's original successor, OrigDest, with NewDest.
217// Also update the MBBInfo for MBB.
218void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
219 MachineBasicBlock *OrigDest,
220 MachineBasicBlock *NewDest) {
221 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
222 MachineInstr *BrMI;
223 if (MBBInfo->TBB == OrigDest) {
224 BrMI = MBBInfo->BrInstr;
225 unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
226 MachineInstrBuilder MIB =
227 BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
228 .addMBB(NewDest);
229 MBBInfo->TBB = NewDest;
230 MBBInfo->BrInstr = MIB.getInstr();
231 } else { // Should be the unconditional jump stmt.
232 MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
233 BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
234 .addMBB(NewDest);
235 MBBInfo->FBB = NewDest;
236 BrMI = &*UncondBrI;
237 }
238 fixPHIsInSucc(NewDest, OrigDest, MBB);
239 BrMI->eraseFromParent();
240 MBB->addSuccessor(NewDest);
241 setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest));
242 MBB->removeSuccessor(OrigDest);
243}
244
245// Change the CondCode and BrInstr according to MBBInfo.
246void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
247 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
248 if (!MBBInfo->Modified)
249 return;
250
251 MachineInstr *BrMI = MBBInfo->BrInstr;
252 X86::CondCode CC = MBBInfo->BranchCode;
253 MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
254 TII->get(GetCondBranchFromCond(CC)))
255 .addMBB(MBBInfo->TBB);
256 BrMI->eraseFromParent();
257 MBBInfo->BrInstr = MIB.getInstr();
258
259 MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
260 BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
261 .addMBB(MBBInfo->FBB);
262 MBB->erase(UncondBrI);
263 MBBInfo->Modified = false;
264}
265
266//
267// Apply the transformation:
268// RootMBB -1-> ... PredMBB -3-> MBB -5-> TargetMBB
269// \-2-> \-4-> \-6-> FalseMBB
270// ==>
271// RootMBB -1-> ... PredMBB -7-> FalseMBB
272// TargetMBB <-8-/ \-2-> \-4->
273//
274// Note that PredMBB and RootMBB could be the same.
275// And in the case of dead TargetMBB, we will not have TargetMBB and edge 8.
276//
277// There are some special handling where the RootMBB is COND_E in which case
278// we directly short-cycle the brinstr.
279//
280void X86CondBrFolding::optimizeCondBr(
281 MachineBasicBlock &MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
282
283 X86::CondCode CC;
284 TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
285 assert(MBBInfo && "Expecting a candidate MBB");
286 MachineBasicBlock *TargetMBB = MBBInfo->TBB;
287 BranchProbability TargetProb = MBPI->getEdgeProbability(&MBB, MBBInfo->TBB);
288
289 // Forward the jump from MBB's predecessor to MBB's false target.
290 MachineBasicBlock *PredMBB = BranchPath.front();
291 TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
292 assert(PredMBBInfo && "Expecting a candidate MBB");
293 if (PredMBBInfo->Modified)
294 fixupModifiedCond(PredMBB);
295 CC = PredMBBInfo->BranchCode;
296 // Don't do this if depth of BranchPath is 1 and PredMBB is of COND_E.
297 // We will short-cycle directly for this case.
298 if (!(CC == X86::COND_E && BranchPath.size() == 1))
299 replaceBrDest(PredMBB, &MBB, MBBInfo->FBB);
300
301 MachineBasicBlock *RootMBB = BranchPath.back();
302 TargetMBBInfo *RootMBBInfo = getMBBInfo(RootMBB);
303 assert(RootMBBInfo && "Expecting a candidate MBB");
304 if (RootMBBInfo->Modified)
305 fixupModifiedCond(RootMBB);
306 CC = RootMBBInfo->BranchCode;
307
308 if (CC != X86::COND_E) {
309 MachineBasicBlock::iterator UncondBrI = findUncondBrI(RootMBB);
310 // RootMBB: Cond jump to the original not-taken MBB.
311 X86::CondCode NewCC;
312 switch (CC) {
313 case X86::COND_L:
314 NewCC = X86::COND_G;
315 break;
316 case X86::COND_G:
317 NewCC = X86::COND_L;
318 break;
319 default:
320 llvm_unreachable("unexpected condtional code.");
321 }
322 BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
323 TII->get(GetCondBranchFromCond(NewCC)))
324 .addMBB(RootMBBInfo->FBB);
325
326 // RootMBB: Jump to TargetMBB
327 BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
328 TII->get(X86::JMP_1))
329 .addMBB(TargetMBB);
330 RootMBB->addSuccessor(TargetMBB);
331 fixPHIsInSucc(TargetMBB, &MBB, RootMBB);
332 RootMBB->erase(UncondBrI);
333 } else {
334 replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB);
335 }
336
337 // Fix RootMBB's CmpValue to MBB's CmpValue to TargetMBB. Don't set Imm
338 // directly. Move MBB's stmt to here as the opcode might be different.
339 if (RootMBBInfo->CmpValue != MBBInfo->CmpValue) {
340 MachineInstr *NewCmp = MBBInfo->CmpInstr;
341 NewCmp->removeFromParent();
342 RootMBB->insert(RootMBBInfo->CmpInstr, NewCmp);
343 RootMBBInfo->CmpInstr->eraseFromParent();
344 }
345
346 // Fix branch Probabilities.
347 auto fixBranchProb = [&](MachineBasicBlock *NextMBB) {
348 BranchProbability Prob;
349 for (auto &I : BranchPath) {
350 MachineBasicBlock *ThisMBB = I;
351 if (!ThisMBB->hasSuccessorProbabilities() ||
352 !ThisMBB->isSuccessor(NextMBB))
353 break;
354 Prob = MBPI->getEdgeProbability(ThisMBB, NextMBB);
355 if (Prob.isUnknown())
356 break;
357 TargetProb = Prob * TargetProb;
358 Prob = Prob - TargetProb;
359 setBranchProb(ThisMBB, NextMBB, Prob);
360 if (ThisMBB == RootMBB) {
361 setBranchProb(ThisMBB, TargetMBB, TargetProb);
362 }
363 ThisMBB->normalizeSuccProbs();
364 if (ThisMBB == RootMBB)
365 break;
366 NextMBB = ThisMBB;
367 }
368 return true;
369 };
370 if (CC != X86::COND_E && !TargetProb.isUnknown())
371 fixBranchProb(MBBInfo->FBB);
372
373 if (CC != X86::COND_E)
374 RemoveList.push_back(&MBB);
375
376 // Invalidate MBBInfo just in case.
377 MBBInfos[MBB.getNumber()] = nullptr;
378 MBBInfos[RootMBB->getNumber()] = nullptr;
379
380 LLVM_DEBUG(dbgs() << "After optimization:\nRootMBB is: " << *RootMBB << "\n");
381 if (BranchPath.size() > 1)
382 LLVM_DEBUG(dbgs() << "PredMBB is: " << *(BranchPath[0]) << "\n");
383}
384
385// Driver function for optimization: find the valid candidate and apply
386// the transformation.
387bool X86CondBrFolding::optimize() {
388 bool Changed = false;
389 LLVM_DEBUG(dbgs() << "***** X86CondBr Folding on Function: " << MF.getName()
390 << " *****\n");
391 // Setup data structures.
392 MBBInfos.resize(MF.getNumBlockIDs());
393 for (auto &MBB : MF)
394 MBBInfos[MBB.getNumber()] = analyzeMBB(MBB);
395
396 for (auto &MBB : MF) {
397 TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
398 if (!MBBInfo || !MBBInfo->CmpBrOnly)
399 continue;
400 if (MBB.pred_size() != 1)
401 continue;
402 LLVM_DEBUG(dbgs() << "Work on MBB." << MBB.getNumber()
403 << " CmpValue: " << MBBInfo->CmpValue << "\n");
404 SmallVector<MachineBasicBlock *, 4> BranchPath;
405 if (!findPath(&MBB, BranchPath))
406 continue;
407
408#ifndef NDEBUG
409 LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n");
410 int Index = 1;
411 LLVM_DEBUG(dbgs() << "Target MBB is: " << MBB << "\n");
412 for (auto I = BranchPath.rbegin(); I != BranchPath.rend(); ++I, ++Index) {
413 MachineBasicBlock *PMBB = *I;
414 TargetMBBInfo *PMBBInfo = getMBBInfo(PMBB);
415 LLVM_DEBUG(dbgs() << "Path MBB (" << Index << " of " << BranchPath.size()
416 << ") is " << *PMBB);
417 LLVM_DEBUG(dbgs() << "CC=" << PMBBInfo->BranchCode
418 << " Val=" << PMBBInfo->CmpValue
419 << " CmpBrOnly=" << PMBBInfo->CmpBrOnly << "\n\n");
420 }
421#endif
422 optimizeCondBr(MBB, BranchPath);
423 Changed = true;
424 }
425 NumFixedCondBrs += RemoveList.size();
426 for (auto MBBI : RemoveList) {
427 for (auto *Succ : MBBI->successors())
428 MBBI->removeSuccessor(Succ);
429 MBBI->eraseFromParent();
430 }
431
432 return Changed;
433}
434
435// Analyze instructions that generate CondCode and extract information.
436bool X86CondBrFolding::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
437 int &CmpValue) {
438 unsigned SrcRegIndex = 0;
439 unsigned ValueIndex = 0;
440 switch (MI.getOpcode()) {
441 // TODO: handle test instructions.
442 default:
443 return false;
444 case X86::CMP64ri32:
445 case X86::CMP64ri8:
446 case X86::CMP32ri:
447 case X86::CMP32ri8:
448 case X86::CMP16ri:
449 case X86::CMP16ri8:
450 case X86::CMP8ri:
451 SrcRegIndex = 0;
452 ValueIndex = 1;
453 break;
454 case X86::SUB64ri32:
455 case X86::SUB64ri8:
456 case X86::SUB32ri:
457 case X86::SUB32ri8:
458 case X86::SUB16ri:
459 case X86::SUB16ri8:
460 case X86::SUB8ri:
461 SrcRegIndex = 1;
462 ValueIndex = 2;
463 break;
464 }
465 SrcReg = MI.getOperand(SrcRegIndex).getReg();
466 assert(MI.getOperand(ValueIndex).isImm() && "Expecting Imm operand");
467 CmpValue = MI.getOperand(ValueIndex).getImm();
468 return true;
469}
470
471// Analyze a candidate MBB and set the extract all the information needed.
472// The valid candidate will have two successors.
473// It also should have a sequence of
474// Branch_instr,
475// CondBr,
476// UnCondBr.
477// Return TargetMBBInfo if MBB is a valid candidate and nullptr otherwise.
478std::unique_ptr<TargetMBBInfo>
479X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
480 MachineBasicBlock *TBB;
481 MachineBasicBlock *FBB;
482 MachineInstr *BrInstr;
483 MachineInstr *CmpInstr;
484 X86::CondCode CC;
485 unsigned SrcReg;
486 int CmpValue;
487 bool Modified;
488 bool CmpBrOnly;
489
490 if (MBB.succ_size() != 2)
491 return nullptr;
492
493 CmpBrOnly = true;
494 FBB = TBB = nullptr;
495 CmpInstr = nullptr;
496 MachineBasicBlock::iterator I = MBB.end();
497 while (I != MBB.begin()) {
498 --I;
499 if (I->isDebugValue())
500 continue;
501 if (I->getOpcode() == X86::JMP_1) {
502 if (FBB)
503 return nullptr;
504 FBB = I->getOperand(0).getMBB();
505 continue;
506 }
507 if (I->isBranch()) {
508 if (TBB)
509 return nullptr;
510 CC = X86::getCondFromBranchOpc(I->getOpcode());
511 switch (CC) {
512 default:
513 return nullptr;
514 case X86::COND_E:
515 case X86::COND_L:
516 case X86::COND_G:
517 case X86::COND_NE:
518 case X86::COND_LE:
519 case X86::COND_GE:
520 break;
521 }
522 TBB = I->getOperand(0).getMBB();
523 BrInstr = &*I;
524 continue;
525 }
526 if (analyzeCompare(*I, SrcReg, CmpValue)) {
527 if (CmpInstr)
528 return nullptr;
529 CmpInstr = &*I;
530 continue;
531 }
532 CmpBrOnly = false;
533 break;
534 }
535
536 if (!TBB || !FBB || !CmpInstr)
537 return nullptr;
538
539 // Simplify CondCode. Note this is only to simplify the findPath logic
540 // and will not change the instruction here.
541 switch (CC) {
542 case X86::COND_NE:
543 CC = X86::COND_E;
544 std::swap(TBB, FBB);
545 Modified = true;
546 break;
547 case X86::COND_LE:
548 if (CmpValue == INT_MAX)
549 return nullptr;
550 CC = X86::COND_L;
551 CmpValue += 1;
552 Modified = true;
553 break;
554 case X86::COND_GE:
555 if (CmpValue == INT_MIN)
556 return nullptr;
557 CC = X86::COND_G;
558 CmpValue -= 1;
559 Modified = true;
560 break;
561 default:
562 Modified = false;
563 break;
564 }
565 return llvm::make_unique<TargetMBBInfo>(TargetMBBInfo{
566 TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
567}
568
569bool X86CondBrFoldingPass::runOnMachineFunction(MachineFunction &MF) {
570 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
571 if (!ST.threewayBranchProfitable())
572 return false;
573 const X86InstrInfo *TII = ST.getInstrInfo();
574 const MachineBranchProbabilityInfo *MBPI =
575 &getAnalysis<MachineBranchProbabilityInfo>();
576
577 X86CondBrFolding CondBr(TII, MBPI, MF);
578 return CondBr.optimize();
579}