blob: 4d925c89ef183a0580453d0e49d9b210af75e34a [file] [log] [blame]
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass merges conditional blocks of code and reduces the number of
11// conditional branches in the hot paths based on profiles.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
16#include "llvm/Transforms/Utils.h"
17#include "llvm/Transforms/Utils/BasicBlockUtils.h"
18#include "llvm/Transforms/Utils/Cloning.h"
19#include "llvm/Transforms/Utils/ValueMapper.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/DenseSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/ProfileSummaryInfo.h"
26#include "llvm/Analysis/RegionInfo.h"
27#include "llvm/Analysis/RegionIterator.h"
28#include "llvm/Analysis/ValueTracking.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/MDBuilder.h"
33#include "llvm/Support/BranchProbability.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Transforms/Scalar.h"
36
37#include <cxxabi.h>
38#include <set>
39#include <sstream>
40
41using namespace llvm;
42
43#define DEBUG_TYPE "chr"
44
45#define CHR_DEBUG(X) LLVM_DEBUG(X)
46
47static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
48 cl::desc("Apply CHR for all functions"));
49
50static cl::opt<double> CHRBiasThreshold(
51 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
52 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
53
54static cl::opt<unsigned> CHRMergeThreshold(
55 "chr-merge-threshold", cl::init(2), cl::Hidden,
56 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
57
58static cl::opt<std::string> CHRModuleList(
59 "chr-module-list", cl::init(""), cl::Hidden,
60 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
61
62static cl::opt<std::string> CHRFunctionList(
63 "chr-function-list", cl::init(""), cl::Hidden,
64 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
65
66static StringSet<> CHRModules;
67static StringSet<> CHRFunctions;
68
69static void ParseCHRFilterFiles() {
70 if (!CHRModuleList.empty()) {
71 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
72 if (!FileOrErr) {
73 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
74 std::exit(1);
75 }
76 StringRef Buf = FileOrErr->get()->getBuffer();
77 SmallVector<StringRef, 0> Lines;
78 Buf.split(Lines, '\n');
79 for (StringRef Line : Lines) {
80 Line = Line.trim();
81 if (!Line.empty())
82 CHRModules.insert(Line);
83 }
84 }
85 if (!CHRFunctionList.empty()) {
86 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
87 if (!FileOrErr) {
88 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
89 std::exit(1);
90 }
91 StringRef Buf = FileOrErr->get()->getBuffer();
92 SmallVector<StringRef, 0> Lines;
93 Buf.split(Lines, '\n');
94 for (StringRef Line : Lines) {
95 Line = Line.trim();
96 if (!Line.empty())
97 CHRFunctions.insert(Line);
98 }
99 }
100}
101
102namespace {
103class ControlHeightReductionLegacyPass : public FunctionPass {
104public:
105 static char ID;
106
107 ControlHeightReductionLegacyPass() : FunctionPass(ID) {
108 initializeControlHeightReductionLegacyPassPass(
109 *PassRegistry::getPassRegistry());
110 ParseCHRFilterFiles();
111 }
112
113 bool runOnFunction(Function &F) override;
114 void getAnalysisUsage(AnalysisUsage &AU) const override {
115 AU.addRequired<BlockFrequencyInfoWrapperPass>();
116 AU.addRequired<DominatorTreeWrapperPass>();
117 AU.addRequired<ProfileSummaryInfoWrapperPass>();
118 AU.addRequired<RegionInfoPass>();
119 AU.addPreserved<GlobalsAAWrapperPass>();
120 }
121};
122} // end anonymous namespace
123
124char ControlHeightReductionLegacyPass::ID = 0;
125
126INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
127 "chr",
128 "Reduce control height in the hot paths",
129 false, false)
130INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
131INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
132INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
133INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
134INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
135 "chr",
136 "Reduce control height in the hot paths",
137 false, false)
138
139FunctionPass *llvm::createControlHeightReductionLegacyPass() {
140 return new ControlHeightReductionLegacyPass();
141}
142
143namespace {
144
145struct CHRStats {
146 CHRStats() : NumBranches(0), NumBranchesDelta(0),
147 WeightedNumBranchesDelta(0) {}
148 void print(raw_ostream &OS) const {
149 OS << "CHRStats: NumBranches " << NumBranches
150 << " NumBranchesDelta " << NumBranchesDelta
151 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
152 }
153 uint64_t NumBranches; // The original number of conditional branches /
154 // selects
155 uint64_t NumBranchesDelta; // The decrease of the number of conditional
156 // branches / selects in the hot paths due to CHR.
157 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
158 // count at the scope entry.
159};
160
161inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) {
162 Stats.print(OS);
163 return OS;
164}
165
166// RegInfo - some properties of a Region.
167struct RegInfo {
168 RegInfo() : R(nullptr), HasBranch(false) {}
169 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
170 Region *R;
171 bool HasBranch;
172 SmallVector<SelectInst *, 8> Selects;
173};
174
175typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
176
177// CHRScope - a sequence of regions to CHR together. It corresponds to a
178// sequence of conditional blocks. It can have subscopes which correspond to
179// nested conditional blocks. Nested CHRScopes form a tree.
180class CHRScope {
181 public:
182 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
183 assert(RI.R && "Null RegionIn");
184 RegInfos.push_back(RI);
185 }
186
187 Region *getParentRegion() {
188 assert(RegInfos.size() > 0 && "Empty CHRScope");
189 Region *Parent = RegInfos[0].R->getParent();
190 assert(Parent && "Unexpected to call this on the top-level region");
191 return Parent;
192 }
193
194 BasicBlock *getEntryBlock() {
195 assert(RegInfos.size() > 0 && "Empty CHRScope");
196 return RegInfos.front().R->getEntry();
197 }
198
199 BasicBlock *getExitBlock() {
200 assert(RegInfos.size() > 0 && "Empty CHRScope");
201 return RegInfos.back().R->getExit();
202 }
203
204 bool appendable(CHRScope *Next) {
205 // The next scope is appendable only if this scope is directly connected to
206 // it (which implies it post-dominates this scope) and this scope dominates
207 // it (no edge to the next scope outside this scope).
208 BasicBlock *NextEntry = Next->getEntryBlock();
209 if (getExitBlock() != NextEntry)
210 // Not directly connected.
211 return false;
212 Region *LastRegion = RegInfos.back().R;
213 for (BasicBlock *Pred : predecessors(NextEntry))
214 if (!LastRegion->contains(Pred))
215 // There's an edge going into the entry of the next scope from outside
216 // of this scope.
217 return false;
218 return true;
219 }
220
221 void append(CHRScope *Next) {
222 assert(RegInfos.size() > 0 && "Empty CHRScope");
223 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
224 assert(getParentRegion() == Next->getParentRegion() &&
225 "Must be siblings");
226 assert(getExitBlock() == Next->getEntryBlock() &&
227 "Must be adjacent");
228 for (RegInfo &RI : Next->RegInfos)
229 RegInfos.push_back(RI);
230 for (CHRScope *Sub : Next->Subs)
231 Subs.push_back(Sub);
232 }
233
234 void addSub(CHRScope *SubIn) {
235#ifndef NDEBUG
236 bool is_child = false;
237 for (RegInfo &RI : RegInfos)
238 if (RI.R == SubIn->getParentRegion()) {
239 is_child = true;
240 break;
241 }
242 assert(is_child && "Must be a child");
243#endif
244 Subs.push_back(SubIn);
245 }
246
247 // Split this scope at the boundary region into two, which will belong to the
248 // tail and returns the tail.
249 CHRScope *split(Region *Boundary) {
250 assert(Boundary && "Boundary null");
251 assert(RegInfos.begin()->R != Boundary &&
252 "Can't be split at beginning");
253 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
254 [&Boundary](const RegInfo& RI) {
255 return Boundary == RI.R;
256 });
257 if (BoundaryIt == RegInfos.end())
258 return nullptr;
259 SmallVector<RegInfo, 8> TailRegInfos;
260 SmallVector<CHRScope *, 8> TailSubs;
261 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
262 RegInfos.resize(BoundaryIt - RegInfos.begin());
263 DenseSet<Region *> TailRegionSet;
264 for (RegInfo &RI : TailRegInfos)
265 TailRegionSet.insert(RI.R);
266 for (auto It = Subs.begin(); It != Subs.end(); ) {
267 CHRScope *Sub = *It;
268 assert(Sub && "null Sub");
269 Region *Parent = Sub->getParentRegion();
270 if (TailRegionSet.count(Parent)) {
271 TailSubs.push_back(Sub);
272 It = Subs.erase(It);
273 } else {
274 assert(std::find_if(RegInfos.begin(), RegInfos.end(),
275 [&Parent](const RegInfo& RI) {
276 return Parent == RI.R;
277 }) != RegInfos.end() &&
278 "Must be in head");
279 ++It;
280 }
281 }
282 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
283 return new CHRScope(TailRegInfos, TailSubs);
284 }
285
286 bool contains(Instruction *I) const {
287 BasicBlock *Parent = I->getParent();
288 for (const RegInfo &RI : RegInfos)
289 if (RI.R->contains(Parent))
290 return true;
291 return false;
292 }
293
294 void print(raw_ostream &OS) const;
295
296 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
297 SmallVector<CHRScope *, 8> Subs; // Subscopes.
298
299 // The instruction at which to insert the CHR conditional branch (and hoist
300 // the dependent condition values).
301 Instruction *BranchInsertPoint;
302
303 // True-biased and false-biased regions (conditional blocks),
304 // respectively. Used only for the outermost scope and includes regions in
305 // subscopes. The rest are unbiased.
306 DenseSet<Region *> TrueBiasedRegions;
307 DenseSet<Region *> FalseBiasedRegions;
308 // Among the biased regions, the regions that get CHRed.
309 SmallVector<RegInfo, 8> CHRRegions;
310
311 // True-biased and false-biased selects, respectively. Used only for the
312 // outermost scope and includes ones in subscopes.
313 DenseSet<SelectInst *> TrueBiasedSelects;
314 DenseSet<SelectInst *> FalseBiasedSelects;
315
316 // Map from one of the above regions to the instructions to stop
317 // hoisting instructions at through use-def chains.
318 HoistStopMapTy HoistStopMap;
319
320 private:
321 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
322 SmallVector<CHRScope *, 8> &SubsIn)
323 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
324};
325
326inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
327 Scope.print(OS);
328 return OS;
329}
330
331class CHR {
332 public:
333 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
334 ProfileSummaryInfo &PSIin, RegionInfo &RIin)
335 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
336
337 ~CHR() {
338 for (CHRScope *Scope : Scopes) {
339 delete Scope;
340 }
341 }
342
343 bool run();
344
345 private:
346 // See the comments in CHR::run() for the high level flow of the algorithm and
347 // what the following functions do.
348
349 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
350 Region *R = RI.getTopLevelRegion();
351 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
352 if (Scope) {
353 Output.push_back(Scope);
354 }
355 }
356 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
357 SmallVectorImpl<CHRScope *> &Scopes);
358 CHRScope *findScope(Region *R);
359 void checkScopeHoistable(CHRScope *Scope);
360
361 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
362 SmallVectorImpl<CHRScope *> &Output);
363 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
364 CHRScope *Outer,
365 DenseSet<Value *> *OuterConditionValues,
366 Instruction *OuterInsertPoint,
367 SmallVectorImpl<CHRScope *> &Output,
368 DenseSet<Instruction *> &Unhoistables);
369
370 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
371 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
372
373 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
374 SmallVectorImpl<CHRScope *> &Output);
375
376 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
377 SmallVectorImpl<CHRScope *> &Output);
378 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
379
380 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
381 SmallVectorImpl<CHRScope *> &Output);
382
383 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
384 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
385 void cloneScopeBlocks(CHRScope *Scope,
386 BasicBlock *PreEntryBlock,
387 BasicBlock *ExitBlock,
388 Region *LastRegion,
389 ValueToValueMapTy &VMap);
390 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
391 BasicBlock *EntryBlock,
392 BasicBlock *NewEntryBlock,
393 ValueToValueMapTy &VMap);
394 void fixupBranchesAndSelects(CHRScope *Scope,
395 BasicBlock *PreEntryBlock,
396 BranchInst *MergedBR,
397 uint64_t ProfileCount);
398 void fixupBranch(Region *R,
399 CHRScope *Scope,
400 IRBuilder<> &IRB,
401 Value *&MergedCondition, BranchProbability &CHRBranchBias);
402 void fixupSelect(SelectInst* SI,
403 CHRScope *Scope,
404 IRBuilder<> &IRB,
405 Value *&MergedCondition, BranchProbability &CHRBranchBias);
406 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
407 Instruction *BranchOrSelect,
408 CHRScope *Scope,
409 IRBuilder<> &IRB,
410 Value *&MergedCondition);
411
412 Function &F;
413 BlockFrequencyInfo &BFI;
414 DominatorTree &DT;
415 ProfileSummaryInfo &PSI;
416 RegionInfo &RI;
417 CHRStats Stats;
418
419 // All the true-biased regions in the function
420 DenseSet<Region *> TrueBiasedRegionsGlobal;
421 // All the false-biased regions in the function
422 DenseSet<Region *> FalseBiasedRegionsGlobal;
423 // All the true-biased selects in the function
424 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
425 // All the false-biased selects in the function
426 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
427 // A map from biased regions to their branch bias
428 DenseMap<Region *, BranchProbability> BranchBiasMap;
429 // A map from biased selects to their branch bias
430 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
431 // All the scopes.
432 DenseSet<CHRScope *> Scopes;
433};
434
435} // end anonymous namespace
436
437static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
438 if (ForceCHR)
439 return true;
440
441 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
442 if (CHRModules.count(F.getParent()->getName()))
443 return true;
444 StringRef Name = F.getName();
445 if (CHRFunctions.count(Name))
446 return true;
447 const char* DemangledName = nullptr;
448 int Status = -1;
449 DemangledName = abi::__cxa_demangle(Name.str().c_str(),
450 nullptr, nullptr, &Status);
451 return DemangledName && CHRFunctions.count(DemangledName);
452 }
453
454 assert(PSI.hasProfileSummary() && "Empty PSI?");
455 return PSI.isFunctionEntryHot(&F);
456}
457
458static void dumpIR(Function &F, const char *Label, CHRStats *Stats) {
459 std::string Name = F.getName().str();
460 const char *DemangledName = nullptr;
461 int Status = -1;
462 DemangledName = abi::__cxa_demangle(Name.c_str(),
463 nullptr, nullptr, &Status);
464 if (DemangledName == nullptr) {
465 DemangledName = "<NOT-MANGLED>";
466 }
467 std::string ModuleName = F.getParent()->getName().str();
468 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
469 << Name);
470 if (Stats)
471 CHR_DEBUG(dbgs() << " " << *Stats);
472 CHR_DEBUG(dbgs() << "\n");
473 CHR_DEBUG(F.dump());
474}
475
476
477void CHRScope::print(raw_ostream &OS) const {
478 assert(RegInfos.size() > 0 && "Empty CHRScope");
479 OS << "CHRScope[";
480 OS << RegInfos.size() << ", Regions[";
481 for (const RegInfo &RI : RegInfos) {
482 OS << RI.R->getNameStr();
483 if (RI.HasBranch)
484 OS << " B";
485 if (RI.Selects.size() > 0)
486 OS << " S" << RI.Selects.size();
487 OS << ", ";
488 }
489 if (RegInfos[0].R->getParent()) {
490 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
491 } else {
492 // top level region
493 OS << "]";
494 }
495 OS << ", Subs[";
496 for (CHRScope *Sub : Subs) {
497 OS << *Sub << ", ";
498 }
499 OS << "]]";
500}
501
502// Return true if the given instruction type can be hoisted by CHR.
503static bool isHoistableInstructionType(Instruction *I) {
504 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
505 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
506 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
507 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
508 isa<InsertValueInst>(I);
509}
510
511// Return true if the given instruction can be hoisted by CHR.
512static bool isHoistable(Instruction *I, DominatorTree &DT) {
513 if (!isHoistableInstructionType(I))
514 return false;
515 return isSafeToSpeculativelyExecute(I, nullptr, &DT);
516}
517
518// Recursively traverse the use-def chains of the given value and return a set
519// of the unhoistable base values defined within the scope (excluding the
520// first-region entry block) or the (hoistable or unhoistable) base values that
521// are defined outside (including the first-region entry block) of the
522// scope. The returned set doesn't include constants.
523static std::set<Value *> getBaseValues(Value *V,
524 DominatorTree &DT) {
525 std::set<Value *> Result;
526 if (auto *I = dyn_cast<Instruction>(V)) {
527 // We don't stop at a block that's not in the Scope because we would miss some
528 // instructions that are based on the same base values if we stop there.
529 if (!isHoistable(I, DT)) {
530 Result.insert(I);
531 return Result;
532 }
533 // I is hoistable above the Scope.
534 for (Value *Op : I->operands()) {
535 std::set<Value *> OpResult = getBaseValues(Op, DT);
536 Result.insert(OpResult.begin(), OpResult.end());
537 }
538 return Result;
539 }
540 if (isa<Argument>(V)) {
541 Result.insert(V);
542 return Result;
543 }
544 // We don't include others like constants because those won't lead to any
545 // chance of folding of conditions (eg two bit checks merged into one check)
546 // after CHR.
547 return Result; // empty
548}
549
550// Return true if V is already hoisted or can be hoisted (along with its
551// operands) above the insert point. When it returns true and HoistStops is
552// non-null, the instructions to stop hoisting at through the use-def chains are
553// inserted into HoistStops.
554static bool
555checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
556 DenseSet<Instruction *> &Unhoistables,
557 DenseSet<Instruction *> *HoistStops) {
558 assert(InsertPoint && "Null InsertPoint");
559 if (auto *I = dyn_cast<Instruction>(V)) {
560 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
561 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
562 if (Unhoistables.count(I)) {
563 // Don't hoist if they are not to be hoisted.
564 return false;
565 }
566 if (DT.dominates(I, InsertPoint)) {
567 // We are already above the insert point. Stop here.
568 if (HoistStops)
569 HoistStops->insert(I);
570 return true;
571 }
572 // We aren't not above the insert point, check if we can hoist it above the
573 // insert point.
574 if (isHoistable(I, DT)) {
575 // Check operands first.
576 DenseSet<Instruction *> OpsHoistStops;
577 bool AllOpsHoisted = true;
578 for (Value *Op : I->operands()) {
579 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
580 AllOpsHoisted = false;
581 break;
582 }
583 }
584 if (AllOpsHoisted) {
585 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
586 if (HoistStops)
587 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
588 return true;
589 }
590 }
591 return false;
592 }
593 // Non-instructions are considered hoistable.
594 return true;
595}
596
597// Returns true and sets the true probability and false probability of an
598// MD_prof metadata if it's well-formed.
599static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
600 BranchProbability &FalseProb) {
601 if (!MD) return false;
602 MDString *MDName = cast<MDString>(MD->getOperand(0));
603 if (MDName->getString() != "branch_weights" ||
604 MD->getNumOperands() != 3)
605 return false;
606 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
607 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
608 if (!TrueWeight || !FalseWeight)
609 return false;
610 APInt TrueWt = TrueWeight->getValue();
611 APInt FalseWt = FalseWeight->getValue();
612 APInt SumWt = TrueWt + FalseWt;
613 TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
614 SumWt.getZExtValue());
615 FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
616 SumWt.getZExtValue());
617 return true;
618}
619
620static BranchProbability getCHRBiasThreshold() {
621 return BranchProbability::getBranchProbability(
622 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
623}
624
625// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
626// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
627// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
628// false.
629template<typename K, typename S, typename M>
630bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
631 S &TrueSet, S &FalseSet, M &BiasMap) {
632 BranchProbability Threshold = getCHRBiasThreshold();
633 if (TrueProb >= Threshold) {
634 TrueSet.insert(Key);
635 BiasMap[Key] = TrueProb;
636 return true;
637 } else if (FalseProb >= Threshold) {
638 FalseSet.insert(Key);
639 BiasMap[Key] = FalseProb;
640 return true;
641 }
642 return false;
643}
644
645// Returns true and insert a region into the right biased set and the map if the
646// branch of the region is biased.
647static bool CheckBiasedBranch(BranchInst *BI, Region *R,
648 DenseSet<Region *> &TrueBiasedRegionsGlobal,
649 DenseSet<Region *> &FalseBiasedRegionsGlobal,
650 DenseMap<Region *, BranchProbability> &BranchBiasMap) {
651 if (!BI->isConditional())
652 return false;
653 BranchProbability ThenProb, ElseProb;
654 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
655 ThenProb, ElseProb))
656 return false;
657 BasicBlock *IfThen = BI->getSuccessor(0);
658 BasicBlock *IfElse = BI->getSuccessor(1);
659 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
660 IfThen != IfElse &&
661 "Invariant from findScopes");
662 if (IfThen == R->getExit()) {
663 // Swap them so that IfThen/ThenProb means going into the conditional code
664 // and IfElse/ElseProb means skipping it.
665 std::swap(IfThen, IfElse);
666 std::swap(ThenProb, ElseProb);
667 }
668 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
669 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
670 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
671 return CheckBias(R, ThenProb, ElseProb,
672 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
673 BranchBiasMap);
674}
675
676// Returns true and insert a select into the right biased set and the map if the
677// select is biased.
678static bool CheckBiasedSelect(
679 SelectInst *SI, Region *R,
680 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
681 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
682 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
683 BranchProbability TrueProb, FalseProb;
684 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
685 TrueProb, FalseProb))
686 return false;
687 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
688 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
689 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
690 return CheckBias(SI, TrueProb, FalseProb,
691 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
692 SelectBiasMap);
693}
694
695// Returns the instruction at which to hoist the dependent condition values and
696// insert the CHR branch for a region. This is the terminator branch in the
697// entry block or the first select in the entry block, if any.
698static Instruction* getBranchInsertPoint(RegInfo &RI) {
699 Region *R = RI.R;
700 BasicBlock *EntryBB = R->getEntry();
701 // The hoist point is by default the terminator of the entry block, which is
702 // the same as the branch instruction if RI.HasBranch is true.
703 Instruction *HoistPoint = EntryBB->getTerminator();
704 for (SelectInst *SI : RI.Selects) {
705 if (SI->getParent() == EntryBB) {
706 // Pick the first select in Selects in the entry block. Note Selects is
707 // sorted in the instruction order within a block (asserted below).
708 HoistPoint = SI;
709 break;
710 }
711 }
712 assert(HoistPoint && "Null HoistPoint");
713#ifndef NDEBUG
714 // Check that HoistPoint is the first one in Selects in the entry block,
715 // if any.
716 DenseSet<Instruction *> EntryBlockSelectSet;
717 for (SelectInst *SI : RI.Selects) {
718 if (SI->getParent() == EntryBB) {
719 EntryBlockSelectSet.insert(SI);
720 }
721 }
722 for (Instruction &I : *EntryBB) {
723 if (EntryBlockSelectSet.count(&I) > 0) {
724 assert(&I == HoistPoint &&
725 "HoistPoint must be the first one in Selects");
726 break;
727 }
728 }
729#endif
730 return HoistPoint;
731}
732
733// Find a CHR scope in the given region.
734CHRScope * CHR::findScope(Region *R) {
735 CHRScope *Result = nullptr;
736 BasicBlock *Entry = R->getEntry();
737 BasicBlock *Exit = R->getExit(); // null if top level.
738 assert(Entry && "Entry must not be null");
739 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
740 "Only top level region has a null exit");
741 if (Entry)
742 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
743 else
744 CHR_DEBUG(dbgs() << "Entry null\n");
745 if (Exit)
746 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
747 else
748 CHR_DEBUG(dbgs() << "Exit null\n");
749 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
750 // to this region).
751 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
752 if (EntryInSubregion)
753 return nullptr;
754 // Exclude loops
755 for (BasicBlock *Pred : predecessors(Entry))
756 if (R->contains(Pred))
757 return nullptr;
758 if (Exit) {
759 // Try to find an if-then block (check if R is an if-then).
760 // if (cond) {
761 // ...
762 // }
763 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
764 if (BI)
765 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
766 else
767 CHR_DEBUG(dbgs() << "BI null\n");
768 if (BI && BI->isConditional()) {
769 BasicBlock *S0 = BI->getSuccessor(0);
770 BasicBlock *S1 = BI->getSuccessor(1);
771 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
772 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
773 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
774 RegInfo RI(R);
775 RI.HasBranch = CheckBiasedBranch(
776 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
777 BranchBiasMap);
778 Result = new CHRScope(RI);
779 Scopes.insert(Result);
780 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
781 ++Stats.NumBranches;
782 }
783 }
784 }
785 {
786 // Try to look for selects in the direct child blocks (as opposed to in
787 // subregions) of R.
788 // ...
789 // if (..) { // Some subregion
790 // ...
791 // }
792 // if (..) { // Some subregion
793 // ...
794 // }
795 // ...
796 // a = cond ? b : c;
797 // ...
798 SmallVector<SelectInst *, 8> Selects;
799 for (RegionNode *E : R->elements()) {
800 if (E->isSubRegion())
801 continue;
802 // This returns the basic block of E if E is a direct child of R (not a
803 // subregion.)
804 BasicBlock *BB = E->getEntry();
805 // Need to push in the order to make it easier to find the first Select
806 // later.
807 for (Instruction &I : *BB) {
808 if (auto *SI = dyn_cast<SelectInst>(&I)) {
809 Selects.push_back(SI);
810 ++Stats.NumBranches;
811 }
812 }
813 }
814 if (Selects.size() > 0) {
815 auto AddSelects = [&](RegInfo &RI) {
816 for (auto *SI : Selects)
817 if (CheckBiasedSelect(SI, RI.R,
818 TrueBiasedSelectsGlobal,
819 FalseBiasedSelectsGlobal,
820 SelectBiasMap))
821 RI.Selects.push_back(SI);
822 };
823 if (!Result) {
824 CHR_DEBUG(dbgs() << "Found a select-only region\n");
825 RegInfo RI(R);
826 AddSelects(RI);
827 Result = new CHRScope(RI);
828 Scopes.insert(Result);
829 } else {
830 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
831 AddSelects(Result->RegInfos[0]);
832 }
833 }
834 }
835
836 if (Result) {
837 checkScopeHoistable(Result);
838 }
839 return Result;
840}
841
842// Check that any of the branch and the selects in the region could be
843// hoisted above the the CHR branch insert point (the most dominating of
844// them, either the branch (at the end of the first block) or the first
845// select in the first block). If the branch can't be hoisted, drop the
846// selects in the first blocks.
847//
848// For example, for the following scope/region with selects, we want to insert
849// the merged branch right before the first select in the first/entry block by
850// hoisting c1, c2, c3, and c4.
851//
852// // Branch insert point here.
853// a = c1 ? b : c; // Select 1
854// d = c2 ? e : f; // Select 2
855// if (c3) { // Branch
856// ...
857// c4 = foo() // A call.
858// g = c4 ? h : i; // Select 3
859// }
860//
861// But suppose we can't hoist c4 because it's dependent on the preceding
862// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
863// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
864void CHR::checkScopeHoistable(CHRScope *Scope) {
865 RegInfo &RI = Scope->RegInfos[0];
866 Region *R = RI.R;
867 BasicBlock *EntryBB = R->getEntry();
868 auto *Branch = RI.HasBranch ?
869 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
870 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
871 if (RI.HasBranch || !Selects.empty()) {
872 Instruction *InsertPoint = getBranchInsertPoint(RI);
873 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
874 // Avoid a data dependence from a select or a branch to a(nother)
875 // select. Note no instruction can't data-depend on a branch (a branch
876 // instruction doesn't produce a value).
877 DenseSet<Instruction *> Unhoistables;
878 // Initialize Unhoistables with the selects.
879 for (SelectInst *SI : Selects) {
880 Unhoistables.insert(SI);
881 }
882 // Remove Selects that can't be hoisted.
883 for (auto it = Selects.begin(); it != Selects.end(); ) {
884 SelectInst *SI = *it;
885 if (SI == InsertPoint) {
886 ++it;
887 continue;
888 }
889 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
890 DT, Unhoistables, nullptr);
891 if (!IsHoistable) {
892 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
893 it = Selects.erase(it);
894 // Since we are dropping the select here, we also drop it from
895 // Unhoistables.
896 Unhoistables.erase(SI);
897 } else
898 ++it;
899 }
900 // Update InsertPoint after potentially removing selects.
901 InsertPoint = getBranchInsertPoint(RI);
902 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
903 if (RI.HasBranch && InsertPoint != Branch) {
904 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
905 DT, Unhoistables, nullptr);
906 if (!IsHoistable) {
907 // If the branch isn't hoistable, drop the selects in the entry
908 // block, preferring the branch, which makes the branch the hoist
909 // point.
910 assert(InsertPoint != Branch && "Branch must not be the hoist point");
911 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
912 CHR_DEBUG(
913 for (SelectInst *SI : Selects) {
914 dbgs() << "SI " << *SI << "\n";
915 });
916 Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
917 [EntryBB](SelectInst *SI) {
918 return SI->getParent() == EntryBB;
919 }), Selects.end());
920 Unhoistables.clear();
921 InsertPoint = Branch;
922 }
923 }
924 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
925#ifndef NDEBUG
926 if (RI.HasBranch) {
927 assert(!DT.dominates(Branch, InsertPoint) &&
928 "Branch can't be already above the hoist point");
929 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
930 DT, Unhoistables, nullptr) &&
931 "checkHoistValue for branch");
932 }
933 for (auto *SI : Selects) {
934 assert(!DT.dominates(SI, InsertPoint) &&
935 "SI can't be already above the hoist point");
936 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
937 Unhoistables, nullptr) &&
938 "checkHoistValue for selects");
939 }
940 CHR_DEBUG(dbgs() << "Result\n");
941 if (RI.HasBranch) {
942 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
943 }
944 for (auto *SI : Selects) {
945 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
946 }
947#endif
948 }
949}
950
951// Traverse the region tree, find all nested scopes and merge them if possible.
952CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
953 SmallVectorImpl<CHRScope *> &Scopes) {
954 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
955 CHRScope *Result = findScope(R);
956 // Visit subscopes.
957 CHRScope *ConsecutiveSubscope = nullptr;
958 SmallVector<CHRScope *, 8> Subscopes;
959 for (auto It = R->begin(); It != R->end(); ++It) {
960 const std::unique_ptr<Region> &SubR = *It;
961 auto Next_It = std::next(It);
962 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
963 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
964 << "\n");
965 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
966 if (SubCHRScope) {
967 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
968 } else {
969 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
970 }
971 if (SubCHRScope) {
972 if (!ConsecutiveSubscope)
973 ConsecutiveSubscope = SubCHRScope;
974 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
975 Subscopes.push_back(ConsecutiveSubscope);
976 ConsecutiveSubscope = SubCHRScope;
977 } else
978 ConsecutiveSubscope->append(SubCHRScope);
979 } else {
980 if (ConsecutiveSubscope) {
981 Subscopes.push_back(ConsecutiveSubscope);
982 }
983 ConsecutiveSubscope = nullptr;
984 }
985 }
986 if (ConsecutiveSubscope) {
987 Subscopes.push_back(ConsecutiveSubscope);
988 }
989 for (CHRScope *Sub : Subscopes) {
990 if (Result) {
991 // Combine it with the parent.
992 Result->addSub(Sub);
993 } else {
994 // Push Subscopes as they won't be combined with the parent.
995 Scopes.push_back(Sub);
996 }
997 }
998 return Result;
999}
1000
1001static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1002 DenseSet<Value *> ConditionValues;
1003 if (RI.HasBranch) {
1004 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1005 ConditionValues.insert(BI->getCondition());
1006 }
1007 for (SelectInst *SI : RI.Selects) {
1008 ConditionValues.insert(SI->getCondition());
1009 }
1010 return ConditionValues;
1011}
1012
1013
1014// Determine whether to split a scope depending on the sets of the branch
1015// condition values of the previous region and the current region. We split
1016// (return true) it if 1) the condition values of the inner/lower scope can't be
1017// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1018// values have an empty intersection (because the combined branch conditions
1019// won't probably lead to a simpler combined condition).
1020static bool shouldSplit(Instruction *InsertPoint,
1021 DenseSet<Value *> &PrevConditionValues,
1022 DenseSet<Value *> &ConditionValues,
1023 DominatorTree &DT,
1024 DenseSet<Instruction *> &Unhoistables) {
1025 CHR_DEBUG(
1026 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1027 for (Value *V : PrevConditionValues) {
1028 dbgs() << *V << ", ";
1029 }
1030 dbgs() << " ConditionValues ";
1031 for (Value *V : ConditionValues) {
1032 dbgs() << *V << ", ";
1033 }
1034 dbgs() << "\n");
1035 assert(InsertPoint && "Null InsertPoint");
1036 // If any of Bases isn't hoistable to the hoist point, split.
1037 for (Value *V : ConditionValues) {
1038 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
1039 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1040 return true; // Not hoistable, split.
1041 }
1042 }
1043 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1044 // unnecessary splits at scopes with no branch/selects. If
1045 // PrevConditionValues and ConditionValues don't intersect at all, split.
1046 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1047 // Use std::set as DenseSet doesn't work with set_intersection.
1048 std::set<Value *> PrevBases, Bases;
1049 for (Value *V : PrevConditionValues) {
1050 std::set<Value *> BaseValues = getBaseValues(V, DT);
1051 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1052 }
1053 for (Value *V : ConditionValues) {
1054 std::set<Value *> BaseValues = getBaseValues(V, DT);
1055 Bases.insert(BaseValues.begin(), BaseValues.end());
1056 }
1057 CHR_DEBUG(
1058 dbgs() << "PrevBases ";
1059 for (Value *V : PrevBases) {
1060 dbgs() << *V << ", ";
1061 }
1062 dbgs() << " Bases ";
1063 for (Value *V : Bases) {
1064 dbgs() << *V << ", ";
1065 }
1066 dbgs() << "\n");
1067 std::set<Value *> Intersection;
1068 std::set_intersection(PrevBases.begin(), PrevBases.end(),
1069 Bases.begin(), Bases.end(),
1070 std::inserter(Intersection, Intersection.begin()));
1071 if (Intersection.empty()) {
1072 // Empty intersection, split.
1073 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1074 return true;
1075 }
1076 }
1077 CHR_DEBUG(dbgs() << "No split\n");
1078 return false; // Don't split.
1079}
1080
1081static void GetSelectsInScope(CHRScope *Scope,
1082 DenseSet<Instruction *> &Output) {
1083 for (RegInfo &RI : Scope->RegInfos) {
1084 for (SelectInst *SI : RI.Selects) {
1085 Output.insert(SI);
1086 }
1087 }
1088 for (CHRScope *Sub : Scope->Subs) {
1089 GetSelectsInScope(Sub, Output);
1090 }
1091}
1092
1093void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1094 SmallVectorImpl<CHRScope *> &Output) {
1095 for (CHRScope *Scope : Input) {
1096 assert(!Scope->BranchInsertPoint &&
1097 "BranchInsertPoint must not be set");
1098 DenseSet<Instruction *> Unhoistables;
1099 GetSelectsInScope(Scope, Unhoistables);
1100 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1101 }
1102#ifndef NDEBUG
1103 for (CHRScope *Scope : Output) {
1104 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1105 }
1106#endif
1107}
1108
1109SmallVector<CHRScope *, 8> CHR::splitScope(
1110 CHRScope *Scope,
1111 CHRScope *Outer,
1112 DenseSet<Value *> *OuterConditionValues,
1113 Instruction *OuterInsertPoint,
1114 SmallVectorImpl<CHRScope *> &Output,
1115 DenseSet<Instruction *> &Unhoistables) {
1116 if (Outer) {
1117 assert(OuterConditionValues && "Null OuterConditionValues");
1118 assert(OuterInsertPoint && "Null OuterInsertPoint");
1119 }
1120 bool PrevSplitFromOuter = true;
1121 DenseSet<Value *> PrevConditionValues;
1122 Instruction *PrevInsertPoint = nullptr;
1123 SmallVector<CHRScope *, 8> Splits;
1124 SmallVector<bool, 8> SplitsSplitFromOuter;
1125 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1126 SmallVector<Instruction *, 8> SplitsInsertPoints;
1127 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1128 for (RegInfo &RI : RegInfos) {
1129 Instruction *InsertPoint = getBranchInsertPoint(RI);
1130 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1131 CHR_DEBUG(
1132 dbgs() << "ConditionValues ";
1133 for (Value *V : ConditionValues) {
1134 dbgs() << *V << ", ";
1135 }
1136 dbgs() << "\n");
1137 if (RI.R == RegInfos[0].R) {
1138 // First iteration. Check to see if we should split from the outer.
1139 if (Outer) {
1140 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1141 CHR_DEBUG(dbgs() << "Should split from outer at "
1142 << RI.R->getNameStr() << "\n");
1143 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1144 ConditionValues, DT, Unhoistables)) {
1145 PrevConditionValues = ConditionValues;
1146 PrevInsertPoint = InsertPoint;
1147 } else {
1148 // Not splitting from the outer. Use the outer bases and insert
1149 // point. Union the bases.
1150 PrevSplitFromOuter = false;
1151 PrevConditionValues = *OuterConditionValues;
1152 PrevConditionValues.insert(ConditionValues.begin(),
1153 ConditionValues.end());
1154 PrevInsertPoint = OuterInsertPoint;
1155 }
1156 } else {
1157 CHR_DEBUG(dbgs() << "Outer null\n");
1158 PrevConditionValues = ConditionValues;
1159 PrevInsertPoint = InsertPoint;
1160 }
1161 } else {
1162 CHR_DEBUG(dbgs() << "Should split from prev at "
1163 << RI.R->getNameStr() << "\n");
1164 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1165 DT, Unhoistables)) {
1166 CHRScope *Tail = Scope->split(RI.R);
1167 Scopes.insert(Tail);
1168 Splits.push_back(Scope);
1169 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1170 SplitsConditionValues.push_back(PrevConditionValues);
1171 SplitsInsertPoints.push_back(PrevInsertPoint);
1172 Scope = Tail;
1173 PrevConditionValues = ConditionValues;
1174 PrevInsertPoint = InsertPoint;
1175 PrevSplitFromOuter = true;
1176 } else {
1177 // Not splitting. Union the bases. Keep the hoist point.
1178 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1179 }
1180 }
1181 }
1182 Splits.push_back(Scope);
1183 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1184 SplitsConditionValues.push_back(PrevConditionValues);
1185 assert(PrevInsertPoint && "Null PrevInsertPoint");
1186 SplitsInsertPoints.push_back(PrevInsertPoint);
1187 assert(Splits.size() == SplitsConditionValues.size() &&
1188 Splits.size() == SplitsSplitFromOuter.size() &&
1189 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1190 for (size_t I = 0; I < Splits.size(); ++I) {
1191 CHRScope *Split = Splits[I];
1192 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1193 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1194 SmallVector<CHRScope *, 8> NewSubs;
1195 DenseSet<Instruction *> SplitUnhoistables;
1196 GetSelectsInScope(Split, SplitUnhoistables);
1197 for (CHRScope *Sub : Split->Subs) {
1198 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1199 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1200 SplitUnhoistables);
1201 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1202 }
1203 Split->Subs = NewSubs;
1204 }
1205 SmallVector<CHRScope *, 8> Result;
1206 for (size_t I = 0; I < Splits.size(); ++I) {
1207 CHRScope *Split = Splits[I];
1208 if (SplitsSplitFromOuter[I]) {
1209 // Split from the outer.
1210 Output.push_back(Split);
1211 Split->BranchInsertPoint = SplitsInsertPoints[I];
1212 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1213 << "\n");
1214 } else {
1215 // Connected to the outer.
1216 Result.push_back(Split);
1217 }
1218 }
1219 if (!Outer)
1220 assert(Result.empty() &&
1221 "If no outer (top-level), must return no nested ones");
1222 return Result;
1223}
1224
1225void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1226 for (CHRScope *Scope : Scopes) {
1227 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1228 classifyBiasedScopes(Scope, Scope);
1229 CHR_DEBUG(
1230 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1231 dbgs() << "TrueBiasedRegions ";
1232 for (Region *R : Scope->TrueBiasedRegions) {
1233 dbgs() << R->getNameStr() << ", ";
1234 }
1235 dbgs() << "\n";
1236 dbgs() << "FalseBiasedRegions ";
1237 for (Region *R : Scope->FalseBiasedRegions) {
1238 dbgs() << R->getNameStr() << ", ";
1239 }
1240 dbgs() << "\n";
1241 dbgs() << "TrueBiasedSelects ";
1242 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1243 dbgs() << *SI << ", ";
1244 }
1245 dbgs() << "\n";
1246 dbgs() << "FalseBiasedSelects ";
1247 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1248 dbgs() << *SI << ", ";
1249 }
1250 dbgs() << "\n";);
1251 }
1252}
1253
1254void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1255 for (RegInfo &RI : Scope->RegInfos) {
1256 if (RI.HasBranch) {
1257 Region *R = RI.R;
1258 if (TrueBiasedRegionsGlobal.count(R) > 0)
1259 OutermostScope->TrueBiasedRegions.insert(R);
1260 else if (FalseBiasedRegionsGlobal.count(R) > 0)
1261 OutermostScope->FalseBiasedRegions.insert(R);
1262 else
1263 llvm_unreachable("Must be biased");
1264 }
1265 for (SelectInst *SI : RI.Selects) {
1266 if (TrueBiasedSelectsGlobal.count(SI) > 0)
1267 OutermostScope->TrueBiasedSelects.insert(SI);
1268 else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1269 OutermostScope->FalseBiasedSelects.insert(SI);
1270 else
1271 llvm_unreachable("Must be biased");
1272 }
1273 }
1274 for (CHRScope *Sub : Scope->Subs) {
1275 classifyBiasedScopes(Sub, OutermostScope);
1276 }
1277}
1278
1279static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1280 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1281 Scope->FalseBiasedRegions.size() +
1282 Scope->TrueBiasedSelects.size() +
1283 Scope->FalseBiasedSelects.size();
1284 return NumBiased >= CHRMergeThreshold;
1285}
1286
1287void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1288 SmallVectorImpl<CHRScope *> &Output) {
1289 for (CHRScope *Scope : Input) {
1290 // Filter out the ones with only one region and no subs.
1291 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1292 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1293 << Scope->TrueBiasedRegions.size()
1294 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1295 << " true-selects " << Scope->TrueBiasedSelects.size()
1296 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1297 continue;
1298 }
1299 Output.push_back(Scope);
1300 }
1301}
1302
1303void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1304 SmallVectorImpl<CHRScope *> &Output) {
1305 for (CHRScope *Scope : Input) {
1306 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1307 "Empty");
1308 setCHRRegions(Scope, Scope);
1309 Output.push_back(Scope);
1310 CHR_DEBUG(
1311 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1312 for (auto pair : Scope->HoistStopMap) {
1313 Region *R = pair.first;
1314 dbgs() << "Region " << R->getNameStr() << "\n";
1315 for (Instruction *I : pair.second) {
1316 dbgs() << "HoistStop " << *I << "\n";
1317 }
1318 }
1319 dbgs() << "CHRRegions" << "\n";
1320 for (RegInfo &RI : Scope->CHRRegions) {
1321 dbgs() << RI.R->getNameStr() << "\n";
1322 });
1323 }
1324}
1325
1326void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1327 DenseSet<Instruction *> Unhoistables;
1328 // Put the biased selects in Unhoistables because they should stay where they
1329 // are and constant-folded after CHR (in case one biased select or a branch
1330 // can depend on another biased select.)
1331 for (RegInfo &RI : Scope->RegInfos) {
1332 for (SelectInst *SI : RI.Selects) {
1333 Unhoistables.insert(SI);
1334 }
1335 }
1336 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1337 for (RegInfo &RI : Scope->RegInfos) {
1338 Region *R = RI.R;
1339 DenseSet<Instruction *> HoistStops;
1340 bool IsHoisted = false;
1341 if (RI.HasBranch) {
1342 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1343 OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1344 "Must be truthy or falsy");
1345 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1346 // Note checkHoistValue fills in HoistStops.
1347 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1348 Unhoistables, &HoistStops);
1349 assert(IsHoistable && "Must be hoistable");
1350 (void)(IsHoistable); // Unused in release build
1351 IsHoisted = true;
1352 }
1353 for (SelectInst *SI : RI.Selects) {
1354 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1355 OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1356 "Must be true or false biased");
1357 // Note checkHoistValue fills in HoistStops.
1358 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1359 Unhoistables, &HoistStops);
1360 assert(IsHoistable && "Must be hoistable");
1361 (void)(IsHoistable); // Unused in release build
1362 IsHoisted = true;
1363 }
1364 if (IsHoisted) {
1365 OutermostScope->CHRRegions.push_back(RI);
1366 OutermostScope->HoistStopMap[R] = HoistStops;
1367 }
1368 }
1369 for (CHRScope *Sub : Scope->Subs)
1370 setCHRRegions(Sub, OutermostScope);
1371}
1372
1373bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1374 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1375}
1376
1377void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1378 SmallVectorImpl<CHRScope *> &Output) {
1379 Output.resize(Input.size());
1380 std::copy(Input.begin(), Input.end(), Output.begin());
1381 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
1382}
1383
1384// Return true if V is already hoisted or was hoisted (along with its operands)
1385// to the insert point.
1386static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1387 HoistStopMapTy &HoistStopMap,
1388 DenseSet<Instruction *> &HoistedSet,
1389 DenseSet<PHINode *> &TrivialPHIs) {
1390 auto IT = HoistStopMap.find(R);
1391 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1392 DenseSet<Instruction *> &HoistStops = IT->second;
1393 if (auto *I = dyn_cast<Instruction>(V)) {
1394 if (I == HoistPoint)
1395 return;
1396 if (HoistStops.count(I))
1397 return;
1398 if (auto *PN = dyn_cast<PHINode>(I))
1399 if (TrivialPHIs.count(PN))
1400 // The trivial phi inserted by the previous CHR scope could replace a
1401 // non-phi in HoistStops. Note that since this phi is at the exit of a
1402 // previous CHR scope, which dominates this scope, it's safe to stop
1403 // hoisting there.
1404 return;
1405 if (HoistedSet.count(I))
1406 // Already hoisted, return.
1407 return;
1408 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1409 for (Value *Op : I->operands()) {
1410 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
1411 }
1412 I->moveBefore(HoistPoint);
1413 HoistedSet.insert(I);
1414 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1415 }
1416}
1417
1418// Hoist the dependent condition values of the branches and the selects in the
1419// scope to the insert point.
1420static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1421 DenseSet<PHINode *> &TrivialPHIs) {
1422 DenseSet<Instruction *> HoistedSet;
1423 for (const RegInfo &RI : Scope->CHRRegions) {
1424 Region *R = RI.R;
1425 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1426 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1427 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1428 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1429 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1430 HoistedSet, TrivialPHIs);
1431 }
1432 for (SelectInst *SI : RI.Selects) {
1433 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1434 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1435 if (!(IsTrueBiased || IsFalseBiased))
1436 continue;
1437 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1438 HoistedSet, TrivialPHIs);
1439 }
1440 }
1441}
1442
1443// Negate the predicate if an ICmp if it's used only by branches or selects by
1444// swapping the operands of the branches or the selects. Returns true if success.
1445static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1446 Instruction *ExcludedUser,
1447 CHRScope *Scope) {
1448 for (User *U : ICmp->users()) {
1449 if (U == ExcludedUser)
1450 continue;
1451 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1452 continue;
1453 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1454 continue;
1455 return false;
1456 }
1457 for (User *U : ICmp->users()) {
1458 if (U == ExcludedUser)
1459 continue;
1460 if (auto *BI = dyn_cast<BranchInst>(U)) {
1461 assert(BI->isConditional() && "Must be conditional");
1462 BI->swapSuccessors();
1463 // Don't need to swap this in terms of
1464 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1465 // mean whehter the branch is likely go into the if-then rather than
1466 // successor0/successor1 and because we can tell which edge is the then or
1467 // the else one by comparing the destination to the region exit block.
1468 continue;
1469 }
1470 if (auto *SI = dyn_cast<SelectInst>(U)) {
1471 // Swap operands
1472 Value *TrueValue = SI->getTrueValue();
1473 Value *FalseValue = SI->getFalseValue();
1474 SI->setTrueValue(FalseValue);
1475 SI->setFalseValue(TrueValue);
1476 SI->swapProfMetadata();
1477 if (Scope->TrueBiasedSelects.count(SI)) {
1478 assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1479 "Must not be already in");
1480 Scope->FalseBiasedSelects.insert(SI);
1481 } else if (Scope->FalseBiasedSelects.count(SI)) {
1482 assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1483 "Must not be already in");
1484 Scope->TrueBiasedSelects.insert(SI);
1485 }
1486 continue;
1487 }
1488 llvm_unreachable("Must be a branch or a select");
1489 }
1490 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1491 return true;
1492}
1493
1494// A helper for transformScopes. Insert a trivial phi at the scope exit block
1495// for a value that's defined in the scope but used outside it (meaning it's
1496// alive at the exit block).
1497static void insertTrivialPHIs(CHRScope *Scope,
1498 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1499 DenseSet<PHINode *> &TrivialPHIs) {
1500 DenseSet<BasicBlock *> BlocksInScopeSet;
1501 SmallVector<BasicBlock *, 8> BlocksInScopeVec;
1502 for (RegInfo &RI : Scope->RegInfos) {
1503 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1504 // sub-Scopes.
1505 BlocksInScopeSet.insert(BB);
1506 BlocksInScopeVec.push_back(BB);
1507 }
1508 }
1509 CHR_DEBUG(
1510 dbgs() << "Inserting redudant phis\n";
1511 for (BasicBlock *BB : BlocksInScopeVec) {
1512 dbgs() << "BlockInScope " << BB->getName() << "\n";
1513 });
1514 for (BasicBlock *BB : BlocksInScopeVec) {
1515 for (Instruction &I : *BB) {
1516 SmallVector<Instruction *, 8> Users;
1517 for (User *U : I.users()) {
1518 if (auto *UI = dyn_cast<Instruction>(U)) {
1519 if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1520 // Unless there's already a phi for I at the exit block.
1521 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1522 CHR_DEBUG(dbgs() << "V " << I << "\n");
1523 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1524 Users.push_back(UI);
1525 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1526 // There's a loop backedge from a block that's dominated by this
1527 // scope to the entry block.
1528 CHR_DEBUG(dbgs() << "V " << I << "\n");
1529 CHR_DEBUG(dbgs()
1530 << "Used at entry block (for a back edge) by a phi user "
1531 << *UI << "\n");
1532 Users.push_back(UI);
1533 }
1534 }
1535 }
1536 if (Users.size() > 0) {
1537 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1538 // ExitBlock. Replace I with the new phi in UI unless UI is another
1539 // phi at ExitBlock.
1540 unsigned PredCount = std::distance(pred_begin(ExitBlock),
1541 pred_end(ExitBlock));
1542 PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1543 &ExitBlock->front());
1544 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1545 PN->addIncoming(&I, Pred);
1546 }
1547 TrivialPHIs.insert(PN);
1548 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1549 for (Instruction *UI : Users) {
1550 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1551 if (UI->getOperand(J) == &I) {
1552 UI->setOperand(J, PN);
1553 }
1554 }
1555 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1556 }
1557 }
1558 }
1559 }
1560}
1561
1562// Assert that all the CHR regions of the scope have a biased branch or select.
1563static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1564#ifndef NDEBUG
1565 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1566 if (Scope->TrueBiasedRegions.count(RI.R) ||
1567 Scope->FalseBiasedRegions.count(RI.R))
1568 return true;
1569 for (SelectInst *SI : RI.Selects)
1570 if (Scope->TrueBiasedSelects.count(SI) ||
1571 Scope->FalseBiasedSelects.count(SI))
1572 return true;
1573 return false;
1574 };
1575 for (RegInfo &RI : Scope->CHRRegions) {
1576 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1577 "Must have biased branch or select");
1578 }
1579#endif
1580}
1581
1582// Assert that all the condition values of the biased branches and selects have
1583// been hoisted to the pre-entry block or outside of the scope.
1584static void assertBranchOrSelectConditionHoisted(CHRScope *Scope,
1585 BasicBlock *PreEntryBlock) {
1586 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1587 for (RegInfo &RI : Scope->CHRRegions) {
1588 Region *R = RI.R;
1589 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1590 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1591 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1592 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1593 Value *V = BI->getCondition();
1594 CHR_DEBUG(dbgs() << *V << "\n");
1595 if (auto *I = dyn_cast<Instruction>(V)) {
1596 assert((I->getParent() == PreEntryBlock ||
1597 !Scope->contains(I)) &&
1598 "Must have been hoisted to PreEntryBlock or outside the scope");
1599 }
1600 }
1601 for (SelectInst *SI : RI.Selects) {
1602 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1603 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1604 if (!(IsTrueBiased || IsFalseBiased))
1605 continue;
1606 Value *V = SI->getCondition();
1607 CHR_DEBUG(dbgs() << *V << "\n");
1608 if (auto *I = dyn_cast<Instruction>(V)) {
1609 assert((I->getParent() == PreEntryBlock ||
1610 !Scope->contains(I)) &&
1611 "Must have been hoisted to PreEntryBlock or outside the scope");
1612 }
1613 }
1614 }
1615}
1616
1617void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1618 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1619
1620 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1621 Region *FirstRegion = Scope->RegInfos[0].R;
1622 BasicBlock *EntryBlock = FirstRegion->getEntry();
1623 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1624 BasicBlock *ExitBlock = LastRegion->getExit();
1625 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1626
1627 if (ExitBlock) {
1628 // Insert a trivial phi at the exit block (where the CHR hot path and the
1629 // cold path merges) for a value that's defined in the scope but used
1630 // outside it (meaning it's alive at the exit block). We will add the
1631 // incoming values for the CHR cold paths to it below. Without this, we'd
1632 // miss updating phi's for such values unless there happens to already be a
1633 // phi for that value there.
1634 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1635 }
1636
1637 // Split the entry block of the first region. The new block becomes the new
1638 // entry block of the first region. The old entry block becomes the block to
1639 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1640 // through the split, we update the entry of the first region after the split,
1641 // and Region only points to the entry and the exit blocks, rather than
1642 // keeping everything in a list or set, the blocks membership and the
1643 // entry/exit blocks of the region are still valid after the split.
1644 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1645 << " at " << *Scope->BranchInsertPoint << "\n");
1646 BasicBlock *NewEntryBlock =
1647 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1648 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1649 "NewEntryBlock's only pred must be EntryBlock");
1650 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1651 BasicBlock *PreEntryBlock = EntryBlock;
1652
1653 ValueToValueMapTy VMap;
1654 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1655 // hot path (originals) and a cold path (clones) and update the PHIs at the
1656 // exit block.
1657 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1658
1659 // Replace the old (placeholder) branch with the new (merged) conditional
1660 // branch.
1661 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1662 NewEntryBlock, VMap);
1663
1664#ifndef NDEBUG
1665 assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1666#endif
1667
1668 // Hoist the conditional values of the branches/selects.
1669 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
1670
1671#ifndef NDEBUG
1672 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1673#endif
1674
1675 // Create the combined branch condition and constant-fold the branches/selects
1676 // in the hot path.
1677 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1678 ProfileCount ? ProfileCount.getValue() : 0);
1679}
1680
1681// A helper for transformScopes. Clone the blocks in the scope (excluding the
1682// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1683// at the exit block.
1684void CHR::cloneScopeBlocks(CHRScope *Scope,
1685 BasicBlock *PreEntryBlock,
1686 BasicBlock *ExitBlock,
1687 Region *LastRegion,
1688 ValueToValueMapTy &VMap) {
1689 // Clone all the blocks. The original blocks will be the hot-path
1690 // CHR-optimized code and the cloned blocks will be the original unoptimized
1691 // code. This is so that the block pointers from the
1692 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1693 // which CHR should apply to.
1694 SmallVector<BasicBlock*, 8> NewBlocks;
1695 for (RegInfo &RI : Scope->RegInfos)
1696 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1697 // sub-Scopes.
1698 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1699 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1700 NewBlocks.push_back(NewBB);
1701 VMap[BB] = NewBB;
1702 }
1703
1704 // Place the cloned blocks right after the original blocks (right before the
1705 // exit block of.)
1706 if (ExitBlock)
1707 F.getBasicBlockList().splice(ExitBlock->getIterator(),
1708 F.getBasicBlockList(),
1709 NewBlocks[0]->getIterator(), F.end());
1710
1711 // Update the cloned blocks/instructions to refer to themselves.
1712 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1713 for (Instruction &I : *NewBlocks[i])
1714 RemapInstruction(&I, VMap,
1715 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1716
1717 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1718 // the top-level region but we don't need to add PHIs. The trivial PHIs
1719 // inserted above will be updated here.
1720 if (ExitBlock)
1721 for (PHINode &PN : ExitBlock->phis())
1722 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1723 ++I) {
1724 BasicBlock *Pred = PN.getIncomingBlock(I);
1725 if (LastRegion->contains(Pred)) {
1726 Value *V = PN.getIncomingValue(I);
1727 auto It = VMap.find(V);
1728 if (It != VMap.end()) V = It->second;
1729 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1730 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1731 }
1732 }
1733}
1734
1735// A helper for transformScope. Replace the old (placeholder) branch with the
1736// new (merged) conditional branch.
1737BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1738 BasicBlock *EntryBlock,
1739 BasicBlock *NewEntryBlock,
1740 ValueToValueMapTy &VMap) {
1741 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1742 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1743 "SplitBlock did not work correctly!");
1744 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1745 "NewEntryBlock's only pred must be EntryBlock");
1746 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1747 "NewEntryBlock must have been copied");
1748 OldBR->removeFromParent();
1749 OldBR->dropAllReferences();
1750 // The true predicate is a placeholder. It will be replaced later in
1751 // fixupBranchesAndSelects().
1752 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1753 cast<BasicBlock>(VMap[NewEntryBlock]),
1754 ConstantInt::getTrue(F.getContext()));
1755 PreEntryBlock->getInstList().push_back(NewBR);
1756 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1757 "NewEntryBlock's only pred must be EntryBlock");
1758 return NewBR;
1759}
1760
1761// A helper for transformScopes. Create the combined branch condition and
1762// constant-fold the branches/selects in the hot path.
1763void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1764 BasicBlock *PreEntryBlock,
1765 BranchInst *MergedBR,
1766 uint64_t ProfileCount) {
1767 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1768 BranchProbability CHRBranchBias(1, 1);
1769 uint64_t NumCHRedBranches = 0;
1770 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1771 for (RegInfo &RI : Scope->CHRRegions) {
1772 Region *R = RI.R;
1773 if (RI.HasBranch) {
1774 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1775 ++NumCHRedBranches;
1776 }
1777 for (SelectInst *SI : RI.Selects) {
1778 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1779 ++NumCHRedBranches;
1780 }
1781 }
1782 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1783 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1784 MergedBR->setCondition(MergedCondition);
1785 SmallVector<uint32_t, 2> Weights;
1786 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
1787 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
1788 MDBuilder MDB(F.getContext());
1789 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1790 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1791 << "\n");
1792}
1793
1794// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1795// and constant-fold a branch in the hot path.
1796void CHR::fixupBranch(Region *R, CHRScope *Scope,
1797 IRBuilder<> &IRB,
1798 Value *&MergedCondition,
1799 BranchProbability &CHRBranchBias) {
1800 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1801 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1802 "Must be truthy or falsy");
1803 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1804 assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1805 "Must be in the bias map");
1806 BranchProbability Bias = BranchBiasMap[R];
1807 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1808 // Take the min.
1809 if (CHRBranchBias > Bias)
1810 CHRBranchBias = Bias;
1811 BasicBlock *IfThen = BI->getSuccessor(1);
1812 BasicBlock *IfElse = BI->getSuccessor(0);
1813 BasicBlock *RegionExitBlock = R->getExit();
1814 assert(RegionExitBlock && "Null ExitBlock");
1815 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1816 IfThen != IfElse && "Invariant from findScopes");
1817 if (IfThen == RegionExitBlock) {
1818 // Swap them so that IfThen means going into it and IfElse means skipping
1819 // it.
1820 std::swap(IfThen, IfElse);
1821 }
1822 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1823 << " IfElse " << IfElse->getName() << "\n");
1824 Value *Cond = BI->getCondition();
1825 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1826 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1827 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1828 MergedCondition);
1829 // Constant-fold the branch at ClonedEntryBlock.
1830 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1831 "The successor shouldn't change");
1832 Value *NewCondition = ConditionTrue ?
1833 ConstantInt::getTrue(F.getContext()) :
1834 ConstantInt::getFalse(F.getContext());
1835 BI->setCondition(NewCondition);
1836}
1837
1838// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1839// and constant-fold a select in the hot path.
1840void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1841 IRBuilder<> &IRB,
1842 Value *&MergedCondition,
1843 BranchProbability &CHRBranchBias) {
1844 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1845 assert((IsTrueBiased ||
1846 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1847 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1848 "Must be in the bias map");
1849 BranchProbability Bias = SelectBiasMap[SI];
1850 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1851 // Take the min.
1852 if (CHRBranchBias > Bias)
1853 CHRBranchBias = Bias;
1854 Value *Cond = SI->getCondition();
1855 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1856 MergedCondition);
1857 Value *NewCondition = IsTrueBiased ?
1858 ConstantInt::getTrue(F.getContext()) :
1859 ConstantInt::getFalse(F.getContext());
1860 SI->setCondition(NewCondition);
1861}
1862
1863// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1864// condition.
1865void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1866 Instruction *BranchOrSelect,
1867 CHRScope *Scope,
1868 IRBuilder<> &IRB,
1869 Value *&MergedCondition) {
1870 if (IsTrueBiased) {
1871 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1872 } else {
1873 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1874 // branch, negate the icmp predicate and swap the branch targets and avoid
1875 // inserting an Xor to negate Cond.
1876 bool Done = false;
1877 if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1878 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1879 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1880 Done = true;
1881 }
1882 if (!Done) {
1883 Value *Negate = IRB.CreateXor(
1884 ConstantInt::getTrue(F.getContext()), Cond);
1885 MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1886 }
1887 }
1888}
1889
1890void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1891 unsigned i = 0;
1892 (void)(i); // Unused in release build.
1893 DenseSet<PHINode *> TrivialPHIs;
1894 for (CHRScope *Scope : CHRScopes) {
1895 transformScopes(Scope, TrivialPHIs);
1896 CHR_DEBUG(
1897 std::ostringstream oss;
1898 oss << " after transformScopes " << i++;
1899 dumpIR(F, oss.str().c_str(), nullptr));
1900 }
1901}
1902
1903static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char * Label) {
1904 dbgs() << Label << " " << Scopes.size() << "\n";
1905 for (CHRScope *Scope : Scopes) {
1906 dbgs() << *Scope << "\n";
1907 }
1908}
1909
1910bool CHR::run() {
1911 if (!shouldApply(F, PSI))
1912 return false;
1913
1914 CHR_DEBUG(dumpIR(F, "before", nullptr));
1915
1916 bool Changed = false;
1917 {
1918 CHR_DEBUG(
1919 dbgs() << "RegionInfo:\n";
1920 RI.print(dbgs()));
1921
1922 // Recursively traverse the region tree and find regions that have biased
1923 // branches and/or selects and create scopes.
1924 SmallVector<CHRScope *, 8> AllScopes;
1925 findScopes(AllScopes);
1926 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
1927
1928 // Split the scopes if 1) the conditiona values of the biased
1929 // branches/selects of the inner/lower scope can't be hoisted up to the
1930 // outermost/uppermost scope entry, or 2) the condition values of the biased
1931 // branches/selects in a scope (including subscopes) don't share at least
1932 // one common value.
1933 SmallVector<CHRScope *, 8> SplitScopes;
1934 splitScopes(AllScopes, SplitScopes);
1935 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
1936
1937 // After splitting, set the biased regions and selects of a scope (a tree
1938 // root) that include those of the subscopes.
1939 classifyBiasedScopes(SplitScopes);
1940 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
1941
1942 // Filter out the scopes that has only one biased region or select (CHR
1943 // isn't useful in such a case).
1944 SmallVector<CHRScope *, 8> FilteredScopes;
1945 filterScopes(SplitScopes, FilteredScopes);
1946 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
1947
1948 // Set the regions to be CHR'ed and their hoist stops for each scope.
1949 SmallVector<CHRScope *, 8> SetScopes;
1950 setCHRRegions(FilteredScopes, SetScopes);
1951 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
1952
1953 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
1954 // ones. We need to apply CHR from outer to inner so that we apply CHR only
1955 // to the hot path, rather than both hot and cold paths.
1956 SmallVector<CHRScope *, 8> SortedScopes;
1957 sortScopes(SetScopes, SortedScopes);
1958 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
1959
1960 CHR_DEBUG(
1961 dbgs() << "RegionInfo:\n";
1962 RI.print(dbgs()));
1963
1964 // Apply the CHR transformation.
1965 if (!SortedScopes.empty()) {
1966 transformScopes(SortedScopes);
1967 Changed = true;
1968 }
1969 }
1970
1971 if (Changed)
1972 CHR_DEBUG(dumpIR(F, "after", &Stats));
1973
1974 return Changed;
1975}
1976
1977bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
1978 BlockFrequencyInfo &BFI =
1979 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
1980 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1981 ProfileSummaryInfo &PSI =
1982 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1983 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1984 return CHR(F, BFI, DT, PSI, RI).run();
1985}
1986
1987namespace llvm {
1988
1989ControlHeightReductionPass::ControlHeightReductionPass() {
1990 ParseCHRFilterFiles();
1991}
1992
1993PreservedAnalyses ControlHeightReductionPass::run(
1994 Function &F,
1995 FunctionAnalysisManager &FAM) {
1996 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1997 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1998 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1999 auto &MAM = MAMProxy.getManager();
2000 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2001 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2002 bool Changed = CHR(F, BFI, DT, PSI, RI).run();
2003 if (!Changed)
2004 return PreservedAnalyses::all();
2005 auto PA = PreservedAnalyses();
2006 PA.preserve<GlobalsAA>();
2007 return PA;
2008}
2009
2010} // namespace llvm