blob: 2e01047bc7da5a4ef82c4a85e8b0910e96c5c396 [file] [log] [blame]
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass merges conditional blocks of code and reduces the number of
11// conditional branches in the hot paths based on profiles.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
16#include "llvm/Transforms/Utils.h"
17#include "llvm/Transforms/Utils/BasicBlockUtils.h"
18#include "llvm/Transforms/Utils/Cloning.h"
19#include "llvm/Transforms/Utils/ValueMapper.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/DenseSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/ProfileSummaryInfo.h"
26#include "llvm/Analysis/RegionInfo.h"
27#include "llvm/Analysis/RegionIterator.h"
28#include "llvm/Analysis/ValueTracking.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/MDBuilder.h"
33#include "llvm/Support/BranchProbability.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Transforms/Scalar.h"
36
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +000037#if !defined(_MSC_VER)
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +000038#include <cxxabi.h>
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +000039#endif
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +000040#include <set>
41#include <sstream>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "chr"
46
47#define CHR_DEBUG(X) LLVM_DEBUG(X)
48
49static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
50 cl::desc("Apply CHR for all functions"));
51
52static cl::opt<double> CHRBiasThreshold(
53 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
54 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
55
56static cl::opt<unsigned> CHRMergeThreshold(
57 "chr-merge-threshold", cl::init(2), cl::Hidden,
58 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
59
60static cl::opt<std::string> CHRModuleList(
61 "chr-module-list", cl::init(""), cl::Hidden,
62 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
63
64static cl::opt<std::string> CHRFunctionList(
65 "chr-function-list", cl::init(""), cl::Hidden,
66 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
67
68static StringSet<> CHRModules;
69static StringSet<> CHRFunctions;
70
71static void ParseCHRFilterFiles() {
72 if (!CHRModuleList.empty()) {
73 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
74 if (!FileOrErr) {
75 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
76 std::exit(1);
77 }
78 StringRef Buf = FileOrErr->get()->getBuffer();
79 SmallVector<StringRef, 0> Lines;
80 Buf.split(Lines, '\n');
81 for (StringRef Line : Lines) {
82 Line = Line.trim();
83 if (!Line.empty())
84 CHRModules.insert(Line);
85 }
86 }
87 if (!CHRFunctionList.empty()) {
88 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
89 if (!FileOrErr) {
90 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
91 std::exit(1);
92 }
93 StringRef Buf = FileOrErr->get()->getBuffer();
94 SmallVector<StringRef, 0> Lines;
95 Buf.split(Lines, '\n');
96 for (StringRef Line : Lines) {
97 Line = Line.trim();
98 if (!Line.empty())
99 CHRFunctions.insert(Line);
100 }
101 }
102}
103
104namespace {
105class ControlHeightReductionLegacyPass : public FunctionPass {
106public:
107 static char ID;
108
109 ControlHeightReductionLegacyPass() : FunctionPass(ID) {
110 initializeControlHeightReductionLegacyPassPass(
111 *PassRegistry::getPassRegistry());
112 ParseCHRFilterFiles();
113 }
114
115 bool runOnFunction(Function &F) override;
116 void getAnalysisUsage(AnalysisUsage &AU) const override {
117 AU.addRequired<BlockFrequencyInfoWrapperPass>();
118 AU.addRequired<DominatorTreeWrapperPass>();
119 AU.addRequired<ProfileSummaryInfoWrapperPass>();
120 AU.addRequired<RegionInfoPass>();
121 AU.addPreserved<GlobalsAAWrapperPass>();
122 }
123};
124} // end anonymous namespace
125
126char ControlHeightReductionLegacyPass::ID = 0;
127
128INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
129 "chr",
130 "Reduce control height in the hot paths",
131 false, false)
132INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
133INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
134INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
135INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
136INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
137 "chr",
138 "Reduce control height in the hot paths",
139 false, false)
140
141FunctionPass *llvm::createControlHeightReductionLegacyPass() {
142 return new ControlHeightReductionLegacyPass();
143}
144
145namespace {
146
147struct CHRStats {
148 CHRStats() : NumBranches(0), NumBranchesDelta(0),
149 WeightedNumBranchesDelta(0) {}
150 void print(raw_ostream &OS) const {
151 OS << "CHRStats: NumBranches " << NumBranches
152 << " NumBranchesDelta " << NumBranchesDelta
153 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
154 }
155 uint64_t NumBranches; // The original number of conditional branches /
156 // selects
157 uint64_t NumBranchesDelta; // The decrease of the number of conditional
158 // branches / selects in the hot paths due to CHR.
159 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
160 // count at the scope entry.
161};
162
163inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) {
164 Stats.print(OS);
165 return OS;
166}
167
168// RegInfo - some properties of a Region.
169struct RegInfo {
170 RegInfo() : R(nullptr), HasBranch(false) {}
171 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
172 Region *R;
173 bool HasBranch;
174 SmallVector<SelectInst *, 8> Selects;
175};
176
177typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
178
179// CHRScope - a sequence of regions to CHR together. It corresponds to a
180// sequence of conditional blocks. It can have subscopes which correspond to
181// nested conditional blocks. Nested CHRScopes form a tree.
182class CHRScope {
183 public:
184 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
185 assert(RI.R && "Null RegionIn");
186 RegInfos.push_back(RI);
187 }
188
189 Region *getParentRegion() {
190 assert(RegInfos.size() > 0 && "Empty CHRScope");
191 Region *Parent = RegInfos[0].R->getParent();
192 assert(Parent && "Unexpected to call this on the top-level region");
193 return Parent;
194 }
195
196 BasicBlock *getEntryBlock() {
197 assert(RegInfos.size() > 0 && "Empty CHRScope");
198 return RegInfos.front().R->getEntry();
199 }
200
201 BasicBlock *getExitBlock() {
202 assert(RegInfos.size() > 0 && "Empty CHRScope");
203 return RegInfos.back().R->getExit();
204 }
205
206 bool appendable(CHRScope *Next) {
207 // The next scope is appendable only if this scope is directly connected to
208 // it (which implies it post-dominates this scope) and this scope dominates
209 // it (no edge to the next scope outside this scope).
210 BasicBlock *NextEntry = Next->getEntryBlock();
211 if (getExitBlock() != NextEntry)
212 // Not directly connected.
213 return false;
214 Region *LastRegion = RegInfos.back().R;
215 for (BasicBlock *Pred : predecessors(NextEntry))
216 if (!LastRegion->contains(Pred))
217 // There's an edge going into the entry of the next scope from outside
218 // of this scope.
219 return false;
220 return true;
221 }
222
223 void append(CHRScope *Next) {
224 assert(RegInfos.size() > 0 && "Empty CHRScope");
225 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
226 assert(getParentRegion() == Next->getParentRegion() &&
227 "Must be siblings");
228 assert(getExitBlock() == Next->getEntryBlock() &&
229 "Must be adjacent");
230 for (RegInfo &RI : Next->RegInfos)
231 RegInfos.push_back(RI);
232 for (CHRScope *Sub : Next->Subs)
233 Subs.push_back(Sub);
234 }
235
236 void addSub(CHRScope *SubIn) {
237#ifndef NDEBUG
238 bool is_child = false;
239 for (RegInfo &RI : RegInfos)
240 if (RI.R == SubIn->getParentRegion()) {
241 is_child = true;
242 break;
243 }
244 assert(is_child && "Must be a child");
245#endif
246 Subs.push_back(SubIn);
247 }
248
249 // Split this scope at the boundary region into two, which will belong to the
250 // tail and returns the tail.
251 CHRScope *split(Region *Boundary) {
252 assert(Boundary && "Boundary null");
253 assert(RegInfos.begin()->R != Boundary &&
254 "Can't be split at beginning");
255 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
256 [&Boundary](const RegInfo& RI) {
257 return Boundary == RI.R;
258 });
259 if (BoundaryIt == RegInfos.end())
260 return nullptr;
261 SmallVector<RegInfo, 8> TailRegInfos;
262 SmallVector<CHRScope *, 8> TailSubs;
263 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
264 RegInfos.resize(BoundaryIt - RegInfos.begin());
265 DenseSet<Region *> TailRegionSet;
266 for (RegInfo &RI : TailRegInfos)
267 TailRegionSet.insert(RI.R);
268 for (auto It = Subs.begin(); It != Subs.end(); ) {
269 CHRScope *Sub = *It;
270 assert(Sub && "null Sub");
271 Region *Parent = Sub->getParentRegion();
272 if (TailRegionSet.count(Parent)) {
273 TailSubs.push_back(Sub);
274 It = Subs.erase(It);
275 } else {
276 assert(std::find_if(RegInfos.begin(), RegInfos.end(),
277 [&Parent](const RegInfo& RI) {
278 return Parent == RI.R;
279 }) != RegInfos.end() &&
280 "Must be in head");
281 ++It;
282 }
283 }
284 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
285 return new CHRScope(TailRegInfos, TailSubs);
286 }
287
288 bool contains(Instruction *I) const {
289 BasicBlock *Parent = I->getParent();
290 for (const RegInfo &RI : RegInfos)
291 if (RI.R->contains(Parent))
292 return true;
293 return false;
294 }
295
296 void print(raw_ostream &OS) const;
297
298 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
299 SmallVector<CHRScope *, 8> Subs; // Subscopes.
300
301 // The instruction at which to insert the CHR conditional branch (and hoist
302 // the dependent condition values).
303 Instruction *BranchInsertPoint;
304
305 // True-biased and false-biased regions (conditional blocks),
306 // respectively. Used only for the outermost scope and includes regions in
307 // subscopes. The rest are unbiased.
308 DenseSet<Region *> TrueBiasedRegions;
309 DenseSet<Region *> FalseBiasedRegions;
310 // Among the biased regions, the regions that get CHRed.
311 SmallVector<RegInfo, 8> CHRRegions;
312
313 // True-biased and false-biased selects, respectively. Used only for the
314 // outermost scope and includes ones in subscopes.
315 DenseSet<SelectInst *> TrueBiasedSelects;
316 DenseSet<SelectInst *> FalseBiasedSelects;
317
318 // Map from one of the above regions to the instructions to stop
319 // hoisting instructions at through use-def chains.
320 HoistStopMapTy HoistStopMap;
321
322 private:
323 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
324 SmallVector<CHRScope *, 8> &SubsIn)
325 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
326};
327
328inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
329 Scope.print(OS);
330 return OS;
331}
332
333class CHR {
334 public:
335 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
336 ProfileSummaryInfo &PSIin, RegionInfo &RIin)
337 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
338
339 ~CHR() {
340 for (CHRScope *Scope : Scopes) {
341 delete Scope;
342 }
343 }
344
345 bool run();
346
347 private:
348 // See the comments in CHR::run() for the high level flow of the algorithm and
349 // what the following functions do.
350
351 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
352 Region *R = RI.getTopLevelRegion();
353 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
354 if (Scope) {
355 Output.push_back(Scope);
356 }
357 }
358 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
359 SmallVectorImpl<CHRScope *> &Scopes);
360 CHRScope *findScope(Region *R);
361 void checkScopeHoistable(CHRScope *Scope);
362
363 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
364 SmallVectorImpl<CHRScope *> &Output);
365 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
366 CHRScope *Outer,
367 DenseSet<Value *> *OuterConditionValues,
368 Instruction *OuterInsertPoint,
369 SmallVectorImpl<CHRScope *> &Output,
370 DenseSet<Instruction *> &Unhoistables);
371
372 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
373 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
374
375 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
376 SmallVectorImpl<CHRScope *> &Output);
377
378 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
379 SmallVectorImpl<CHRScope *> &Output);
380 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
381
382 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
383 SmallVectorImpl<CHRScope *> &Output);
384
385 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
386 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
387 void cloneScopeBlocks(CHRScope *Scope,
388 BasicBlock *PreEntryBlock,
389 BasicBlock *ExitBlock,
390 Region *LastRegion,
391 ValueToValueMapTy &VMap);
392 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
393 BasicBlock *EntryBlock,
394 BasicBlock *NewEntryBlock,
395 ValueToValueMapTy &VMap);
396 void fixupBranchesAndSelects(CHRScope *Scope,
397 BasicBlock *PreEntryBlock,
398 BranchInst *MergedBR,
399 uint64_t ProfileCount);
400 void fixupBranch(Region *R,
401 CHRScope *Scope,
402 IRBuilder<> &IRB,
403 Value *&MergedCondition, BranchProbability &CHRBranchBias);
404 void fixupSelect(SelectInst* SI,
405 CHRScope *Scope,
406 IRBuilder<> &IRB,
407 Value *&MergedCondition, BranchProbability &CHRBranchBias);
408 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
409 Instruction *BranchOrSelect,
410 CHRScope *Scope,
411 IRBuilder<> &IRB,
412 Value *&MergedCondition);
413
414 Function &F;
415 BlockFrequencyInfo &BFI;
416 DominatorTree &DT;
417 ProfileSummaryInfo &PSI;
418 RegionInfo &RI;
419 CHRStats Stats;
420
421 // All the true-biased regions in the function
422 DenseSet<Region *> TrueBiasedRegionsGlobal;
423 // All the false-biased regions in the function
424 DenseSet<Region *> FalseBiasedRegionsGlobal;
425 // All the true-biased selects in the function
426 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
427 // All the false-biased selects in the function
428 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
429 // A map from biased regions to their branch bias
430 DenseMap<Region *, BranchProbability> BranchBiasMap;
431 // A map from biased selects to their branch bias
432 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
433 // All the scopes.
434 DenseSet<CHRScope *> Scopes;
435};
436
437} // end anonymous namespace
438
439static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
440 if (ForceCHR)
441 return true;
442
443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
444 if (CHRModules.count(F.getParent()->getName()))
445 return true;
446 StringRef Name = F.getName();
447 if (CHRFunctions.count(Name))
448 return true;
449 const char* DemangledName = nullptr;
450 int Status = -1;
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000451#if !defined(_MSC_VER)
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000452 DemangledName = abi::__cxa_demangle(Name.str().c_str(),
453 nullptr, nullptr, &Status);
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000454#endif
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000455 return DemangledName && CHRFunctions.count(DemangledName);
456 }
457
458 assert(PSI.hasProfileSummary() && "Empty PSI?");
459 return PSI.isFunctionEntryHot(&F);
460}
461
462static void dumpIR(Function &F, const char *Label, CHRStats *Stats) {
463 std::string Name = F.getName().str();
464 const char *DemangledName = nullptr;
465 int Status = -1;
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000466#if !defined(_MSC_VER)
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000467 DemangledName = abi::__cxa_demangle(Name.c_str(),
468 nullptr, nullptr, &Status);
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000469#endif
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000470 if (DemangledName == nullptr) {
471 DemangledName = "<NOT-MANGLED>";
472 }
473 std::string ModuleName = F.getParent()->getName().str();
474 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
475 << Name);
476 if (Stats)
477 CHR_DEBUG(dbgs() << " " << *Stats);
478 CHR_DEBUG(dbgs() << "\n");
479 CHR_DEBUG(F.dump());
480}
481
482
483void CHRScope::print(raw_ostream &OS) const {
484 assert(RegInfos.size() > 0 && "Empty CHRScope");
485 OS << "CHRScope[";
486 OS << RegInfos.size() << ", Regions[";
487 for (const RegInfo &RI : RegInfos) {
488 OS << RI.R->getNameStr();
489 if (RI.HasBranch)
490 OS << " B";
491 if (RI.Selects.size() > 0)
492 OS << " S" << RI.Selects.size();
493 OS << ", ";
494 }
495 if (RegInfos[0].R->getParent()) {
496 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
497 } else {
498 // top level region
499 OS << "]";
500 }
501 OS << ", Subs[";
502 for (CHRScope *Sub : Subs) {
503 OS << *Sub << ", ";
504 }
505 OS << "]]";
506}
507
508// Return true if the given instruction type can be hoisted by CHR.
509static bool isHoistableInstructionType(Instruction *I) {
510 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
511 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
512 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
513 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
514 isa<InsertValueInst>(I);
515}
516
517// Return true if the given instruction can be hoisted by CHR.
518static bool isHoistable(Instruction *I, DominatorTree &DT) {
519 if (!isHoistableInstructionType(I))
520 return false;
521 return isSafeToSpeculativelyExecute(I, nullptr, &DT);
522}
523
524// Recursively traverse the use-def chains of the given value and return a set
525// of the unhoistable base values defined within the scope (excluding the
526// first-region entry block) or the (hoistable or unhoistable) base values that
527// are defined outside (including the first-region entry block) of the
528// scope. The returned set doesn't include constants.
529static std::set<Value *> getBaseValues(Value *V,
530 DominatorTree &DT) {
531 std::set<Value *> Result;
532 if (auto *I = dyn_cast<Instruction>(V)) {
533 // We don't stop at a block that's not in the Scope because we would miss some
534 // instructions that are based on the same base values if we stop there.
535 if (!isHoistable(I, DT)) {
536 Result.insert(I);
537 return Result;
538 }
539 // I is hoistable above the Scope.
540 for (Value *Op : I->operands()) {
541 std::set<Value *> OpResult = getBaseValues(Op, DT);
542 Result.insert(OpResult.begin(), OpResult.end());
543 }
544 return Result;
545 }
546 if (isa<Argument>(V)) {
547 Result.insert(V);
548 return Result;
549 }
550 // We don't include others like constants because those won't lead to any
551 // chance of folding of conditions (eg two bit checks merged into one check)
552 // after CHR.
553 return Result; // empty
554}
555
556// Return true if V is already hoisted or can be hoisted (along with its
557// operands) above the insert point. When it returns true and HoistStops is
558// non-null, the instructions to stop hoisting at through the use-def chains are
559// inserted into HoistStops.
560static bool
561checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
562 DenseSet<Instruction *> &Unhoistables,
563 DenseSet<Instruction *> *HoistStops) {
564 assert(InsertPoint && "Null InsertPoint");
565 if (auto *I = dyn_cast<Instruction>(V)) {
566 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
567 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
568 if (Unhoistables.count(I)) {
569 // Don't hoist if they are not to be hoisted.
570 return false;
571 }
572 if (DT.dominates(I, InsertPoint)) {
573 // We are already above the insert point. Stop here.
574 if (HoistStops)
575 HoistStops->insert(I);
576 return true;
577 }
578 // We aren't not above the insert point, check if we can hoist it above the
579 // insert point.
580 if (isHoistable(I, DT)) {
581 // Check operands first.
582 DenseSet<Instruction *> OpsHoistStops;
583 bool AllOpsHoisted = true;
584 for (Value *Op : I->operands()) {
585 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
586 AllOpsHoisted = false;
587 break;
588 }
589 }
590 if (AllOpsHoisted) {
591 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
592 if (HoistStops)
593 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
594 return true;
595 }
596 }
597 return false;
598 }
599 // Non-instructions are considered hoistable.
600 return true;
601}
602
603// Returns true and sets the true probability and false probability of an
604// MD_prof metadata if it's well-formed.
605static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
606 BranchProbability &FalseProb) {
607 if (!MD) return false;
608 MDString *MDName = cast<MDString>(MD->getOperand(0));
609 if (MDName->getString() != "branch_weights" ||
610 MD->getNumOperands() != 3)
611 return false;
612 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
613 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
614 if (!TrueWeight || !FalseWeight)
615 return false;
616 APInt TrueWt = TrueWeight->getValue();
617 APInt FalseWt = FalseWeight->getValue();
618 APInt SumWt = TrueWt + FalseWt;
619 TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
620 SumWt.getZExtValue());
621 FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
622 SumWt.getZExtValue());
623 return true;
624}
625
626static BranchProbability getCHRBiasThreshold() {
627 return BranchProbability::getBranchProbability(
628 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
629}
630
631// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
632// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
633// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
634// false.
635template<typename K, typename S, typename M>
636bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
637 S &TrueSet, S &FalseSet, M &BiasMap) {
638 BranchProbability Threshold = getCHRBiasThreshold();
639 if (TrueProb >= Threshold) {
640 TrueSet.insert(Key);
641 BiasMap[Key] = TrueProb;
642 return true;
643 } else if (FalseProb >= Threshold) {
644 FalseSet.insert(Key);
645 BiasMap[Key] = FalseProb;
646 return true;
647 }
648 return false;
649}
650
651// Returns true and insert a region into the right biased set and the map if the
652// branch of the region is biased.
653static bool CheckBiasedBranch(BranchInst *BI, Region *R,
654 DenseSet<Region *> &TrueBiasedRegionsGlobal,
655 DenseSet<Region *> &FalseBiasedRegionsGlobal,
656 DenseMap<Region *, BranchProbability> &BranchBiasMap) {
657 if (!BI->isConditional())
658 return false;
659 BranchProbability ThenProb, ElseProb;
660 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
661 ThenProb, ElseProb))
662 return false;
663 BasicBlock *IfThen = BI->getSuccessor(0);
664 BasicBlock *IfElse = BI->getSuccessor(1);
665 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
666 IfThen != IfElse &&
667 "Invariant from findScopes");
668 if (IfThen == R->getExit()) {
669 // Swap them so that IfThen/ThenProb means going into the conditional code
670 // and IfElse/ElseProb means skipping it.
671 std::swap(IfThen, IfElse);
672 std::swap(ThenProb, ElseProb);
673 }
674 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
675 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
676 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
677 return CheckBias(R, ThenProb, ElseProb,
678 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
679 BranchBiasMap);
680}
681
682// Returns true and insert a select into the right biased set and the map if the
683// select is biased.
684static bool CheckBiasedSelect(
685 SelectInst *SI, Region *R,
686 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
687 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
688 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
689 BranchProbability TrueProb, FalseProb;
690 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
691 TrueProb, FalseProb))
692 return false;
693 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
694 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
695 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
696 return CheckBias(SI, TrueProb, FalseProb,
697 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
698 SelectBiasMap);
699}
700
701// Returns the instruction at which to hoist the dependent condition values and
702// insert the CHR branch for a region. This is the terminator branch in the
703// entry block or the first select in the entry block, if any.
704static Instruction* getBranchInsertPoint(RegInfo &RI) {
705 Region *R = RI.R;
706 BasicBlock *EntryBB = R->getEntry();
707 // The hoist point is by default the terminator of the entry block, which is
708 // the same as the branch instruction if RI.HasBranch is true.
709 Instruction *HoistPoint = EntryBB->getTerminator();
710 for (SelectInst *SI : RI.Selects) {
711 if (SI->getParent() == EntryBB) {
712 // Pick the first select in Selects in the entry block. Note Selects is
713 // sorted in the instruction order within a block (asserted below).
714 HoistPoint = SI;
715 break;
716 }
717 }
718 assert(HoistPoint && "Null HoistPoint");
719#ifndef NDEBUG
720 // Check that HoistPoint is the first one in Selects in the entry block,
721 // if any.
722 DenseSet<Instruction *> EntryBlockSelectSet;
723 for (SelectInst *SI : RI.Selects) {
724 if (SI->getParent() == EntryBB) {
725 EntryBlockSelectSet.insert(SI);
726 }
727 }
728 for (Instruction &I : *EntryBB) {
729 if (EntryBlockSelectSet.count(&I) > 0) {
730 assert(&I == HoistPoint &&
731 "HoistPoint must be the first one in Selects");
732 break;
733 }
734 }
735#endif
736 return HoistPoint;
737}
738
739// Find a CHR scope in the given region.
740CHRScope * CHR::findScope(Region *R) {
741 CHRScope *Result = nullptr;
742 BasicBlock *Entry = R->getEntry();
743 BasicBlock *Exit = R->getExit(); // null if top level.
744 assert(Entry && "Entry must not be null");
745 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
746 "Only top level region has a null exit");
747 if (Entry)
748 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
749 else
750 CHR_DEBUG(dbgs() << "Entry null\n");
751 if (Exit)
752 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
753 else
754 CHR_DEBUG(dbgs() << "Exit null\n");
755 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
756 // to this region).
757 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
758 if (EntryInSubregion)
759 return nullptr;
760 // Exclude loops
761 for (BasicBlock *Pred : predecessors(Entry))
762 if (R->contains(Pred))
763 return nullptr;
764 if (Exit) {
765 // Try to find an if-then block (check if R is an if-then).
766 // if (cond) {
767 // ...
768 // }
769 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
770 if (BI)
771 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
772 else
773 CHR_DEBUG(dbgs() << "BI null\n");
774 if (BI && BI->isConditional()) {
775 BasicBlock *S0 = BI->getSuccessor(0);
776 BasicBlock *S1 = BI->getSuccessor(1);
777 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
778 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
779 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
780 RegInfo RI(R);
781 RI.HasBranch = CheckBiasedBranch(
782 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
783 BranchBiasMap);
784 Result = new CHRScope(RI);
785 Scopes.insert(Result);
786 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
787 ++Stats.NumBranches;
788 }
789 }
790 }
791 {
792 // Try to look for selects in the direct child blocks (as opposed to in
793 // subregions) of R.
794 // ...
795 // if (..) { // Some subregion
796 // ...
797 // }
798 // if (..) { // Some subregion
799 // ...
800 // }
801 // ...
802 // a = cond ? b : c;
803 // ...
804 SmallVector<SelectInst *, 8> Selects;
805 for (RegionNode *E : R->elements()) {
806 if (E->isSubRegion())
807 continue;
808 // This returns the basic block of E if E is a direct child of R (not a
809 // subregion.)
810 BasicBlock *BB = E->getEntry();
811 // Need to push in the order to make it easier to find the first Select
812 // later.
813 for (Instruction &I : *BB) {
814 if (auto *SI = dyn_cast<SelectInst>(&I)) {
815 Selects.push_back(SI);
816 ++Stats.NumBranches;
817 }
818 }
819 }
820 if (Selects.size() > 0) {
821 auto AddSelects = [&](RegInfo &RI) {
822 for (auto *SI : Selects)
823 if (CheckBiasedSelect(SI, RI.R,
824 TrueBiasedSelectsGlobal,
825 FalseBiasedSelectsGlobal,
826 SelectBiasMap))
827 RI.Selects.push_back(SI);
828 };
829 if (!Result) {
830 CHR_DEBUG(dbgs() << "Found a select-only region\n");
831 RegInfo RI(R);
832 AddSelects(RI);
833 Result = new CHRScope(RI);
834 Scopes.insert(Result);
835 } else {
836 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
837 AddSelects(Result->RegInfos[0]);
838 }
839 }
840 }
841
842 if (Result) {
843 checkScopeHoistable(Result);
844 }
845 return Result;
846}
847
848// Check that any of the branch and the selects in the region could be
849// hoisted above the the CHR branch insert point (the most dominating of
850// them, either the branch (at the end of the first block) or the first
851// select in the first block). If the branch can't be hoisted, drop the
852// selects in the first blocks.
853//
854// For example, for the following scope/region with selects, we want to insert
855// the merged branch right before the first select in the first/entry block by
856// hoisting c1, c2, c3, and c4.
857//
858// // Branch insert point here.
859// a = c1 ? b : c; // Select 1
860// d = c2 ? e : f; // Select 2
861// if (c3) { // Branch
862// ...
863// c4 = foo() // A call.
864// g = c4 ? h : i; // Select 3
865// }
866//
867// But suppose we can't hoist c4 because it's dependent on the preceding
868// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
869// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
870void CHR::checkScopeHoistable(CHRScope *Scope) {
871 RegInfo &RI = Scope->RegInfos[0];
872 Region *R = RI.R;
873 BasicBlock *EntryBB = R->getEntry();
874 auto *Branch = RI.HasBranch ?
875 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
876 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
877 if (RI.HasBranch || !Selects.empty()) {
878 Instruction *InsertPoint = getBranchInsertPoint(RI);
879 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
880 // Avoid a data dependence from a select or a branch to a(nother)
881 // select. Note no instruction can't data-depend on a branch (a branch
882 // instruction doesn't produce a value).
883 DenseSet<Instruction *> Unhoistables;
884 // Initialize Unhoistables with the selects.
885 for (SelectInst *SI : Selects) {
886 Unhoistables.insert(SI);
887 }
888 // Remove Selects that can't be hoisted.
889 for (auto it = Selects.begin(); it != Selects.end(); ) {
890 SelectInst *SI = *it;
891 if (SI == InsertPoint) {
892 ++it;
893 continue;
894 }
895 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
896 DT, Unhoistables, nullptr);
897 if (!IsHoistable) {
898 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
899 it = Selects.erase(it);
900 // Since we are dropping the select here, we also drop it from
901 // Unhoistables.
902 Unhoistables.erase(SI);
903 } else
904 ++it;
905 }
906 // Update InsertPoint after potentially removing selects.
907 InsertPoint = getBranchInsertPoint(RI);
908 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
909 if (RI.HasBranch && InsertPoint != Branch) {
910 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
911 DT, Unhoistables, nullptr);
912 if (!IsHoistable) {
913 // If the branch isn't hoistable, drop the selects in the entry
914 // block, preferring the branch, which makes the branch the hoist
915 // point.
916 assert(InsertPoint != Branch && "Branch must not be the hoist point");
917 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
918 CHR_DEBUG(
919 for (SelectInst *SI : Selects) {
920 dbgs() << "SI " << *SI << "\n";
921 });
922 Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
923 [EntryBB](SelectInst *SI) {
924 return SI->getParent() == EntryBB;
925 }), Selects.end());
926 Unhoistables.clear();
927 InsertPoint = Branch;
928 }
929 }
930 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
931#ifndef NDEBUG
932 if (RI.HasBranch) {
933 assert(!DT.dominates(Branch, InsertPoint) &&
934 "Branch can't be already above the hoist point");
935 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
936 DT, Unhoistables, nullptr) &&
937 "checkHoistValue for branch");
938 }
939 for (auto *SI : Selects) {
940 assert(!DT.dominates(SI, InsertPoint) &&
941 "SI can't be already above the hoist point");
942 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
943 Unhoistables, nullptr) &&
944 "checkHoistValue for selects");
945 }
946 CHR_DEBUG(dbgs() << "Result\n");
947 if (RI.HasBranch) {
948 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
949 }
950 for (auto *SI : Selects) {
951 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
952 }
953#endif
954 }
955}
956
957// Traverse the region tree, find all nested scopes and merge them if possible.
958CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
959 SmallVectorImpl<CHRScope *> &Scopes) {
960 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
961 CHRScope *Result = findScope(R);
962 // Visit subscopes.
963 CHRScope *ConsecutiveSubscope = nullptr;
964 SmallVector<CHRScope *, 8> Subscopes;
965 for (auto It = R->begin(); It != R->end(); ++It) {
966 const std::unique_ptr<Region> &SubR = *It;
967 auto Next_It = std::next(It);
968 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
969 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
970 << "\n");
971 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
972 if (SubCHRScope) {
973 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
974 } else {
975 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
976 }
977 if (SubCHRScope) {
978 if (!ConsecutiveSubscope)
979 ConsecutiveSubscope = SubCHRScope;
980 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
981 Subscopes.push_back(ConsecutiveSubscope);
982 ConsecutiveSubscope = SubCHRScope;
983 } else
984 ConsecutiveSubscope->append(SubCHRScope);
985 } else {
986 if (ConsecutiveSubscope) {
987 Subscopes.push_back(ConsecutiveSubscope);
988 }
989 ConsecutiveSubscope = nullptr;
990 }
991 }
992 if (ConsecutiveSubscope) {
993 Subscopes.push_back(ConsecutiveSubscope);
994 }
995 for (CHRScope *Sub : Subscopes) {
996 if (Result) {
997 // Combine it with the parent.
998 Result->addSub(Sub);
999 } else {
1000 // Push Subscopes as they won't be combined with the parent.
1001 Scopes.push_back(Sub);
1002 }
1003 }
1004 return Result;
1005}
1006
1007static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1008 DenseSet<Value *> ConditionValues;
1009 if (RI.HasBranch) {
1010 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1011 ConditionValues.insert(BI->getCondition());
1012 }
1013 for (SelectInst *SI : RI.Selects) {
1014 ConditionValues.insert(SI->getCondition());
1015 }
1016 return ConditionValues;
1017}
1018
1019
1020// Determine whether to split a scope depending on the sets of the branch
1021// condition values of the previous region and the current region. We split
1022// (return true) it if 1) the condition values of the inner/lower scope can't be
1023// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1024// values have an empty intersection (because the combined branch conditions
1025// won't probably lead to a simpler combined condition).
1026static bool shouldSplit(Instruction *InsertPoint,
1027 DenseSet<Value *> &PrevConditionValues,
1028 DenseSet<Value *> &ConditionValues,
1029 DominatorTree &DT,
1030 DenseSet<Instruction *> &Unhoistables) {
1031 CHR_DEBUG(
1032 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1033 for (Value *V : PrevConditionValues) {
1034 dbgs() << *V << ", ";
1035 }
1036 dbgs() << " ConditionValues ";
1037 for (Value *V : ConditionValues) {
1038 dbgs() << *V << ", ";
1039 }
1040 dbgs() << "\n");
1041 assert(InsertPoint && "Null InsertPoint");
1042 // If any of Bases isn't hoistable to the hoist point, split.
1043 for (Value *V : ConditionValues) {
1044 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
1045 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1046 return true; // Not hoistable, split.
1047 }
1048 }
1049 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1050 // unnecessary splits at scopes with no branch/selects. If
1051 // PrevConditionValues and ConditionValues don't intersect at all, split.
1052 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1053 // Use std::set as DenseSet doesn't work with set_intersection.
1054 std::set<Value *> PrevBases, Bases;
1055 for (Value *V : PrevConditionValues) {
1056 std::set<Value *> BaseValues = getBaseValues(V, DT);
1057 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1058 }
1059 for (Value *V : ConditionValues) {
1060 std::set<Value *> BaseValues = getBaseValues(V, DT);
1061 Bases.insert(BaseValues.begin(), BaseValues.end());
1062 }
1063 CHR_DEBUG(
1064 dbgs() << "PrevBases ";
1065 for (Value *V : PrevBases) {
1066 dbgs() << *V << ", ";
1067 }
1068 dbgs() << " Bases ";
1069 for (Value *V : Bases) {
1070 dbgs() << *V << ", ";
1071 }
1072 dbgs() << "\n");
1073 std::set<Value *> Intersection;
1074 std::set_intersection(PrevBases.begin(), PrevBases.end(),
1075 Bases.begin(), Bases.end(),
1076 std::inserter(Intersection, Intersection.begin()));
1077 if (Intersection.empty()) {
1078 // Empty intersection, split.
1079 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1080 return true;
1081 }
1082 }
1083 CHR_DEBUG(dbgs() << "No split\n");
1084 return false; // Don't split.
1085}
1086
1087static void GetSelectsInScope(CHRScope *Scope,
1088 DenseSet<Instruction *> &Output) {
1089 for (RegInfo &RI : Scope->RegInfos) {
1090 for (SelectInst *SI : RI.Selects) {
1091 Output.insert(SI);
1092 }
1093 }
1094 for (CHRScope *Sub : Scope->Subs) {
1095 GetSelectsInScope(Sub, Output);
1096 }
1097}
1098
1099void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1100 SmallVectorImpl<CHRScope *> &Output) {
1101 for (CHRScope *Scope : Input) {
1102 assert(!Scope->BranchInsertPoint &&
1103 "BranchInsertPoint must not be set");
1104 DenseSet<Instruction *> Unhoistables;
1105 GetSelectsInScope(Scope, Unhoistables);
1106 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1107 }
1108#ifndef NDEBUG
1109 for (CHRScope *Scope : Output) {
1110 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1111 }
1112#endif
1113}
1114
1115SmallVector<CHRScope *, 8> CHR::splitScope(
1116 CHRScope *Scope,
1117 CHRScope *Outer,
1118 DenseSet<Value *> *OuterConditionValues,
1119 Instruction *OuterInsertPoint,
1120 SmallVectorImpl<CHRScope *> &Output,
1121 DenseSet<Instruction *> &Unhoistables) {
1122 if (Outer) {
1123 assert(OuterConditionValues && "Null OuterConditionValues");
1124 assert(OuterInsertPoint && "Null OuterInsertPoint");
1125 }
1126 bool PrevSplitFromOuter = true;
1127 DenseSet<Value *> PrevConditionValues;
1128 Instruction *PrevInsertPoint = nullptr;
1129 SmallVector<CHRScope *, 8> Splits;
1130 SmallVector<bool, 8> SplitsSplitFromOuter;
1131 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1132 SmallVector<Instruction *, 8> SplitsInsertPoints;
1133 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1134 for (RegInfo &RI : RegInfos) {
1135 Instruction *InsertPoint = getBranchInsertPoint(RI);
1136 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1137 CHR_DEBUG(
1138 dbgs() << "ConditionValues ";
1139 for (Value *V : ConditionValues) {
1140 dbgs() << *V << ", ";
1141 }
1142 dbgs() << "\n");
1143 if (RI.R == RegInfos[0].R) {
1144 // First iteration. Check to see if we should split from the outer.
1145 if (Outer) {
1146 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1147 CHR_DEBUG(dbgs() << "Should split from outer at "
1148 << RI.R->getNameStr() << "\n");
1149 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1150 ConditionValues, DT, Unhoistables)) {
1151 PrevConditionValues = ConditionValues;
1152 PrevInsertPoint = InsertPoint;
1153 } else {
1154 // Not splitting from the outer. Use the outer bases and insert
1155 // point. Union the bases.
1156 PrevSplitFromOuter = false;
1157 PrevConditionValues = *OuterConditionValues;
1158 PrevConditionValues.insert(ConditionValues.begin(),
1159 ConditionValues.end());
1160 PrevInsertPoint = OuterInsertPoint;
1161 }
1162 } else {
1163 CHR_DEBUG(dbgs() << "Outer null\n");
1164 PrevConditionValues = ConditionValues;
1165 PrevInsertPoint = InsertPoint;
1166 }
1167 } else {
1168 CHR_DEBUG(dbgs() << "Should split from prev at "
1169 << RI.R->getNameStr() << "\n");
1170 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1171 DT, Unhoistables)) {
1172 CHRScope *Tail = Scope->split(RI.R);
1173 Scopes.insert(Tail);
1174 Splits.push_back(Scope);
1175 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1176 SplitsConditionValues.push_back(PrevConditionValues);
1177 SplitsInsertPoints.push_back(PrevInsertPoint);
1178 Scope = Tail;
1179 PrevConditionValues = ConditionValues;
1180 PrevInsertPoint = InsertPoint;
1181 PrevSplitFromOuter = true;
1182 } else {
1183 // Not splitting. Union the bases. Keep the hoist point.
1184 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1185 }
1186 }
1187 }
1188 Splits.push_back(Scope);
1189 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1190 SplitsConditionValues.push_back(PrevConditionValues);
1191 assert(PrevInsertPoint && "Null PrevInsertPoint");
1192 SplitsInsertPoints.push_back(PrevInsertPoint);
1193 assert(Splits.size() == SplitsConditionValues.size() &&
1194 Splits.size() == SplitsSplitFromOuter.size() &&
1195 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1196 for (size_t I = 0; I < Splits.size(); ++I) {
1197 CHRScope *Split = Splits[I];
1198 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1199 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1200 SmallVector<CHRScope *, 8> NewSubs;
1201 DenseSet<Instruction *> SplitUnhoistables;
1202 GetSelectsInScope(Split, SplitUnhoistables);
1203 for (CHRScope *Sub : Split->Subs) {
1204 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1205 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1206 SplitUnhoistables);
1207 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1208 }
1209 Split->Subs = NewSubs;
1210 }
1211 SmallVector<CHRScope *, 8> Result;
1212 for (size_t I = 0; I < Splits.size(); ++I) {
1213 CHRScope *Split = Splits[I];
1214 if (SplitsSplitFromOuter[I]) {
1215 // Split from the outer.
1216 Output.push_back(Split);
1217 Split->BranchInsertPoint = SplitsInsertPoints[I];
1218 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1219 << "\n");
1220 } else {
1221 // Connected to the outer.
1222 Result.push_back(Split);
1223 }
1224 }
1225 if (!Outer)
1226 assert(Result.empty() &&
1227 "If no outer (top-level), must return no nested ones");
1228 return Result;
1229}
1230
1231void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1232 for (CHRScope *Scope : Scopes) {
1233 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1234 classifyBiasedScopes(Scope, Scope);
1235 CHR_DEBUG(
1236 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1237 dbgs() << "TrueBiasedRegions ";
1238 for (Region *R : Scope->TrueBiasedRegions) {
1239 dbgs() << R->getNameStr() << ", ";
1240 }
1241 dbgs() << "\n";
1242 dbgs() << "FalseBiasedRegions ";
1243 for (Region *R : Scope->FalseBiasedRegions) {
1244 dbgs() << R->getNameStr() << ", ";
1245 }
1246 dbgs() << "\n";
1247 dbgs() << "TrueBiasedSelects ";
1248 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1249 dbgs() << *SI << ", ";
1250 }
1251 dbgs() << "\n";
1252 dbgs() << "FalseBiasedSelects ";
1253 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1254 dbgs() << *SI << ", ";
1255 }
1256 dbgs() << "\n";);
1257 }
1258}
1259
1260void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1261 for (RegInfo &RI : Scope->RegInfos) {
1262 if (RI.HasBranch) {
1263 Region *R = RI.R;
1264 if (TrueBiasedRegionsGlobal.count(R) > 0)
1265 OutermostScope->TrueBiasedRegions.insert(R);
1266 else if (FalseBiasedRegionsGlobal.count(R) > 0)
1267 OutermostScope->FalseBiasedRegions.insert(R);
1268 else
1269 llvm_unreachable("Must be biased");
1270 }
1271 for (SelectInst *SI : RI.Selects) {
1272 if (TrueBiasedSelectsGlobal.count(SI) > 0)
1273 OutermostScope->TrueBiasedSelects.insert(SI);
1274 else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1275 OutermostScope->FalseBiasedSelects.insert(SI);
1276 else
1277 llvm_unreachable("Must be biased");
1278 }
1279 }
1280 for (CHRScope *Sub : Scope->Subs) {
1281 classifyBiasedScopes(Sub, OutermostScope);
1282 }
1283}
1284
1285static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1286 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1287 Scope->FalseBiasedRegions.size() +
1288 Scope->TrueBiasedSelects.size() +
1289 Scope->FalseBiasedSelects.size();
1290 return NumBiased >= CHRMergeThreshold;
1291}
1292
1293void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1294 SmallVectorImpl<CHRScope *> &Output) {
1295 for (CHRScope *Scope : Input) {
1296 // Filter out the ones with only one region and no subs.
1297 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1298 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1299 << Scope->TrueBiasedRegions.size()
1300 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1301 << " true-selects " << Scope->TrueBiasedSelects.size()
1302 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1303 continue;
1304 }
1305 Output.push_back(Scope);
1306 }
1307}
1308
1309void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1310 SmallVectorImpl<CHRScope *> &Output) {
1311 for (CHRScope *Scope : Input) {
1312 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1313 "Empty");
1314 setCHRRegions(Scope, Scope);
1315 Output.push_back(Scope);
1316 CHR_DEBUG(
1317 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1318 for (auto pair : Scope->HoistStopMap) {
1319 Region *R = pair.first;
1320 dbgs() << "Region " << R->getNameStr() << "\n";
1321 for (Instruction *I : pair.second) {
1322 dbgs() << "HoistStop " << *I << "\n";
1323 }
1324 }
1325 dbgs() << "CHRRegions" << "\n";
1326 for (RegInfo &RI : Scope->CHRRegions) {
1327 dbgs() << RI.R->getNameStr() << "\n";
1328 });
1329 }
1330}
1331
1332void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1333 DenseSet<Instruction *> Unhoistables;
1334 // Put the biased selects in Unhoistables because they should stay where they
1335 // are and constant-folded after CHR (in case one biased select or a branch
1336 // can depend on another biased select.)
1337 for (RegInfo &RI : Scope->RegInfos) {
1338 for (SelectInst *SI : RI.Selects) {
1339 Unhoistables.insert(SI);
1340 }
1341 }
1342 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1343 for (RegInfo &RI : Scope->RegInfos) {
1344 Region *R = RI.R;
1345 DenseSet<Instruction *> HoistStops;
1346 bool IsHoisted = false;
1347 if (RI.HasBranch) {
1348 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1349 OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1350 "Must be truthy or falsy");
1351 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1352 // Note checkHoistValue fills in HoistStops.
1353 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1354 Unhoistables, &HoistStops);
1355 assert(IsHoistable && "Must be hoistable");
1356 (void)(IsHoistable); // Unused in release build
1357 IsHoisted = true;
1358 }
1359 for (SelectInst *SI : RI.Selects) {
1360 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1361 OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1362 "Must be true or false biased");
1363 // Note checkHoistValue fills in HoistStops.
1364 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1365 Unhoistables, &HoistStops);
1366 assert(IsHoistable && "Must be hoistable");
1367 (void)(IsHoistable); // Unused in release build
1368 IsHoisted = true;
1369 }
1370 if (IsHoisted) {
1371 OutermostScope->CHRRegions.push_back(RI);
1372 OutermostScope->HoistStopMap[R] = HoistStops;
1373 }
1374 }
1375 for (CHRScope *Sub : Scope->Subs)
1376 setCHRRegions(Sub, OutermostScope);
1377}
1378
1379bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1380 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1381}
1382
1383void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1384 SmallVectorImpl<CHRScope *> &Output) {
1385 Output.resize(Input.size());
1386 std::copy(Input.begin(), Input.end(), Output.begin());
1387 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
1388}
1389
1390// Return true if V is already hoisted or was hoisted (along with its operands)
1391// to the insert point.
1392static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1393 HoistStopMapTy &HoistStopMap,
1394 DenseSet<Instruction *> &HoistedSet,
1395 DenseSet<PHINode *> &TrivialPHIs) {
1396 auto IT = HoistStopMap.find(R);
1397 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1398 DenseSet<Instruction *> &HoistStops = IT->second;
1399 if (auto *I = dyn_cast<Instruction>(V)) {
1400 if (I == HoistPoint)
1401 return;
1402 if (HoistStops.count(I))
1403 return;
1404 if (auto *PN = dyn_cast<PHINode>(I))
1405 if (TrivialPHIs.count(PN))
1406 // The trivial phi inserted by the previous CHR scope could replace a
1407 // non-phi in HoistStops. Note that since this phi is at the exit of a
1408 // previous CHR scope, which dominates this scope, it's safe to stop
1409 // hoisting there.
1410 return;
1411 if (HoistedSet.count(I))
1412 // Already hoisted, return.
1413 return;
1414 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1415 for (Value *Op : I->operands()) {
1416 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
1417 }
1418 I->moveBefore(HoistPoint);
1419 HoistedSet.insert(I);
1420 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1421 }
1422}
1423
1424// Hoist the dependent condition values of the branches and the selects in the
1425// scope to the insert point.
1426static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1427 DenseSet<PHINode *> &TrivialPHIs) {
1428 DenseSet<Instruction *> HoistedSet;
1429 for (const RegInfo &RI : Scope->CHRRegions) {
1430 Region *R = RI.R;
1431 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1432 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1433 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1434 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1435 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1436 HoistedSet, TrivialPHIs);
1437 }
1438 for (SelectInst *SI : RI.Selects) {
1439 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1440 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1441 if (!(IsTrueBiased || IsFalseBiased))
1442 continue;
1443 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1444 HoistedSet, TrivialPHIs);
1445 }
1446 }
1447}
1448
1449// Negate the predicate if an ICmp if it's used only by branches or selects by
1450// swapping the operands of the branches or the selects. Returns true if success.
1451static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1452 Instruction *ExcludedUser,
1453 CHRScope *Scope) {
1454 for (User *U : ICmp->users()) {
1455 if (U == ExcludedUser)
1456 continue;
1457 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1458 continue;
1459 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1460 continue;
1461 return false;
1462 }
1463 for (User *U : ICmp->users()) {
1464 if (U == ExcludedUser)
1465 continue;
1466 if (auto *BI = dyn_cast<BranchInst>(U)) {
1467 assert(BI->isConditional() && "Must be conditional");
1468 BI->swapSuccessors();
1469 // Don't need to swap this in terms of
1470 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1471 // mean whehter the branch is likely go into the if-then rather than
1472 // successor0/successor1 and because we can tell which edge is the then or
1473 // the else one by comparing the destination to the region exit block.
1474 continue;
1475 }
1476 if (auto *SI = dyn_cast<SelectInst>(U)) {
1477 // Swap operands
1478 Value *TrueValue = SI->getTrueValue();
1479 Value *FalseValue = SI->getFalseValue();
1480 SI->setTrueValue(FalseValue);
1481 SI->setFalseValue(TrueValue);
1482 SI->swapProfMetadata();
1483 if (Scope->TrueBiasedSelects.count(SI)) {
1484 assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1485 "Must not be already in");
1486 Scope->FalseBiasedSelects.insert(SI);
1487 } else if (Scope->FalseBiasedSelects.count(SI)) {
1488 assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1489 "Must not be already in");
1490 Scope->TrueBiasedSelects.insert(SI);
1491 }
1492 continue;
1493 }
1494 llvm_unreachable("Must be a branch or a select");
1495 }
1496 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1497 return true;
1498}
1499
1500// A helper for transformScopes. Insert a trivial phi at the scope exit block
1501// for a value that's defined in the scope but used outside it (meaning it's
1502// alive at the exit block).
1503static void insertTrivialPHIs(CHRScope *Scope,
1504 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1505 DenseSet<PHINode *> &TrivialPHIs) {
1506 DenseSet<BasicBlock *> BlocksInScopeSet;
1507 SmallVector<BasicBlock *, 8> BlocksInScopeVec;
1508 for (RegInfo &RI : Scope->RegInfos) {
1509 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1510 // sub-Scopes.
1511 BlocksInScopeSet.insert(BB);
1512 BlocksInScopeVec.push_back(BB);
1513 }
1514 }
1515 CHR_DEBUG(
1516 dbgs() << "Inserting redudant phis\n";
1517 for (BasicBlock *BB : BlocksInScopeVec) {
1518 dbgs() << "BlockInScope " << BB->getName() << "\n";
1519 });
1520 for (BasicBlock *BB : BlocksInScopeVec) {
1521 for (Instruction &I : *BB) {
1522 SmallVector<Instruction *, 8> Users;
1523 for (User *U : I.users()) {
1524 if (auto *UI = dyn_cast<Instruction>(U)) {
1525 if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1526 // Unless there's already a phi for I at the exit block.
1527 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1528 CHR_DEBUG(dbgs() << "V " << I << "\n");
1529 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1530 Users.push_back(UI);
1531 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1532 // There's a loop backedge from a block that's dominated by this
1533 // scope to the entry block.
1534 CHR_DEBUG(dbgs() << "V " << I << "\n");
1535 CHR_DEBUG(dbgs()
1536 << "Used at entry block (for a back edge) by a phi user "
1537 << *UI << "\n");
1538 Users.push_back(UI);
1539 }
1540 }
1541 }
1542 if (Users.size() > 0) {
1543 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1544 // ExitBlock. Replace I with the new phi in UI unless UI is another
1545 // phi at ExitBlock.
1546 unsigned PredCount = std::distance(pred_begin(ExitBlock),
1547 pred_end(ExitBlock));
1548 PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1549 &ExitBlock->front());
1550 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1551 PN->addIncoming(&I, Pred);
1552 }
1553 TrivialPHIs.insert(PN);
1554 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1555 for (Instruction *UI : Users) {
1556 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1557 if (UI->getOperand(J) == &I) {
1558 UI->setOperand(J, PN);
1559 }
1560 }
1561 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1562 }
1563 }
1564 }
1565 }
1566}
1567
1568// Assert that all the CHR regions of the scope have a biased branch or select.
1569static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1570#ifndef NDEBUG
1571 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1572 if (Scope->TrueBiasedRegions.count(RI.R) ||
1573 Scope->FalseBiasedRegions.count(RI.R))
1574 return true;
1575 for (SelectInst *SI : RI.Selects)
1576 if (Scope->TrueBiasedSelects.count(SI) ||
1577 Scope->FalseBiasedSelects.count(SI))
1578 return true;
1579 return false;
1580 };
1581 for (RegInfo &RI : Scope->CHRRegions) {
1582 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1583 "Must have biased branch or select");
1584 }
1585#endif
1586}
1587
1588// Assert that all the condition values of the biased branches and selects have
1589// been hoisted to the pre-entry block or outside of the scope.
1590static void assertBranchOrSelectConditionHoisted(CHRScope *Scope,
1591 BasicBlock *PreEntryBlock) {
1592 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1593 for (RegInfo &RI : Scope->CHRRegions) {
1594 Region *R = RI.R;
1595 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1596 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1597 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1598 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1599 Value *V = BI->getCondition();
1600 CHR_DEBUG(dbgs() << *V << "\n");
1601 if (auto *I = dyn_cast<Instruction>(V)) {
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +00001602 (void)(I); // Unused in release build.
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001603 assert((I->getParent() == PreEntryBlock ||
1604 !Scope->contains(I)) &&
1605 "Must have been hoisted to PreEntryBlock or outside the scope");
1606 }
1607 }
1608 for (SelectInst *SI : RI.Selects) {
1609 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1610 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1611 if (!(IsTrueBiased || IsFalseBiased))
1612 continue;
1613 Value *V = SI->getCondition();
1614 CHR_DEBUG(dbgs() << *V << "\n");
1615 if (auto *I = dyn_cast<Instruction>(V)) {
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +00001616 (void)(I); // Unused in release build.
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001617 assert((I->getParent() == PreEntryBlock ||
1618 !Scope->contains(I)) &&
1619 "Must have been hoisted to PreEntryBlock or outside the scope");
1620 }
1621 }
1622 }
1623}
1624
1625void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1626 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1627
1628 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1629 Region *FirstRegion = Scope->RegInfos[0].R;
1630 BasicBlock *EntryBlock = FirstRegion->getEntry();
1631 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1632 BasicBlock *ExitBlock = LastRegion->getExit();
1633 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1634
1635 if (ExitBlock) {
1636 // Insert a trivial phi at the exit block (where the CHR hot path and the
1637 // cold path merges) for a value that's defined in the scope but used
1638 // outside it (meaning it's alive at the exit block). We will add the
1639 // incoming values for the CHR cold paths to it below. Without this, we'd
1640 // miss updating phi's for such values unless there happens to already be a
1641 // phi for that value there.
1642 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1643 }
1644
1645 // Split the entry block of the first region. The new block becomes the new
1646 // entry block of the first region. The old entry block becomes the block to
1647 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1648 // through the split, we update the entry of the first region after the split,
1649 // and Region only points to the entry and the exit blocks, rather than
1650 // keeping everything in a list or set, the blocks membership and the
1651 // entry/exit blocks of the region are still valid after the split.
1652 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1653 << " at " << *Scope->BranchInsertPoint << "\n");
1654 BasicBlock *NewEntryBlock =
1655 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1656 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1657 "NewEntryBlock's only pred must be EntryBlock");
1658 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1659 BasicBlock *PreEntryBlock = EntryBlock;
1660
1661 ValueToValueMapTy VMap;
1662 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1663 // hot path (originals) and a cold path (clones) and update the PHIs at the
1664 // exit block.
1665 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1666
1667 // Replace the old (placeholder) branch with the new (merged) conditional
1668 // branch.
1669 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1670 NewEntryBlock, VMap);
1671
1672#ifndef NDEBUG
1673 assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1674#endif
1675
1676 // Hoist the conditional values of the branches/selects.
1677 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
1678
1679#ifndef NDEBUG
1680 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1681#endif
1682
1683 // Create the combined branch condition and constant-fold the branches/selects
1684 // in the hot path.
1685 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1686 ProfileCount ? ProfileCount.getValue() : 0);
1687}
1688
1689// A helper for transformScopes. Clone the blocks in the scope (excluding the
1690// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1691// at the exit block.
1692void CHR::cloneScopeBlocks(CHRScope *Scope,
1693 BasicBlock *PreEntryBlock,
1694 BasicBlock *ExitBlock,
1695 Region *LastRegion,
1696 ValueToValueMapTy &VMap) {
1697 // Clone all the blocks. The original blocks will be the hot-path
1698 // CHR-optimized code and the cloned blocks will be the original unoptimized
1699 // code. This is so that the block pointers from the
1700 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1701 // which CHR should apply to.
1702 SmallVector<BasicBlock*, 8> NewBlocks;
1703 for (RegInfo &RI : Scope->RegInfos)
1704 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1705 // sub-Scopes.
1706 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1707 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1708 NewBlocks.push_back(NewBB);
1709 VMap[BB] = NewBB;
1710 }
1711
1712 // Place the cloned blocks right after the original blocks (right before the
1713 // exit block of.)
1714 if (ExitBlock)
1715 F.getBasicBlockList().splice(ExitBlock->getIterator(),
1716 F.getBasicBlockList(),
1717 NewBlocks[0]->getIterator(), F.end());
1718
1719 // Update the cloned blocks/instructions to refer to themselves.
1720 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1721 for (Instruction &I : *NewBlocks[i])
1722 RemapInstruction(&I, VMap,
1723 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1724
1725 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1726 // the top-level region but we don't need to add PHIs. The trivial PHIs
1727 // inserted above will be updated here.
1728 if (ExitBlock)
1729 for (PHINode &PN : ExitBlock->phis())
1730 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1731 ++I) {
1732 BasicBlock *Pred = PN.getIncomingBlock(I);
1733 if (LastRegion->contains(Pred)) {
1734 Value *V = PN.getIncomingValue(I);
1735 auto It = VMap.find(V);
1736 if (It != VMap.end()) V = It->second;
1737 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1738 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1739 }
1740 }
1741}
1742
1743// A helper for transformScope. Replace the old (placeholder) branch with the
1744// new (merged) conditional branch.
1745BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1746 BasicBlock *EntryBlock,
1747 BasicBlock *NewEntryBlock,
1748 ValueToValueMapTy &VMap) {
1749 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1750 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1751 "SplitBlock did not work correctly!");
1752 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1753 "NewEntryBlock's only pred must be EntryBlock");
1754 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1755 "NewEntryBlock must have been copied");
1756 OldBR->removeFromParent();
1757 OldBR->dropAllReferences();
1758 // The true predicate is a placeholder. It will be replaced later in
1759 // fixupBranchesAndSelects().
1760 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1761 cast<BasicBlock>(VMap[NewEntryBlock]),
1762 ConstantInt::getTrue(F.getContext()));
1763 PreEntryBlock->getInstList().push_back(NewBR);
1764 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1765 "NewEntryBlock's only pred must be EntryBlock");
1766 return NewBR;
1767}
1768
1769// A helper for transformScopes. Create the combined branch condition and
1770// constant-fold the branches/selects in the hot path.
1771void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1772 BasicBlock *PreEntryBlock,
1773 BranchInst *MergedBR,
1774 uint64_t ProfileCount) {
1775 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1776 BranchProbability CHRBranchBias(1, 1);
1777 uint64_t NumCHRedBranches = 0;
1778 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1779 for (RegInfo &RI : Scope->CHRRegions) {
1780 Region *R = RI.R;
1781 if (RI.HasBranch) {
1782 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1783 ++NumCHRedBranches;
1784 }
1785 for (SelectInst *SI : RI.Selects) {
1786 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1787 ++NumCHRedBranches;
1788 }
1789 }
1790 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1791 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1792 MergedBR->setCondition(MergedCondition);
1793 SmallVector<uint32_t, 2> Weights;
1794 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
1795 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
1796 MDBuilder MDB(F.getContext());
1797 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1798 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1799 << "\n");
1800}
1801
1802// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1803// and constant-fold a branch in the hot path.
1804void CHR::fixupBranch(Region *R, CHRScope *Scope,
1805 IRBuilder<> &IRB,
1806 Value *&MergedCondition,
1807 BranchProbability &CHRBranchBias) {
1808 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1809 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1810 "Must be truthy or falsy");
1811 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1812 assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1813 "Must be in the bias map");
1814 BranchProbability Bias = BranchBiasMap[R];
1815 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1816 // Take the min.
1817 if (CHRBranchBias > Bias)
1818 CHRBranchBias = Bias;
1819 BasicBlock *IfThen = BI->getSuccessor(1);
1820 BasicBlock *IfElse = BI->getSuccessor(0);
1821 BasicBlock *RegionExitBlock = R->getExit();
1822 assert(RegionExitBlock && "Null ExitBlock");
1823 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1824 IfThen != IfElse && "Invariant from findScopes");
1825 if (IfThen == RegionExitBlock) {
1826 // Swap them so that IfThen means going into it and IfElse means skipping
1827 // it.
1828 std::swap(IfThen, IfElse);
1829 }
1830 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1831 << " IfElse " << IfElse->getName() << "\n");
1832 Value *Cond = BI->getCondition();
1833 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1834 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1835 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1836 MergedCondition);
1837 // Constant-fold the branch at ClonedEntryBlock.
1838 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1839 "The successor shouldn't change");
1840 Value *NewCondition = ConditionTrue ?
1841 ConstantInt::getTrue(F.getContext()) :
1842 ConstantInt::getFalse(F.getContext());
1843 BI->setCondition(NewCondition);
1844}
1845
1846// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1847// and constant-fold a select in the hot path.
1848void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1849 IRBuilder<> &IRB,
1850 Value *&MergedCondition,
1851 BranchProbability &CHRBranchBias) {
1852 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1853 assert((IsTrueBiased ||
1854 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1855 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1856 "Must be in the bias map");
1857 BranchProbability Bias = SelectBiasMap[SI];
1858 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1859 // Take the min.
1860 if (CHRBranchBias > Bias)
1861 CHRBranchBias = Bias;
1862 Value *Cond = SI->getCondition();
1863 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1864 MergedCondition);
1865 Value *NewCondition = IsTrueBiased ?
1866 ConstantInt::getTrue(F.getContext()) :
1867 ConstantInt::getFalse(F.getContext());
1868 SI->setCondition(NewCondition);
1869}
1870
1871// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1872// condition.
1873void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1874 Instruction *BranchOrSelect,
1875 CHRScope *Scope,
1876 IRBuilder<> &IRB,
1877 Value *&MergedCondition) {
1878 if (IsTrueBiased) {
1879 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1880 } else {
1881 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1882 // branch, negate the icmp predicate and swap the branch targets and avoid
1883 // inserting an Xor to negate Cond.
1884 bool Done = false;
1885 if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1886 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1887 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1888 Done = true;
1889 }
1890 if (!Done) {
1891 Value *Negate = IRB.CreateXor(
1892 ConstantInt::getTrue(F.getContext()), Cond);
1893 MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1894 }
1895 }
1896}
1897
1898void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1899 unsigned i = 0;
1900 (void)(i); // Unused in release build.
1901 DenseSet<PHINode *> TrivialPHIs;
1902 for (CHRScope *Scope : CHRScopes) {
1903 transformScopes(Scope, TrivialPHIs);
1904 CHR_DEBUG(
1905 std::ostringstream oss;
1906 oss << " after transformScopes " << i++;
1907 dumpIR(F, oss.str().c_str(), nullptr));
1908 }
1909}
1910
1911static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char * Label) {
1912 dbgs() << Label << " " << Scopes.size() << "\n";
1913 for (CHRScope *Scope : Scopes) {
1914 dbgs() << *Scope << "\n";
1915 }
1916}
1917
1918bool CHR::run() {
1919 if (!shouldApply(F, PSI))
1920 return false;
1921
1922 CHR_DEBUG(dumpIR(F, "before", nullptr));
1923
1924 bool Changed = false;
1925 {
1926 CHR_DEBUG(
1927 dbgs() << "RegionInfo:\n";
1928 RI.print(dbgs()));
1929
1930 // Recursively traverse the region tree and find regions that have biased
1931 // branches and/or selects and create scopes.
1932 SmallVector<CHRScope *, 8> AllScopes;
1933 findScopes(AllScopes);
1934 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
1935
1936 // Split the scopes if 1) the conditiona values of the biased
1937 // branches/selects of the inner/lower scope can't be hoisted up to the
1938 // outermost/uppermost scope entry, or 2) the condition values of the biased
1939 // branches/selects in a scope (including subscopes) don't share at least
1940 // one common value.
1941 SmallVector<CHRScope *, 8> SplitScopes;
1942 splitScopes(AllScopes, SplitScopes);
1943 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
1944
1945 // After splitting, set the biased regions and selects of a scope (a tree
1946 // root) that include those of the subscopes.
1947 classifyBiasedScopes(SplitScopes);
1948 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
1949
1950 // Filter out the scopes that has only one biased region or select (CHR
1951 // isn't useful in such a case).
1952 SmallVector<CHRScope *, 8> FilteredScopes;
1953 filterScopes(SplitScopes, FilteredScopes);
1954 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
1955
1956 // Set the regions to be CHR'ed and their hoist stops for each scope.
1957 SmallVector<CHRScope *, 8> SetScopes;
1958 setCHRRegions(FilteredScopes, SetScopes);
1959 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
1960
1961 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
1962 // ones. We need to apply CHR from outer to inner so that we apply CHR only
1963 // to the hot path, rather than both hot and cold paths.
1964 SmallVector<CHRScope *, 8> SortedScopes;
1965 sortScopes(SetScopes, SortedScopes);
1966 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
1967
1968 CHR_DEBUG(
1969 dbgs() << "RegionInfo:\n";
1970 RI.print(dbgs()));
1971
1972 // Apply the CHR transformation.
1973 if (!SortedScopes.empty()) {
1974 transformScopes(SortedScopes);
1975 Changed = true;
1976 }
1977 }
1978
1979 if (Changed)
1980 CHR_DEBUG(dumpIR(F, "after", &Stats));
1981
1982 return Changed;
1983}
1984
1985bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
1986 BlockFrequencyInfo &BFI =
1987 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
1988 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1989 ProfileSummaryInfo &PSI =
1990 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1991 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1992 return CHR(F, BFI, DT, PSI, RI).run();
1993}
1994
1995namespace llvm {
1996
1997ControlHeightReductionPass::ControlHeightReductionPass() {
1998 ParseCHRFilterFiles();
1999}
2000
2001PreservedAnalyses ControlHeightReductionPass::run(
2002 Function &F,
2003 FunctionAnalysisManager &FAM) {
2004 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2005 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2006 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2007 auto &MAM = MAMProxy.getManager();
2008 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2009 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2010 bool Changed = CHR(F, BFI, DT, PSI, RI).run();
2011 if (!Changed)
2012 return PreservedAnalyses::all();
2013 auto PA = PreservedAnalyses();
2014 PA.preserve<GlobalsAA>();
2015 return PA;
2016}
2017
2018} // namespace llvm