blob: 4da8a7113f6983ce8877dfd2cb353fef1c8996dd [file] [log] [blame]
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass merges conditional blocks of code and reduces the number of
11// conditional branches in the hot paths based on profiles.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
16#include "llvm/Transforms/Utils.h"
17#include "llvm/Transforms/Utils/BasicBlockUtils.h"
18#include "llvm/Transforms/Utils/Cloning.h"
19#include "llvm/Transforms/Utils/ValueMapper.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/DenseSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/ProfileSummaryInfo.h"
26#include "llvm/Analysis/RegionInfo.h"
27#include "llvm/Analysis/RegionIterator.h"
28#include "llvm/Analysis/ValueTracking.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/MDBuilder.h"
33#include "llvm/Support/BranchProbability.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Transforms/Scalar.h"
36
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +000037#if !defined(_MSC_VER)
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +000038#include <cxxabi.h>
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +000039#endif
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +000040#include <set>
41#include <sstream>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "chr"
46
47#define CHR_DEBUG(X) LLVM_DEBUG(X)
48
49static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
50 cl::desc("Apply CHR for all functions"));
51
52static cl::opt<double> CHRBiasThreshold(
53 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
54 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
55
56static cl::opt<unsigned> CHRMergeThreshold(
57 "chr-merge-threshold", cl::init(2), cl::Hidden,
58 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
59
60static cl::opt<std::string> CHRModuleList(
61 "chr-module-list", cl::init(""), cl::Hidden,
62 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
63
64static cl::opt<std::string> CHRFunctionList(
65 "chr-function-list", cl::init(""), cl::Hidden,
66 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
67
68static StringSet<> CHRModules;
69static StringSet<> CHRFunctions;
70
71static void ParseCHRFilterFiles() {
72 if (!CHRModuleList.empty()) {
73 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
74 if (!FileOrErr) {
75 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
76 std::exit(1);
77 }
78 StringRef Buf = FileOrErr->get()->getBuffer();
79 SmallVector<StringRef, 0> Lines;
80 Buf.split(Lines, '\n');
81 for (StringRef Line : Lines) {
82 Line = Line.trim();
83 if (!Line.empty())
84 CHRModules.insert(Line);
85 }
86 }
87 if (!CHRFunctionList.empty()) {
88 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
89 if (!FileOrErr) {
90 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
91 std::exit(1);
92 }
93 StringRef Buf = FileOrErr->get()->getBuffer();
94 SmallVector<StringRef, 0> Lines;
95 Buf.split(Lines, '\n');
96 for (StringRef Line : Lines) {
97 Line = Line.trim();
98 if (!Line.empty())
99 CHRFunctions.insert(Line);
100 }
101 }
102}
103
104namespace {
105class ControlHeightReductionLegacyPass : public FunctionPass {
106public:
107 static char ID;
108
109 ControlHeightReductionLegacyPass() : FunctionPass(ID) {
110 initializeControlHeightReductionLegacyPassPass(
111 *PassRegistry::getPassRegistry());
112 ParseCHRFilterFiles();
113 }
114
115 bool runOnFunction(Function &F) override;
116 void getAnalysisUsage(AnalysisUsage &AU) const override {
117 AU.addRequired<BlockFrequencyInfoWrapperPass>();
118 AU.addRequired<DominatorTreeWrapperPass>();
119 AU.addRequired<ProfileSummaryInfoWrapperPass>();
120 AU.addRequired<RegionInfoPass>();
121 AU.addPreserved<GlobalsAAWrapperPass>();
122 }
123};
124} // end anonymous namespace
125
126char ControlHeightReductionLegacyPass::ID = 0;
127
128INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
129 "chr",
130 "Reduce control height in the hot paths",
131 false, false)
132INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
133INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
134INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
135INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
136INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
137 "chr",
138 "Reduce control height in the hot paths",
139 false, false)
140
141FunctionPass *llvm::createControlHeightReductionLegacyPass() {
142 return new ControlHeightReductionLegacyPass();
143}
144
145namespace {
146
147struct CHRStats {
148 CHRStats() : NumBranches(0), NumBranchesDelta(0),
149 WeightedNumBranchesDelta(0) {}
150 void print(raw_ostream &OS) const {
151 OS << "CHRStats: NumBranches " << NumBranches
152 << " NumBranchesDelta " << NumBranchesDelta
153 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
154 }
155 uint64_t NumBranches; // The original number of conditional branches /
156 // selects
157 uint64_t NumBranchesDelta; // The decrease of the number of conditional
158 // branches / selects in the hot paths due to CHR.
159 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
160 // count at the scope entry.
161};
162
Fangrui Songc8f348c2018-09-05 03:10:20 +0000163inline raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
164 const CHRStats &Stats) {
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000165 Stats.print(OS);
166 return OS;
167}
168
169// RegInfo - some properties of a Region.
170struct RegInfo {
171 RegInfo() : R(nullptr), HasBranch(false) {}
172 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
173 Region *R;
174 bool HasBranch;
175 SmallVector<SelectInst *, 8> Selects;
176};
177
178typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
179
180// CHRScope - a sequence of regions to CHR together. It corresponds to a
181// sequence of conditional blocks. It can have subscopes which correspond to
182// nested conditional blocks. Nested CHRScopes form a tree.
183class CHRScope {
184 public:
185 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
186 assert(RI.R && "Null RegionIn");
187 RegInfos.push_back(RI);
188 }
189
190 Region *getParentRegion() {
191 assert(RegInfos.size() > 0 && "Empty CHRScope");
192 Region *Parent = RegInfos[0].R->getParent();
193 assert(Parent && "Unexpected to call this on the top-level region");
194 return Parent;
195 }
196
197 BasicBlock *getEntryBlock() {
198 assert(RegInfos.size() > 0 && "Empty CHRScope");
199 return RegInfos.front().R->getEntry();
200 }
201
202 BasicBlock *getExitBlock() {
203 assert(RegInfos.size() > 0 && "Empty CHRScope");
204 return RegInfos.back().R->getExit();
205 }
206
207 bool appendable(CHRScope *Next) {
208 // The next scope is appendable only if this scope is directly connected to
209 // it (which implies it post-dominates this scope) and this scope dominates
210 // it (no edge to the next scope outside this scope).
211 BasicBlock *NextEntry = Next->getEntryBlock();
212 if (getExitBlock() != NextEntry)
213 // Not directly connected.
214 return false;
215 Region *LastRegion = RegInfos.back().R;
216 for (BasicBlock *Pred : predecessors(NextEntry))
217 if (!LastRegion->contains(Pred))
218 // There's an edge going into the entry of the next scope from outside
219 // of this scope.
220 return false;
221 return true;
222 }
223
224 void append(CHRScope *Next) {
225 assert(RegInfos.size() > 0 && "Empty CHRScope");
226 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
227 assert(getParentRegion() == Next->getParentRegion() &&
228 "Must be siblings");
229 assert(getExitBlock() == Next->getEntryBlock() &&
230 "Must be adjacent");
231 for (RegInfo &RI : Next->RegInfos)
232 RegInfos.push_back(RI);
233 for (CHRScope *Sub : Next->Subs)
234 Subs.push_back(Sub);
235 }
236
237 void addSub(CHRScope *SubIn) {
238#ifndef NDEBUG
239 bool is_child = false;
240 for (RegInfo &RI : RegInfos)
241 if (RI.R == SubIn->getParentRegion()) {
242 is_child = true;
243 break;
244 }
245 assert(is_child && "Must be a child");
246#endif
247 Subs.push_back(SubIn);
248 }
249
250 // Split this scope at the boundary region into two, which will belong to the
251 // tail and returns the tail.
252 CHRScope *split(Region *Boundary) {
253 assert(Boundary && "Boundary null");
254 assert(RegInfos.begin()->R != Boundary &&
255 "Can't be split at beginning");
256 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
257 [&Boundary](const RegInfo& RI) {
258 return Boundary == RI.R;
259 });
260 if (BoundaryIt == RegInfos.end())
261 return nullptr;
262 SmallVector<RegInfo, 8> TailRegInfos;
263 SmallVector<CHRScope *, 8> TailSubs;
264 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
265 RegInfos.resize(BoundaryIt - RegInfos.begin());
266 DenseSet<Region *> TailRegionSet;
267 for (RegInfo &RI : TailRegInfos)
268 TailRegionSet.insert(RI.R);
269 for (auto It = Subs.begin(); It != Subs.end(); ) {
270 CHRScope *Sub = *It;
271 assert(Sub && "null Sub");
272 Region *Parent = Sub->getParentRegion();
273 if (TailRegionSet.count(Parent)) {
274 TailSubs.push_back(Sub);
275 It = Subs.erase(It);
276 } else {
277 assert(std::find_if(RegInfos.begin(), RegInfos.end(),
278 [&Parent](const RegInfo& RI) {
279 return Parent == RI.R;
280 }) != RegInfos.end() &&
281 "Must be in head");
282 ++It;
283 }
284 }
285 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
286 return new CHRScope(TailRegInfos, TailSubs);
287 }
288
289 bool contains(Instruction *I) const {
290 BasicBlock *Parent = I->getParent();
291 for (const RegInfo &RI : RegInfos)
292 if (RI.R->contains(Parent))
293 return true;
294 return false;
295 }
296
297 void print(raw_ostream &OS) const;
298
299 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
300 SmallVector<CHRScope *, 8> Subs; // Subscopes.
301
302 // The instruction at which to insert the CHR conditional branch (and hoist
303 // the dependent condition values).
304 Instruction *BranchInsertPoint;
305
306 // True-biased and false-biased regions (conditional blocks),
307 // respectively. Used only for the outermost scope and includes regions in
308 // subscopes. The rest are unbiased.
309 DenseSet<Region *> TrueBiasedRegions;
310 DenseSet<Region *> FalseBiasedRegions;
311 // Among the biased regions, the regions that get CHRed.
312 SmallVector<RegInfo, 8> CHRRegions;
313
314 // True-biased and false-biased selects, respectively. Used only for the
315 // outermost scope and includes ones in subscopes.
316 DenseSet<SelectInst *> TrueBiasedSelects;
317 DenseSet<SelectInst *> FalseBiasedSelects;
318
319 // Map from one of the above regions to the instructions to stop
320 // hoisting instructions at through use-def chains.
321 HoistStopMapTy HoistStopMap;
322
323 private:
324 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
325 SmallVector<CHRScope *, 8> &SubsIn)
326 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
327};
328
329inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
330 Scope.print(OS);
331 return OS;
332}
333
334class CHR {
335 public:
336 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
337 ProfileSummaryInfo &PSIin, RegionInfo &RIin)
338 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
339
340 ~CHR() {
341 for (CHRScope *Scope : Scopes) {
342 delete Scope;
343 }
344 }
345
346 bool run();
347
348 private:
349 // See the comments in CHR::run() for the high level flow of the algorithm and
350 // what the following functions do.
351
352 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
353 Region *R = RI.getTopLevelRegion();
354 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
355 if (Scope) {
356 Output.push_back(Scope);
357 }
358 }
359 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
360 SmallVectorImpl<CHRScope *> &Scopes);
361 CHRScope *findScope(Region *R);
362 void checkScopeHoistable(CHRScope *Scope);
363
364 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
365 SmallVectorImpl<CHRScope *> &Output);
366 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
367 CHRScope *Outer,
368 DenseSet<Value *> *OuterConditionValues,
369 Instruction *OuterInsertPoint,
370 SmallVectorImpl<CHRScope *> &Output,
371 DenseSet<Instruction *> &Unhoistables);
372
373 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
374 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
375
376 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
377 SmallVectorImpl<CHRScope *> &Output);
378
379 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
380 SmallVectorImpl<CHRScope *> &Output);
381 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
382
383 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
384 SmallVectorImpl<CHRScope *> &Output);
385
386 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
387 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
388 void cloneScopeBlocks(CHRScope *Scope,
389 BasicBlock *PreEntryBlock,
390 BasicBlock *ExitBlock,
391 Region *LastRegion,
392 ValueToValueMapTy &VMap);
393 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
394 BasicBlock *EntryBlock,
395 BasicBlock *NewEntryBlock,
396 ValueToValueMapTy &VMap);
397 void fixupBranchesAndSelects(CHRScope *Scope,
398 BasicBlock *PreEntryBlock,
399 BranchInst *MergedBR,
400 uint64_t ProfileCount);
401 void fixupBranch(Region *R,
402 CHRScope *Scope,
403 IRBuilder<> &IRB,
404 Value *&MergedCondition, BranchProbability &CHRBranchBias);
405 void fixupSelect(SelectInst* SI,
406 CHRScope *Scope,
407 IRBuilder<> &IRB,
408 Value *&MergedCondition, BranchProbability &CHRBranchBias);
409 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
410 Instruction *BranchOrSelect,
411 CHRScope *Scope,
412 IRBuilder<> &IRB,
413 Value *&MergedCondition);
414
415 Function &F;
416 BlockFrequencyInfo &BFI;
417 DominatorTree &DT;
418 ProfileSummaryInfo &PSI;
419 RegionInfo &RI;
420 CHRStats Stats;
421
422 // All the true-biased regions in the function
423 DenseSet<Region *> TrueBiasedRegionsGlobal;
424 // All the false-biased regions in the function
425 DenseSet<Region *> FalseBiasedRegionsGlobal;
426 // All the true-biased selects in the function
427 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
428 // All the false-biased selects in the function
429 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
430 // A map from biased regions to their branch bias
431 DenseMap<Region *, BranchProbability> BranchBiasMap;
432 // A map from biased selects to their branch bias
433 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
434 // All the scopes.
435 DenseSet<CHRScope *> Scopes;
436};
437
438} // end anonymous namespace
439
440static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
441 if (ForceCHR)
442 return true;
443
444 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
445 if (CHRModules.count(F.getParent()->getName()))
446 return true;
447 StringRef Name = F.getName();
448 if (CHRFunctions.count(Name))
449 return true;
450 const char* DemangledName = nullptr;
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000451#if !defined(_MSC_VER)
Reid Kleckner792a4f82018-09-04 20:34:47 +0000452 int Status = -1;
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000453 DemangledName = abi::__cxa_demangle(Name.str().c_str(),
454 nullptr, nullptr, &Status);
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000455#endif
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000456 return DemangledName && CHRFunctions.count(DemangledName);
457 }
458
459 assert(PSI.hasProfileSummary() && "Empty PSI?");
460 return PSI.isFunctionEntryHot(&F);
461}
462
Fangrui Songc8f348c2018-09-05 03:10:20 +0000463static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
464 CHRStats *Stats) {
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000465 std::string Name = F.getName().str();
466 const char *DemangledName = nullptr;
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000467#if !defined(_MSC_VER)
Reid Kleckner792a4f82018-09-04 20:34:47 +0000468 int Status = -1;
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000469 DemangledName = abi::__cxa_demangle(Name.c_str(),
470 nullptr, nullptr, &Status);
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +0000471#endif
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000472 if (DemangledName == nullptr) {
473 DemangledName = "<NOT-MANGLED>";
474 }
475 std::string ModuleName = F.getParent()->getName().str();
476 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
477 << Name);
478 if (Stats)
479 CHR_DEBUG(dbgs() << " " << *Stats);
480 CHR_DEBUG(dbgs() << "\n");
481 CHR_DEBUG(F.dump());
482}
483
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +0000484void CHRScope::print(raw_ostream &OS) const {
485 assert(RegInfos.size() > 0 && "Empty CHRScope");
486 OS << "CHRScope[";
487 OS << RegInfos.size() << ", Regions[";
488 for (const RegInfo &RI : RegInfos) {
489 OS << RI.R->getNameStr();
490 if (RI.HasBranch)
491 OS << " B";
492 if (RI.Selects.size() > 0)
493 OS << " S" << RI.Selects.size();
494 OS << ", ";
495 }
496 if (RegInfos[0].R->getParent()) {
497 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
498 } else {
499 // top level region
500 OS << "]";
501 }
502 OS << ", Subs[";
503 for (CHRScope *Sub : Subs) {
504 OS << *Sub << ", ";
505 }
506 OS << "]]";
507}
508
509// Return true if the given instruction type can be hoisted by CHR.
510static bool isHoistableInstructionType(Instruction *I) {
511 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
512 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
513 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
514 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
515 isa<InsertValueInst>(I);
516}
517
518// Return true if the given instruction can be hoisted by CHR.
519static bool isHoistable(Instruction *I, DominatorTree &DT) {
520 if (!isHoistableInstructionType(I))
521 return false;
522 return isSafeToSpeculativelyExecute(I, nullptr, &DT);
523}
524
525// Recursively traverse the use-def chains of the given value and return a set
526// of the unhoistable base values defined within the scope (excluding the
527// first-region entry block) or the (hoistable or unhoistable) base values that
528// are defined outside (including the first-region entry block) of the
529// scope. The returned set doesn't include constants.
530static std::set<Value *> getBaseValues(Value *V,
531 DominatorTree &DT) {
532 std::set<Value *> Result;
533 if (auto *I = dyn_cast<Instruction>(V)) {
534 // We don't stop at a block that's not in the Scope because we would miss some
535 // instructions that are based on the same base values if we stop there.
536 if (!isHoistable(I, DT)) {
537 Result.insert(I);
538 return Result;
539 }
540 // I is hoistable above the Scope.
541 for (Value *Op : I->operands()) {
542 std::set<Value *> OpResult = getBaseValues(Op, DT);
543 Result.insert(OpResult.begin(), OpResult.end());
544 }
545 return Result;
546 }
547 if (isa<Argument>(V)) {
548 Result.insert(V);
549 return Result;
550 }
551 // We don't include others like constants because those won't lead to any
552 // chance of folding of conditions (eg two bit checks merged into one check)
553 // after CHR.
554 return Result; // empty
555}
556
557// Return true if V is already hoisted or can be hoisted (along with its
558// operands) above the insert point. When it returns true and HoistStops is
559// non-null, the instructions to stop hoisting at through the use-def chains are
560// inserted into HoistStops.
561static bool
562checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
563 DenseSet<Instruction *> &Unhoistables,
564 DenseSet<Instruction *> *HoistStops) {
565 assert(InsertPoint && "Null InsertPoint");
566 if (auto *I = dyn_cast<Instruction>(V)) {
567 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
568 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
569 if (Unhoistables.count(I)) {
570 // Don't hoist if they are not to be hoisted.
571 return false;
572 }
573 if (DT.dominates(I, InsertPoint)) {
574 // We are already above the insert point. Stop here.
575 if (HoistStops)
576 HoistStops->insert(I);
577 return true;
578 }
579 // We aren't not above the insert point, check if we can hoist it above the
580 // insert point.
581 if (isHoistable(I, DT)) {
582 // Check operands first.
583 DenseSet<Instruction *> OpsHoistStops;
584 bool AllOpsHoisted = true;
585 for (Value *Op : I->operands()) {
586 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
587 AllOpsHoisted = false;
588 break;
589 }
590 }
591 if (AllOpsHoisted) {
592 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
593 if (HoistStops)
594 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
595 return true;
596 }
597 }
598 return false;
599 }
600 // Non-instructions are considered hoistable.
601 return true;
602}
603
604// Returns true and sets the true probability and false probability of an
605// MD_prof metadata if it's well-formed.
606static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
607 BranchProbability &FalseProb) {
608 if (!MD) return false;
609 MDString *MDName = cast<MDString>(MD->getOperand(0));
610 if (MDName->getString() != "branch_weights" ||
611 MD->getNumOperands() != 3)
612 return false;
613 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
614 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
615 if (!TrueWeight || !FalseWeight)
616 return false;
617 APInt TrueWt = TrueWeight->getValue();
618 APInt FalseWt = FalseWeight->getValue();
619 APInt SumWt = TrueWt + FalseWt;
620 TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
621 SumWt.getZExtValue());
622 FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
623 SumWt.getZExtValue());
624 return true;
625}
626
627static BranchProbability getCHRBiasThreshold() {
628 return BranchProbability::getBranchProbability(
629 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
630}
631
632// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
633// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
634// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
635// false.
636template<typename K, typename S, typename M>
637bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
638 S &TrueSet, S &FalseSet, M &BiasMap) {
639 BranchProbability Threshold = getCHRBiasThreshold();
640 if (TrueProb >= Threshold) {
641 TrueSet.insert(Key);
642 BiasMap[Key] = TrueProb;
643 return true;
644 } else if (FalseProb >= Threshold) {
645 FalseSet.insert(Key);
646 BiasMap[Key] = FalseProb;
647 return true;
648 }
649 return false;
650}
651
652// Returns true and insert a region into the right biased set and the map if the
653// branch of the region is biased.
654static bool CheckBiasedBranch(BranchInst *BI, Region *R,
655 DenseSet<Region *> &TrueBiasedRegionsGlobal,
656 DenseSet<Region *> &FalseBiasedRegionsGlobal,
657 DenseMap<Region *, BranchProbability> &BranchBiasMap) {
658 if (!BI->isConditional())
659 return false;
660 BranchProbability ThenProb, ElseProb;
661 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
662 ThenProb, ElseProb))
663 return false;
664 BasicBlock *IfThen = BI->getSuccessor(0);
665 BasicBlock *IfElse = BI->getSuccessor(1);
666 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
667 IfThen != IfElse &&
668 "Invariant from findScopes");
669 if (IfThen == R->getExit()) {
670 // Swap them so that IfThen/ThenProb means going into the conditional code
671 // and IfElse/ElseProb means skipping it.
672 std::swap(IfThen, IfElse);
673 std::swap(ThenProb, ElseProb);
674 }
675 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
676 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
677 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
678 return CheckBias(R, ThenProb, ElseProb,
679 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
680 BranchBiasMap);
681}
682
683// Returns true and insert a select into the right biased set and the map if the
684// select is biased.
685static bool CheckBiasedSelect(
686 SelectInst *SI, Region *R,
687 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
688 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
689 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
690 BranchProbability TrueProb, FalseProb;
691 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
692 TrueProb, FalseProb))
693 return false;
694 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
695 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
696 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
697 return CheckBias(SI, TrueProb, FalseProb,
698 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
699 SelectBiasMap);
700}
701
702// Returns the instruction at which to hoist the dependent condition values and
703// insert the CHR branch for a region. This is the terminator branch in the
704// entry block or the first select in the entry block, if any.
705static Instruction* getBranchInsertPoint(RegInfo &RI) {
706 Region *R = RI.R;
707 BasicBlock *EntryBB = R->getEntry();
708 // The hoist point is by default the terminator of the entry block, which is
709 // the same as the branch instruction if RI.HasBranch is true.
710 Instruction *HoistPoint = EntryBB->getTerminator();
711 for (SelectInst *SI : RI.Selects) {
712 if (SI->getParent() == EntryBB) {
713 // Pick the first select in Selects in the entry block. Note Selects is
714 // sorted in the instruction order within a block (asserted below).
715 HoistPoint = SI;
716 break;
717 }
718 }
719 assert(HoistPoint && "Null HoistPoint");
720#ifndef NDEBUG
721 // Check that HoistPoint is the first one in Selects in the entry block,
722 // if any.
723 DenseSet<Instruction *> EntryBlockSelectSet;
724 for (SelectInst *SI : RI.Selects) {
725 if (SI->getParent() == EntryBB) {
726 EntryBlockSelectSet.insert(SI);
727 }
728 }
729 for (Instruction &I : *EntryBB) {
730 if (EntryBlockSelectSet.count(&I) > 0) {
731 assert(&I == HoistPoint &&
732 "HoistPoint must be the first one in Selects");
733 break;
734 }
735 }
736#endif
737 return HoistPoint;
738}
739
740// Find a CHR scope in the given region.
741CHRScope * CHR::findScope(Region *R) {
742 CHRScope *Result = nullptr;
743 BasicBlock *Entry = R->getEntry();
744 BasicBlock *Exit = R->getExit(); // null if top level.
745 assert(Entry && "Entry must not be null");
746 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
747 "Only top level region has a null exit");
748 if (Entry)
749 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
750 else
751 CHR_DEBUG(dbgs() << "Entry null\n");
752 if (Exit)
753 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
754 else
755 CHR_DEBUG(dbgs() << "Exit null\n");
756 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
757 // to this region).
758 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
759 if (EntryInSubregion)
760 return nullptr;
761 // Exclude loops
762 for (BasicBlock *Pred : predecessors(Entry))
763 if (R->contains(Pred))
764 return nullptr;
765 if (Exit) {
766 // Try to find an if-then block (check if R is an if-then).
767 // if (cond) {
768 // ...
769 // }
770 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
771 if (BI)
772 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
773 else
774 CHR_DEBUG(dbgs() << "BI null\n");
775 if (BI && BI->isConditional()) {
776 BasicBlock *S0 = BI->getSuccessor(0);
777 BasicBlock *S1 = BI->getSuccessor(1);
778 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
779 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
780 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
781 RegInfo RI(R);
782 RI.HasBranch = CheckBiasedBranch(
783 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
784 BranchBiasMap);
785 Result = new CHRScope(RI);
786 Scopes.insert(Result);
787 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
788 ++Stats.NumBranches;
789 }
790 }
791 }
792 {
793 // Try to look for selects in the direct child blocks (as opposed to in
794 // subregions) of R.
795 // ...
796 // if (..) { // Some subregion
797 // ...
798 // }
799 // if (..) { // Some subregion
800 // ...
801 // }
802 // ...
803 // a = cond ? b : c;
804 // ...
805 SmallVector<SelectInst *, 8> Selects;
806 for (RegionNode *E : R->elements()) {
807 if (E->isSubRegion())
808 continue;
809 // This returns the basic block of E if E is a direct child of R (not a
810 // subregion.)
811 BasicBlock *BB = E->getEntry();
812 // Need to push in the order to make it easier to find the first Select
813 // later.
814 for (Instruction &I : *BB) {
815 if (auto *SI = dyn_cast<SelectInst>(&I)) {
816 Selects.push_back(SI);
817 ++Stats.NumBranches;
818 }
819 }
820 }
821 if (Selects.size() > 0) {
822 auto AddSelects = [&](RegInfo &RI) {
823 for (auto *SI : Selects)
824 if (CheckBiasedSelect(SI, RI.R,
825 TrueBiasedSelectsGlobal,
826 FalseBiasedSelectsGlobal,
827 SelectBiasMap))
828 RI.Selects.push_back(SI);
829 };
830 if (!Result) {
831 CHR_DEBUG(dbgs() << "Found a select-only region\n");
832 RegInfo RI(R);
833 AddSelects(RI);
834 Result = new CHRScope(RI);
835 Scopes.insert(Result);
836 } else {
837 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
838 AddSelects(Result->RegInfos[0]);
839 }
840 }
841 }
842
843 if (Result) {
844 checkScopeHoistable(Result);
845 }
846 return Result;
847}
848
849// Check that any of the branch and the selects in the region could be
850// hoisted above the the CHR branch insert point (the most dominating of
851// them, either the branch (at the end of the first block) or the first
852// select in the first block). If the branch can't be hoisted, drop the
853// selects in the first blocks.
854//
855// For example, for the following scope/region with selects, we want to insert
856// the merged branch right before the first select in the first/entry block by
857// hoisting c1, c2, c3, and c4.
858//
859// // Branch insert point here.
860// a = c1 ? b : c; // Select 1
861// d = c2 ? e : f; // Select 2
862// if (c3) { // Branch
863// ...
864// c4 = foo() // A call.
865// g = c4 ? h : i; // Select 3
866// }
867//
868// But suppose we can't hoist c4 because it's dependent on the preceding
869// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
870// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
871void CHR::checkScopeHoistable(CHRScope *Scope) {
872 RegInfo &RI = Scope->RegInfos[0];
873 Region *R = RI.R;
874 BasicBlock *EntryBB = R->getEntry();
875 auto *Branch = RI.HasBranch ?
876 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
877 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
878 if (RI.HasBranch || !Selects.empty()) {
879 Instruction *InsertPoint = getBranchInsertPoint(RI);
880 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
881 // Avoid a data dependence from a select or a branch to a(nother)
882 // select. Note no instruction can't data-depend on a branch (a branch
883 // instruction doesn't produce a value).
884 DenseSet<Instruction *> Unhoistables;
885 // Initialize Unhoistables with the selects.
886 for (SelectInst *SI : Selects) {
887 Unhoistables.insert(SI);
888 }
889 // Remove Selects that can't be hoisted.
890 for (auto it = Selects.begin(); it != Selects.end(); ) {
891 SelectInst *SI = *it;
892 if (SI == InsertPoint) {
893 ++it;
894 continue;
895 }
896 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
897 DT, Unhoistables, nullptr);
898 if (!IsHoistable) {
899 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
900 it = Selects.erase(it);
901 // Since we are dropping the select here, we also drop it from
902 // Unhoistables.
903 Unhoistables.erase(SI);
904 } else
905 ++it;
906 }
907 // Update InsertPoint after potentially removing selects.
908 InsertPoint = getBranchInsertPoint(RI);
909 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
910 if (RI.HasBranch && InsertPoint != Branch) {
911 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
912 DT, Unhoistables, nullptr);
913 if (!IsHoistable) {
914 // If the branch isn't hoistable, drop the selects in the entry
915 // block, preferring the branch, which makes the branch the hoist
916 // point.
917 assert(InsertPoint != Branch && "Branch must not be the hoist point");
918 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
919 CHR_DEBUG(
920 for (SelectInst *SI : Selects) {
921 dbgs() << "SI " << *SI << "\n";
922 });
923 Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
924 [EntryBB](SelectInst *SI) {
925 return SI->getParent() == EntryBB;
926 }), Selects.end());
927 Unhoistables.clear();
928 InsertPoint = Branch;
929 }
930 }
931 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
932#ifndef NDEBUG
933 if (RI.HasBranch) {
934 assert(!DT.dominates(Branch, InsertPoint) &&
935 "Branch can't be already above the hoist point");
936 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
937 DT, Unhoistables, nullptr) &&
938 "checkHoistValue for branch");
939 }
940 for (auto *SI : Selects) {
941 assert(!DT.dominates(SI, InsertPoint) &&
942 "SI can't be already above the hoist point");
943 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
944 Unhoistables, nullptr) &&
945 "checkHoistValue for selects");
946 }
947 CHR_DEBUG(dbgs() << "Result\n");
948 if (RI.HasBranch) {
949 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
950 }
951 for (auto *SI : Selects) {
952 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
953 }
954#endif
955 }
956}
957
958// Traverse the region tree, find all nested scopes and merge them if possible.
959CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
960 SmallVectorImpl<CHRScope *> &Scopes) {
961 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
962 CHRScope *Result = findScope(R);
963 // Visit subscopes.
964 CHRScope *ConsecutiveSubscope = nullptr;
965 SmallVector<CHRScope *, 8> Subscopes;
966 for (auto It = R->begin(); It != R->end(); ++It) {
967 const std::unique_ptr<Region> &SubR = *It;
968 auto Next_It = std::next(It);
969 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
970 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
971 << "\n");
972 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
973 if (SubCHRScope) {
974 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
975 } else {
976 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
977 }
978 if (SubCHRScope) {
979 if (!ConsecutiveSubscope)
980 ConsecutiveSubscope = SubCHRScope;
981 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
982 Subscopes.push_back(ConsecutiveSubscope);
983 ConsecutiveSubscope = SubCHRScope;
984 } else
985 ConsecutiveSubscope->append(SubCHRScope);
986 } else {
987 if (ConsecutiveSubscope) {
988 Subscopes.push_back(ConsecutiveSubscope);
989 }
990 ConsecutiveSubscope = nullptr;
991 }
992 }
993 if (ConsecutiveSubscope) {
994 Subscopes.push_back(ConsecutiveSubscope);
995 }
996 for (CHRScope *Sub : Subscopes) {
997 if (Result) {
998 // Combine it with the parent.
999 Result->addSub(Sub);
1000 } else {
1001 // Push Subscopes as they won't be combined with the parent.
1002 Scopes.push_back(Sub);
1003 }
1004 }
1005 return Result;
1006}
1007
1008static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1009 DenseSet<Value *> ConditionValues;
1010 if (RI.HasBranch) {
1011 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1012 ConditionValues.insert(BI->getCondition());
1013 }
1014 for (SelectInst *SI : RI.Selects) {
1015 ConditionValues.insert(SI->getCondition());
1016 }
1017 return ConditionValues;
1018}
1019
1020
1021// Determine whether to split a scope depending on the sets of the branch
1022// condition values of the previous region and the current region. We split
1023// (return true) it if 1) the condition values of the inner/lower scope can't be
1024// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1025// values have an empty intersection (because the combined branch conditions
1026// won't probably lead to a simpler combined condition).
1027static bool shouldSplit(Instruction *InsertPoint,
1028 DenseSet<Value *> &PrevConditionValues,
1029 DenseSet<Value *> &ConditionValues,
1030 DominatorTree &DT,
1031 DenseSet<Instruction *> &Unhoistables) {
1032 CHR_DEBUG(
1033 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1034 for (Value *V : PrevConditionValues) {
1035 dbgs() << *V << ", ";
1036 }
1037 dbgs() << " ConditionValues ";
1038 for (Value *V : ConditionValues) {
1039 dbgs() << *V << ", ";
1040 }
1041 dbgs() << "\n");
1042 assert(InsertPoint && "Null InsertPoint");
1043 // If any of Bases isn't hoistable to the hoist point, split.
1044 for (Value *V : ConditionValues) {
1045 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
1046 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1047 return true; // Not hoistable, split.
1048 }
1049 }
1050 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1051 // unnecessary splits at scopes with no branch/selects. If
1052 // PrevConditionValues and ConditionValues don't intersect at all, split.
1053 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1054 // Use std::set as DenseSet doesn't work with set_intersection.
1055 std::set<Value *> PrevBases, Bases;
1056 for (Value *V : PrevConditionValues) {
1057 std::set<Value *> BaseValues = getBaseValues(V, DT);
1058 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1059 }
1060 for (Value *V : ConditionValues) {
1061 std::set<Value *> BaseValues = getBaseValues(V, DT);
1062 Bases.insert(BaseValues.begin(), BaseValues.end());
1063 }
1064 CHR_DEBUG(
1065 dbgs() << "PrevBases ";
1066 for (Value *V : PrevBases) {
1067 dbgs() << *V << ", ";
1068 }
1069 dbgs() << " Bases ";
1070 for (Value *V : Bases) {
1071 dbgs() << *V << ", ";
1072 }
1073 dbgs() << "\n");
1074 std::set<Value *> Intersection;
1075 std::set_intersection(PrevBases.begin(), PrevBases.end(),
1076 Bases.begin(), Bases.end(),
1077 std::inserter(Intersection, Intersection.begin()));
1078 if (Intersection.empty()) {
1079 // Empty intersection, split.
1080 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1081 return true;
1082 }
1083 }
1084 CHR_DEBUG(dbgs() << "No split\n");
1085 return false; // Don't split.
1086}
1087
1088static void GetSelectsInScope(CHRScope *Scope,
1089 DenseSet<Instruction *> &Output) {
1090 for (RegInfo &RI : Scope->RegInfos) {
1091 for (SelectInst *SI : RI.Selects) {
1092 Output.insert(SI);
1093 }
1094 }
1095 for (CHRScope *Sub : Scope->Subs) {
1096 GetSelectsInScope(Sub, Output);
1097 }
1098}
1099
1100void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1101 SmallVectorImpl<CHRScope *> &Output) {
1102 for (CHRScope *Scope : Input) {
1103 assert(!Scope->BranchInsertPoint &&
1104 "BranchInsertPoint must not be set");
1105 DenseSet<Instruction *> Unhoistables;
1106 GetSelectsInScope(Scope, Unhoistables);
1107 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1108 }
1109#ifndef NDEBUG
1110 for (CHRScope *Scope : Output) {
1111 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1112 }
1113#endif
1114}
1115
1116SmallVector<CHRScope *, 8> CHR::splitScope(
1117 CHRScope *Scope,
1118 CHRScope *Outer,
1119 DenseSet<Value *> *OuterConditionValues,
1120 Instruction *OuterInsertPoint,
1121 SmallVectorImpl<CHRScope *> &Output,
1122 DenseSet<Instruction *> &Unhoistables) {
1123 if (Outer) {
1124 assert(OuterConditionValues && "Null OuterConditionValues");
1125 assert(OuterInsertPoint && "Null OuterInsertPoint");
1126 }
1127 bool PrevSplitFromOuter = true;
1128 DenseSet<Value *> PrevConditionValues;
1129 Instruction *PrevInsertPoint = nullptr;
1130 SmallVector<CHRScope *, 8> Splits;
1131 SmallVector<bool, 8> SplitsSplitFromOuter;
1132 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1133 SmallVector<Instruction *, 8> SplitsInsertPoints;
1134 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1135 for (RegInfo &RI : RegInfos) {
1136 Instruction *InsertPoint = getBranchInsertPoint(RI);
1137 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1138 CHR_DEBUG(
1139 dbgs() << "ConditionValues ";
1140 for (Value *V : ConditionValues) {
1141 dbgs() << *V << ", ";
1142 }
1143 dbgs() << "\n");
1144 if (RI.R == RegInfos[0].R) {
1145 // First iteration. Check to see if we should split from the outer.
1146 if (Outer) {
1147 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1148 CHR_DEBUG(dbgs() << "Should split from outer at "
1149 << RI.R->getNameStr() << "\n");
1150 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1151 ConditionValues, DT, Unhoistables)) {
1152 PrevConditionValues = ConditionValues;
1153 PrevInsertPoint = InsertPoint;
1154 } else {
1155 // Not splitting from the outer. Use the outer bases and insert
1156 // point. Union the bases.
1157 PrevSplitFromOuter = false;
1158 PrevConditionValues = *OuterConditionValues;
1159 PrevConditionValues.insert(ConditionValues.begin(),
1160 ConditionValues.end());
1161 PrevInsertPoint = OuterInsertPoint;
1162 }
1163 } else {
1164 CHR_DEBUG(dbgs() << "Outer null\n");
1165 PrevConditionValues = ConditionValues;
1166 PrevInsertPoint = InsertPoint;
1167 }
1168 } else {
1169 CHR_DEBUG(dbgs() << "Should split from prev at "
1170 << RI.R->getNameStr() << "\n");
1171 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1172 DT, Unhoistables)) {
1173 CHRScope *Tail = Scope->split(RI.R);
1174 Scopes.insert(Tail);
1175 Splits.push_back(Scope);
1176 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1177 SplitsConditionValues.push_back(PrevConditionValues);
1178 SplitsInsertPoints.push_back(PrevInsertPoint);
1179 Scope = Tail;
1180 PrevConditionValues = ConditionValues;
1181 PrevInsertPoint = InsertPoint;
1182 PrevSplitFromOuter = true;
1183 } else {
1184 // Not splitting. Union the bases. Keep the hoist point.
1185 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1186 }
1187 }
1188 }
1189 Splits.push_back(Scope);
1190 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1191 SplitsConditionValues.push_back(PrevConditionValues);
1192 assert(PrevInsertPoint && "Null PrevInsertPoint");
1193 SplitsInsertPoints.push_back(PrevInsertPoint);
1194 assert(Splits.size() == SplitsConditionValues.size() &&
1195 Splits.size() == SplitsSplitFromOuter.size() &&
1196 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1197 for (size_t I = 0; I < Splits.size(); ++I) {
1198 CHRScope *Split = Splits[I];
1199 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1200 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1201 SmallVector<CHRScope *, 8> NewSubs;
1202 DenseSet<Instruction *> SplitUnhoistables;
1203 GetSelectsInScope(Split, SplitUnhoistables);
1204 for (CHRScope *Sub : Split->Subs) {
1205 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1206 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1207 SplitUnhoistables);
1208 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1209 }
1210 Split->Subs = NewSubs;
1211 }
1212 SmallVector<CHRScope *, 8> Result;
1213 for (size_t I = 0; I < Splits.size(); ++I) {
1214 CHRScope *Split = Splits[I];
1215 if (SplitsSplitFromOuter[I]) {
1216 // Split from the outer.
1217 Output.push_back(Split);
1218 Split->BranchInsertPoint = SplitsInsertPoints[I];
1219 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1220 << "\n");
1221 } else {
1222 // Connected to the outer.
1223 Result.push_back(Split);
1224 }
1225 }
1226 if (!Outer)
1227 assert(Result.empty() &&
1228 "If no outer (top-level), must return no nested ones");
1229 return Result;
1230}
1231
1232void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1233 for (CHRScope *Scope : Scopes) {
1234 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1235 classifyBiasedScopes(Scope, Scope);
1236 CHR_DEBUG(
1237 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1238 dbgs() << "TrueBiasedRegions ";
1239 for (Region *R : Scope->TrueBiasedRegions) {
1240 dbgs() << R->getNameStr() << ", ";
1241 }
1242 dbgs() << "\n";
1243 dbgs() << "FalseBiasedRegions ";
1244 for (Region *R : Scope->FalseBiasedRegions) {
1245 dbgs() << R->getNameStr() << ", ";
1246 }
1247 dbgs() << "\n";
1248 dbgs() << "TrueBiasedSelects ";
1249 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1250 dbgs() << *SI << ", ";
1251 }
1252 dbgs() << "\n";
1253 dbgs() << "FalseBiasedSelects ";
1254 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1255 dbgs() << *SI << ", ";
1256 }
1257 dbgs() << "\n";);
1258 }
1259}
1260
1261void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1262 for (RegInfo &RI : Scope->RegInfos) {
1263 if (RI.HasBranch) {
1264 Region *R = RI.R;
1265 if (TrueBiasedRegionsGlobal.count(R) > 0)
1266 OutermostScope->TrueBiasedRegions.insert(R);
1267 else if (FalseBiasedRegionsGlobal.count(R) > 0)
1268 OutermostScope->FalseBiasedRegions.insert(R);
1269 else
1270 llvm_unreachable("Must be biased");
1271 }
1272 for (SelectInst *SI : RI.Selects) {
1273 if (TrueBiasedSelectsGlobal.count(SI) > 0)
1274 OutermostScope->TrueBiasedSelects.insert(SI);
1275 else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1276 OutermostScope->FalseBiasedSelects.insert(SI);
1277 else
1278 llvm_unreachable("Must be biased");
1279 }
1280 }
1281 for (CHRScope *Sub : Scope->Subs) {
1282 classifyBiasedScopes(Sub, OutermostScope);
1283 }
1284}
1285
1286static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1287 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1288 Scope->FalseBiasedRegions.size() +
1289 Scope->TrueBiasedSelects.size() +
1290 Scope->FalseBiasedSelects.size();
1291 return NumBiased >= CHRMergeThreshold;
1292}
1293
1294void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1295 SmallVectorImpl<CHRScope *> &Output) {
1296 for (CHRScope *Scope : Input) {
1297 // Filter out the ones with only one region and no subs.
1298 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1299 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1300 << Scope->TrueBiasedRegions.size()
1301 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1302 << " true-selects " << Scope->TrueBiasedSelects.size()
1303 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1304 continue;
1305 }
1306 Output.push_back(Scope);
1307 }
1308}
1309
1310void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1311 SmallVectorImpl<CHRScope *> &Output) {
1312 for (CHRScope *Scope : Input) {
1313 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1314 "Empty");
1315 setCHRRegions(Scope, Scope);
1316 Output.push_back(Scope);
1317 CHR_DEBUG(
1318 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1319 for (auto pair : Scope->HoistStopMap) {
1320 Region *R = pair.first;
1321 dbgs() << "Region " << R->getNameStr() << "\n";
1322 for (Instruction *I : pair.second) {
1323 dbgs() << "HoistStop " << *I << "\n";
1324 }
1325 }
1326 dbgs() << "CHRRegions" << "\n";
1327 for (RegInfo &RI : Scope->CHRRegions) {
1328 dbgs() << RI.R->getNameStr() << "\n";
1329 });
1330 }
1331}
1332
1333void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1334 DenseSet<Instruction *> Unhoistables;
1335 // Put the biased selects in Unhoistables because they should stay where they
1336 // are and constant-folded after CHR (in case one biased select or a branch
1337 // can depend on another biased select.)
1338 for (RegInfo &RI : Scope->RegInfos) {
1339 for (SelectInst *SI : RI.Selects) {
1340 Unhoistables.insert(SI);
1341 }
1342 }
1343 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1344 for (RegInfo &RI : Scope->RegInfos) {
1345 Region *R = RI.R;
1346 DenseSet<Instruction *> HoistStops;
1347 bool IsHoisted = false;
1348 if (RI.HasBranch) {
1349 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1350 OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1351 "Must be truthy or falsy");
1352 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1353 // Note checkHoistValue fills in HoistStops.
1354 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1355 Unhoistables, &HoistStops);
1356 assert(IsHoistable && "Must be hoistable");
1357 (void)(IsHoistable); // Unused in release build
1358 IsHoisted = true;
1359 }
1360 for (SelectInst *SI : RI.Selects) {
1361 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1362 OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1363 "Must be true or false biased");
1364 // Note checkHoistValue fills in HoistStops.
1365 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1366 Unhoistables, &HoistStops);
1367 assert(IsHoistable && "Must be hoistable");
1368 (void)(IsHoistable); // Unused in release build
1369 IsHoisted = true;
1370 }
1371 if (IsHoisted) {
1372 OutermostScope->CHRRegions.push_back(RI);
1373 OutermostScope->HoistStopMap[R] = HoistStops;
1374 }
1375 }
1376 for (CHRScope *Sub : Scope->Subs)
1377 setCHRRegions(Sub, OutermostScope);
1378}
1379
1380bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1381 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1382}
1383
1384void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1385 SmallVectorImpl<CHRScope *> &Output) {
1386 Output.resize(Input.size());
1387 std::copy(Input.begin(), Input.end(), Output.begin());
1388 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
1389}
1390
1391// Return true if V is already hoisted or was hoisted (along with its operands)
1392// to the insert point.
1393static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1394 HoistStopMapTy &HoistStopMap,
1395 DenseSet<Instruction *> &HoistedSet,
1396 DenseSet<PHINode *> &TrivialPHIs) {
1397 auto IT = HoistStopMap.find(R);
1398 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1399 DenseSet<Instruction *> &HoistStops = IT->second;
1400 if (auto *I = dyn_cast<Instruction>(V)) {
1401 if (I == HoistPoint)
1402 return;
1403 if (HoistStops.count(I))
1404 return;
1405 if (auto *PN = dyn_cast<PHINode>(I))
1406 if (TrivialPHIs.count(PN))
1407 // The trivial phi inserted by the previous CHR scope could replace a
1408 // non-phi in HoistStops. Note that since this phi is at the exit of a
1409 // previous CHR scope, which dominates this scope, it's safe to stop
1410 // hoisting there.
1411 return;
1412 if (HoistedSet.count(I))
1413 // Already hoisted, return.
1414 return;
1415 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1416 for (Value *Op : I->operands()) {
1417 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
1418 }
1419 I->moveBefore(HoistPoint);
1420 HoistedSet.insert(I);
1421 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1422 }
1423}
1424
1425// Hoist the dependent condition values of the branches and the selects in the
1426// scope to the insert point.
1427static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1428 DenseSet<PHINode *> &TrivialPHIs) {
1429 DenseSet<Instruction *> HoistedSet;
1430 for (const RegInfo &RI : Scope->CHRRegions) {
1431 Region *R = RI.R;
1432 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1433 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1434 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1435 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1436 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1437 HoistedSet, TrivialPHIs);
1438 }
1439 for (SelectInst *SI : RI.Selects) {
1440 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1441 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1442 if (!(IsTrueBiased || IsFalseBiased))
1443 continue;
1444 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1445 HoistedSet, TrivialPHIs);
1446 }
1447 }
1448}
1449
1450// Negate the predicate if an ICmp if it's used only by branches or selects by
1451// swapping the operands of the branches or the selects. Returns true if success.
1452static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1453 Instruction *ExcludedUser,
1454 CHRScope *Scope) {
1455 for (User *U : ICmp->users()) {
1456 if (U == ExcludedUser)
1457 continue;
1458 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1459 continue;
1460 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1461 continue;
1462 return false;
1463 }
1464 for (User *U : ICmp->users()) {
1465 if (U == ExcludedUser)
1466 continue;
1467 if (auto *BI = dyn_cast<BranchInst>(U)) {
1468 assert(BI->isConditional() && "Must be conditional");
1469 BI->swapSuccessors();
1470 // Don't need to swap this in terms of
1471 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1472 // mean whehter the branch is likely go into the if-then rather than
1473 // successor0/successor1 and because we can tell which edge is the then or
1474 // the else one by comparing the destination to the region exit block.
1475 continue;
1476 }
1477 if (auto *SI = dyn_cast<SelectInst>(U)) {
1478 // Swap operands
1479 Value *TrueValue = SI->getTrueValue();
1480 Value *FalseValue = SI->getFalseValue();
1481 SI->setTrueValue(FalseValue);
1482 SI->setFalseValue(TrueValue);
1483 SI->swapProfMetadata();
1484 if (Scope->TrueBiasedSelects.count(SI)) {
1485 assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1486 "Must not be already in");
1487 Scope->FalseBiasedSelects.insert(SI);
1488 } else if (Scope->FalseBiasedSelects.count(SI)) {
1489 assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1490 "Must not be already in");
1491 Scope->TrueBiasedSelects.insert(SI);
1492 }
1493 continue;
1494 }
1495 llvm_unreachable("Must be a branch or a select");
1496 }
1497 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1498 return true;
1499}
1500
1501// A helper for transformScopes. Insert a trivial phi at the scope exit block
1502// for a value that's defined in the scope but used outside it (meaning it's
1503// alive at the exit block).
1504static void insertTrivialPHIs(CHRScope *Scope,
1505 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1506 DenseSet<PHINode *> &TrivialPHIs) {
1507 DenseSet<BasicBlock *> BlocksInScopeSet;
1508 SmallVector<BasicBlock *, 8> BlocksInScopeVec;
1509 for (RegInfo &RI : Scope->RegInfos) {
1510 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1511 // sub-Scopes.
1512 BlocksInScopeSet.insert(BB);
1513 BlocksInScopeVec.push_back(BB);
1514 }
1515 }
1516 CHR_DEBUG(
1517 dbgs() << "Inserting redudant phis\n";
1518 for (BasicBlock *BB : BlocksInScopeVec) {
1519 dbgs() << "BlockInScope " << BB->getName() << "\n";
1520 });
1521 for (BasicBlock *BB : BlocksInScopeVec) {
1522 for (Instruction &I : *BB) {
1523 SmallVector<Instruction *, 8> Users;
1524 for (User *U : I.users()) {
1525 if (auto *UI = dyn_cast<Instruction>(U)) {
1526 if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1527 // Unless there's already a phi for I at the exit block.
1528 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1529 CHR_DEBUG(dbgs() << "V " << I << "\n");
1530 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1531 Users.push_back(UI);
1532 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1533 // There's a loop backedge from a block that's dominated by this
1534 // scope to the entry block.
1535 CHR_DEBUG(dbgs() << "V " << I << "\n");
1536 CHR_DEBUG(dbgs()
1537 << "Used at entry block (for a back edge) by a phi user "
1538 << *UI << "\n");
1539 Users.push_back(UI);
1540 }
1541 }
1542 }
1543 if (Users.size() > 0) {
1544 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1545 // ExitBlock. Replace I with the new phi in UI unless UI is another
1546 // phi at ExitBlock.
1547 unsigned PredCount = std::distance(pred_begin(ExitBlock),
1548 pred_end(ExitBlock));
1549 PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1550 &ExitBlock->front());
1551 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1552 PN->addIncoming(&I, Pred);
1553 }
1554 TrivialPHIs.insert(PN);
1555 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1556 for (Instruction *UI : Users) {
1557 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1558 if (UI->getOperand(J) == &I) {
1559 UI->setOperand(J, PN);
1560 }
1561 }
1562 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1563 }
1564 }
1565 }
1566 }
1567}
1568
1569// Assert that all the CHR regions of the scope have a biased branch or select.
Fangrui Songc8f348c2018-09-05 03:10:20 +00001570static void LLVM_ATTRIBUTE_UNUSED
1571assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001572#ifndef NDEBUG
1573 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1574 if (Scope->TrueBiasedRegions.count(RI.R) ||
1575 Scope->FalseBiasedRegions.count(RI.R))
1576 return true;
1577 for (SelectInst *SI : RI.Selects)
1578 if (Scope->TrueBiasedSelects.count(SI) ||
1579 Scope->FalseBiasedSelects.count(SI))
1580 return true;
1581 return false;
1582 };
1583 for (RegInfo &RI : Scope->CHRRegions) {
1584 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1585 "Must have biased branch or select");
1586 }
1587#endif
1588}
1589
1590// Assert that all the condition values of the biased branches and selects have
1591// been hoisted to the pre-entry block or outside of the scope.
Fangrui Songc8f348c2018-09-05 03:10:20 +00001592static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1593 CHRScope *Scope, BasicBlock *PreEntryBlock) {
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001594 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1595 for (RegInfo &RI : Scope->CHRRegions) {
1596 Region *R = RI.R;
1597 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1598 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1599 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1600 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1601 Value *V = BI->getCondition();
1602 CHR_DEBUG(dbgs() << *V << "\n");
1603 if (auto *I = dyn_cast<Instruction>(V)) {
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +00001604 (void)(I); // Unused in release build.
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001605 assert((I->getParent() == PreEntryBlock ||
1606 !Scope->contains(I)) &&
1607 "Must have been hoisted to PreEntryBlock or outside the scope");
1608 }
1609 }
1610 for (SelectInst *SI : RI.Selects) {
1611 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1612 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1613 if (!(IsTrueBiased || IsFalseBiased))
1614 continue;
1615 Value *V = SI->getCondition();
1616 CHR_DEBUG(dbgs() << *V << "\n");
1617 if (auto *I = dyn_cast<Instruction>(V)) {
Hiroshi Yamauchi72ee6d62018-09-04 18:10:54 +00001618 (void)(I); // Unused in release build.
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001619 assert((I->getParent() == PreEntryBlock ||
1620 !Scope->contains(I)) &&
1621 "Must have been hoisted to PreEntryBlock or outside the scope");
1622 }
1623 }
1624 }
1625}
1626
1627void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1628 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1629
1630 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1631 Region *FirstRegion = Scope->RegInfos[0].R;
1632 BasicBlock *EntryBlock = FirstRegion->getEntry();
1633 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1634 BasicBlock *ExitBlock = LastRegion->getExit();
1635 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1636
1637 if (ExitBlock) {
1638 // Insert a trivial phi at the exit block (where the CHR hot path and the
1639 // cold path merges) for a value that's defined in the scope but used
1640 // outside it (meaning it's alive at the exit block). We will add the
1641 // incoming values for the CHR cold paths to it below. Without this, we'd
1642 // miss updating phi's for such values unless there happens to already be a
1643 // phi for that value there.
1644 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1645 }
1646
1647 // Split the entry block of the first region. The new block becomes the new
1648 // entry block of the first region. The old entry block becomes the block to
1649 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1650 // through the split, we update the entry of the first region after the split,
1651 // and Region only points to the entry and the exit blocks, rather than
1652 // keeping everything in a list or set, the blocks membership and the
1653 // entry/exit blocks of the region are still valid after the split.
1654 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1655 << " at " << *Scope->BranchInsertPoint << "\n");
1656 BasicBlock *NewEntryBlock =
1657 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1658 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1659 "NewEntryBlock's only pred must be EntryBlock");
1660 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1661 BasicBlock *PreEntryBlock = EntryBlock;
1662
1663 ValueToValueMapTy VMap;
1664 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1665 // hot path (originals) and a cold path (clones) and update the PHIs at the
1666 // exit block.
1667 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1668
1669 // Replace the old (placeholder) branch with the new (merged) conditional
1670 // branch.
1671 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1672 NewEntryBlock, VMap);
1673
1674#ifndef NDEBUG
1675 assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1676#endif
1677
1678 // Hoist the conditional values of the branches/selects.
1679 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
1680
1681#ifndef NDEBUG
1682 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1683#endif
1684
1685 // Create the combined branch condition and constant-fold the branches/selects
1686 // in the hot path.
1687 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1688 ProfileCount ? ProfileCount.getValue() : 0);
1689}
1690
1691// A helper for transformScopes. Clone the blocks in the scope (excluding the
1692// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1693// at the exit block.
1694void CHR::cloneScopeBlocks(CHRScope *Scope,
1695 BasicBlock *PreEntryBlock,
1696 BasicBlock *ExitBlock,
1697 Region *LastRegion,
1698 ValueToValueMapTy &VMap) {
1699 // Clone all the blocks. The original blocks will be the hot-path
1700 // CHR-optimized code and the cloned blocks will be the original unoptimized
1701 // code. This is so that the block pointers from the
1702 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1703 // which CHR should apply to.
1704 SmallVector<BasicBlock*, 8> NewBlocks;
1705 for (RegInfo &RI : Scope->RegInfos)
1706 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1707 // sub-Scopes.
1708 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1709 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1710 NewBlocks.push_back(NewBB);
1711 VMap[BB] = NewBB;
1712 }
1713
1714 // Place the cloned blocks right after the original blocks (right before the
1715 // exit block of.)
1716 if (ExitBlock)
1717 F.getBasicBlockList().splice(ExitBlock->getIterator(),
1718 F.getBasicBlockList(),
1719 NewBlocks[0]->getIterator(), F.end());
1720
1721 // Update the cloned blocks/instructions to refer to themselves.
1722 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1723 for (Instruction &I : *NewBlocks[i])
1724 RemapInstruction(&I, VMap,
1725 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1726
1727 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1728 // the top-level region but we don't need to add PHIs. The trivial PHIs
1729 // inserted above will be updated here.
1730 if (ExitBlock)
1731 for (PHINode &PN : ExitBlock->phis())
1732 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1733 ++I) {
1734 BasicBlock *Pred = PN.getIncomingBlock(I);
1735 if (LastRegion->contains(Pred)) {
1736 Value *V = PN.getIncomingValue(I);
1737 auto It = VMap.find(V);
1738 if (It != VMap.end()) V = It->second;
1739 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1740 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1741 }
1742 }
1743}
1744
1745// A helper for transformScope. Replace the old (placeholder) branch with the
1746// new (merged) conditional branch.
1747BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1748 BasicBlock *EntryBlock,
1749 BasicBlock *NewEntryBlock,
1750 ValueToValueMapTy &VMap) {
1751 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1752 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1753 "SplitBlock did not work correctly!");
1754 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1755 "NewEntryBlock's only pred must be EntryBlock");
1756 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1757 "NewEntryBlock must have been copied");
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001758 OldBR->dropAllReferences();
Hiroshi Yamauchibd897a02018-09-04 21:28:22 +00001759 OldBR->eraseFromParent();
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001760 // The true predicate is a placeholder. It will be replaced later in
1761 // fixupBranchesAndSelects().
1762 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1763 cast<BasicBlock>(VMap[NewEntryBlock]),
1764 ConstantInt::getTrue(F.getContext()));
1765 PreEntryBlock->getInstList().push_back(NewBR);
1766 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1767 "NewEntryBlock's only pred must be EntryBlock");
1768 return NewBR;
1769}
1770
1771// A helper for transformScopes. Create the combined branch condition and
1772// constant-fold the branches/selects in the hot path.
1773void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1774 BasicBlock *PreEntryBlock,
1775 BranchInst *MergedBR,
1776 uint64_t ProfileCount) {
1777 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1778 BranchProbability CHRBranchBias(1, 1);
1779 uint64_t NumCHRedBranches = 0;
1780 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1781 for (RegInfo &RI : Scope->CHRRegions) {
1782 Region *R = RI.R;
1783 if (RI.HasBranch) {
1784 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1785 ++NumCHRedBranches;
1786 }
1787 for (SelectInst *SI : RI.Selects) {
1788 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1789 ++NumCHRedBranches;
1790 }
1791 }
1792 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1793 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1794 MergedBR->setCondition(MergedCondition);
1795 SmallVector<uint32_t, 2> Weights;
1796 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
1797 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
1798 MDBuilder MDB(F.getContext());
1799 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1800 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1801 << "\n");
1802}
1803
1804// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1805// and constant-fold a branch in the hot path.
1806void CHR::fixupBranch(Region *R, CHRScope *Scope,
1807 IRBuilder<> &IRB,
1808 Value *&MergedCondition,
1809 BranchProbability &CHRBranchBias) {
1810 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1811 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1812 "Must be truthy or falsy");
1813 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1814 assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1815 "Must be in the bias map");
1816 BranchProbability Bias = BranchBiasMap[R];
1817 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1818 // Take the min.
1819 if (CHRBranchBias > Bias)
1820 CHRBranchBias = Bias;
1821 BasicBlock *IfThen = BI->getSuccessor(1);
1822 BasicBlock *IfElse = BI->getSuccessor(0);
1823 BasicBlock *RegionExitBlock = R->getExit();
1824 assert(RegionExitBlock && "Null ExitBlock");
1825 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1826 IfThen != IfElse && "Invariant from findScopes");
1827 if (IfThen == RegionExitBlock) {
1828 // Swap them so that IfThen means going into it and IfElse means skipping
1829 // it.
1830 std::swap(IfThen, IfElse);
1831 }
1832 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1833 << " IfElse " << IfElse->getName() << "\n");
1834 Value *Cond = BI->getCondition();
1835 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1836 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1837 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1838 MergedCondition);
1839 // Constant-fold the branch at ClonedEntryBlock.
1840 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1841 "The successor shouldn't change");
1842 Value *NewCondition = ConditionTrue ?
1843 ConstantInt::getTrue(F.getContext()) :
1844 ConstantInt::getFalse(F.getContext());
1845 BI->setCondition(NewCondition);
1846}
1847
1848// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1849// and constant-fold a select in the hot path.
1850void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1851 IRBuilder<> &IRB,
1852 Value *&MergedCondition,
1853 BranchProbability &CHRBranchBias) {
1854 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1855 assert((IsTrueBiased ||
1856 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1857 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1858 "Must be in the bias map");
1859 BranchProbability Bias = SelectBiasMap[SI];
1860 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1861 // Take the min.
1862 if (CHRBranchBias > Bias)
1863 CHRBranchBias = Bias;
1864 Value *Cond = SI->getCondition();
1865 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1866 MergedCondition);
1867 Value *NewCondition = IsTrueBiased ?
1868 ConstantInt::getTrue(F.getContext()) :
1869 ConstantInt::getFalse(F.getContext());
1870 SI->setCondition(NewCondition);
1871}
1872
1873// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1874// condition.
1875void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1876 Instruction *BranchOrSelect,
1877 CHRScope *Scope,
1878 IRBuilder<> &IRB,
1879 Value *&MergedCondition) {
1880 if (IsTrueBiased) {
1881 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1882 } else {
1883 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1884 // branch, negate the icmp predicate and swap the branch targets and avoid
1885 // inserting an Xor to negate Cond.
1886 bool Done = false;
1887 if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1888 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1889 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1890 Done = true;
1891 }
1892 if (!Done) {
1893 Value *Negate = IRB.CreateXor(
1894 ConstantInt::getTrue(F.getContext()), Cond);
1895 MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1896 }
1897 }
1898}
1899
1900void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1901 unsigned i = 0;
1902 (void)(i); // Unused in release build.
1903 DenseSet<PHINode *> TrivialPHIs;
1904 for (CHRScope *Scope : CHRScopes) {
1905 transformScopes(Scope, TrivialPHIs);
1906 CHR_DEBUG(
1907 std::ostringstream oss;
1908 oss << " after transformScopes " << i++;
1909 dumpIR(F, oss.str().c_str(), nullptr));
1910 }
1911}
1912
Fangrui Songc8f348c2018-09-05 03:10:20 +00001913static void LLVM_ATTRIBUTE_UNUSED
1914dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
Hiroshi Yamauchi9775a622018-09-04 17:19:13 +00001915 dbgs() << Label << " " << Scopes.size() << "\n";
1916 for (CHRScope *Scope : Scopes) {
1917 dbgs() << *Scope << "\n";
1918 }
1919}
1920
1921bool CHR::run() {
1922 if (!shouldApply(F, PSI))
1923 return false;
1924
1925 CHR_DEBUG(dumpIR(F, "before", nullptr));
1926
1927 bool Changed = false;
1928 {
1929 CHR_DEBUG(
1930 dbgs() << "RegionInfo:\n";
1931 RI.print(dbgs()));
1932
1933 // Recursively traverse the region tree and find regions that have biased
1934 // branches and/or selects and create scopes.
1935 SmallVector<CHRScope *, 8> AllScopes;
1936 findScopes(AllScopes);
1937 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
1938
1939 // Split the scopes if 1) the conditiona values of the biased
1940 // branches/selects of the inner/lower scope can't be hoisted up to the
1941 // outermost/uppermost scope entry, or 2) the condition values of the biased
1942 // branches/selects in a scope (including subscopes) don't share at least
1943 // one common value.
1944 SmallVector<CHRScope *, 8> SplitScopes;
1945 splitScopes(AllScopes, SplitScopes);
1946 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
1947
1948 // After splitting, set the biased regions and selects of a scope (a tree
1949 // root) that include those of the subscopes.
1950 classifyBiasedScopes(SplitScopes);
1951 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
1952
1953 // Filter out the scopes that has only one biased region or select (CHR
1954 // isn't useful in such a case).
1955 SmallVector<CHRScope *, 8> FilteredScopes;
1956 filterScopes(SplitScopes, FilteredScopes);
1957 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
1958
1959 // Set the regions to be CHR'ed and their hoist stops for each scope.
1960 SmallVector<CHRScope *, 8> SetScopes;
1961 setCHRRegions(FilteredScopes, SetScopes);
1962 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
1963
1964 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
1965 // ones. We need to apply CHR from outer to inner so that we apply CHR only
1966 // to the hot path, rather than both hot and cold paths.
1967 SmallVector<CHRScope *, 8> SortedScopes;
1968 sortScopes(SetScopes, SortedScopes);
1969 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
1970
1971 CHR_DEBUG(
1972 dbgs() << "RegionInfo:\n";
1973 RI.print(dbgs()));
1974
1975 // Apply the CHR transformation.
1976 if (!SortedScopes.empty()) {
1977 transformScopes(SortedScopes);
1978 Changed = true;
1979 }
1980 }
1981
1982 if (Changed)
1983 CHR_DEBUG(dumpIR(F, "after", &Stats));
1984
1985 return Changed;
1986}
1987
1988bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
1989 BlockFrequencyInfo &BFI =
1990 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
1991 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1992 ProfileSummaryInfo &PSI =
1993 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1994 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1995 return CHR(F, BFI, DT, PSI, RI).run();
1996}
1997
1998namespace llvm {
1999
2000ControlHeightReductionPass::ControlHeightReductionPass() {
2001 ParseCHRFilterFiles();
2002}
2003
2004PreservedAnalyses ControlHeightReductionPass::run(
2005 Function &F,
2006 FunctionAnalysisManager &FAM) {
2007 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2008 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2009 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2010 auto &MAM = MAMProxy.getManager();
2011 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2012 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2013 bool Changed = CHR(F, BFI, DT, PSI, RI).run();
2014 if (!Changed)
2015 return PreservedAnalyses::all();
2016 auto PA = PreservedAnalyses();
2017 PA.preserve<GlobalsAA>();
2018 return PA;
2019}
2020
2021} // namespace llvm