Easwaran Raman | bdf2026 | 2018-01-09 19:39:35 +0000 | [diff] [blame] | 1 | //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements a transformation that synthesizes entry counts for |
| 11 | // functions and attaches !prof metadata to functions with the synthesized |
| 12 | // counts. The presence of !prof metadata with counter name set to |
| 13 | // 'synthesized_function_entry_count' indicate that the value of the counter is |
| 14 | // an estimation of the likely execution count of the function. This transform |
| 15 | // is applied only in non PGO mode as functions get 'real' profile-based |
| 16 | // function entry counts in the PGO mode. |
| 17 | // |
| 18 | // The transformation works by first assigning some initial values to the entry |
| 19 | // counts of all functions and then doing a top-down traversal of the |
| 20 | // callgraph-scc to propagate the counts. For each function the set of callsites |
| 21 | // and their relative block frequency is gathered. The relative block frequency |
| 22 | // multiplied by the entry count of the caller and added to the callee's entry |
| 23 | // count. For non-trivial SCCs, the new counts are computed from the previous |
| 24 | // counts and updated in one shot. |
| 25 | // |
| 26 | //===----------------------------------------------------------------------===// |
| 27 | |
| 28 | #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" |
| 29 | #include "llvm/ADT/DenseSet.h" |
| 30 | #include "llvm/ADT/STLExtras.h" |
| 31 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 32 | #include "llvm/Analysis/CallGraph.h" |
| 33 | #include "llvm/Analysis/SyntheticCountsUtils.h" |
| 34 | #include "llvm/IR/CallSite.h" |
| 35 | #include "llvm/IR/Function.h" |
| 36 | #include "llvm/IR/Instructions.h" |
| 37 | #include "llvm/IR/Module.h" |
| 38 | #include "llvm/Support/CommandLine.h" |
| 39 | #include "llvm/Support/Debug.h" |
| 40 | #include "llvm/Support/raw_ostream.h" |
| 41 | |
| 42 | using namespace llvm; |
| 43 | using Scaled64 = ScaledNumber<uint64_t>; |
Easwaran Raman | e5b8de2 | 2018-01-17 22:24:23 +0000 | [diff] [blame] | 44 | using ProfileCount = Function::ProfileCount; |
Easwaran Raman | bdf2026 | 2018-01-09 19:39:35 +0000 | [diff] [blame] | 45 | |
| 46 | #define DEBUG_TYPE "synthetic-counts-propagation" |
| 47 | |
| 48 | /// Initial synthetic count assigned to functions. |
| 49 | static cl::opt<int> |
| 50 | InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), |
| 51 | cl::ZeroOrMore, |
| 52 | cl::desc("Initial value of synthetic entry count.")); |
| 53 | |
| 54 | /// Initial synthetic count assigned to inline functions. |
| 55 | static cl::opt<int> InlineSyntheticCount( |
| 56 | "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, |
| 57 | cl::desc("Initial synthetic entry count for inline functions.")); |
| 58 | |
| 59 | /// Initial synthetic count assigned to cold functions. |
| 60 | static cl::opt<int> ColdSyntheticCount( |
| 61 | "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, |
| 62 | cl::desc("Initial synthetic entry count for cold functions.")); |
| 63 | |
| 64 | // Assign initial synthetic entry counts to functions. |
| 65 | static void |
| 66 | initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) { |
| 67 | auto MayHaveIndirectCalls = [](Function &F) { |
| 68 | for (auto *U : F.users()) { |
| 69 | if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) |
| 70 | return true; |
| 71 | } |
| 72 | return false; |
| 73 | }; |
| 74 | |
| 75 | for (Function &F : M) { |
| 76 | uint64_t InitialCount = InitialSyntheticCount; |
| 77 | if (F.isDeclaration()) |
| 78 | continue; |
| 79 | if (F.hasFnAttribute(Attribute::AlwaysInline) || |
| 80 | F.hasFnAttribute(Attribute::InlineHint)) { |
| 81 | // Use a higher value for inline functions to account for the fact that |
| 82 | // these are usually beneficial to inline. |
| 83 | InitialCount = InlineSyntheticCount; |
| 84 | } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) { |
| 85 | // Local functions without inline hints get counts only through |
| 86 | // propagation. |
| 87 | InitialCount = 0; |
| 88 | } else if (F.hasFnAttribute(Attribute::Cold) || |
| 89 | F.hasFnAttribute(Attribute::NoInline)) { |
| 90 | // Use a lower value for noinline and cold functions. |
| 91 | InitialCount = ColdSyntheticCount; |
| 92 | } |
| 93 | SetCount(&F, InitialCount); |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | PreservedAnalyses SyntheticCountsPropagation::run(Module &M, |
| 98 | ModuleAnalysisManager &MAM) { |
| 99 | FunctionAnalysisManager &FAM = |
| 100 | MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); |
| 101 | DenseMap<Function *, uint64_t> Counts; |
| 102 | // Set initial entry counts. |
| 103 | initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; }); |
| 104 | |
Easwaran Raman | 8410c37 | 2018-01-25 22:02:29 +0000 | [diff] [blame] | 105 | // Compute the relative block frequency for a call edge. Use scaled numbers |
Easwaran Raman | bdf2026 | 2018-01-09 19:39:35 +0000 | [diff] [blame] | 106 | // and not integers since the relative block frequency could be less than 1. |
Easwaran Raman | 8410c37 | 2018-01-25 22:02:29 +0000 | [diff] [blame] | 107 | auto GetCallSiteRelFreq = [&](const CallGraphNode::CallRecord &Edge) { |
| 108 | Optional<Scaled64> Res = None; |
| 109 | if (!Edge.first) |
| 110 | return Res; |
| 111 | assert(isa<Instruction>(Edge.first)); |
| 112 | CallSite CS(cast<Instruction>(Edge.first)); |
Easwaran Raman | bdf2026 | 2018-01-09 19:39:35 +0000 | [diff] [blame] | 113 | Function *Caller = CS.getCaller(); |
| 114 | auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); |
| 115 | BasicBlock *CSBB = CS.getInstruction()->getParent(); |
| 116 | Scaled64 EntryFreq(BFI.getEntryFreq(), 0); |
| 117 | Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0); |
| 118 | BBFreq /= EntryFreq; |
Easwaran Raman | 8410c37 | 2018-01-25 22:02:29 +0000 | [diff] [blame] | 119 | return Optional<Scaled64>(BBFreq); |
Easwaran Raman | bdf2026 | 2018-01-09 19:39:35 +0000 | [diff] [blame] | 120 | }; |
| 121 | |
| 122 | CallGraph CG(M); |
| 123 | // Propgate the entry counts on the callgraph. |
Easwaran Raman | 8410c37 | 2018-01-25 22:02:29 +0000 | [diff] [blame] | 124 | SyntheticCountsUtils<const CallGraph *>::propagate( |
| 125 | &CG, GetCallSiteRelFreq, |
| 126 | [&](const CallGraphNode *N) { return Counts[N->getFunction()]; }, |
| 127 | [&](const CallGraphNode *N, uint64_t New) { |
| 128 | auto F = N->getFunction(); |
| 129 | if (!F || F->isDeclaration()) |
| 130 | return; |
| 131 | Counts[F] += New; |
| 132 | }); |
Easwaran Raman | bdf2026 | 2018-01-09 19:39:35 +0000 | [diff] [blame] | 133 | |
| 134 | // Set the counts as metadata. |
| 135 | for (auto Entry : Counts) |
Easwaran Raman | e5b8de2 | 2018-01-17 22:24:23 +0000 | [diff] [blame] | 136 | Entry.first->setEntryCount( |
| 137 | ProfileCount(Entry.second, Function::PCT_Synthetic)); |
Easwaran Raman | bdf2026 | 2018-01-09 19:39:35 +0000 | [diff] [blame] | 138 | |
| 139 | return PreservedAnalyses::all(); |
| 140 | } |