blob: 113ae5736a26e39c560065da498a44089fd424d2 [file] [log] [blame]
Michael J. Spencerb8427252018-04-17 23:30:05 +00001//===- CallGraphSort.cpp --------------------------------------------------===//
2//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
11/// for Large-Scale Data-Center Applications
12/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
13///
14/// The goal of this algorithm is to improve runtime performance of the final
15/// executable by arranging code sections such that page table and i-cache
16/// misses are minimized.
17///
18/// Definitions:
19/// * Cluster
20/// * An ordered list of input sections which are layed out as a unit. At the
21/// beginning of the algorithm each input section has its own cluster and
22/// the weight of the cluster is the sum of the weight of all incomming
23/// edges.
24/// * Call-Chain Clustering (C³) Heuristic
25/// * Defines when and how clusters are combined. Pick the highest weighted
26/// input section then add it to its most likely predecessor if it wouldn't
27/// penalize it too much.
28/// * Density
29/// * The weight of the cluster divided by the size of the cluster. This is a
30/// proxy for the ammount of execution time spent per byte of the cluster.
31///
32/// It does so given a call graph profile by the following:
33/// * Build a weighted call graph from the call graph profile
34/// * Sort input sections by weight
35/// * For each input section starting with the highest weight
36/// * Find its most likely predecessor cluster
37/// * Check if the combined cluster would be too large, or would have too low
38/// a density.
39/// * If not, then combine the clusters.
40/// * Sort non-empty clusters by density
41///
42//===----------------------------------------------------------------------===//
43
44#include "CallGraphSort.h"
45#include "OutputSections.h"
46#include "SymbolTable.h"
47#include "Symbols.h"
48
49using namespace llvm;
50using namespace lld;
51using namespace lld::elf;
52
53namespace {
54struct Edge {
55 int From;
56 uint64_t Weight;
57};
58
59struct Cluster {
60 Cluster(int Sec, size_t S) {
61 Sections.push_back(Sec);
62 Size = S;
63 }
64
65 double getDensity() const {
66 if (Size == 0)
67 return 0;
68 return double(Weight) / double(Size);
69 }
70
71 std::vector<int> Sections;
72 size_t Size = 0;
73 uint64_t Weight = 0;
74 uint64_t InitialWeight = 0;
George Rimarf0eedbc2018-08-28 08:49:40 +000075 Edge BestPred = {-1, 0};
Michael J. Spencerb8427252018-04-17 23:30:05 +000076};
77
78class CallGraphSort {
79public:
80 CallGraphSort();
81
82 DenseMap<const InputSectionBase *, int> run();
83
84private:
85 std::vector<Cluster> Clusters;
86 std::vector<const InputSectionBase *> Sections;
87
88 void groupClusters();
89};
90
91// Maximum ammount the combined cluster density can be worse than the original
92// cluster to consider merging.
93constexpr int MAX_DENSITY_DEGRADATION = 8;
94
95// Maximum cluster size in bytes.
96constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
97} // end anonymous namespace
98
99// Take the edge list in Config->CallGraphProfile, resolve symbol names to
100// Symbols, and generate a graph between InputSections with the provided
101// weights.
102CallGraphSort::CallGraphSort() {
103 llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
104 uint64_t> &Profile = Config->CallGraphProfile;
105 DenseMap<const InputSectionBase *, int> SecToCluster;
106
107 auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
108 auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
109 if (Res.second) {
110 Sections.push_back(IS);
111 Clusters.emplace_back(Clusters.size(), IS->getSize());
112 }
113 return Res.first->second;
114 };
115
116 // Create the graph.
117 for (const auto &C : Profile) {
118 const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
119 const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
120 uint64_t Weight = C.second;
121
122 // Ignore edges between input sections belonging to different output
123 // sections. This is done because otherwise we would end up with clusters
124 // containing input sections that can't actually be placed adjacently in the
125 // output. This messes with the cluster size and density calculations. We
126 // would also end up moving input sections in other output sections without
127 // moving them closer to what calls them.
128 if (FromSB->getOutputSection() != ToSB->getOutputSection())
129 continue;
130
131 int From = GetOrCreateNode(FromSB);
132 int To = GetOrCreateNode(ToSB);
133
134 Clusters[To].Weight += Weight;
135
136 if (From == To)
137 continue;
138
George Rimarf0eedbc2018-08-28 08:49:40 +0000139 // Remember the best edge.
140 Cluster &ToC = Clusters[To];
141 if (ToC.BestPred.From == -1 || ToC.BestPred.Weight < Weight) {
142 ToC.BestPred.From = From;
143 ToC.BestPred.Weight = Weight;
144 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000145 }
146 for (Cluster &C : Clusters)
147 C.InitialWeight = C.Weight;
148}
149
150// It's bad to merge clusters which would degrade the density too much.
151static bool isNewDensityBad(Cluster &A, Cluster &B) {
152 double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
153 if (NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION)
154 return true;
155 return false;
156}
157
158static void mergeClusters(Cluster &Into, Cluster &From) {
159 Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
160 From.Sections.end());
161 Into.Size += From.Size;
162 Into.Weight += From.Weight;
163 From.Sections.clear();
164 From.Size = 0;
165 From.Weight = 0;
166}
167
168// Group InputSections into clusters using the Call-Chain Clustering heuristic
169// then sort the clusters by density.
170void CallGraphSort::groupClusters() {
171 std::vector<int> SortedSecs(Clusters.size());
172 std::vector<Cluster *> SecToCluster(Clusters.size());
173
George Rimara5cf8da2018-08-12 07:52:24 +0000174 for (size_t I = 0; I < Clusters.size(); ++I) {
175 SortedSecs[I] = I;
176 SecToCluster[I] = &Clusters[I];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000177 }
178
179 std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
180 return Clusters[B].getDensity() < Clusters[A].getDensity();
181 });
182
183 for (int SI : SortedSecs) {
184 // Clusters[SI] is the same as SecToClusters[SI] here because it has not
185 // been merged into another cluster yet.
186 Cluster &C = Clusters[SI];
187
George Rimarf0eedbc2018-08-28 08:49:40 +0000188 // Don't consider merging if the edge is unlikely.
189 if (C.BestPred.From == -1 || C.BestPred.Weight * 10 <= C.InitialWeight)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000190 continue;
191
George Rimarf0eedbc2018-08-28 08:49:40 +0000192 Cluster *PredC = SecToCluster[C.BestPred.From];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000193 if (PredC == &C)
194 continue;
195
196 if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
197 continue;
198
199 if (isNewDensityBad(*PredC, C))
200 continue;
201
202 // NOTE: Consider using a disjoint-set to track section -> cluster mapping
203 // if this is ever slow.
204 for (int SI : C.Sections)
205 SecToCluster[SI] = PredC;
206
207 mergeClusters(*PredC, C);
208 }
209
210 // Remove empty or dead nodes. Invalidates all cluster indices.
George Rimar97df22f2018-04-23 14:41:49 +0000211 llvm::erase_if(Clusters, [](const Cluster &C) {
212 return C.Size == 0 || C.Sections.empty();
213 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000214
215 // Sort by density.
George Rimarde83cbf2018-04-24 09:55:39 +0000216 std::stable_sort(Clusters.begin(), Clusters.end(),
217 [](const Cluster &A, const Cluster &B) {
218 return A.getDensity() > B.getDensity();
219 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000220}
221
222DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
223 groupClusters();
224
225 // Generate order.
226 llvm::DenseMap<const InputSectionBase *, int> OrderMap;
227 ssize_t CurOrder = 1;
228
229 for (const Cluster &C : Clusters)
230 for (int SecIndex : C.Sections)
231 OrderMap[Sections[SecIndex]] = CurOrder++;
232
233 return OrderMap;
234}
235
236// Sort sections by the profile data provided by -callgraph-profile-file
237//
238// This first builds a call graph based on the profile data then merges sections
239// according to the C³ huristic. All clusters are then sorted by a density
240// metric to further improve locality.
241DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
242 return CallGraphSort().run();
243}