blob: 2a7d78664b8e4368c70cace1871b029d47bfe283 [file] [log] [blame]
Michael J. Spencerb8427252018-04-17 23:30:05 +00001//===- CallGraphSort.cpp --------------------------------------------------===//
2//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
11/// for Large-Scale Data-Center Applications
12/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
13///
14/// The goal of this algorithm is to improve runtime performance of the final
15/// executable by arranging code sections such that page table and i-cache
16/// misses are minimized.
17///
18/// Definitions:
19/// * Cluster
20/// * An ordered list of input sections which are layed out as a unit. At the
21/// beginning of the algorithm each input section has its own cluster and
22/// the weight of the cluster is the sum of the weight of all incomming
23/// edges.
24/// * Call-Chain Clustering (C³) Heuristic
25/// * Defines when and how clusters are combined. Pick the highest weighted
26/// input section then add it to its most likely predecessor if it wouldn't
27/// penalize it too much.
28/// * Density
29/// * The weight of the cluster divided by the size of the cluster. This is a
30/// proxy for the ammount of execution time spent per byte of the cluster.
31///
32/// It does so given a call graph profile by the following:
33/// * Build a weighted call graph from the call graph profile
34/// * Sort input sections by weight
35/// * For each input section starting with the highest weight
36/// * Find its most likely predecessor cluster
37/// * Check if the combined cluster would be too large, or would have too low
38/// a density.
39/// * If not, then combine the clusters.
40/// * Sort non-empty clusters by density
41///
42//===----------------------------------------------------------------------===//
43
44#include "CallGraphSort.h"
45#include "OutputSections.h"
46#include "SymbolTable.h"
47#include "Symbols.h"
48
49using namespace llvm;
50using namespace lld;
51using namespace lld::elf;
52
53namespace {
54struct Edge {
55 int From;
56 uint64_t Weight;
57};
58
59struct Cluster {
Rui Ueyama3d354082018-10-12 22:44:06 +000060 Cluster(int Sec, size_t S) : Sections{Sec}, Size(S) {}
Michael J. Spencerb8427252018-04-17 23:30:05 +000061
62 double getDensity() const {
63 if (Size == 0)
64 return 0;
65 return double(Weight) / double(Size);
66 }
67
68 std::vector<int> Sections;
69 size_t Size = 0;
70 uint64_t Weight = 0;
71 uint64_t InitialWeight = 0;
George Rimarf0eedbc2018-08-28 08:49:40 +000072 Edge BestPred = {-1, 0};
Michael J. Spencerb8427252018-04-17 23:30:05 +000073};
74
75class CallGraphSort {
76public:
77 CallGraphSort();
78
79 DenseMap<const InputSectionBase *, int> run();
80
81private:
82 std::vector<Cluster> Clusters;
83 std::vector<const InputSectionBase *> Sections;
84
85 void groupClusters();
86};
87
88// Maximum ammount the combined cluster density can be worse than the original
89// cluster to consider merging.
90constexpr int MAX_DENSITY_DEGRADATION = 8;
91
92// Maximum cluster size in bytes.
93constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
94} // end anonymous namespace
95
Rui Ueyama3d354082018-10-12 22:44:06 +000096typedef std::pair<const InputSectionBase *, const InputSectionBase *>
97 SectionPair;
98
Michael J. Spencerb8427252018-04-17 23:30:05 +000099// Take the edge list in Config->CallGraphProfile, resolve symbol names to
100// Symbols, and generate a graph between InputSections with the provided
101// weights.
102CallGraphSort::CallGraphSort() {
Rui Ueyama3d354082018-10-12 22:44:06 +0000103 MapVector<SectionPair, uint64_t> &Profile = Config->CallGraphProfile;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000104 DenseMap<const InputSectionBase *, int> SecToCluster;
105
106 auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
107 auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
108 if (Res.second) {
109 Sections.push_back(IS);
110 Clusters.emplace_back(Clusters.size(), IS->getSize());
111 }
112 return Res.first->second;
113 };
114
115 // Create the graph.
Rui Ueyama3d354082018-10-12 22:44:06 +0000116 for (std::pair<SectionPair, uint64_t> &C : Profile) {
Michael J. Spencerb8427252018-04-17 23:30:05 +0000117 const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
118 const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
119 uint64_t Weight = C.second;
120
121 // Ignore edges between input sections belonging to different output
122 // sections. This is done because otherwise we would end up with clusters
123 // containing input sections that can't actually be placed adjacently in the
124 // output. This messes with the cluster size and density calculations. We
125 // would also end up moving input sections in other output sections without
126 // moving them closer to what calls them.
127 if (FromSB->getOutputSection() != ToSB->getOutputSection())
128 continue;
129
130 int From = GetOrCreateNode(FromSB);
131 int To = GetOrCreateNode(ToSB);
132
133 Clusters[To].Weight += Weight;
134
135 if (From == To)
136 continue;
137
George Rimarf0eedbc2018-08-28 08:49:40 +0000138 // Remember the best edge.
139 Cluster &ToC = Clusters[To];
140 if (ToC.BestPred.From == -1 || ToC.BestPred.Weight < Weight) {
141 ToC.BestPred.From = From;
142 ToC.BestPred.Weight = Weight;
143 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000144 }
145 for (Cluster &C : Clusters)
146 C.InitialWeight = C.Weight;
147}
148
149// It's bad to merge clusters which would degrade the density too much.
150static bool isNewDensityBad(Cluster &A, Cluster &B) {
151 double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
Rui Ueyama3d354082018-10-12 22:44:06 +0000152 return NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000153}
154
155static void mergeClusters(Cluster &Into, Cluster &From) {
156 Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
157 From.Sections.end());
158 Into.Size += From.Size;
159 Into.Weight += From.Weight;
160 From.Sections.clear();
161 From.Size = 0;
162 From.Weight = 0;
163}
164
165// Group InputSections into clusters using the Call-Chain Clustering heuristic
166// then sort the clusters by density.
167void CallGraphSort::groupClusters() {
168 std::vector<int> SortedSecs(Clusters.size());
169 std::vector<Cluster *> SecToCluster(Clusters.size());
170
George Rimara5cf8da2018-08-12 07:52:24 +0000171 for (size_t I = 0; I < Clusters.size(); ++I) {
172 SortedSecs[I] = I;
173 SecToCluster[I] = &Clusters[I];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000174 }
175
176 std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
177 return Clusters[B].getDensity() < Clusters[A].getDensity();
178 });
179
180 for (int SI : SortedSecs) {
181 // Clusters[SI] is the same as SecToClusters[SI] here because it has not
182 // been merged into another cluster yet.
183 Cluster &C = Clusters[SI];
184
George Rimarf0eedbc2018-08-28 08:49:40 +0000185 // Don't consider merging if the edge is unlikely.
186 if (C.BestPred.From == -1 || C.BestPred.Weight * 10 <= C.InitialWeight)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000187 continue;
188
George Rimarf0eedbc2018-08-28 08:49:40 +0000189 Cluster *PredC = SecToCluster[C.BestPred.From];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000190 if (PredC == &C)
191 continue;
192
193 if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
194 continue;
195
196 if (isNewDensityBad(*PredC, C))
197 continue;
198
199 // NOTE: Consider using a disjoint-set to track section -> cluster mapping
200 // if this is ever slow.
201 for (int SI : C.Sections)
202 SecToCluster[SI] = PredC;
203
204 mergeClusters(*PredC, C);
205 }
206
207 // Remove empty or dead nodes. Invalidates all cluster indices.
George Rimar97df22f2018-04-23 14:41:49 +0000208 llvm::erase_if(Clusters, [](const Cluster &C) {
209 return C.Size == 0 || C.Sections.empty();
210 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000211
212 // Sort by density.
George Rimarde83cbf2018-04-24 09:55:39 +0000213 std::stable_sort(Clusters.begin(), Clusters.end(),
214 [](const Cluster &A, const Cluster &B) {
215 return A.getDensity() > B.getDensity();
216 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000217}
218
219DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
220 groupClusters();
221
222 // Generate order.
Rui Ueyama3d354082018-10-12 22:44:06 +0000223 DenseMap<const InputSectionBase *, int> OrderMap;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000224 ssize_t CurOrder = 1;
225
226 for (const Cluster &C : Clusters)
227 for (int SecIndex : C.Sections)
228 OrderMap[Sections[SecIndex]] = CurOrder++;
229
230 return OrderMap;
231}
232
233// Sort sections by the profile data provided by -callgraph-profile-file
234//
235// This first builds a call graph based on the profile data then merges sections
236// according to the C³ huristic. All clusters are then sorted by a density
237// metric to further improve locality.
238DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
239 return CallGraphSort().run();
240}