blob: 2e50c50559beb8a5deafba9b85c4a616c802b84d [file] [log] [blame]
Michael J. Spencerb8427252018-04-17 23:30:05 +00001//===- CallGraphSort.cpp --------------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Michael J. Spencerb8427252018-04-17 23:30:05 +00006//
7//===----------------------------------------------------------------------===//
8///
9/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
10/// for Large-Scale Data-Center Applications
11/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
12///
13/// The goal of this algorithm is to improve runtime performance of the final
14/// executable by arranging code sections such that page table and i-cache
15/// misses are minimized.
16///
17/// Definitions:
18/// * Cluster
19/// * An ordered list of input sections which are layed out as a unit. At the
20/// beginning of the algorithm each input section has its own cluster and
21/// the weight of the cluster is the sum of the weight of all incomming
22/// edges.
23/// * Call-Chain Clustering (C³) Heuristic
24/// * Defines when and how clusters are combined. Pick the highest weighted
25/// input section then add it to its most likely predecessor if it wouldn't
26/// penalize it too much.
27/// * Density
28/// * The weight of the cluster divided by the size of the cluster. This is a
29/// proxy for the ammount of execution time spent per byte of the cluster.
30///
31/// It does so given a call graph profile by the following:
32/// * Build a weighted call graph from the call graph profile
33/// * Sort input sections by weight
34/// * For each input section starting with the highest weight
35/// * Find its most likely predecessor cluster
36/// * Check if the combined cluster would be too large, or would have too low
37/// a density.
38/// * If not, then combine the clusters.
39/// * Sort non-empty clusters by density
40///
41//===----------------------------------------------------------------------===//
42
43#include "CallGraphSort.h"
44#include "OutputSections.h"
45#include "SymbolTable.h"
46#include "Symbols.h"
47
48using namespace llvm;
49using namespace lld;
50using namespace lld::elf;
51
52namespace {
53struct Edge {
54 int From;
55 uint64_t Weight;
56};
57
58struct Cluster {
Rui Ueyama3d354082018-10-12 22:44:06 +000059 Cluster(int Sec, size_t S) : Sections{Sec}, Size(S) {}
Michael J. Spencerb8427252018-04-17 23:30:05 +000060
61 double getDensity() const {
62 if (Size == 0)
63 return 0;
64 return double(Weight) / double(Size);
65 }
66
67 std::vector<int> Sections;
68 size_t Size = 0;
69 uint64_t Weight = 0;
70 uint64_t InitialWeight = 0;
George Rimarf0eedbc2018-08-28 08:49:40 +000071 Edge BestPred = {-1, 0};
Michael J. Spencerb8427252018-04-17 23:30:05 +000072};
73
74class CallGraphSort {
75public:
76 CallGraphSort();
77
78 DenseMap<const InputSectionBase *, int> run();
79
80private:
81 std::vector<Cluster> Clusters;
82 std::vector<const InputSectionBase *> Sections;
83
84 void groupClusters();
85};
86
87// Maximum ammount the combined cluster density can be worse than the original
88// cluster to consider merging.
89constexpr int MAX_DENSITY_DEGRADATION = 8;
90
91// Maximum cluster size in bytes.
92constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
93} // end anonymous namespace
94
Rui Ueyama3d354082018-10-12 22:44:06 +000095typedef std::pair<const InputSectionBase *, const InputSectionBase *>
96 SectionPair;
97
Michael J. Spencerb8427252018-04-17 23:30:05 +000098// Take the edge list in Config->CallGraphProfile, resolve symbol names to
99// Symbols, and generate a graph between InputSections with the provided
100// weights.
101CallGraphSort::CallGraphSort() {
Rui Ueyama3d354082018-10-12 22:44:06 +0000102 MapVector<SectionPair, uint64_t> &Profile = Config->CallGraphProfile;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000103 DenseMap<const InputSectionBase *, int> SecToCluster;
104
105 auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
106 auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
107 if (Res.second) {
108 Sections.push_back(IS);
109 Clusters.emplace_back(Clusters.size(), IS->getSize());
110 }
111 return Res.first->second;
112 };
113
114 // Create the graph.
Rui Ueyama3d354082018-10-12 22:44:06 +0000115 for (std::pair<SectionPair, uint64_t> &C : Profile) {
Michael J. Spencerb8427252018-04-17 23:30:05 +0000116 const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
117 const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
118 uint64_t Weight = C.second;
119
120 // Ignore edges between input sections belonging to different output
121 // sections. This is done because otherwise we would end up with clusters
122 // containing input sections that can't actually be placed adjacently in the
123 // output. This messes with the cluster size and density calculations. We
124 // would also end up moving input sections in other output sections without
125 // moving them closer to what calls them.
126 if (FromSB->getOutputSection() != ToSB->getOutputSection())
127 continue;
128
129 int From = GetOrCreateNode(FromSB);
130 int To = GetOrCreateNode(ToSB);
131
132 Clusters[To].Weight += Weight;
133
134 if (From == To)
135 continue;
136
George Rimarf0eedbc2018-08-28 08:49:40 +0000137 // Remember the best edge.
138 Cluster &ToC = Clusters[To];
139 if (ToC.BestPred.From == -1 || ToC.BestPred.Weight < Weight) {
140 ToC.BestPred.From = From;
141 ToC.BestPred.Weight = Weight;
142 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000143 }
144 for (Cluster &C : Clusters)
145 C.InitialWeight = C.Weight;
146}
147
148// It's bad to merge clusters which would degrade the density too much.
149static bool isNewDensityBad(Cluster &A, Cluster &B) {
150 double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
Rui Ueyama3d354082018-10-12 22:44:06 +0000151 return NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000152}
153
154static void mergeClusters(Cluster &Into, Cluster &From) {
155 Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
156 From.Sections.end());
157 Into.Size += From.Size;
158 Into.Weight += From.Weight;
159 From.Sections.clear();
160 From.Size = 0;
161 From.Weight = 0;
162}
163
164// Group InputSections into clusters using the Call-Chain Clustering heuristic
165// then sort the clusters by density.
166void CallGraphSort::groupClusters() {
167 std::vector<int> SortedSecs(Clusters.size());
168 std::vector<Cluster *> SecToCluster(Clusters.size());
169
George Rimara5cf8da2018-08-12 07:52:24 +0000170 for (size_t I = 0; I < Clusters.size(); ++I) {
171 SortedSecs[I] = I;
172 SecToCluster[I] = &Clusters[I];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000173 }
174
175 std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
176 return Clusters[B].getDensity() < Clusters[A].getDensity();
177 });
178
179 for (int SI : SortedSecs) {
180 // Clusters[SI] is the same as SecToClusters[SI] here because it has not
181 // been merged into another cluster yet.
182 Cluster &C = Clusters[SI];
183
George Rimarf0eedbc2018-08-28 08:49:40 +0000184 // Don't consider merging if the edge is unlikely.
185 if (C.BestPred.From == -1 || C.BestPred.Weight * 10 <= C.InitialWeight)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000186 continue;
187
George Rimarf0eedbc2018-08-28 08:49:40 +0000188 Cluster *PredC = SecToCluster[C.BestPred.From];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000189 if (PredC == &C)
190 continue;
191
192 if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
193 continue;
194
195 if (isNewDensityBad(*PredC, C))
196 continue;
197
198 // NOTE: Consider using a disjoint-set to track section -> cluster mapping
199 // if this is ever slow.
200 for (int SI : C.Sections)
201 SecToCluster[SI] = PredC;
202
203 mergeClusters(*PredC, C);
204 }
205
206 // Remove empty or dead nodes. Invalidates all cluster indices.
George Rimar97df22f2018-04-23 14:41:49 +0000207 llvm::erase_if(Clusters, [](const Cluster &C) {
208 return C.Size == 0 || C.Sections.empty();
209 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000210
211 // Sort by density.
George Rimarde83cbf2018-04-24 09:55:39 +0000212 std::stable_sort(Clusters.begin(), Clusters.end(),
213 [](const Cluster &A, const Cluster &B) {
214 return A.getDensity() > B.getDensity();
215 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000216}
217
218DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
219 groupClusters();
220
221 // Generate order.
Rui Ueyama3d354082018-10-12 22:44:06 +0000222 DenseMap<const InputSectionBase *, int> OrderMap;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000223 ssize_t CurOrder = 1;
224
225 for (const Cluster &C : Clusters)
226 for (int SecIndex : C.Sections)
227 OrderMap[Sections[SecIndex]] = CurOrder++;
228
229 return OrderMap;
230}
231
232// Sort sections by the profile data provided by -callgraph-profile-file
233//
234// This first builds a call graph based on the profile data then merges sections
235// according to the C³ huristic. All clusters are then sorted by a density
236// metric to further improve locality.
237DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
238 return CallGraphSort().run();
239}