blob: 6dad7c965f1a7cd7477ce0663f4671635be30cb9 [file] [log] [blame]
Michael J. Spencerb8427252018-04-17 23:30:05 +00001//===- CallGraphSort.cpp --------------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Michael J. Spencerb8427252018-04-17 23:30:05 +00006//
7//===----------------------------------------------------------------------===//
8///
9/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
10/// for Large-Scale Data-Center Applications
11/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
12///
13/// The goal of this algorithm is to improve runtime performance of the final
14/// executable by arranging code sections such that page table and i-cache
15/// misses are minimized.
16///
17/// Definitions:
18/// * Cluster
Nico Weber5976a3f2019-10-28 21:41:38 -040019/// * An ordered list of input sections which are laid out as a unit. At the
Michael J. Spencerb8427252018-04-17 23:30:05 +000020/// beginning of the algorithm each input section has its own cluster and
Nico Weber5976a3f2019-10-28 21:41:38 -040021/// the weight of the cluster is the sum of the weight of all incoming
Michael J. Spencerb8427252018-04-17 23:30:05 +000022/// edges.
23/// * Call-Chain Clustering (C³) Heuristic
24/// * Defines when and how clusters are combined. Pick the highest weighted
25/// input section then add it to its most likely predecessor if it wouldn't
26/// penalize it too much.
27/// * Density
28/// * The weight of the cluster divided by the size of the cluster. This is a
Nico Weber5976a3f2019-10-28 21:41:38 -040029/// proxy for the amount of execution time spent per byte of the cluster.
Michael J. Spencerb8427252018-04-17 23:30:05 +000030///
31/// It does so given a call graph profile by the following:
32/// * Build a weighted call graph from the call graph profile
33/// * Sort input sections by weight
34/// * For each input section starting with the highest weight
35/// * Find its most likely predecessor cluster
36/// * Check if the combined cluster would be too large, or would have too low
37/// a density.
38/// * If not, then combine the clusters.
39/// * Sort non-empty clusters by density
40///
41//===----------------------------------------------------------------------===//
42
43#include "CallGraphSort.h"
44#include "OutputSections.h"
45#include "SymbolTable.h"
46#include "Symbols.h"
47
Fangrui Song7588cf02019-10-04 07:56:54 +000048#include <numeric>
49
Michael J. Spencerb8427252018-04-17 23:30:05 +000050using namespace llvm;
Fangrui Songbd8cfe62019-10-07 08:31:18 +000051
52namespace lld {
53namespace elf {
Michael J. Spencerb8427252018-04-17 23:30:05 +000054
55namespace {
56struct Edge {
Rui Ueyama3837f422019-07-10 05:00:37 +000057 int from;
58 uint64_t weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +000059};
60
61struct Cluster {
Fangrui Song7588cf02019-10-04 07:56:54 +000062 Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
Michael J. Spencerb8427252018-04-17 23:30:05 +000063
64 double getDensity() const {
Rui Ueyama3837f422019-07-10 05:00:37 +000065 if (size == 0)
Michael J. Spencerb8427252018-04-17 23:30:05 +000066 return 0;
Rui Ueyama3837f422019-07-10 05:00:37 +000067 return double(weight) / double(size);
Michael J. Spencerb8427252018-04-17 23:30:05 +000068 }
69
Fangrui Song7588cf02019-10-04 07:56:54 +000070 int next;
71 int prev;
Rui Ueyama3837f422019-07-10 05:00:37 +000072 size_t size = 0;
73 uint64_t weight = 0;
74 uint64_t initialWeight = 0;
75 Edge bestPred = {-1, 0};
Michael J. Spencerb8427252018-04-17 23:30:05 +000076};
77
78class CallGraphSort {
79public:
80 CallGraphSort();
81
82 DenseMap<const InputSectionBase *, int> run();
83
84private:
Rui Ueyama3837f422019-07-10 05:00:37 +000085 std::vector<Cluster> clusters;
86 std::vector<const InputSectionBase *> sections;
Michael J. Spencerb8427252018-04-17 23:30:05 +000087};
88
Nico Weber5976a3f2019-10-28 21:41:38 -040089// Maximum amount the combined cluster density can be worse than the original
Michael J. Spencerb8427252018-04-17 23:30:05 +000090// cluster to consider merging.
91constexpr int MAX_DENSITY_DEGRADATION = 8;
92
93// Maximum cluster size in bytes.
94constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
95} // end anonymous namespace
96
Rui Ueyama68b9f452019-04-01 00:11:24 +000097using SectionPair =
98 std::pair<const InputSectionBase *, const InputSectionBase *>;
Rui Ueyama3d354082018-10-12 22:44:06 +000099
Michael J. Spencerb8427252018-04-17 23:30:05 +0000100// Take the edge list in Config->CallGraphProfile, resolve symbol names to
101// Symbols, and generate a graph between InputSections with the provided
102// weights.
103CallGraphSort::CallGraphSort() {
Rui Ueyama3837f422019-07-10 05:00:37 +0000104 MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
105 DenseMap<const InputSectionBase *, int> secToCluster;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000106
Rui Ueyama3837f422019-07-10 05:00:37 +0000107 auto getOrCreateNode = [&](const InputSectionBase *isec) -> int {
Fangrui Song7588cf02019-10-04 07:56:54 +0000108 auto res = secToCluster.try_emplace(isec, clusters.size());
Rui Ueyama3837f422019-07-10 05:00:37 +0000109 if (res.second) {
110 sections.push_back(isec);
111 clusters.emplace_back(clusters.size(), isec->getSize());
Michael J. Spencerb8427252018-04-17 23:30:05 +0000112 }
Rui Ueyama3837f422019-07-10 05:00:37 +0000113 return res.first->second;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000114 };
115
116 // Create the graph.
Rui Ueyama3837f422019-07-10 05:00:37 +0000117 for (std::pair<SectionPair, uint64_t> &c : profile) {
118 const auto *fromSB = cast<InputSectionBase>(c.first.first->repl);
119 const auto *toSB = cast<InputSectionBase>(c.first.second->repl);
120 uint64_t weight = c.second;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000121
122 // Ignore edges between input sections belonging to different output
123 // sections. This is done because otherwise we would end up with clusters
124 // containing input sections that can't actually be placed adjacently in the
125 // output. This messes with the cluster size and density calculations. We
126 // would also end up moving input sections in other output sections without
127 // moving them closer to what calls them.
Rui Ueyama3837f422019-07-10 05:00:37 +0000128 if (fromSB->getOutputSection() != toSB->getOutputSection())
Michael J. Spencerb8427252018-04-17 23:30:05 +0000129 continue;
130
Rui Ueyama3837f422019-07-10 05:00:37 +0000131 int from = getOrCreateNode(fromSB);
132 int to = getOrCreateNode(toSB);
Michael J. Spencerb8427252018-04-17 23:30:05 +0000133
Rui Ueyama3837f422019-07-10 05:00:37 +0000134 clusters[to].weight += weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000135
Rui Ueyama3837f422019-07-10 05:00:37 +0000136 if (from == to)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000137 continue;
138
George Rimarf0eedbc2018-08-28 08:49:40 +0000139 // Remember the best edge.
Rui Ueyama3837f422019-07-10 05:00:37 +0000140 Cluster &toC = clusters[to];
141 if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
142 toC.bestPred.from = from;
143 toC.bestPred.weight = weight;
George Rimarf0eedbc2018-08-28 08:49:40 +0000144 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000145 }
Rui Ueyama3837f422019-07-10 05:00:37 +0000146 for (Cluster &c : clusters)
147 c.initialWeight = c.weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000148}
149
150// It's bad to merge clusters which would degrade the density too much.
Rui Ueyama3837f422019-07-10 05:00:37 +0000151static bool isNewDensityBad(Cluster &a, Cluster &b) {
152 double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
153 return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000154}
155
Fangrui Song7588cf02019-10-04 07:56:54 +0000156// Find the leader of V's belonged cluster (represented as an equivalence
157// class). We apply union-find path-halving technique (simple to implement) in
158// the meantime as it decreases depths and the time complexity.
159static int getLeader(std::vector<int> &leaders, int v) {
160 while (leaders[v] != v) {
161 leaders[v] = leaders[leaders[v]];
162 v = leaders[v];
163 }
164 return v;
165}
166
167static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
168 Cluster &from, int fromIdx) {
169 int tail1 = into.prev, tail2 = from.prev;
170 into.prev = tail2;
171 cs[tail2].next = intoIdx;
172 from.prev = tail1;
173 cs[tail1].next = fromIdx;
Rui Ueyama3837f422019-07-10 05:00:37 +0000174 into.size += from.size;
175 into.weight += from.weight;
Rui Ueyama3837f422019-07-10 05:00:37 +0000176 from.size = 0;
177 from.weight = 0;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000178}
179
180// Group InputSections into clusters using the Call-Chain Clustering heuristic
181// then sort the clusters by density.
Fangrui Song7588cf02019-10-04 07:56:54 +0000182DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
183 std::vector<int> sorted(clusters.size());
184 std::vector<int> leaders(clusters.size());
Michael J. Spencerb8427252018-04-17 23:30:05 +0000185
Fangrui Song7588cf02019-10-04 07:56:54 +0000186 std::iota(leaders.begin(), leaders.end(), 0);
187 std::iota(sorted.begin(), sorted.end(), 0);
188 llvm::stable_sort(sorted, [&](int a, int b) {
Rui Ueyama3837f422019-07-10 05:00:37 +0000189 return clusters[a].getDensity() > clusters[b].getDensity();
Michael J. Spencerb8427252018-04-17 23:30:05 +0000190 });
191
Fangrui Song7588cf02019-10-04 07:56:54 +0000192 for (int l : sorted) {
193 // The cluster index is the same as the index of its leader here because
194 // clusters[L] has not been merged into another cluster yet.
195 Cluster &c = clusters[l];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000196
George Rimarf0eedbc2018-08-28 08:49:40 +0000197 // Don't consider merging if the edge is unlikely.
Rui Ueyama3837f422019-07-10 05:00:37 +0000198 if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000199 continue;
200
Fangrui Song7588cf02019-10-04 07:56:54 +0000201 int predL = getLeader(leaders, c.bestPred.from);
202 if (l == predL)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000203 continue;
204
Fangrui Song7588cf02019-10-04 07:56:54 +0000205 Cluster *predC = &clusters[predL];
Rui Ueyama3837f422019-07-10 05:00:37 +0000206 if (c.size + predC->size > MAX_CLUSTER_SIZE)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000207 continue;
208
Rui Ueyama3837f422019-07-10 05:00:37 +0000209 if (isNewDensityBad(*predC, c))
Michael J. Spencerb8427252018-04-17 23:30:05 +0000210 continue;
211
Fangrui Song7588cf02019-10-04 07:56:54 +0000212 leaders[l] = predL;
213 mergeClusters(clusters, *predC, predL, c, l);
Michael J. Spencerb8427252018-04-17 23:30:05 +0000214 }
215
Fangrui Song7588cf02019-10-04 07:56:54 +0000216 // Sort remaining non-empty clusters by density.
217 sorted.clear();
218 for (int i = 0, e = (int)clusters.size(); i != e; ++i)
219 if (clusters[i].size > 0)
220 sorted.push_back(i);
221 llvm::stable_sort(sorted, [&](int a, int b) {
222 return clusters[a].getDensity() > clusters[b].getDensity();
George Rimar97df22f2018-04-23 14:41:49 +0000223 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000224
Rui Ueyama3837f422019-07-10 05:00:37 +0000225 DenseMap<const InputSectionBase *, int> orderMap;
Fangrui Song7588cf02019-10-04 07:56:54 +0000226 int curOrder = 1;
227 for (int leader : sorted)
228 for (int i = leader;;) {
229 orderMap[sections[i]] = curOrder++;
230 i = clusters[i].next;
231 if (i == leader)
232 break;
233 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000234
Rui Ueyama3837f422019-07-10 05:00:37 +0000235 if (!config->printSymbolOrder.empty()) {
236 std::error_code ec;
Fangrui Songd9b948b2019-08-05 05:43:48 +0000237 raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
Rui Ueyama3837f422019-07-10 05:00:37 +0000238 if (ec) {
239 error("cannot open " + config->printSymbolOrder + ": " + ec.message());
240 return orderMap;
Rui Ueyama432030e2019-03-27 23:52:22 +0000241 }
242
Fangrui Song47cfe8f2019-07-16 05:50:45 +0000243 // Print the symbols ordered by C3, in the order of increasing curOrder
244 // Instead of sorting all the orderMap, just repeat the loops above.
Fangrui Song7588cf02019-10-04 07:56:54 +0000245 for (int leader : sorted)
246 for (int i = leader;;) {
Rui Ueyama432030e2019-03-27 23:52:22 +0000247 // Search all the symbols in the file of the section
248 // and find out a Defined symbol with name that is within the section.
Fangrui Song7588cf02019-10-04 07:56:54 +0000249 for (Symbol *sym : sections[i]->file->getSymbols())
Rui Ueyama3837f422019-07-10 05:00:37 +0000250 if (!sym->isSection()) // Filter out section-type symbols here.
251 if (auto *d = dyn_cast<Defined>(sym))
Fangrui Song7588cf02019-10-04 07:56:54 +0000252 if (sections[i] == d->section)
Rui Ueyama3837f422019-07-10 05:00:37 +0000253 os << sym->getName() << "\n";
Fangrui Song7588cf02019-10-04 07:56:54 +0000254 i = clusters[i].next;
255 if (i == leader)
256 break;
257 }
Rui Ueyama432030e2019-03-27 23:52:22 +0000258 }
259
Rui Ueyama3837f422019-07-10 05:00:37 +0000260 return orderMap;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000261}
262
263// Sort sections by the profile data provided by -callgraph-profile-file
264//
265// This first builds a call graph based on the profile data then merges sections
266// according to the C³ huristic. All clusters are then sorted by a density
267// metric to further improve locality.
Fangrui Songbd8cfe62019-10-07 08:31:18 +0000268DenseMap<const InputSectionBase *, int> computeCallGraphProfileOrder() {
Michael J. Spencerb8427252018-04-17 23:30:05 +0000269 return CallGraphSort().run();
270}
Fangrui Songbd8cfe62019-10-07 08:31:18 +0000271
272} // namespace elf
273} // namespace lld