blob: 459aa3c01b5ce1d7acf54ccf53b322f8a1021d48 [file] [log] [blame]
Michael J. Spencerb8427252018-04-17 23:30:05 +00001//===- CallGraphSort.cpp --------------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Michael J. Spencerb8427252018-04-17 23:30:05 +00006//
7//===----------------------------------------------------------------------===//
8///
9/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
10/// for Large-Scale Data-Center Applications
11/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
12///
13/// The goal of this algorithm is to improve runtime performance of the final
14/// executable by arranging code sections such that page table and i-cache
15/// misses are minimized.
16///
17/// Definitions:
18/// * Cluster
19/// * An ordered list of input sections which are layed out as a unit. At the
20/// beginning of the algorithm each input section has its own cluster and
21/// the weight of the cluster is the sum of the weight of all incomming
22/// edges.
23/// * Call-Chain Clustering (C³) Heuristic
24/// * Defines when and how clusters are combined. Pick the highest weighted
25/// input section then add it to its most likely predecessor if it wouldn't
26/// penalize it too much.
27/// * Density
28/// * The weight of the cluster divided by the size of the cluster. This is a
29/// proxy for the ammount of execution time spent per byte of the cluster.
30///
31/// It does so given a call graph profile by the following:
32/// * Build a weighted call graph from the call graph profile
33/// * Sort input sections by weight
34/// * For each input section starting with the highest weight
35/// * Find its most likely predecessor cluster
36/// * Check if the combined cluster would be too large, or would have too low
37/// a density.
38/// * If not, then combine the clusters.
39/// * Sort non-empty clusters by density
40///
41//===----------------------------------------------------------------------===//
42
43#include "CallGraphSort.h"
44#include "OutputSections.h"
45#include "SymbolTable.h"
46#include "Symbols.h"
47
Fangrui Song7588cf02019-10-04 07:56:54 +000048#include <numeric>
49
Michael J. Spencerb8427252018-04-17 23:30:05 +000050using namespace llvm;
51using namespace lld;
52using namespace lld::elf;
53
54namespace {
55struct Edge {
Rui Ueyama3837f422019-07-10 05:00:37 +000056 int from;
57 uint64_t weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +000058};
59
60struct Cluster {
Fangrui Song7588cf02019-10-04 07:56:54 +000061 Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
Michael J. Spencerb8427252018-04-17 23:30:05 +000062
63 double getDensity() const {
Rui Ueyama3837f422019-07-10 05:00:37 +000064 if (size == 0)
Michael J. Spencerb8427252018-04-17 23:30:05 +000065 return 0;
Rui Ueyama3837f422019-07-10 05:00:37 +000066 return double(weight) / double(size);
Michael J. Spencerb8427252018-04-17 23:30:05 +000067 }
68
Fangrui Song7588cf02019-10-04 07:56:54 +000069 int next;
70 int prev;
Rui Ueyama3837f422019-07-10 05:00:37 +000071 size_t size = 0;
72 uint64_t weight = 0;
73 uint64_t initialWeight = 0;
74 Edge bestPred = {-1, 0};
Michael J. Spencerb8427252018-04-17 23:30:05 +000075};
76
77class CallGraphSort {
78public:
79 CallGraphSort();
80
81 DenseMap<const InputSectionBase *, int> run();
82
83private:
Rui Ueyama3837f422019-07-10 05:00:37 +000084 std::vector<Cluster> clusters;
85 std::vector<const InputSectionBase *> sections;
Michael J. Spencerb8427252018-04-17 23:30:05 +000086};
87
88// Maximum ammount the combined cluster density can be worse than the original
89// cluster to consider merging.
90constexpr int MAX_DENSITY_DEGRADATION = 8;
91
92// Maximum cluster size in bytes.
93constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
94} // end anonymous namespace
95
Rui Ueyama68b9f452019-04-01 00:11:24 +000096using SectionPair =
97 std::pair<const InputSectionBase *, const InputSectionBase *>;
Rui Ueyama3d354082018-10-12 22:44:06 +000098
Michael J. Spencerb8427252018-04-17 23:30:05 +000099// Take the edge list in Config->CallGraphProfile, resolve symbol names to
100// Symbols, and generate a graph between InputSections with the provided
101// weights.
102CallGraphSort::CallGraphSort() {
Rui Ueyama3837f422019-07-10 05:00:37 +0000103 MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
104 DenseMap<const InputSectionBase *, int> secToCluster;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000105
Rui Ueyama3837f422019-07-10 05:00:37 +0000106 auto getOrCreateNode = [&](const InputSectionBase *isec) -> int {
Fangrui Song7588cf02019-10-04 07:56:54 +0000107 auto res = secToCluster.try_emplace(isec, clusters.size());
Rui Ueyama3837f422019-07-10 05:00:37 +0000108 if (res.second) {
109 sections.push_back(isec);
110 clusters.emplace_back(clusters.size(), isec->getSize());
Michael J. Spencerb8427252018-04-17 23:30:05 +0000111 }
Rui Ueyama3837f422019-07-10 05:00:37 +0000112 return res.first->second;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000113 };
114
115 // Create the graph.
Rui Ueyama3837f422019-07-10 05:00:37 +0000116 for (std::pair<SectionPair, uint64_t> &c : profile) {
117 const auto *fromSB = cast<InputSectionBase>(c.first.first->repl);
118 const auto *toSB = cast<InputSectionBase>(c.first.second->repl);
119 uint64_t weight = c.second;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000120
121 // Ignore edges between input sections belonging to different output
122 // sections. This is done because otherwise we would end up with clusters
123 // containing input sections that can't actually be placed adjacently in the
124 // output. This messes with the cluster size and density calculations. We
125 // would also end up moving input sections in other output sections without
126 // moving them closer to what calls them.
Rui Ueyama3837f422019-07-10 05:00:37 +0000127 if (fromSB->getOutputSection() != toSB->getOutputSection())
Michael J. Spencerb8427252018-04-17 23:30:05 +0000128 continue;
129
Rui Ueyama3837f422019-07-10 05:00:37 +0000130 int from = getOrCreateNode(fromSB);
131 int to = getOrCreateNode(toSB);
Michael J. Spencerb8427252018-04-17 23:30:05 +0000132
Rui Ueyama3837f422019-07-10 05:00:37 +0000133 clusters[to].weight += weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000134
Rui Ueyama3837f422019-07-10 05:00:37 +0000135 if (from == to)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000136 continue;
137
George Rimarf0eedbc2018-08-28 08:49:40 +0000138 // Remember the best edge.
Rui Ueyama3837f422019-07-10 05:00:37 +0000139 Cluster &toC = clusters[to];
140 if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
141 toC.bestPred.from = from;
142 toC.bestPred.weight = weight;
George Rimarf0eedbc2018-08-28 08:49:40 +0000143 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000144 }
Rui Ueyama3837f422019-07-10 05:00:37 +0000145 for (Cluster &c : clusters)
146 c.initialWeight = c.weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000147}
148
149// It's bad to merge clusters which would degrade the density too much.
Rui Ueyama3837f422019-07-10 05:00:37 +0000150static bool isNewDensityBad(Cluster &a, Cluster &b) {
151 double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
152 return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000153}
154
Fangrui Song7588cf02019-10-04 07:56:54 +0000155// Find the leader of V's belonged cluster (represented as an equivalence
156// class). We apply union-find path-halving technique (simple to implement) in
157// the meantime as it decreases depths and the time complexity.
158static int getLeader(std::vector<int> &leaders, int v) {
159 while (leaders[v] != v) {
160 leaders[v] = leaders[leaders[v]];
161 v = leaders[v];
162 }
163 return v;
164}
165
166static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
167 Cluster &from, int fromIdx) {
168 int tail1 = into.prev, tail2 = from.prev;
169 into.prev = tail2;
170 cs[tail2].next = intoIdx;
171 from.prev = tail1;
172 cs[tail1].next = fromIdx;
Rui Ueyama3837f422019-07-10 05:00:37 +0000173 into.size += from.size;
174 into.weight += from.weight;
Rui Ueyama3837f422019-07-10 05:00:37 +0000175 from.size = 0;
176 from.weight = 0;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000177}
178
179// Group InputSections into clusters using the Call-Chain Clustering heuristic
180// then sort the clusters by density.
Fangrui Song7588cf02019-10-04 07:56:54 +0000181DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
182 std::vector<int> sorted(clusters.size());
183 std::vector<int> leaders(clusters.size());
Michael J. Spencerb8427252018-04-17 23:30:05 +0000184
Fangrui Song7588cf02019-10-04 07:56:54 +0000185 std::iota(leaders.begin(), leaders.end(), 0);
186 std::iota(sorted.begin(), sorted.end(), 0);
187 llvm::stable_sort(sorted, [&](int a, int b) {
Rui Ueyama3837f422019-07-10 05:00:37 +0000188 return clusters[a].getDensity() > clusters[b].getDensity();
Michael J. Spencerb8427252018-04-17 23:30:05 +0000189 });
190
Fangrui Song7588cf02019-10-04 07:56:54 +0000191 for (int l : sorted) {
192 // The cluster index is the same as the index of its leader here because
193 // clusters[L] has not been merged into another cluster yet.
194 Cluster &c = clusters[l];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000195
George Rimarf0eedbc2018-08-28 08:49:40 +0000196 // Don't consider merging if the edge is unlikely.
Rui Ueyama3837f422019-07-10 05:00:37 +0000197 if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000198 continue;
199
Fangrui Song7588cf02019-10-04 07:56:54 +0000200 int predL = getLeader(leaders, c.bestPred.from);
201 if (l == predL)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000202 continue;
203
Fangrui Song7588cf02019-10-04 07:56:54 +0000204 Cluster *predC = &clusters[predL];
Rui Ueyama3837f422019-07-10 05:00:37 +0000205 if (c.size + predC->size > MAX_CLUSTER_SIZE)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000206 continue;
207
Rui Ueyama3837f422019-07-10 05:00:37 +0000208 if (isNewDensityBad(*predC, c))
Michael J. Spencerb8427252018-04-17 23:30:05 +0000209 continue;
210
Fangrui Song7588cf02019-10-04 07:56:54 +0000211 leaders[l] = predL;
212 mergeClusters(clusters, *predC, predL, c, l);
Michael J. Spencerb8427252018-04-17 23:30:05 +0000213 }
214
Fangrui Song7588cf02019-10-04 07:56:54 +0000215 // Sort remaining non-empty clusters by density.
216 sorted.clear();
217 for (int i = 0, e = (int)clusters.size(); i != e; ++i)
218 if (clusters[i].size > 0)
219 sorted.push_back(i);
220 llvm::stable_sort(sorted, [&](int a, int b) {
221 return clusters[a].getDensity() > clusters[b].getDensity();
George Rimar97df22f2018-04-23 14:41:49 +0000222 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000223
Rui Ueyama3837f422019-07-10 05:00:37 +0000224 DenseMap<const InputSectionBase *, int> orderMap;
Fangrui Song7588cf02019-10-04 07:56:54 +0000225 int curOrder = 1;
226 for (int leader : sorted)
227 for (int i = leader;;) {
228 orderMap[sections[i]] = curOrder++;
229 i = clusters[i].next;
230 if (i == leader)
231 break;
232 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000233
Rui Ueyama3837f422019-07-10 05:00:37 +0000234 if (!config->printSymbolOrder.empty()) {
235 std::error_code ec;
Fangrui Songd9b948b2019-08-05 05:43:48 +0000236 raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
Rui Ueyama3837f422019-07-10 05:00:37 +0000237 if (ec) {
238 error("cannot open " + config->printSymbolOrder + ": " + ec.message());
239 return orderMap;
Rui Ueyama432030e2019-03-27 23:52:22 +0000240 }
241
Fangrui Song47cfe8f2019-07-16 05:50:45 +0000242 // Print the symbols ordered by C3, in the order of increasing curOrder
243 // Instead of sorting all the orderMap, just repeat the loops above.
Fangrui Song7588cf02019-10-04 07:56:54 +0000244 for (int leader : sorted)
245 for (int i = leader;;) {
Rui Ueyama432030e2019-03-27 23:52:22 +0000246 // Search all the symbols in the file of the section
247 // and find out a Defined symbol with name that is within the section.
Fangrui Song7588cf02019-10-04 07:56:54 +0000248 for (Symbol *sym : sections[i]->file->getSymbols())
Rui Ueyama3837f422019-07-10 05:00:37 +0000249 if (!sym->isSection()) // Filter out section-type symbols here.
250 if (auto *d = dyn_cast<Defined>(sym))
Fangrui Song7588cf02019-10-04 07:56:54 +0000251 if (sections[i] == d->section)
Rui Ueyama3837f422019-07-10 05:00:37 +0000252 os << sym->getName() << "\n";
Fangrui Song7588cf02019-10-04 07:56:54 +0000253 i = clusters[i].next;
254 if (i == leader)
255 break;
256 }
Rui Ueyama432030e2019-03-27 23:52:22 +0000257 }
258
Rui Ueyama3837f422019-07-10 05:00:37 +0000259 return orderMap;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000260}
261
262// Sort sections by the profile data provided by -callgraph-profile-file
263//
264// This first builds a call graph based on the profile data then merges sections
265// according to the C³ huristic. All clusters are then sorted by a density
266// metric to further improve locality.
267DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
268 return CallGraphSort().run();
269}