blob: c9a62f69accad6e2978effdcd25394064ae01f83 [file] [log] [blame]
Michael J. Spencerb8427252018-04-17 23:30:05 +00001//===- CallGraphSort.cpp --------------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Michael J. Spencerb8427252018-04-17 23:30:05 +00006//
7//===----------------------------------------------------------------------===//
8///
9/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
10/// for Large-Scale Data-Center Applications
11/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
12///
13/// The goal of this algorithm is to improve runtime performance of the final
14/// executable by arranging code sections such that page table and i-cache
15/// misses are minimized.
16///
17/// Definitions:
18/// * Cluster
19/// * An ordered list of input sections which are layed out as a unit. At the
20/// beginning of the algorithm each input section has its own cluster and
21/// the weight of the cluster is the sum of the weight of all incomming
22/// edges.
23/// * Call-Chain Clustering (C³) Heuristic
24/// * Defines when and how clusters are combined. Pick the highest weighted
25/// input section then add it to its most likely predecessor if it wouldn't
26/// penalize it too much.
27/// * Density
28/// * The weight of the cluster divided by the size of the cluster. This is a
29/// proxy for the ammount of execution time spent per byte of the cluster.
30///
31/// It does so given a call graph profile by the following:
32/// * Build a weighted call graph from the call graph profile
33/// * Sort input sections by weight
34/// * For each input section starting with the highest weight
35/// * Find its most likely predecessor cluster
36/// * Check if the combined cluster would be too large, or would have too low
37/// a density.
38/// * If not, then combine the clusters.
39/// * Sort non-empty clusters by density
40///
41//===----------------------------------------------------------------------===//
42
43#include "CallGraphSort.h"
44#include "OutputSections.h"
45#include "SymbolTable.h"
46#include "Symbols.h"
47
48using namespace llvm;
49using namespace lld;
50using namespace lld::elf;
51
52namespace {
53struct Edge {
Rui Ueyama3837f422019-07-10 05:00:37 +000054 int from;
55 uint64_t weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +000056};
57
58struct Cluster {
Rui Ueyama3837f422019-07-10 05:00:37 +000059 Cluster(int sec, size_t s) : sections{sec}, size(s) {}
Michael J. Spencerb8427252018-04-17 23:30:05 +000060
61 double getDensity() const {
Rui Ueyama3837f422019-07-10 05:00:37 +000062 if (size == 0)
Michael J. Spencerb8427252018-04-17 23:30:05 +000063 return 0;
Rui Ueyama3837f422019-07-10 05:00:37 +000064 return double(weight) / double(size);
Michael J. Spencerb8427252018-04-17 23:30:05 +000065 }
66
Rui Ueyama3837f422019-07-10 05:00:37 +000067 std::vector<int> sections;
68 size_t size = 0;
69 uint64_t weight = 0;
70 uint64_t initialWeight = 0;
71 Edge bestPred = {-1, 0};
Michael J. Spencerb8427252018-04-17 23:30:05 +000072};
73
74class CallGraphSort {
75public:
76 CallGraphSort();
77
78 DenseMap<const InputSectionBase *, int> run();
79
80private:
Rui Ueyama3837f422019-07-10 05:00:37 +000081 std::vector<Cluster> clusters;
82 std::vector<const InputSectionBase *> sections;
Michael J. Spencerb8427252018-04-17 23:30:05 +000083
84 void groupClusters();
85};
86
87// Maximum ammount the combined cluster density can be worse than the original
88// cluster to consider merging.
89constexpr int MAX_DENSITY_DEGRADATION = 8;
90
91// Maximum cluster size in bytes.
92constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
93} // end anonymous namespace
94
Rui Ueyama68b9f452019-04-01 00:11:24 +000095using SectionPair =
96 std::pair<const InputSectionBase *, const InputSectionBase *>;
Rui Ueyama3d354082018-10-12 22:44:06 +000097
Michael J. Spencerb8427252018-04-17 23:30:05 +000098// Take the edge list in Config->CallGraphProfile, resolve symbol names to
99// Symbols, and generate a graph between InputSections with the provided
100// weights.
101CallGraphSort::CallGraphSort() {
Rui Ueyama3837f422019-07-10 05:00:37 +0000102 MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
103 DenseMap<const InputSectionBase *, int> secToCluster;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000104
Rui Ueyama3837f422019-07-10 05:00:37 +0000105 auto getOrCreateNode = [&](const InputSectionBase *isec) -> int {
106 auto res = secToCluster.insert(std::make_pair(isec, clusters.size()));
107 if (res.second) {
108 sections.push_back(isec);
109 clusters.emplace_back(clusters.size(), isec->getSize());
Michael J. Spencerb8427252018-04-17 23:30:05 +0000110 }
Rui Ueyama3837f422019-07-10 05:00:37 +0000111 return res.first->second;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000112 };
113
114 // Create the graph.
Rui Ueyama3837f422019-07-10 05:00:37 +0000115 for (std::pair<SectionPair, uint64_t> &c : profile) {
116 const auto *fromSB = cast<InputSectionBase>(c.first.first->repl);
117 const auto *toSB = cast<InputSectionBase>(c.first.second->repl);
118 uint64_t weight = c.second;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000119
120 // Ignore edges between input sections belonging to different output
121 // sections. This is done because otherwise we would end up with clusters
122 // containing input sections that can't actually be placed adjacently in the
123 // output. This messes with the cluster size and density calculations. We
124 // would also end up moving input sections in other output sections without
125 // moving them closer to what calls them.
Rui Ueyama3837f422019-07-10 05:00:37 +0000126 if (fromSB->getOutputSection() != toSB->getOutputSection())
Michael J. Spencerb8427252018-04-17 23:30:05 +0000127 continue;
128
Rui Ueyama3837f422019-07-10 05:00:37 +0000129 int from = getOrCreateNode(fromSB);
130 int to = getOrCreateNode(toSB);
Michael J. Spencerb8427252018-04-17 23:30:05 +0000131
Rui Ueyama3837f422019-07-10 05:00:37 +0000132 clusters[to].weight += weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000133
Rui Ueyama3837f422019-07-10 05:00:37 +0000134 if (from == to)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000135 continue;
136
George Rimarf0eedbc2018-08-28 08:49:40 +0000137 // Remember the best edge.
Rui Ueyama3837f422019-07-10 05:00:37 +0000138 Cluster &toC = clusters[to];
139 if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
140 toC.bestPred.from = from;
141 toC.bestPred.weight = weight;
George Rimarf0eedbc2018-08-28 08:49:40 +0000142 }
Michael J. Spencerb8427252018-04-17 23:30:05 +0000143 }
Rui Ueyama3837f422019-07-10 05:00:37 +0000144 for (Cluster &c : clusters)
145 c.initialWeight = c.weight;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000146}
147
148// It's bad to merge clusters which would degrade the density too much.
Rui Ueyama3837f422019-07-10 05:00:37 +0000149static bool isNewDensityBad(Cluster &a, Cluster &b) {
150 double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
151 return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000152}
153
Rui Ueyama3837f422019-07-10 05:00:37 +0000154static void mergeClusters(Cluster &into, Cluster &from) {
155 into.sections.insert(into.sections.end(), from.sections.begin(),
156 from.sections.end());
157 into.size += from.size;
158 into.weight += from.weight;
159 from.sections.clear();
160 from.size = 0;
161 from.weight = 0;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000162}
163
164// Group InputSections into clusters using the Call-Chain Clustering heuristic
165// then sort the clusters by density.
166void CallGraphSort::groupClusters() {
Rui Ueyama3837f422019-07-10 05:00:37 +0000167 std::vector<int> sortedSecs(clusters.size());
168 std::vector<Cluster *> secToCluster(clusters.size());
Michael J. Spencerb8427252018-04-17 23:30:05 +0000169
Rui Ueyama3837f422019-07-10 05:00:37 +0000170 for (size_t i = 0; i < clusters.size(); ++i) {
171 sortedSecs[i] = i;
172 secToCluster[i] = &clusters[i];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000173 }
174
Rui Ueyama3837f422019-07-10 05:00:37 +0000175 llvm::stable_sort(sortedSecs, [&](int a, int b) {
176 return clusters[a].getDensity() > clusters[b].getDensity();
Michael J. Spencerb8427252018-04-17 23:30:05 +0000177 });
178
Rui Ueyama3837f422019-07-10 05:00:37 +0000179 for (int si : sortedSecs) {
Michael J. Spencerb8427252018-04-17 23:30:05 +0000180 // Clusters[SI] is the same as SecToClusters[SI] here because it has not
181 // been merged into another cluster yet.
Rui Ueyama3837f422019-07-10 05:00:37 +0000182 Cluster &c = clusters[si];
Michael J. Spencerb8427252018-04-17 23:30:05 +0000183
George Rimarf0eedbc2018-08-28 08:49:40 +0000184 // Don't consider merging if the edge is unlikely.
Rui Ueyama3837f422019-07-10 05:00:37 +0000185 if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000186 continue;
187
Rui Ueyama3837f422019-07-10 05:00:37 +0000188 Cluster *predC = secToCluster[c.bestPred.from];
189 if (predC == &c)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000190 continue;
191
Rui Ueyama3837f422019-07-10 05:00:37 +0000192 if (c.size + predC->size > MAX_CLUSTER_SIZE)
Michael J. Spencerb8427252018-04-17 23:30:05 +0000193 continue;
194
Rui Ueyama3837f422019-07-10 05:00:37 +0000195 if (isNewDensityBad(*predC, c))
Michael J. Spencerb8427252018-04-17 23:30:05 +0000196 continue;
197
198 // NOTE: Consider using a disjoint-set to track section -> cluster mapping
199 // if this is ever slow.
Rui Ueyama3837f422019-07-10 05:00:37 +0000200 for (int si : c.sections)
201 secToCluster[si] = predC;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000202
Rui Ueyama3837f422019-07-10 05:00:37 +0000203 mergeClusters(*predC, c);
Michael J. Spencerb8427252018-04-17 23:30:05 +0000204 }
205
206 // Remove empty or dead nodes. Invalidates all cluster indices.
Rui Ueyama3837f422019-07-10 05:00:37 +0000207 llvm::erase_if(clusters, [](const Cluster &c) {
208 return c.size == 0 || c.sections.empty();
George Rimar97df22f2018-04-23 14:41:49 +0000209 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000210
211 // Sort by density.
Rui Ueyama3837f422019-07-10 05:00:37 +0000212 llvm::stable_sort(clusters, [](const Cluster &a, const Cluster &b) {
213 return a.getDensity() > b.getDensity();
Fangrui Song32c0ebe2019-04-23 02:42:06 +0000214 });
Michael J. Spencerb8427252018-04-17 23:30:05 +0000215}
216
217DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
218 groupClusters();
219
220 // Generate order.
Rui Ueyama3837f422019-07-10 05:00:37 +0000221 DenseMap<const InputSectionBase *, int> orderMap;
222 ssize_t curOrder = 1;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000223
Rui Ueyama3837f422019-07-10 05:00:37 +0000224 for (const Cluster &c : clusters)
225 for (int secIndex : c.sections)
226 orderMap[sections[secIndex]] = curOrder++;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000227
Rui Ueyama3837f422019-07-10 05:00:37 +0000228 if (!config->printSymbolOrder.empty()) {
229 std::error_code ec;
230 raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::F_None);
231 if (ec) {
232 error("cannot open " + config->printSymbolOrder + ": " + ec.message());
233 return orderMap;
Rui Ueyama432030e2019-03-27 23:52:22 +0000234 }
235
236 // Print the symbols ordered by C3, in the order of increasing CurOrder
237 // Instead of sorting all the OrderMap, just repeat the loops above.
Rui Ueyama3837f422019-07-10 05:00:37 +0000238 for (const Cluster &c : clusters)
239 for (int secIndex : c.sections)
Rui Ueyama432030e2019-03-27 23:52:22 +0000240 // Search all the symbols in the file of the section
241 // and find out a Defined symbol with name that is within the section.
Rui Ueyama3837f422019-07-10 05:00:37 +0000242 for (Symbol *sym: sections[secIndex]->file->getSymbols())
243 if (!sym->isSection()) // Filter out section-type symbols here.
244 if (auto *d = dyn_cast<Defined>(sym))
245 if (sections[secIndex] == d->section)
246 os << sym->getName() << "\n";
Rui Ueyama432030e2019-03-27 23:52:22 +0000247 }
248
Rui Ueyama3837f422019-07-10 05:00:37 +0000249 return orderMap;
Michael J. Spencerb8427252018-04-17 23:30:05 +0000250}
251
252// Sort sections by the profile data provided by -callgraph-profile-file
253//
254// This first builds a call graph based on the profile data then merges sections
255// according to the C³ huristic. All clusters are then sorted by a density
256// metric to further improve locality.
257DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
258 return CallGraphSort().run();
259}