blob: a0806052f3f3df833d360a8063aa1dc7ccec4016 [file] [log] [blame]
Rui Ueyama0b289522016-02-25 18:43:51 +00001//===- ICF.cpp ------------------------------------------------------------===//
2//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Rui Ueyama91ae8612016-12-01 19:45:22 +000010// ICF is short for Identical Code Folding. That is a size optimization to
11// identify and merge two or more read-only sections (typically functions)
12// that happened to have the same contents. It usually reduces output size
13// by a few percent.
Rui Ueyama0b289522016-02-25 18:43:51 +000014//
Rui Ueyama91ae8612016-12-01 19:45:22 +000015// In ICF, two sections are considered identical if they have the same
16// section flags, section data, and relocations. Relocations are tricky,
17// because two relocations are considered the same if they have the same
18// relocation types, values, and if they point to the same sections *in
19// terms of ICF*.
Rui Ueyama0b289522016-02-25 18:43:51 +000020//
Rui Ueyama91ae8612016-12-01 19:45:22 +000021// Here is an example. If foo and bar defined below are compiled to the
22// same machine instructions, ICF can and should merge the two, although
23// their relocations point to each other.
Rui Ueyama0b289522016-02-25 18:43:51 +000024//
25// void foo() { bar(); }
26// void bar() { foo(); }
27//
Rui Ueyama91ae8612016-12-01 19:45:22 +000028// If you merge the two, their relocations point to the same section and
29// thus you know they are mergeable, but how do we know they are mergeable
30// in the first place? This is not an easy problem to solve.
31//
32// What we are doing in LLD is some sort of coloring algorithm.
33//
34// We color non-identical sections in different colors repeatedly.
35// Sections in the same color when the algorithm terminates are considered
36// identical. Here are the details:
37//
38// 1. First, we color all sections using their hash values of section
39// types, section contents, and numbers of relocations. At this moment,
40// relocation targets are not taken into account. We just color
41// sections that apparently differ in different colors.
42//
43// 2. Next, for each color C, we visit sections in color C to compare
44// relocation target colors. We recolor sections A and B in different
45// colors if A's and B's relocations are different in terms of target
46// colors.
47//
48// 3. If we recolor some section in step 2, relocations that were
49// previously pointing to the same color targets may now be pointing to
50// different colors. Therefore, repeat 2 until a convergence is
51// obtained.
52//
53// 4. For each color C, pick an arbitrary section in color C, and merges
54// other sections in color C with it.
55//
56// For small programs, this algorithm needs 3-5 iterations. For large
57// programs such as Chromium, it takes more than 20 iterations.
58//
59// We parallelize each step so that multiple threads can work on different
60// colors concurrently. That gave us a large performance boost when
61// applying ICF on large programs. For example, MSVC link.exe or GNU gold
62// takes 10-20 seconds to apply ICF on Chromium, whose output size is
63// about 1.5 GB, but LLD can finish it in less than 2 seconds on a 2.8 GHz
64// 40 core machine. Even without threading, LLD's ICF is still faster than
65// MSVC or gold though.
Rui Ueyama0b289522016-02-25 18:43:51 +000066//
67//===----------------------------------------------------------------------===//
68
69#include "ICF.h"
70#include "Config.h"
Rui Ueyama0b289522016-02-25 18:43:51 +000071#include "SymbolTable.h"
72
Rui Ueyamac1835312016-12-01 17:09:04 +000073#include "lld/Core/Parallel.h"
Rui Ueyama0b289522016-02-25 18:43:51 +000074#include "llvm/ADT/Hashing.h"
75#include "llvm/Object/ELF.h"
76#include "llvm/Support/ELF.h"
Rui Ueyamaa05134e2016-11-19 20:15:55 +000077#include <algorithm>
Rui Ueyama1b6bab02016-12-02 05:35:46 +000078#include <atomic>
Rui Ueyama0b289522016-02-25 18:43:51 +000079
80using namespace lld;
Rafael Espindolae0df00b2016-02-28 00:25:54 +000081using namespace lld::elf;
Rui Ueyama0b289522016-02-25 18:43:51 +000082using namespace llvm;
83using namespace llvm::ELF;
84using namespace llvm::object;
85
Rui Ueyamabd1f0632016-11-20 02:39:59 +000086namespace {
Rui Ueyama0b289522016-02-25 18:43:51 +000087template <class ELFT> class ICF {
Rui Ueyama0b289522016-02-25 18:43:51 +000088public:
Rui Ueyama4f8d21f2016-05-02 19:30:42 +000089 void run();
Rui Ueyama0b289522016-02-25 18:43:51 +000090
91private:
Rui Ueyama1b6bab02016-12-02 05:35:46 +000092 void segregate(size_t Begin, size_t End, bool Constant);
Rui Ueyama0b289522016-02-25 18:43:51 +000093
Rui Ueyama9dedfb12016-11-30 01:50:03 +000094 template <class RelTy>
95 bool constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB);
Rui Ueyama0b289522016-02-25 18:43:51 +000096
Rui Ueyama9dedfb12016-11-30 01:50:03 +000097 template <class RelTy>
98 bool variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA,
99 const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB);
Rui Ueyama0b289522016-02-25 18:43:51 +0000100
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000101 bool equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B);
102 bool equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B);
103
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000104 size_t findBoundary(size_t Begin, size_t End);
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000105
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000106 void forEachColorRange(size_t Begin, size_t End,
107 std::function<void(size_t, size_t)> Fn);
108
109 void forEachColor(std::function<void(size_t, size_t)> Fn);
110
111 std::vector<InputSection<ELFT> *> Sections;
Rui Ueyamac1835312016-12-01 17:09:04 +0000112 int Cnt = 0;
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000113 std::atomic<bool> Repeat = {false};
Rui Ueyama0b289522016-02-25 18:43:51 +0000114};
115}
Rui Ueyama0b289522016-02-25 18:43:51 +0000116
Rui Ueyama0b289522016-02-25 18:43:51 +0000117// Returns a hash value for S. Note that the information about
118// relocation targets is not included in the hash value.
Rui Ueyamac1835312016-12-01 17:09:04 +0000119template <class ELFT> static uint32_t getHash(InputSection<ELFT> *S) {
Rui Ueyamaa05134e2016-11-19 20:15:55 +0000120 return hash_combine(S->Flags, S->getSize(), S->NumRelocations);
Rui Ueyama0b289522016-02-25 18:43:51 +0000121}
122
Rui Ueyamabd1f0632016-11-20 02:39:59 +0000123// Returns true if section S is subject of ICF.
124template <class ELFT> static bool isEligible(InputSection<ELFT> *S) {
Rui Ueyama0b289522016-02-25 18:43:51 +0000125 // .init and .fini contains instructions that must be executed to
126 // initialize and finalize the process. They cannot and should not
127 // be merged.
Rui Ueyamabd1f0632016-11-20 02:39:59 +0000128 return S->Live && (S->Flags & SHF_ALLOC) && !(S->Flags & SHF_WRITE) &&
129 S->Name != ".init" && S->Name != ".fini";
Rui Ueyama0b289522016-02-25 18:43:51 +0000130}
131
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000132// Split a range into smaller ranges by recoloring sections
133// in a given range.
134template <class ELFT>
135void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) {
136 // This loop rearranges sections in [Begin, End) so that all sections
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000137 // that are equal in terms of equals{Constant,Variable} are contiguous
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000138 // in [Begin, End).
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000139 //
140 // The algorithm is quadratic in the worst case, but that is not an
141 // issue in practice because the number of the distinct sections in
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000142 // each range is usually very small.
Rui Ueyamac1835312016-12-01 17:09:04 +0000143
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000144 while (Begin < End) {
145 // Divide [Begin, End) into two. Let Mid be the start index of the
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000146 // second group.
Rui Ueyama0b289522016-02-25 18:43:51 +0000147 auto Bound = std::stable_partition(
Rui Ueyamac1835312016-12-01 17:09:04 +0000148 Sections.begin() + Begin + 1, Sections.begin() + End,
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000149 [&](InputSection<ELFT> *S) {
150 if (Constant)
Rui Ueyamac1835312016-12-01 17:09:04 +0000151 return equalsConstant(Sections[Begin], S);
152 return equalsVariable(Sections[Begin], S);
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000153 });
154 size_t Mid = Bound - Sections.begin();
Rui Ueyama0b289522016-02-25 18:43:51 +0000155
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000156 // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by
157 // updating the section colors in [Begin, End). We use Mid as a
158 // color ID because every group ends with a unique index.
Rui Ueyamac1835312016-12-01 17:09:04 +0000159 //
Rui Ueyama91ae8612016-12-01 19:45:22 +0000160 // Note on Color[0] and Color[1]: we have two storages for colors.
161 // At the beginning of each iteration of the main loop, both have
162 // the same color. Color[0] contains the current color, and Color[1]
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000163 // contains the next color which will be used on the next iteration.
Rui Ueyamac1835312016-12-01 17:09:04 +0000164 //
165 // Recall that other threads may be working on other ranges. They
Rui Ueyama91ae8612016-12-01 19:45:22 +0000166 // may be reading colors that we are about to update. We cannot
167 // update colors in place because it breaks the invariance that
168 // all sections in the same group must have the same color. In
169 // other words, the following for loop is not an atomic operation,
170 // and that is observable from other threads.
Rui Ueyamac1835312016-12-01 17:09:04 +0000171 //
Rui Ueyama91ae8612016-12-01 19:45:22 +0000172 // By writing new colors to write-only places, we can keep the invariance.
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000173 for (size_t I = Begin; I < Mid; ++I)
174 Sections[I]->Color[(Cnt + 1) % 2] = Mid;
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000175
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000176 // If we created a group, we need to iterate the main loop again.
177 if (Mid != End)
178 Repeat = true;
179
180 Begin = Mid;
Rui Ueyama0b289522016-02-25 18:43:51 +0000181 }
182}
183
184// Compare two lists of relocations.
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000185template <class ELFT>
186template <class RelTy>
187bool ICF<ELFT>::constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) {
Rui Ueyamaa05134e2016-11-19 20:15:55 +0000188 auto Eq = [](const RelTy &A, const RelTy &B) {
189 return A.r_offset == B.r_offset &&
190 A.getType(Config->Mips64EL) == B.getType(Config->Mips64EL) &&
191 getAddend<ELFT>(A) == getAddend<ELFT>(B);
192 };
193
194 return RelsA.size() == RelsB.size() &&
195 std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq);
Rui Ueyama0b289522016-02-25 18:43:51 +0000196}
197
198// Compare "non-moving" part of two InputSections, namely everything
199// except relocation targets.
200template <class ELFT>
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000201bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A,
202 const InputSection<ELFT> *B) {
Rui Ueyamabd1f0632016-11-20 02:39:59 +0000203 if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags ||
204 A->getSize() != B->getSize() || A->Data != B->Data)
Rui Ueyama0b289522016-02-25 18:43:51 +0000205 return false;
206
Rui Ueyamabd1f0632016-11-20 02:39:59 +0000207 if (A->AreRelocsRela)
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000208 return constantEq(A->relas(), B->relas());
209 return constantEq(A->rels(), B->rels());
Rui Ueyama0b289522016-02-25 18:43:51 +0000210}
211
Rui Ueyama7bed9ee2016-11-20 23:15:54 +0000212// Compare two lists of relocations. Returns true if all pairs of
213// relocations point to the same section in terms of ICF.
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000214template <class ELFT>
215template <class RelTy>
216bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA,
217 const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB) {
Rui Ueyamaa05134e2016-11-19 20:15:55 +0000218 auto Eq = [&](const RelTy &RA, const RelTy &RB) {
Rui Ueyama91ae8612016-12-01 19:45:22 +0000219 // The two sections must be identical.
Rui Ueyamabd1f0632016-11-20 02:39:59 +0000220 SymbolBody &SA = A->getFile()->getRelocTargetSym(RA);
221 SymbolBody &SB = B->getFile()->getRelocTargetSym(RB);
Rafael Espindola67d72c02016-03-11 12:06:30 +0000222 if (&SA == &SB)
Rui Ueyamaa05134e2016-11-19 20:15:55 +0000223 return true;
Rui Ueyama0b289522016-02-25 18:43:51 +0000224
Rui Ueyama91ae8612016-12-01 19:45:22 +0000225 // Or, the two sections must have the same color.
Rafael Espindola67d72c02016-03-11 12:06:30 +0000226 auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA);
227 auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB);
Rui Ueyama0b289522016-02-25 18:43:51 +0000228 if (!DA || !DB)
229 return false;
Rafael Espindolaccfe3cb2016-04-04 14:04:16 +0000230 if (DA->Value != DB->Value)
Rui Ueyama0b289522016-02-25 18:43:51 +0000231 return false;
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000232
Rui Ueyama9f8cb732016-11-20 02:43:44 +0000233 auto *X = dyn_cast<InputSection<ELFT>>(DA->Section);
234 auto *Y = dyn_cast<InputSection<ELFT>>(DB->Section);
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000235 if (!X || !Y)
236 return false;
Rui Ueyamac1835312016-12-01 17:09:04 +0000237
238 // Performance hack for single-thread. If no other threads are
Rui Ueyama91ae8612016-12-01 19:45:22 +0000239 // running, we can safely read next colors as there is no race
Rui Ueyamac1835312016-12-01 17:09:04 +0000240 // condition. This optimization may reduce the number of
241 // iterations of the main loop because we can see results of the
242 // same iteration.
243 size_t Idx = (Config->Threads ? Cnt : Cnt + 1) % 2;
Rui Ueyamaa6cd5fe2016-12-01 21:41:06 +0000244
Rui Ueyama83ec6812016-12-02 17:23:58 +0000245 // Ineligible sections have the special color 0.
246 // They can never be the same in terms of section colors.
247 if (X->Color[Idx] == 0)
248 return false;
Rui Ueyamaa6cd5fe2016-12-01 21:41:06 +0000249
Rui Ueyama91ae8612016-12-01 19:45:22 +0000250 return X->Color[Idx] == Y->Color[Idx];
Rui Ueyamaa05134e2016-11-19 20:15:55 +0000251 };
252
253 return std::equal(RelsA.begin(), RelsA.end(), RelsB.begin(), Eq);
Rui Ueyama0b289522016-02-25 18:43:51 +0000254}
255
256// Compare "moving" part of two InputSections, namely relocation targets.
257template <class ELFT>
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000258bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A,
259 const InputSection<ELFT> *B) {
Rafael Espindola9f0c4bb2016-11-10 14:53:24 +0000260 if (A->AreRelocsRela)
Rui Ueyamaa05134e2016-11-19 20:15:55 +0000261 return variableEq(A, A->relas(), B, B->relas());
262 return variableEq(A, A->rels(), B, B->rels());
Rui Ueyama0b289522016-02-25 18:43:51 +0000263}
264
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000265template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) {
266 for (size_t I = Begin + 1; I < End; ++I)
267 if (Sections[Begin]->Color[Cnt % 2] != Sections[I]->Color[Cnt % 2])
268 return I;
269 return End;
270}
271
272// Sections in the same color are contiguous in Sections vector.
273// Therefore, Sections vector can be considered as contiguous groups
274// of sections, grouped by colors.
275//
276// This function calls Fn on every group that starts within [Begin, End).
277// Note that a group must starts in that range but doesn't necessarily
278// have to end before End.
279template <class ELFT>
280void ICF<ELFT>::forEachColorRange(size_t Begin, size_t End,
281 std::function<void(size_t, size_t)> Fn) {
282 if (Begin > 0)
283 Begin = findBoundary(Begin - 1, End);
284
285 while (Begin < End) {
286 size_t Mid = findBoundary(Begin, Sections.size());
287 Fn(Begin, Mid);
288 Begin = Mid;
289 }
290}
291
292// Call Fn on each color group.
293template <class ELFT>
294void ICF<ELFT>::forEachColor(std::function<void(size_t, size_t)> Fn) {
295 // If threading is disabled or the number of sections are
296 // too small to use threading, call Fn sequentially.
297 if (!Config->Threads || Sections.size() < 1024) {
298 forEachColorRange(0, Sections.size(), Fn);
299 return;
300 }
301
302 // Split sections into 256 shards and call Fn in parallel.
303 size_t NumShards = 256;
304 size_t Step = Sections.size() / NumShards;
305 parallel_for(size_t(0), NumShards, [&](size_t I) {
306 forEachColorRange(I * Step, (I + 1) * Step, Fn);
307 });
308 forEachColorRange(Step * NumShards, Sections.size(), Fn);
Rui Ueyamac1835312016-12-01 17:09:04 +0000309}
310
Rui Ueyama0b289522016-02-25 18:43:51 +0000311// The main function of ICF.
Rui Ueyama4f8d21f2016-05-02 19:30:42 +0000312template <class ELFT> void ICF<ELFT>::run() {
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000313 // Collect sections to merge.
314 for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections)
315 if (auto *S = dyn_cast<InputSection<ELFT>>(Sec))
316 if (isEligible(S))
317 Sections.push_back(S);
318
Rui Ueyama91ae8612016-12-01 19:45:22 +0000319 // Initially, we use hash values to color sections. Therefore, if
320 // two sections have the same color, they are likely (but not
Rui Ueyama0b289522016-02-25 18:43:51 +0000321 // guaranteed) to have the same static contents in terms of ICF.
Rui Ueyamae2dfbc12016-11-19 23:14:23 +0000322 for (InputSection<ELFT> *S : Sections)
Rui Ueyama91ae8612016-12-01 19:45:22 +0000323 // Set MSB to 1 to avoid collisions with non-hash colors.
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000324 S->Color[0] = getHash(S) | (1 << 31);
Rui Ueyama0b289522016-02-25 18:43:51 +0000325
Rui Ueyama7bed9ee2016-11-20 23:15:54 +0000326 // From now on, sections in Sections are ordered so that sections in
Rui Ueyama91ae8612016-12-01 19:45:22 +0000327 // the same color are consecutive in the vector.
Rui Ueyamae2dfbc12016-11-19 23:14:23 +0000328 std::stable_sort(Sections.begin(), Sections.end(),
Rui Ueyama0b289522016-02-25 18:43:51 +0000329 [](InputSection<ELFT> *A, InputSection<ELFT> *B) {
Rui Ueyama91ae8612016-12-01 19:45:22 +0000330 if (A->Color[0] != B->Color[0])
331 return A->Color[0] < B->Color[0];
Petr Hosek901948a2016-08-22 18:53:09 +0000332 // Within a group, put the highest alignment
333 // requirement first, so that's the one we'll keep.
334 return B->Alignment < A->Alignment;
Rui Ueyama0b289522016-02-25 18:43:51 +0000335 });
336
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000337 // Compare static contents and assign unique IDs for each static content.
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000338 forEachColor([&](size_t Begin, size_t End) { segregate(Begin, End, true); });
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000339 ++Cnt;
340
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000341 // Split groups by comparing relocations until convergence is obtained.
342 do {
343 Repeat = false;
344 forEachColor(
345 [&](size_t Begin, size_t End) { segregate(Begin, End, false); });
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000346 ++Cnt;
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000347 } while (Repeat);
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000348
349 log("ICF needed " + Twine(Cnt) + " iterations");
Rui Ueyama0b289522016-02-25 18:43:51 +0000350
Rui Ueyama91ae8612016-12-01 19:45:22 +0000351 // Merge sections in the same colors.
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000352 forEachColor([&](size_t Begin, size_t End) {
353 if (End - Begin == 1)
354 return;
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000355
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000356 log("selected " + Sections[Begin]->Name);
357 for (size_t I = Begin + 1; I < End; ++I) {
Rui Ueyama9dedfb12016-11-30 01:50:03 +0000358 log(" removed " + Sections[I]->Name);
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000359 Sections[Begin]->replace(Sections[I]);
Rui Ueyama0b289522016-02-25 18:43:51 +0000360 }
Rui Ueyama1b6bab02016-12-02 05:35:46 +0000361 });
Rui Ueyama0b289522016-02-25 18:43:51 +0000362}
363
364// ICF entry point function.
Rui Ueyama4f8d21f2016-05-02 19:30:42 +0000365template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); }
Rui Ueyama0b289522016-02-25 18:43:51 +0000366
Rui Ueyama4f8d21f2016-05-02 19:30:42 +0000367template void elf::doIcf<ELF32LE>();
368template void elf::doIcf<ELF32BE>();
369template void elf::doIcf<ELF64LE>();
370template void elf::doIcf<ELF64BE>();