blob: 69725dad78a0f8c87ac19ef545e18d7ce7a1f197 [file] [log] [blame]
Rui Ueyama49560c72015-06-24 20:40:03 +00001//===- ICF.cpp ------------------------------------------------------------===//
2//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Rui Ueyama5b93aa52015-09-11 04:29:03 +000010// Identical COMDAT Folding is a feature to merge COMDAT sections not by
11// name (which is regular COMDAT handling) but by contents. If two COMDAT
12// sections have the same data, relocations, attributes, etc., then the two
13// are considered identical and merged by the linker. This optimization
14// makes outputs smaller.
15//
16// ICF is theoretically a problem of reducing graphs by merging as many
Rui Ueyama9cb28702015-09-16 03:26:31 +000017// identical subgraphs as possible, if we consider sections as vertices and
Rui Ueyama5b93aa52015-09-11 04:29:03 +000018// relocations as edges. This may be a bit more complicated problem than you
19// might think. The order of processing sections matters since merging two
Rui Ueyamac48f78c2015-09-15 00:35:41 +000020// sections can make other sections, whose relocations now point to the same
Rui Ueyama5b93aa52015-09-11 04:29:03 +000021// section, mergeable. Graphs may contain cycles, which is common in COFF.
22// We need a sophisticated algorithm to do this properly and efficiently.
23//
Rui Ueyama9cb28702015-09-16 03:26:31 +000024// What we do in this file is this. We split sections into groups. Sections
25// in the same group are considered identical.
Rui Ueyama5b93aa52015-09-11 04:29:03 +000026//
Rui Ueyama9cb28702015-09-16 03:26:31 +000027// First, all sections are grouped by their "constant" values. Constant
Rui Ueyama92298d52015-09-16 14:19:10 +000028// values are values that are never changed by ICF, such as section contents,
Rui Ueyama9cb28702015-09-16 03:26:31 +000029// section name, number of relocations, type and offset of each relocation,
Rui Ueyama92298d52015-09-16 14:19:10 +000030// etc. Because we do not care about some relocation targets in this step,
31// two sections in the same group may not be identical, but at least two
32// sections in different groups can never be identical.
Rui Ueyama9cb28702015-09-16 03:26:31 +000033//
34// Then, we try to split each group by relocation targets. Relocations are
35// considered identical if and only if the relocation targets are in the
36// same group. Splitting a group may make more groups to be splittable,
37// because two relocations that were previously considered identical might
38// now point to different groups. We repeat this step until the convergence
39// is obtained.
40//
41// This algorithm is so-called "optimistic" algorithm described in
42// http://research.google.com/pubs/pub36912.html.
Rui Ueyama49560c72015-06-24 20:40:03 +000043//
44//===----------------------------------------------------------------------===//
45
46#include "Chunks.h"
Rui Ueyama7b276e22015-07-30 22:57:21 +000047#include "Symbols.h"
48#include "llvm/ADT/Hashing.h"
Rui Ueyama5b93aa52015-09-11 04:29:03 +000049#include "llvm/Support/Debug.h"
50#include "llvm/Support/raw_ostream.h"
51#include <algorithm>
Rui Ueyama49560c72015-06-24 20:40:03 +000052#include <vector>
53
Rui Ueyama7b276e22015-07-30 22:57:21 +000054using namespace llvm;
55
Rui Ueyama49560c72015-06-24 20:40:03 +000056namespace lld {
57namespace coff {
Rui Ueyama5b93aa52015-09-11 04:29:03 +000058
Rui Ueyama92298d52015-09-16 14:19:10 +000059typedef std::vector<SectionChunk *>::iterator ChunkIterator;
60typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *);
61
62class ICF {
63public:
64 ICF(const std::vector<Chunk *> &V) : Chunks(V) {}
65 void run();
66
67private:
68 static uint64_t getHash(SectionChunk *C);
69 static bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
70 static bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
71 bool forEachGroup(std::vector<SectionChunk *> &SChunks, Comparator Eq);
72 bool partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
73
74 const std::vector<Chunk *> &Chunks;
75 uint64_t NextID = 0;
76};
77
78// Entry point to ICF.
79void doICF(const std::vector<Chunk *> &Chunks) {
Rui Ueyama92298d52015-09-16 14:19:10 +000080 ICF(Chunks).run();
Rui Ueyama5b93aa52015-09-11 04:29:03 +000081}
82
Rui Ueyama92298d52015-09-16 14:19:10 +000083uint64_t ICF::getHash(SectionChunk *C) {
84 return hash_combine(C->getPermissions(),
85 hash_value(C->SectionName),
86 C->NumRelocs,
87 uint32_t(C->Header->SizeOfRawData),
88 std::distance(C->Relocs.end(), C->Relocs.begin()),
89 C->Checksum);
Rui Ueyama9cb28702015-09-16 03:26:31 +000090}
Rui Ueyama5b93aa52015-09-11 04:29:03 +000091
Rui Ueyama92298d52015-09-16 14:19:10 +000092bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
Rui Ueyama9cb28702015-09-16 03:26:31 +000093 if (A->getPermissions() != B->getPermissions() ||
94 A->SectionName != B->SectionName ||
95 A->Header->SizeOfRawData != B->Header->SizeOfRawData ||
96 A->NumRelocs != B->NumRelocs ||
97 A->Checksum != B->Checksum ||
Rui Ueyama7d8263b2015-09-18 01:30:56 +000098 A->AssocChildren.size() != B->AssocChildren.size()) {
Rui Ueyama7b276e22015-07-30 22:57:21 +000099 return false;
Rui Ueyama9cb28702015-09-16 03:26:31 +0000100 }
Rui Ueyama7b276e22015-07-30 22:57:21 +0000101
Rui Ueyama7d8263b2015-09-18 01:30:56 +0000102 // Compare associative sections.
Rui Ueyama9cb28702015-09-16 03:26:31 +0000103 for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
104 if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
Rui Ueyama7b276e22015-07-30 22:57:21 +0000105 return false;
106
Rui Ueyama7d8263b2015-09-18 01:30:56 +0000107 // Compare relocations.
Rui Ueyama7b276e22015-07-30 22:57:21 +0000108 auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
Rui Ueyama9cb28702015-09-16 03:26:31 +0000109 if (R1.Type != R2.Type ||
110 R1.VirtualAddress != R2.VirtualAddress) {
Rui Ueyama7b276e22015-07-30 22:57:21 +0000111 return false;
Rui Ueyama9cb28702015-09-16 03:26:31 +0000112 }
113 SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
114 SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
Rui Ueyama7b276e22015-07-30 22:57:21 +0000115 if (B1 == B2)
116 return true;
117 auto *D1 = dyn_cast<DefinedRegular>(B1);
118 auto *D2 = dyn_cast<DefinedRegular>(B2);
Rui Ueyama9cb28702015-09-16 03:26:31 +0000119 return D1 && D2 &&
120 D1->getValue() == D2->getValue() &&
121 D1->getChunk()->GroupID == D2->getChunk()->GroupID;
Rui Ueyama7b276e22015-07-30 22:57:21 +0000122 };
Rui Ueyama7d8263b2015-09-18 01:30:56 +0000123 if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
124 return false;
125
126 // Compare section contents.
127 return A->getContents() == B->getContents();
Rui Ueyama9cb28702015-09-16 03:26:31 +0000128}
129
Rui Ueyama92298d52015-09-16 14:19:10 +0000130bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
Rui Ueyamac9a6e822015-09-18 01:51:37 +0000131 // Compare associative sections.
132 for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I)
133 if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID)
134 return false;
135
136 // Compare relocations.
137 auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
138 SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl();
139 SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl();
140 if (B1 == B2)
141 return true;
142 auto *D1 = dyn_cast<DefinedRegular>(B1);
143 auto *D2 = dyn_cast<DefinedRegular>(B2);
144 return D1 && D2 && D1->getChunk()->GroupID == D2->getChunk()->GroupID;
145 };
146 return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
Rui Ueyama9cb28702015-09-16 03:26:31 +0000147}
148
Rui Ueyama92298d52015-09-16 14:19:10 +0000149bool ICF::partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq) {
Rui Ueyama9cb28702015-09-16 03:26:31 +0000150 bool R = false;
151 for (auto It = Begin;;) {
152 SectionChunk *Head = *It;
153 auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) {
154 return Eq(Head, SC);
155 });
156 if (Bound == End)
157 return R;
158 size_t ID = NextID++;
159 std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; });
160 It = Bound;
161 R = true;
162 }
163}
164
Rui Ueyama92298d52015-09-16 14:19:10 +0000165bool ICF::forEachGroup(std::vector<SectionChunk *> &SChunks, Comparator Eq) {
Rui Ueyama9cb28702015-09-16 03:26:31 +0000166 bool R = false;
167 for (auto It = SChunks.begin(), End = SChunks.end(); It != End;) {
168 SectionChunk *Head = *It;
169 auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
170 return SC->GroupID != Head->GroupID;
171 });
172 if (partition(It, Bound, Eq))
173 R = true;
174 It = Bound;
175 }
176 return R;
Rui Ueyama7b276e22015-07-30 22:57:21 +0000177}
178
Rui Ueyama49560c72015-06-24 20:40:03 +0000179// Merge identical COMDAT sections.
Rui Ueyamaef907ec2015-09-04 21:35:54 +0000180// Two sections are considered the same if their section headers,
Rui Ueyama49560c72015-06-24 20:40:03 +0000181// contents and relocations are all the same.
Rui Ueyama92298d52015-09-16 14:19:10 +0000182void ICF::run() {
183 // Collect only mergeable sections and group by hash value.
Rui Ueyamaef907ec2015-09-04 21:35:54 +0000184 std::vector<SectionChunk *> SChunks;
Rui Ueyama9cb28702015-09-16 03:26:31 +0000185 for (Chunk *C : Chunks) {
186 if (auto *SC = dyn_cast<SectionChunk>(C)) {
187 bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
188 if (SC->isCOMDAT() && SC->isLive() && !Writable) {
Rui Ueyamaef907ec2015-09-04 21:35:54 +0000189 SChunks.push_back(SC);
Rui Ueyama92298d52015-09-16 14:19:10 +0000190 SC->GroupID = getHash(SC) | (uint64_t(1) << 63);
Rui Ueyama9cb28702015-09-16 03:26:31 +0000191 } else {
192 SC->GroupID = NextID++;
193 }
Rui Ueyama5b93aa52015-09-11 04:29:03 +0000194 }
Rui Ueyama9cb28702015-09-16 03:26:31 +0000195 }
196
Rui Ueyama92298d52015-09-16 14:19:10 +0000197 // From now on, sections in SChunks are ordered so that sections in
198 // the same group are consecutive in the vector.
Rui Ueyama9cb28702015-09-16 03:26:31 +0000199 std::sort(SChunks.begin(), SChunks.end(),
200 [](SectionChunk *A, SectionChunk *B) {
201 return A->GroupID < B->GroupID;
202 });
203
204 // Split groups until we get a convergence.
Rui Ueyama66c06ce2015-09-16 23:55:39 +0000205 int Cnt = 1;
Rui Ueyama92298d52015-09-16 14:19:10 +0000206 forEachGroup(SChunks, equalsConstant);
Rui Ueyama66c06ce2015-09-16 23:55:39 +0000207 while (forEachGroup(SChunks, equalsVariable))
208 ++Cnt;
209 if (Config->Verbose)
210 llvm::outs() << "\nICF needed " << Cnt << " iterations.\n";
Rui Ueyama9cb28702015-09-16 03:26:31 +0000211
212 // Merge sections in the same group.
213 for (auto It = SChunks.begin(), End = SChunks.end(); It != End;) {
214 SectionChunk *Head = *It;
215 auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
216 return Head->GroupID != SC->GroupID;
217 });
218 if (std::distance(It, Bound) == 1) {
219 It = Bound;
220 continue;
221 }
222 if (Config->Verbose)
223 llvm::outs() << "Selected " << Head->getDebugName() << "\n";
224 for (++It; It != Bound; ++It) {
225 SectionChunk *SC = *It;
226 if (Config->Verbose)
227 llvm::outs() << " Removed " << SC->getDebugName() << "\n";
228 SC->replaceWith(Head);
229 }
Rui Ueyamaef907ec2015-09-04 21:35:54 +0000230 }
Rui Ueyama49560c72015-06-24 20:40:03 +0000231}
232
233} // namespace coff
234} // namespace lld