blob: 5ba0fc11b90f3cfa27589b774fb71224ebe2cdad [file] [log] [blame]
Pete Choub3323082014-08-06 21:29:36 +08001//===- IndenticalCodeFolding.cpp ------------------------------------------===//
2//
3// The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Stephen Hines37b74a32014-11-26 18:48:20 -08009#include "mcld/LD/IdenticalCodeFolding.h"
Pete Choub3323082014-08-06 21:29:36 +080010
Stephen Hines37b74a32014-11-26 18:48:20 -080011#include "mcld/GeneralOptions.h"
12#include "mcld/Module.h"
13#include "mcld/Fragment/RegionFragment.h"
14#include "mcld/LD/LDContext.h"
15#include "mcld/LD/LDSection.h"
16#include "mcld/LD/RelocData.h"
17#include "mcld/LD/Relocator.h"
18#include "mcld/LD/ResolveInfo.h"
19#include "mcld/LD/SectionData.h"
20#include "mcld/LinkerConfig.h"
21#include "mcld/MC/Input.h"
22#include "mcld/Support/Demangle.h"
23#include "mcld/Support/MsgHandling.h"
24#include "mcld/Target/GNULDBackend.h"
Pete Choub3323082014-08-06 21:29:36 +080025
26#include <llvm/ADT/StringRef.h>
27#include <llvm/Support/Casting.h>
28#include <llvm/Support/Format.h>
29
30#include <cassert>
31#include <map>
32#include <set>
33
34#include <zlib.h>
Pete Choub3323082014-08-06 21:29:36 +080035
Stephen Hines37b74a32014-11-26 18:48:20 -080036namespace mcld {
37
38static bool isSymCtorOrDtor(const ResolveInfo& pSym) {
Pete Choub3323082014-08-06 21:29:36 +080039 // We can always fold ctors and dtors since accessing function pointer in C++
40 // is forbidden.
41 llvm::StringRef name(pSym.name(), pSym.nameSize());
42 if (!name.startswith("_ZZ") && !name.startswith("_ZN")) {
43 return false;
44 }
45 return isCtorOrDtor(pSym.name(), pSym.nameSize());
46}
47
48IdenticalCodeFolding::IdenticalCodeFolding(const LinkerConfig& pConfig,
49 const TargetLDBackend& pBackend,
50 Module& pModule)
Stephen Hines37b74a32014-11-26 18:48:20 -080051 : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule) {
Pete Choub3323082014-08-06 21:29:36 +080052}
53
Stephen Hines37b74a32014-11-26 18:48:20 -080054void IdenticalCodeFolding::foldIdenticalCode() {
Pete Choub3323082014-08-06 21:29:36 +080055 // 1. Find folding candidates.
56 FoldingCandidates candidate_list;
57 findCandidates(candidate_list);
58
59 // 2. Initialize constant section content
60 for (size_t i = 0; i < candidate_list.size(); ++i) {
61 candidate_list[i].initConstantContent(m_Backend, m_KeptSections);
62 }
63
64 // 3. Find identical code until convergence
65 bool converged = false;
66 size_t iterations = 0;
67 while (!converged && (iterations < m_Config.options().getICFIterations())) {
68 converged = matchCandidates(candidate_list);
69 ++iterations;
70 }
71 if (m_Config.options().printICFSections()) {
72 debug(diag::debug_icf_iterations) << iterations;
73 }
74
75 // 4. Fold the identical code
76 typedef std::set<Input*> FoldedObjects;
77 FoldedObjects folded_objs;
78 KeptSections::iterator kept, keptEnd = m_KeptSections.end();
79 size_t index = 0;
80 for (kept = m_KeptSections.begin(); kept != keptEnd; ++kept, ++index) {
81 LDSection* sect = (*kept).first;
82 Input* obj = (*kept).second.first;
83 size_t kept_index = (*kept).second.second;
84 if (index != kept_index) {
85 sect->setKind(LDFileFormat::Folded);
86 folded_objs.insert(obj);
87
88 if (m_Config.options().printICFSections()) {
89 KeptSections::iterator it = m_KeptSections.begin() + kept_index;
90 LDSection* kept_sect = (*it).first;
91 Input* kept_obj = (*it).second.first;
Stephen Hines37b74a32014-11-26 18:48:20 -080092 debug(diag::debug_icf_folded_section) << sect->name() << obj->name()
Pete Choub3323082014-08-06 21:29:36 +080093 << kept_sect->name()
94 << kept_obj->name();
95 }
96 }
97 }
98
99 // Adjust the fragment reference of the folded symbols.
100 FoldedObjects::iterator fobj, fobjEnd = folded_objs.end();
101 for (fobj = folded_objs.begin(); fobj != fobjEnd; ++fobj) {
102 LDContext::sym_iterator sym, symEnd = (*fobj)->context()->symTabEnd();
103 for (sym = (*fobj)->context()->symTabBegin(); sym != symEnd; ++sym) {
104 if ((*sym)->hasFragRef() && ((*sym)->type() == ResolveInfo::Function)) {
105 LDSymbol* out_sym = (*sym)->resolveInfo()->outSymbol();
106 FragmentRef* frag_ref = out_sym->fragRef();
107 LDSection* sect = &(frag_ref->frag()->getParent()->getSection());
108 if (sect->kind() == LDFileFormat::Folded) {
109 size_t kept_index = m_KeptSections[sect].second;
Stephen Hines37b74a32014-11-26 18:48:20 -0800110 LDSection* kept_sect = (*(m_KeptSections.begin() + kept_index)).first;
Pete Choub3323082014-08-06 21:29:36 +0800111 frag_ref->assign(kept_sect->getSectionData()->front(),
112 frag_ref->offset());
113 }
114 }
Stephen Hines37b74a32014-11-26 18:48:20 -0800115 } // for each symbol
116 } // for each folded object
Pete Choub3323082014-08-06 21:29:36 +0800117}
118
Stephen Hines37b74a32014-11-26 18:48:20 -0800119void IdenticalCodeFolding::findCandidates(FoldingCandidates& pCandidateList) {
Pete Choub3323082014-08-06 21:29:36 +0800120 Module::obj_iterator obj, objEnd = m_Module.obj_end();
121 for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
122 std::set<const LDSection*> funcptr_access_set;
123 typedef std::map<LDSection*, LDSection*> CandidateMap;
124 CandidateMap candidate_map;
125 LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
126 for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
127 switch ((*sect)->kind()) {
128 case LDFileFormat::TEXT: {
Stephen Hines37b74a32014-11-26 18:48:20 -0800129 candidate_map.insert(
130 std::make_pair(*sect, reinterpret_cast<LDSection*>(NULL)));
Pete Choub3323082014-08-06 21:29:36 +0800131 break;
132 }
133 case LDFileFormat::Relocation: {
134 LDSection* target = (*sect)->getLink();
135 if (target->kind() == LDFileFormat::TEXT) {
136 candidate_map[target] = *sect;
137 }
138
139 // Safe icf
Pirama Arumuga Nainar2bf3f882015-04-21 10:33:13 -0700140 if (m_Config.options().getICFMode() == GeneralOptions::ICF::Safe) {
Pete Choub3323082014-08-06 21:29:36 +0800141 RelocData::iterator rel, relEnd = (*sect)->getRelocData()->end();
142 for (rel = (*sect)->getRelocData()->begin(); rel != relEnd; ++rel) {
143 LDSymbol* sym = rel->symInfo()->outSymbol();
144 if (sym->hasFragRef() && (sym->type() == ResolveInfo::Function)) {
145 const LDSection* def =
146 &sym->fragRef()->frag()->getParent()->getSection();
147 if (!isSymCtorOrDtor(*rel->symInfo()) &&
148 m_Backend.mayHaveUnsafeFunctionPointerAccess(*target) &&
Stephen Hines37b74a32014-11-26 18:48:20 -0800149 m_Backend.getRelocator()
150 ->mayHaveFunctionPointerAccess(*rel)) {
Pete Choub3323082014-08-06 21:29:36 +0800151 funcptr_access_set.insert(def);
152 }
153 }
Stephen Hines37b74a32014-11-26 18:48:20 -0800154 } // for each reloc
Pete Choub3323082014-08-06 21:29:36 +0800155 }
156
157 break;
158 }
159 default: {
160 // skip
Stephen Hines37b74a32014-11-26 18:48:20 -0800161 break;
Pete Choub3323082014-08-06 21:29:36 +0800162 }
Stephen Hines37b74a32014-11-26 18:48:20 -0800163 } // end of switch
164 } // for each section
Pete Choub3323082014-08-06 21:29:36 +0800165
166 CandidateMap::iterator candidate, candidateEnd = candidate_map.end();
167 for (candidate = candidate_map.begin(); candidate != candidateEnd;
168 ++candidate) {
Pirama Arumuga Nainar2bf3f882015-04-21 10:33:13 -0700169 if ((m_Config.options().getICFMode() == GeneralOptions::ICF::All) ||
Pete Choub3323082014-08-06 21:29:36 +0800170 (funcptr_access_set.count(candidate->first) == 0)) {
171 size_t index = m_KeptSections.size();
172 m_KeptSections[candidate->first] = ObjectAndId(*obj, index);
Stephen Hines37b74a32014-11-26 18:48:20 -0800173 pCandidateList.push_back(
174 FoldingCandidate(candidate->first, candidate->second, *obj));
Pete Choub3323082014-08-06 21:29:36 +0800175 }
Stephen Hines37b74a32014-11-26 18:48:20 -0800176 } // for each possible candidate
177 } // for each obj
Pete Choub3323082014-08-06 21:29:36 +0800178}
179
Stephen Hines37b74a32014-11-26 18:48:20 -0800180bool IdenticalCodeFolding::matchCandidates(FoldingCandidates& pCandidateList) {
Pete Choub3323082014-08-06 21:29:36 +0800181 typedef std::multimap<uint32_t, size_t> ChecksumMap;
182 ChecksumMap checksum_map;
183 std::vector<std::string> contents(pCandidateList.size());
184 bool converged = true;
185
186 for (size_t index = 0; index < pCandidateList.size(); ++index) {
Stephen Hines37b74a32014-11-26 18:48:20 -0800187 contents[index] = pCandidateList[index].getContentWithVariables(
188 m_Backend, m_KeptSections);
Pete Choub3323082014-08-06 21:29:36 +0800189 uint32_t checksum = ::crc32(0xFFFFFFFF,
190 (const uint8_t*)contents[index].c_str(),
191 contents[index].length());
192
193 size_t count = checksum_map.count(checksum);
194 if (count == 0) {
195 checksum_map.insert(std::make_pair(checksum, index));
196 } else {
197 std::pair<ChecksumMap::iterator, ChecksumMap::iterator> ret =
198 checksum_map.equal_range(checksum);
199 for (ChecksumMap::iterator it = ret.first; it != ret.second; ++it) {
200 size_t kept_index = (*it).second;
201 if (contents[index].compare(contents[kept_index]) == 0) {
202 m_KeptSections[pCandidateList[index].sect].second = kept_index;
203 converged = false;
204 break;
205 }
206 }
207 }
208 }
209
210 return converged;
211}
212
213void IdenticalCodeFolding::FoldingCandidate::initConstantContent(
214 const TargetLDBackend& pBackend,
Stephen Hines37b74a32014-11-26 18:48:20 -0800215 const IdenticalCodeFolding::KeptSections& pKeptSections) {
Pete Choub3323082014-08-06 21:29:36 +0800216 // Get the static content from text.
217 assert(sect != NULL && sect->hasSectionData());
218 SectionData::const_iterator frag, fragEnd = sect->getSectionData()->end();
219 for (frag = sect->getSectionData()->begin(); frag != fragEnd; ++frag) {
220 switch (frag->getKind()) {
221 case Fragment::Region: {
222 const RegionFragment& region = llvm::cast<RegionFragment>(*frag);
223 content.append(region.getRegion().begin(), region.size());
224 break;
225 }
226 default: {
227 // FIXME: Currently we only take care of RegionFragment.
228 break;
229 }
230 }
231 }
232
233 // Get the static content from relocs.
234 if (reloc_sect != NULL && reloc_sect->hasRelocData()) {
Stephen Hinesb0d0eb22016-03-08 00:18:09 -0800235 for (Relocation& rel : *reloc_sect->getRelocData()) {
Stephen Hinesa6c24df2015-03-18 14:53:18 -0700236 llvm::format_object<Relocation::Type,
237 Relocation::Address,
238 Relocation::Address,
239 Relocation::Address> rel_info("%x%llx%llx%llx",
Stephen Hinesb0d0eb22016-03-08 00:18:09 -0800240 rel.type(),
241 rel.symValue(),
242 rel.addend(),
243 rel.place());
Pete Choub3323082014-08-06 21:29:36 +0800244 char rel_str[48];
245 rel_info.print(rel_str, sizeof(rel_str));
246 content.append(rel_str);
247
248 // Handle the recursive call.
Stephen Hinesb0d0eb22016-03-08 00:18:09 -0800249 LDSymbol* sym = rel.symInfo()->outSymbol();
Pete Choub3323082014-08-06 21:29:36 +0800250 if ((sym->type() == ResolveInfo::Function) && sym->hasFragRef()) {
251 LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
252 if (def == sect) {
253 continue;
254 }
255 }
256
Stephen Hinesb0d0eb22016-03-08 00:18:09 -0800257 if (!pBackend.isSymbolPreemptible(*rel.symInfo()) && sym->hasFragRef() &&
Pete Choub3323082014-08-06 21:29:36 +0800258 (pKeptSections.find(
Stephen Hines37b74a32014-11-26 18:48:20 -0800259 &sym->fragRef()->frag()->getParent()->getSection()) !=
260 pKeptSections.end())) {
Pete Choub3323082014-08-06 21:29:36 +0800261 // Mark this reloc as a variable.
Stephen Hinesb0d0eb22016-03-08 00:18:09 -0800262 variable_relocs.push_back(&rel);
Pete Choub3323082014-08-06 21:29:36 +0800263 } else {
264 // TODO: Support inlining merge sections if possible (target-dependent).
265 if ((sym->binding() == ResolveInfo::Local) ||
266 (sym->binding() == ResolveInfo::Absolute)) {
267 // ABS or Local symbols.
Stephen Hines37b74a32014-11-26 18:48:20 -0800268 content.append(sym->name()).append(obj->name()).append(
269 obj->path().native());
Pete Choub3323082014-08-06 21:29:36 +0800270 } else {
271 content.append(sym->name());
272 }
273 }
274 }
275 }
276}
277
278std::string IdenticalCodeFolding::FoldingCandidate::getContentWithVariables(
279 const TargetLDBackend& pBackend,
Stephen Hines37b74a32014-11-26 18:48:20 -0800280 const IdenticalCodeFolding::KeptSections& pKeptSections) {
Pete Choub3323082014-08-06 21:29:36 +0800281 std::string result(content);
282 // Compute the variable content from relocs.
283 std::vector<Relocation*>::const_iterator rel, relEnd = variable_relocs.end();
284 for (rel = variable_relocs.begin(); rel != relEnd; ++rel) {
285 LDSymbol* sym = (*rel)->symInfo()->outSymbol();
286 LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
287 // Use the kept section index.
288 KeptSections::const_iterator it = pKeptSections.find(def);
Stephen Hinesa6c24df2015-03-18 14:53:18 -0700289 llvm::format_object<size_t> kept_info("%x", (*it).second.second);
Pete Choub3323082014-08-06 21:29:36 +0800290 char kept_str[8];
291 kept_info.print(kept_str, sizeof(kept_str));
292 result.append(kept_str);
293 }
294
295 return result;
296}
Stephen Hines37b74a32014-11-26 18:48:20 -0800297
298} // namespace mcld