blob: 256c830a44a46c1e05d0289365c6f5a48b5a8db5 [file] [log] [blame]
Seiya Nuta552bcb82019-08-19 21:05:31 +00001//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOLayoutBuilder.h"
Guillaume Chateletaf11cc72019-09-12 15:20:36 +000010#include "llvm/Support/Alignment.h"
Seiya Nuta552bcb82019-08-19 21:05:31 +000011#include "llvm/Support/Errc.h"
12#include "llvm/Support/ErrorHandling.h"
13
14namespace llvm {
15namespace objcopy {
16namespace macho {
17
18uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
19 uint32_t Size = 0;
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -080020 for (const LoadCommand &LC : O.LoadCommands) {
Seiya Nuta12bd4902019-08-19 21:12:02 +000021 const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
Seiya Nuta552bcb82019-08-19 21:05:31 +000022 auto cmd = MLC.load_command_data.cmd;
23 switch (cmd) {
24 case MachO::LC_SEGMENT:
25 Size += sizeof(MachO::segment_command) +
26 sizeof(MachO::section) * LC.Sections.size();
27 continue;
28 case MachO::LC_SEGMENT_64:
29 Size += sizeof(MachO::segment_command_64) +
30 sizeof(MachO::section_64) * LC.Sections.size();
31 continue;
32 }
33
34 switch (cmd) {
35#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
36 case MachO::LCName: \
37 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
38 break;
39#include "llvm/BinaryFormat/MachO.def"
40#undef HANDLE_LOAD_COMMAND
41 }
42 }
43
44 return Size;
45}
46
47void MachOLayoutBuilder::constructStringTable() {
48 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
49 StrTableBuilder.add(Sym->Name);
50 StrTableBuilder.finalize();
51}
52
53void MachOLayoutBuilder::updateSymbolIndexes() {
54 uint32_t Index = 0;
55 for (auto &Symbol : O.SymTable.Symbols)
56 Symbol->Index = Index++;
57}
58
59// Updates the index and the number of local/external/undefined symbols.
60void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
61 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
62 // Make sure that nlist entries in the symbol table are sorted by the those
63 // types. The order is: local < defined external < undefined external.
Georgii Rymar1647ff62020-04-13 14:46:41 +030064 assert(llvm::is_sorted(O.SymTable.Symbols,
65 [](const std::unique_ptr<SymbolEntry> &A,
66 const std::unique_ptr<SymbolEntry> &B) {
67 bool AL = A->isLocalSymbol(),
68 BL = B->isLocalSymbol();
69 if (AL != BL)
70 return AL;
71 return !AL && !A->isUndefinedSymbol() &&
72 B->isUndefinedSymbol();
73 }) &&
Seiya Nuta552bcb82019-08-19 21:05:31 +000074 "Symbols are not sorted by their types.");
75
76 uint32_t NumLocalSymbols = 0;
77 auto Iter = O.SymTable.Symbols.begin();
78 auto End = O.SymTable.Symbols.end();
79 for (; Iter != End; ++Iter) {
80 if ((*Iter)->isExternalSymbol())
81 break;
82
83 ++NumLocalSymbols;
84 }
85
86 uint32_t NumExtDefSymbols = 0;
87 for (; Iter != End; ++Iter) {
88 if ((*Iter)->isUndefinedSymbol())
89 break;
90
91 ++NumExtDefSymbols;
92 }
93
94 MLC.dysymtab_command_data.ilocalsym = 0;
95 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
96 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
97 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
98 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
99 MLC.dysymtab_command_data.nundefsym =
100 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
101}
102
103// Recomputes and updates offset and size fields in load commands and sections
104// since they could be modified.
105uint64_t MachOLayoutBuilder::layoutSegments() {
106 auto HeaderSize =
107 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
Seiya Nuta12bd4902019-08-19 21:12:02 +0000108 const bool IsObjectFile =
109 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
110 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800111 for (LoadCommand &LC : O.LoadCommands) {
Seiya Nuta552bcb82019-08-19 21:05:31 +0000112 auto &MLC = LC.MachOLoadCommand;
113 StringRef Segname;
Seiya Nuta12bd4902019-08-19 21:12:02 +0000114 uint64_t SegmentVmAddr;
115 uint64_t SegmentVmSize;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000116 switch (MLC.load_command_data.cmd) {
117 case MachO::LC_SEGMENT:
Seiya Nuta12bd4902019-08-19 21:12:02 +0000118 SegmentVmAddr = MLC.segment_command_data.vmaddr;
119 SegmentVmSize = MLC.segment_command_data.vmsize;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000120 Segname = StringRef(MLC.segment_command_data.segname,
121 strnlen(MLC.segment_command_data.segname,
122 sizeof(MLC.segment_command_data.segname)));
123 break;
124 case MachO::LC_SEGMENT_64:
Seiya Nuta12bd4902019-08-19 21:12:02 +0000125 SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
126 SegmentVmSize = MLC.segment_command_64_data.vmsize;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000127 Segname = StringRef(MLC.segment_command_64_data.segname,
128 strnlen(MLC.segment_command_64_data.segname,
129 sizeof(MLC.segment_command_64_data.segname)));
130 break;
131 default:
132 continue;
133 }
134
135 if (Segname == "__LINKEDIT") {
136 // We update the __LINKEDIT segment later (in layoutTail).
137 assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
138 LinkEditLoadCommand = &MLC;
139 continue;
140 }
141
142 // Update file offsets and sizes of sections.
Seiya Nuta12bd4902019-08-19 21:12:02 +0000143 uint64_t SegOffset = Offset;
144 uint64_t SegFileSize = 0;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000145 uint64_t VMSize = 0;
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800146 for (std::unique_ptr<Section> &Sec : LC.Sections) {
Alexander Shaposhnikov84eff8c2020-04-24 00:58:27 -0700147 assert(SegmentVmAddr <= Sec->Addr &&
148 "Section's address cannot be smaller than Segment's one");
149 uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
Seiya Nuta12bd4902019-08-19 21:12:02 +0000150 if (IsObjectFile) {
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800151 if (Sec->isVirtualSection()) {
152 Sec->Offset = 0;
Seiya Nuta12bd4902019-08-19 21:12:02 +0000153 } else {
Simon Pilgrim5a28f0a2019-08-20 10:25:57 +0000154 uint64_t PaddingSize =
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800155 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
156 Sec->Offset = SegOffset + SegFileSize + PaddingSize;
157 Sec->Size = Sec->Content.size();
158 SegFileSize += PaddingSize + Sec->Size;
Seiya Nuta12bd4902019-08-19 21:12:02 +0000159 }
Seiya Nuta12bd4902019-08-19 21:12:02 +0000160 } else {
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800161 if (Sec->isVirtualSection()) {
162 Sec->Offset = 0;
Seiya Nuta12bd4902019-08-19 21:12:02 +0000163 } else {
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800164 Sec->Offset = SegOffset + SectOffset;
165 Sec->Size = Sec->Content.size();
166 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
Seiya Nuta12bd4902019-08-19 21:12:02 +0000167 }
Seiya Nuta552bcb82019-08-19 21:05:31 +0000168 }
Alexander Shaposhnikov84eff8c2020-04-24 00:58:27 -0700169 VMSize = std::max(VMSize, SectOffset + Sec->Size);
Seiya Nuta552bcb82019-08-19 21:05:31 +0000170 }
171
Seiya Nuta12bd4902019-08-19 21:12:02 +0000172 if (IsObjectFile) {
173 Offset += SegFileSize;
174 } else {
175 Offset = alignTo(Offset + SegFileSize, PageSize);
176 SegFileSize = alignTo(SegFileSize, PageSize);
177 // Use the original vmsize if the segment is __PAGEZERO.
178 VMSize =
179 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
180 }
181
Seiya Nuta552bcb82019-08-19 21:05:31 +0000182 switch (MLC.load_command_data.cmd) {
183 case MachO::LC_SEGMENT:
184 MLC.segment_command_data.cmdsize =
185 sizeof(MachO::segment_command) +
186 sizeof(MachO::section) * LC.Sections.size();
187 MLC.segment_command_data.nsects = LC.Sections.size();
Seiya Nuta12bd4902019-08-19 21:12:02 +0000188 MLC.segment_command_data.fileoff = SegOffset;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000189 MLC.segment_command_data.vmsize = VMSize;
Seiya Nuta12bd4902019-08-19 21:12:02 +0000190 MLC.segment_command_data.filesize = SegFileSize;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000191 break;
192 case MachO::LC_SEGMENT_64:
193 MLC.segment_command_64_data.cmdsize =
194 sizeof(MachO::segment_command_64) +
195 sizeof(MachO::section_64) * LC.Sections.size();
196 MLC.segment_command_64_data.nsects = LC.Sections.size();
Seiya Nuta12bd4902019-08-19 21:12:02 +0000197 MLC.segment_command_64_data.fileoff = SegOffset;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000198 MLC.segment_command_64_data.vmsize = VMSize;
Seiya Nuta12bd4902019-08-19 21:12:02 +0000199 MLC.segment_command_64_data.filesize = SegFileSize;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000200 break;
201 }
Seiya Nuta552bcb82019-08-19 21:05:31 +0000202 }
203
204 return Offset;
205}
206
207uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800208 for (LoadCommand &LC : O.LoadCommands)
209 for (std::unique_ptr<Section> &Sec : LC.Sections) {
210 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
211 Sec->NReloc = Sec->Relocations.size();
212 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000213 }
214
215 return Offset;
216}
217
218Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
Alexander Shaposhnikov913bc312020-06-15 18:55:59 -0700219 // If we are building the layout of an executable or dynamic library
220 // which does not have any segments other than __LINKEDIT,
221 // the Offset can be equal to zero by this time. It happens because of the
Kirill Bobyrev45e4c9d2020-06-16 08:29:52 +0200222 // convention that in such cases the file offsets specified by LC_SEGMENT
223 // start with zero (unlike the case of a relocatable object file).
Alexander Shaposhnikov913bc312020-06-15 18:55:59 -0700224 const uint64_t HeaderSize =
225 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
Kirill Bobyrev45e4c9d2020-06-16 08:29:52 +0200226 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
227 Offset >= HeaderSize + O.Header.SizeOfCmds) &&
Alexander Shaposhnikov913bc312020-06-15 18:55:59 -0700228 "Incorrect tail offset");
229 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
230
Seiya Nuta552bcb82019-08-19 21:05:31 +0000231 // The order of LINKEDIT elements is as follows:
232 // rebase info, binding info, weak binding info, lazy binding info, export
233 // trie, data-in-code, symbol table, indirect symbol table, symbol table
Alexander Shaposhnikov913bc312020-06-15 18:55:59 -0700234 // strings, code signature.
Seiya Nuta552bcb82019-08-19 21:05:31 +0000235 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
236 uint64_t StartOfLinkEdit = Offset;
237 uint64_t StartOfRebaseInfo = StartOfLinkEdit;
238 uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
239 uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
240 uint64_t StartOfLazyBindingInfo =
241 StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
242 uint64_t StartOfExportTrie =
243 StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
244 uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
245 uint64_t StartOfDataInCode =
246 StartOfFunctionStarts + O.FunctionStarts.Data.size();
247 uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
248 uint64_t StartOfIndirectSymbols =
249 StartOfSymbols + NListSize * O.SymTable.Symbols.size();
250 uint64_t StartOfSymbolStrings =
251 StartOfIndirectSymbols +
252 sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
Alexander Shaposhnikov913bc312020-06-15 18:55:59 -0700253 uint64_t StartOfCodeSignature =
254 StartOfSymbolStrings + StrTableBuilder.getSize();
Seiya Nuta552bcb82019-08-19 21:05:31 +0000255 uint64_t LinkEditSize =
Alexander Shaposhnikov913bc312020-06-15 18:55:59 -0700256 (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000257
258 // Now we have determined the layout of the contents of the __LINKEDIT
259 // segment. Update its load command.
260 if (LinkEditLoadCommand) {
261 MachO::macho_load_command *MLC = LinkEditLoadCommand;
262 switch (LinkEditLoadCommand->load_command_data.cmd) {
263 case MachO::LC_SEGMENT:
264 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
265 MLC->segment_command_data.fileoff = StartOfLinkEdit;
266 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
267 MLC->segment_command_data.filesize = LinkEditSize;
268 break;
269 case MachO::LC_SEGMENT_64:
270 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
271 MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
272 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
273 MLC->segment_command_64_data.filesize = LinkEditSize;
274 break;
275 }
276 }
277
Alexander Shaposhnikovdc046c72020-02-21 13:18:36 -0800278 for (LoadCommand &LC : O.LoadCommands) {
Seiya Nuta552bcb82019-08-19 21:05:31 +0000279 auto &MLC = LC.MachOLoadCommand;
280 auto cmd = MLC.load_command_data.cmd;
281 switch (cmd) {
Alexander Shaposhnikov913bc312020-06-15 18:55:59 -0700282 case MachO::LC_CODE_SIGNATURE:
283 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
284 MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size();
285 break;
Seiya Nuta552bcb82019-08-19 21:05:31 +0000286 case MachO::LC_SYMTAB:
287 MLC.symtab_command_data.symoff = StartOfSymbols;
288 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
289 MLC.symtab_command_data.stroff = StartOfSymbolStrings;
290 MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
291 break;
292 case MachO::LC_DYSYMTAB: {
293 if (MLC.dysymtab_command_data.ntoc != 0 ||
294 MLC.dysymtab_command_data.nmodtab != 0 ||
295 MLC.dysymtab_command_data.nextrefsyms != 0 ||
296 MLC.dysymtab_command_data.nlocrel != 0 ||
297 MLC.dysymtab_command_data.nextrel != 0)
298 return createStringError(llvm::errc::not_supported,
299 "shared library is not yet supported");
300
301 if (!O.IndirectSymTable.Symbols.empty()) {
302 MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
303 MLC.dysymtab_command_data.nindirectsyms =
304 O.IndirectSymTable.Symbols.size();
305 }
306
307 updateDySymTab(MLC);
308 break;
309 }
310 case MachO::LC_DATA_IN_CODE:
311 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
312 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
313 break;
314 case MachO::LC_FUNCTION_STARTS:
315 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
316 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
317 break;
318 case MachO::LC_DYLD_INFO:
319 case MachO::LC_DYLD_INFO_ONLY:
320 MLC.dyld_info_command_data.rebase_off =
321 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
322 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
323 MLC.dyld_info_command_data.bind_off =
324 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
325 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
326 MLC.dyld_info_command_data.weak_bind_off =
327 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
328 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
329 MLC.dyld_info_command_data.lazy_bind_off =
330 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
331 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
332 MLC.dyld_info_command_data.export_off =
333 O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
334 MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
335 break;
Alexander Shaposhnikovd17d50e2020-04-20 16:33:18 -0700336 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
337 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
338 // relative virtual address. At the moment modification of the __TEXT
339 // segment of executables isn't supported anyway (e.g. data in code entries
340 // are not recalculated). Moreover, in general
341 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
342 // without making additional assumptions (e.g. that the entire __TEXT
343 // segment should be encrypted) we do not know how to recalculate the
344 // boundaries of the encrypted part. For now just copy over these load
345 // commands until we encounter a real world usecase where
346 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
347 case MachO::LC_ENCRYPTION_INFO:
348 case MachO::LC_ENCRYPTION_INFO_64:
Seiya Nuta552bcb82019-08-19 21:05:31 +0000349 case MachO::LC_LOAD_DYLINKER:
350 case MachO::LC_MAIN:
351 case MachO::LC_RPATH:
352 case MachO::LC_SEGMENT:
353 case MachO::LC_SEGMENT_64:
354 case MachO::LC_VERSION_MIN_MACOSX:
Alexander Shaposhnikov074af2d2019-10-24 17:35:10 -0700355 case MachO::LC_VERSION_MIN_IPHONEOS:
356 case MachO::LC_VERSION_MIN_TVOS:
357 case MachO::LC_VERSION_MIN_WATCHOS:
Seiya Nuta552bcb82019-08-19 21:05:31 +0000358 case MachO::LC_BUILD_VERSION:
359 case MachO::LC_ID_DYLIB:
360 case MachO::LC_LOAD_DYLIB:
Alexander Shaposhnikovd987eed2020-04-23 11:37:39 -0700361 case MachO::LC_LOAD_WEAK_DYLIB:
Seiya Nuta552bcb82019-08-19 21:05:31 +0000362 case MachO::LC_UUID:
363 case MachO::LC_SOURCE_VERSION:
364 // Nothing to update.
365 break;
366 default:
367 // Abort if it's unsupported in order to prevent corrupting the object.
368 return createStringError(llvm::errc::not_supported,
369 "unsupported load command (cmd=0x%x)", cmd);
370 }
371 }
372
373 return Error::success();
374}
375
376Error MachOLayoutBuilder::layout() {
377 O.Header.NCmds = O.LoadCommands.size();
378 O.Header.SizeOfCmds = computeSizeOfCmds();
379 constructStringTable();
380 updateSymbolIndexes();
381 uint64_t Offset = layoutSegments();
382 Offset = layoutRelocations(Offset);
383 return layoutTail(Offset);
384}
385
386} // end namespace macho
387} // end namespace objcopy
388} // end namespace llvm