Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 1 | //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "MachOLayoutBuilder.h" |
Guillaume Chatelet | af11cc7 | 2019-09-12 15:20:36 +0000 | [diff] [blame] | 10 | #include "llvm/Support/Alignment.h" |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 11 | #include "llvm/Support/Errc.h" |
| 12 | #include "llvm/Support/ErrorHandling.h" |
| 13 | |
| 14 | namespace llvm { |
| 15 | namespace objcopy { |
| 16 | namespace macho { |
| 17 | |
| 18 | uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { |
| 19 | uint32_t Size = 0; |
| 20 | for (const auto &LC : O.LoadCommands) { |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 21 | const MachO::macho_load_command &MLC = LC.MachOLoadCommand; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 22 | auto cmd = MLC.load_command_data.cmd; |
| 23 | switch (cmd) { |
| 24 | case MachO::LC_SEGMENT: |
| 25 | Size += sizeof(MachO::segment_command) + |
| 26 | sizeof(MachO::section) * LC.Sections.size(); |
| 27 | continue; |
| 28 | case MachO::LC_SEGMENT_64: |
| 29 | Size += sizeof(MachO::segment_command_64) + |
| 30 | sizeof(MachO::section_64) * LC.Sections.size(); |
| 31 | continue; |
| 32 | } |
| 33 | |
| 34 | switch (cmd) { |
| 35 | #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ |
| 36 | case MachO::LCName: \ |
| 37 | Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ |
| 38 | break; |
| 39 | #include "llvm/BinaryFormat/MachO.def" |
| 40 | #undef HANDLE_LOAD_COMMAND |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | return Size; |
| 45 | } |
| 46 | |
| 47 | void MachOLayoutBuilder::constructStringTable() { |
| 48 | for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols) |
| 49 | StrTableBuilder.add(Sym->Name); |
| 50 | StrTableBuilder.finalize(); |
| 51 | } |
| 52 | |
| 53 | void MachOLayoutBuilder::updateSymbolIndexes() { |
| 54 | uint32_t Index = 0; |
| 55 | for (auto &Symbol : O.SymTable.Symbols) |
| 56 | Symbol->Index = Index++; |
| 57 | } |
| 58 | |
| 59 | // Updates the index and the number of local/external/undefined symbols. |
| 60 | void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { |
| 61 | assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); |
| 62 | // Make sure that nlist entries in the symbol table are sorted by the those |
| 63 | // types. The order is: local < defined external < undefined external. |
| 64 | assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(), |
| 65 | [](const std::unique_ptr<SymbolEntry> &A, |
| 66 | const std::unique_ptr<SymbolEntry> &B) { |
| 67 | return (A->isLocalSymbol() && !B->isLocalSymbol()) || |
| 68 | (!A->isUndefinedSymbol() && |
| 69 | B->isUndefinedSymbol()); |
| 70 | }) && |
| 71 | "Symbols are not sorted by their types."); |
| 72 | |
| 73 | uint32_t NumLocalSymbols = 0; |
| 74 | auto Iter = O.SymTable.Symbols.begin(); |
| 75 | auto End = O.SymTable.Symbols.end(); |
| 76 | for (; Iter != End; ++Iter) { |
| 77 | if ((*Iter)->isExternalSymbol()) |
| 78 | break; |
| 79 | |
| 80 | ++NumLocalSymbols; |
| 81 | } |
| 82 | |
| 83 | uint32_t NumExtDefSymbols = 0; |
| 84 | for (; Iter != End; ++Iter) { |
| 85 | if ((*Iter)->isUndefinedSymbol()) |
| 86 | break; |
| 87 | |
| 88 | ++NumExtDefSymbols; |
| 89 | } |
| 90 | |
| 91 | MLC.dysymtab_command_data.ilocalsym = 0; |
| 92 | MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; |
| 93 | MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; |
| 94 | MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; |
| 95 | MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; |
| 96 | MLC.dysymtab_command_data.nundefsym = |
| 97 | O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); |
| 98 | } |
| 99 | |
| 100 | // Recomputes and updates offset and size fields in load commands and sections |
| 101 | // since they could be modified. |
| 102 | uint64_t MachOLayoutBuilder::layoutSegments() { |
| 103 | auto HeaderSize = |
| 104 | Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 105 | const bool IsObjectFile = |
| 106 | O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; |
| 107 | uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 108 | for (auto &LC : O.LoadCommands) { |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 109 | auto &MLC = LC.MachOLoadCommand; |
| 110 | StringRef Segname; |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 111 | uint64_t SegmentVmAddr; |
| 112 | uint64_t SegmentVmSize; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 113 | switch (MLC.load_command_data.cmd) { |
| 114 | case MachO::LC_SEGMENT: |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 115 | SegmentVmAddr = MLC.segment_command_data.vmaddr; |
| 116 | SegmentVmSize = MLC.segment_command_data.vmsize; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 117 | Segname = StringRef(MLC.segment_command_data.segname, |
| 118 | strnlen(MLC.segment_command_data.segname, |
| 119 | sizeof(MLC.segment_command_data.segname))); |
| 120 | break; |
| 121 | case MachO::LC_SEGMENT_64: |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 122 | SegmentVmAddr = MLC.segment_command_64_data.vmaddr; |
| 123 | SegmentVmSize = MLC.segment_command_64_data.vmsize; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 124 | Segname = StringRef(MLC.segment_command_64_data.segname, |
| 125 | strnlen(MLC.segment_command_64_data.segname, |
| 126 | sizeof(MLC.segment_command_64_data.segname))); |
| 127 | break; |
| 128 | default: |
| 129 | continue; |
| 130 | } |
| 131 | |
| 132 | if (Segname == "__LINKEDIT") { |
| 133 | // We update the __LINKEDIT segment later (in layoutTail). |
| 134 | assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); |
| 135 | LinkEditLoadCommand = &MLC; |
| 136 | continue; |
| 137 | } |
| 138 | |
| 139 | // Update file offsets and sizes of sections. |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 140 | uint64_t SegOffset = Offset; |
| 141 | uint64_t SegFileSize = 0; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 142 | uint64_t VMSize = 0; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 143 | for (auto &Sec : LC.Sections) { |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 144 | if (IsObjectFile) { |
| 145 | if (Sec.isVirtualSection()) { |
| 146 | Sec.Offset = 0; |
| 147 | } else { |
Simon Pilgrim | 5a28f0a | 2019-08-20 10:25:57 +0000 | [diff] [blame] | 148 | uint64_t PaddingSize = |
Guillaume Chatelet | 18f805a | 2019-09-27 12:54:21 +0000 | [diff] [blame] | 149 | offsetToAlignment(SegFileSize, Align(1ull << Sec.Align)); |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 150 | Sec.Offset = SegOffset + SegFileSize + PaddingSize; |
| 151 | Sec.Size = Sec.Content.size(); |
| 152 | SegFileSize += PaddingSize + Sec.Size; |
| 153 | } |
| 154 | VMSize = std::max(VMSize, Sec.Addr + Sec.Size); |
| 155 | } else { |
| 156 | if (Sec.isVirtualSection()) { |
| 157 | Sec.Offset = 0; |
| 158 | VMSize += Sec.Size; |
| 159 | } else { |
| 160 | uint32_t SectOffset = Sec.Addr - SegmentVmAddr; |
| 161 | Sec.Offset = SegOffset + SectOffset; |
| 162 | Sec.Size = Sec.Content.size(); |
| 163 | SegFileSize = std::max(SegFileSize, SectOffset + Sec.Size); |
| 164 | VMSize = std::max(VMSize, SegFileSize); |
| 165 | } |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 166 | } |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 167 | } |
| 168 | |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 169 | if (IsObjectFile) { |
| 170 | Offset += SegFileSize; |
| 171 | } else { |
| 172 | Offset = alignTo(Offset + SegFileSize, PageSize); |
| 173 | SegFileSize = alignTo(SegFileSize, PageSize); |
| 174 | // Use the original vmsize if the segment is __PAGEZERO. |
| 175 | VMSize = |
| 176 | Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); |
| 177 | } |
| 178 | |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 179 | switch (MLC.load_command_data.cmd) { |
| 180 | case MachO::LC_SEGMENT: |
| 181 | MLC.segment_command_data.cmdsize = |
| 182 | sizeof(MachO::segment_command) + |
| 183 | sizeof(MachO::section) * LC.Sections.size(); |
| 184 | MLC.segment_command_data.nsects = LC.Sections.size(); |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 185 | MLC.segment_command_data.fileoff = SegOffset; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 186 | MLC.segment_command_data.vmsize = VMSize; |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 187 | MLC.segment_command_data.filesize = SegFileSize; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 188 | break; |
| 189 | case MachO::LC_SEGMENT_64: |
| 190 | MLC.segment_command_64_data.cmdsize = |
| 191 | sizeof(MachO::segment_command_64) + |
| 192 | sizeof(MachO::section_64) * LC.Sections.size(); |
| 193 | MLC.segment_command_64_data.nsects = LC.Sections.size(); |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 194 | MLC.segment_command_64_data.fileoff = SegOffset; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 195 | MLC.segment_command_64_data.vmsize = VMSize; |
Seiya Nuta | 12bd490 | 2019-08-19 21:12:02 +0000 | [diff] [blame] | 196 | MLC.segment_command_64_data.filesize = SegFileSize; |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 197 | break; |
| 198 | } |
Seiya Nuta | 552bcb8 | 2019-08-19 21:05:31 +0000 | [diff] [blame] | 199 | } |
| 200 | |
| 201 | return Offset; |
| 202 | } |
| 203 | |
| 204 | uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { |
| 205 | for (auto &LC : O.LoadCommands) |
| 206 | for (auto &Sec : LC.Sections) { |
| 207 | Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; |
| 208 | Sec.NReloc = Sec.Relocations.size(); |
| 209 | Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; |
| 210 | } |
| 211 | |
| 212 | return Offset; |
| 213 | } |
| 214 | |
| 215 | Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { |
| 216 | // The order of LINKEDIT elements is as follows: |
| 217 | // rebase info, binding info, weak binding info, lazy binding info, export |
| 218 | // trie, data-in-code, symbol table, indirect symbol table, symbol table |
| 219 | // strings. |
| 220 | uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); |
| 221 | uint64_t StartOfLinkEdit = Offset; |
| 222 | uint64_t StartOfRebaseInfo = StartOfLinkEdit; |
| 223 | uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); |
| 224 | uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); |
| 225 | uint64_t StartOfLazyBindingInfo = |
| 226 | StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); |
| 227 | uint64_t StartOfExportTrie = |
| 228 | StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); |
| 229 | uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); |
| 230 | uint64_t StartOfDataInCode = |
| 231 | StartOfFunctionStarts + O.FunctionStarts.Data.size(); |
| 232 | uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size(); |
| 233 | uint64_t StartOfIndirectSymbols = |
| 234 | StartOfSymbols + NListSize * O.SymTable.Symbols.size(); |
| 235 | uint64_t StartOfSymbolStrings = |
| 236 | StartOfIndirectSymbols + |
| 237 | sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); |
| 238 | uint64_t LinkEditSize = |
| 239 | (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit; |
| 240 | |
| 241 | // Now we have determined the layout of the contents of the __LINKEDIT |
| 242 | // segment. Update its load command. |
| 243 | if (LinkEditLoadCommand) { |
| 244 | MachO::macho_load_command *MLC = LinkEditLoadCommand; |
| 245 | switch (LinkEditLoadCommand->load_command_data.cmd) { |
| 246 | case MachO::LC_SEGMENT: |
| 247 | MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); |
| 248 | MLC->segment_command_data.fileoff = StartOfLinkEdit; |
| 249 | MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); |
| 250 | MLC->segment_command_data.filesize = LinkEditSize; |
| 251 | break; |
| 252 | case MachO::LC_SEGMENT_64: |
| 253 | MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); |
| 254 | MLC->segment_command_64_data.fileoff = StartOfLinkEdit; |
| 255 | MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); |
| 256 | MLC->segment_command_64_data.filesize = LinkEditSize; |
| 257 | break; |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | for (auto &LC : O.LoadCommands) { |
| 262 | auto &MLC = LC.MachOLoadCommand; |
| 263 | auto cmd = MLC.load_command_data.cmd; |
| 264 | switch (cmd) { |
| 265 | case MachO::LC_SYMTAB: |
| 266 | MLC.symtab_command_data.symoff = StartOfSymbols; |
| 267 | MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); |
| 268 | MLC.symtab_command_data.stroff = StartOfSymbolStrings; |
| 269 | MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); |
| 270 | break; |
| 271 | case MachO::LC_DYSYMTAB: { |
| 272 | if (MLC.dysymtab_command_data.ntoc != 0 || |
| 273 | MLC.dysymtab_command_data.nmodtab != 0 || |
| 274 | MLC.dysymtab_command_data.nextrefsyms != 0 || |
| 275 | MLC.dysymtab_command_data.nlocrel != 0 || |
| 276 | MLC.dysymtab_command_data.nextrel != 0) |
| 277 | return createStringError(llvm::errc::not_supported, |
| 278 | "shared library is not yet supported"); |
| 279 | |
| 280 | if (!O.IndirectSymTable.Symbols.empty()) { |
| 281 | MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; |
| 282 | MLC.dysymtab_command_data.nindirectsyms = |
| 283 | O.IndirectSymTable.Symbols.size(); |
| 284 | } |
| 285 | |
| 286 | updateDySymTab(MLC); |
| 287 | break; |
| 288 | } |
| 289 | case MachO::LC_DATA_IN_CODE: |
| 290 | MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; |
| 291 | MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); |
| 292 | break; |
| 293 | case MachO::LC_FUNCTION_STARTS: |
| 294 | MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; |
| 295 | MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); |
| 296 | break; |
| 297 | case MachO::LC_DYLD_INFO: |
| 298 | case MachO::LC_DYLD_INFO_ONLY: |
| 299 | MLC.dyld_info_command_data.rebase_off = |
| 300 | O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; |
| 301 | MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); |
| 302 | MLC.dyld_info_command_data.bind_off = |
| 303 | O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; |
| 304 | MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); |
| 305 | MLC.dyld_info_command_data.weak_bind_off = |
| 306 | O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; |
| 307 | MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); |
| 308 | MLC.dyld_info_command_data.lazy_bind_off = |
| 309 | O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; |
| 310 | MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); |
| 311 | MLC.dyld_info_command_data.export_off = |
| 312 | O.Exports.Trie.empty() ? 0 : StartOfExportTrie; |
| 313 | MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); |
| 314 | break; |
| 315 | case MachO::LC_LOAD_DYLINKER: |
| 316 | case MachO::LC_MAIN: |
| 317 | case MachO::LC_RPATH: |
| 318 | case MachO::LC_SEGMENT: |
| 319 | case MachO::LC_SEGMENT_64: |
| 320 | case MachO::LC_VERSION_MIN_MACOSX: |
| 321 | case MachO::LC_BUILD_VERSION: |
| 322 | case MachO::LC_ID_DYLIB: |
| 323 | case MachO::LC_LOAD_DYLIB: |
| 324 | case MachO::LC_UUID: |
| 325 | case MachO::LC_SOURCE_VERSION: |
| 326 | // Nothing to update. |
| 327 | break; |
| 328 | default: |
| 329 | // Abort if it's unsupported in order to prevent corrupting the object. |
| 330 | return createStringError(llvm::errc::not_supported, |
| 331 | "unsupported load command (cmd=0x%x)", cmd); |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | return Error::success(); |
| 336 | } |
| 337 | |
| 338 | Error MachOLayoutBuilder::layout() { |
| 339 | O.Header.NCmds = O.LoadCommands.size(); |
| 340 | O.Header.SizeOfCmds = computeSizeOfCmds(); |
| 341 | constructStringTable(); |
| 342 | updateSymbolIndexes(); |
| 343 | uint64_t Offset = layoutSegments(); |
| 344 | Offset = layoutRelocations(Offset); |
| 345 | return layoutTail(Offset); |
| 346 | } |
| 347 | |
| 348 | } // end namespace macho |
| 349 | } // end namespace objcopy |
| 350 | } // end namespace llvm |