blob: 3ef2949addab2499ba9b4f86605d1cc797207788 [file] [log] [blame]
Nick Kledzik5b9e48b2014-11-19 02:21:53 +00001//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===//
Nick Kledzik30332b12013-10-08 00:43:34 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Nick Kledzik30332b12013-10-08 00:43:34 +00006//
7//===----------------------------------------------------------------------===//
8
9///
10/// \file These data structures comprise the "normalized" view of
11/// mach-o object files. The normalized view is an in-memory only data structure
Shankar Easwaran3d8de472014-01-27 03:09:26 +000012/// which is always in native endianness and pointer size.
13///
14/// The normalized view easily converts to and from YAML using YAML I/O.
Nick Kledzik30332b12013-10-08 00:43:34 +000015///
16/// The normalized view converts to and from binary mach-o object files using
17/// the writeBinary() and readBinary() functions.
18///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000019/// The normalized view converts to and from lld::Atoms using the
Nick Kledzik30332b12013-10-08 00:43:34 +000020/// normalizedToAtoms() and normalizedFromAtoms().
21///
22/// Overall, the conversion paths available look like:
23///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000024/// +---------------+
25/// | binary mach-o |
26/// +---------------+
Nick Kledzik30332b12013-10-08 00:43:34 +000027/// ^
28/// |
29/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000030/// +------------+ +------+
31/// | normalized | <-> | yaml |
32/// +------------+ +------+
Nick Kledzik30332b12013-10-08 00:43:34 +000033/// ^
34/// |
35/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000036/// +-------+
Nick Kledzik30332b12013-10-08 00:43:34 +000037/// | Atoms |
Shankar Easwaran3d8de472014-01-27 03:09:26 +000038/// +-------+
39///
Nick Kledzik30332b12013-10-08 00:43:34 +000040
Pete Cooperd75b7182016-02-08 21:50:45 +000041#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
42#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
43
Lang Hames436f7d62016-07-27 22:55:30 +000044#include "DebugInfo.h"
Rui Ueyama3f851702017-10-02 21:00:41 +000045#include "lld/Common/LLVM.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000046#include "lld/Core/Error.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000047#include "lld/ReaderWriter/MachOLinkingContext.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000048#include "llvm/ADT/SmallString.h"
49#include "llvm/ADT/StringRef.h"
Zachary Turner264b5d92017-06-07 03:48:56 +000050#include "llvm/BinaryFormat/MachO.h"
Nick Kledzik6edd7222014-01-11 01:07:43 +000051#include "llvm/Support/Allocator.h"
Pete Coopere5fa5a32015-12-16 22:03:21 +000052#include "llvm/Support/Debug.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000053#include "llvm/Support/ErrorOr.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000054#include "llvm/Support/YAMLTraits.h"
55
Nick Kledzik6edd7222014-01-11 01:07:43 +000056using llvm::BumpPtrAllocator;
Nick Kledzik30332b12013-10-08 00:43:34 +000057using llvm::yaml::Hex64;
58using llvm::yaml::Hex32;
Nick Kledzik21921372014-07-24 23:06:56 +000059using llvm::yaml::Hex16;
Nick Kledzik30332b12013-10-08 00:43:34 +000060using llvm::yaml::Hex8;
61using llvm::yaml::SequenceTraits;
62using llvm::MachO::HeaderFileType;
63using llvm::MachO::BindType;
64using llvm::MachO::RebaseType;
65using llvm::MachO::NListType;
66using llvm::MachO::RelocationInfoType;
67using llvm::MachO::SectionType;
68using llvm::MachO::LoadCommandType;
69using llvm::MachO::ExportSymbolKind;
Nick Kledzik21921372014-07-24 23:06:56 +000070using llvm::MachO::DataRegionType;
Nick Kledzik30332b12013-10-08 00:43:34 +000071
72namespace lld {
73namespace mach_o {
74namespace normalized {
75
76
77/// The real mach-o relocation record is 8-bytes on disk and is
78/// encoded in one of two different bit-field patterns. This
Nick Kledzik369ffd12013-10-08 02:07:19 +000079/// normalized form has the union of all possible fields.
Nick Kledzik30332b12013-10-08 00:43:34 +000080struct Relocation {
Shankar Easwaran3d8de472014-01-27 03:09:26 +000081 Relocation() : offset(0), scattered(false),
82 type(llvm::MachO::GENERIC_RELOC_VANILLA),
83 length(0), pcRel(false), isExtern(false), value(0),
Nick Kledzik30332b12013-10-08 00:43:34 +000084 symbol(0) { }
85
86 Hex32 offset;
87 bool scattered;
88 RelocationInfoType type;
89 uint8_t length;
90 bool pcRel;
91 bool isExtern;
92 Hex32 value;
93 uint32_t symbol;
94};
95
96/// A typedef so that YAML I/O can treat this vector as a sequence.
97typedef std::vector<Relocation> Relocations;
98
99/// A typedef so that YAML I/O can process the raw bytes in a section.
100typedef std::vector<Hex8> ContentBytes;
101
102/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
103typedef std::vector<uint32_t> IndirectSymbols;
104
105/// A typedef so that YAML I/O can encode/decode section attributes.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000106LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
Nick Kledzik30332b12013-10-08 00:43:34 +0000107
Pete Cooper3f564a52016-03-24 00:36:37 +0000108/// A typedef so that YAML I/O can encode/decode section alignment.
109LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment)
110
Nick Kledzik30332b12013-10-08 00:43:34 +0000111/// Mach-O has a 32-bit and 64-bit section record. This normalized form
112/// can support either kind.
113struct Section {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000114 Section() : type(llvm::MachO::S_REGULAR),
Rui Ueyamaf006f4d2015-03-26 01:44:01 +0000115 attributes(0), alignment(1), address(0) { }
Nick Kledzik30332b12013-10-08 00:43:34 +0000116
117 StringRef segmentName;
118 StringRef sectionName;
119 SectionType type;
120 SectionAttr attributes;
Pete Cooper3f564a52016-03-24 00:36:37 +0000121 SectionAlignment alignment;
Nick Kledzik30332b12013-10-08 00:43:34 +0000122 Hex64 address;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000123 ArrayRef<uint8_t> content;
Nick Kledzik30332b12013-10-08 00:43:34 +0000124 Relocations relocations;
125 IndirectSymbols indirectSymbols;
126};
127
128
129/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000130LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
Nick Kledzik30332b12013-10-08 00:43:34 +0000131
132/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000133LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
Nick Kledzik30332b12013-10-08 00:43:34 +0000134
135/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
136/// type and scope and mixed in the same n_type field. This normalized form
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000137/// works for any pointer size and separates out the type and scope.
Nick Kledzik30332b12013-10-08 00:43:34 +0000138struct Symbol {
139 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
140
141 StringRef name;
142 NListType type;
143 SymbolScope scope;
144 uint8_t sect;
145 SymbolDesc desc;
146 Hex64 value;
147};
148
Lang Hamesac2adce2015-12-11 23:25:09 +0000149/// Check whether the given section type indicates a zero-filled section.
150// FIXME: Utility functions of this kind should probably be moved into
151// llvm/Support.
152inline bool isZeroFillSection(SectionType T) {
153 return (T == llvm::MachO::S_ZEROFILL ||
154 T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL);
155}
156
Nick Kledzik30332b12013-10-08 00:43:34 +0000157/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000158LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
Nick Kledzik30332b12013-10-08 00:43:34 +0000159
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000160/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz
161LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion)
162
Nick Kledzik30332b12013-10-08 00:43:34 +0000163/// Segments are only used in normalized final linked images (not in relocatable
164/// object files). They specify how a range of the file is loaded.
165struct Segment {
166 StringRef name;
167 Hex64 address;
168 Hex64 size;
Pete Cooperb8fec3e2016-02-06 00:51:16 +0000169 VMProtect init_access;
170 VMProtect max_access;
Nick Kledzik30332b12013-10-08 00:43:34 +0000171};
172
173/// Only used in normalized final linked images to specify on which dylibs
174/// it depends.
175struct DependentDylib {
176 StringRef path;
177 LoadCommandType kind;
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000178 PackedVersion compatVersion;
179 PackedVersion currentVersion;
Nick Kledzik30332b12013-10-08 00:43:34 +0000180};
181
182/// A normalized rebasing entry. Only used in normalized final linked images.
183struct RebaseLocation {
184 Hex32 segOffset;
185 uint8_t segIndex;
186 RebaseType kind;
187};
188
189/// A normalized binding entry. Only used in normalized final linked images.
190struct BindLocation {
191 Hex32 segOffset;
192 uint8_t segIndex;
193 BindType kind;
194 bool canBeNull;
195 int ordinal;
196 StringRef symbolName;
197 Hex64 addend;
198};
199
200/// A typedef so that YAML I/O can encode/decode export flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000201LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000202
203/// A normalized export entry. Only used in normalized final linked images.
204struct Export {
205 StringRef name;
206 Hex64 offset;
207 ExportSymbolKind kind;
208 ExportFlags flags;
209 Hex32 otherOffset;
210 StringRef otherName;
211};
212
Nick Kledzik21921372014-07-24 23:06:56 +0000213/// A normalized data-in-code entry.
214struct DataInCode {
215 Hex32 offset;
216 Hex16 length;
217 DataRegionType kind;
218};
219
Nick Kledzik30332b12013-10-08 00:43:34 +0000220/// A typedef so that YAML I/O can encode/decode mach_header.flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000221LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000222
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000223///
Nick Kledzik30332b12013-10-08 00:43:34 +0000224struct NormalizedFile {
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000225 MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown;
226 HeaderFileType fileType = llvm::MachO::MH_OBJECT;
227 FileFlags flags = 0;
Nick Kledzik30332b12013-10-08 00:43:34 +0000228 std::vector<Segment> segments; // Not used in object files.
229 std::vector<Section> sections;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000230
Nick Kledzik30332b12013-10-08 00:43:34 +0000231 // Symbols sorted by kind.
232 std::vector<Symbol> localSymbols;
233 std::vector<Symbol> globalSymbols;
234 std::vector<Symbol> undefinedSymbols;
Lang Hames436f7d62016-07-27 22:55:30 +0000235 std::vector<Symbol> stabsSymbols;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000236
Nick Kledzik30332b12013-10-08 00:43:34 +0000237 // Maps to load commands with no LINKEDIT content (final linked images only).
238 std::vector<DependentDylib> dependentDylibs;
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000239 StringRef installName; // dylibs only
240 PackedVersion compatVersion = 0; // dylibs only
241 PackedVersion currentVersion = 0; // dylibs only
242 bool hasUUID = false;
Pete Cooper354809e2016-02-03 22:28:29 +0000243 bool hasMinVersionLoadCommand = false;
Pete Cooper9b28a452016-02-09 02:10:39 +0000244 bool generateDataInCodeLoadCommand = false;
Nick Kledzik30332b12013-10-08 00:43:34 +0000245 std::vector<StringRef> rpaths;
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000246 Hex64 entryAddress = 0;
247 Hex64 stackSize = 0;
248 MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown;
249 Hex64 sourceVersion = 0;
250 PackedVersion minOSverson = 0;
251 PackedVersion sdkVersion = 0;
Pete Cooperceee5de2016-02-04 02:16:08 +0000252 LoadCommandType minOSVersionKind = (LoadCommandType)0;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000253
Nick Kledzik30332b12013-10-08 00:43:34 +0000254 // Maps to load commands with LINKEDIT content (final linked images only).
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000255 Hex32 pageSize = 0;
Nick Kledzik30332b12013-10-08 00:43:34 +0000256 std::vector<RebaseLocation> rebasingInfo;
257 std::vector<BindLocation> bindingInfo;
258 std::vector<BindLocation> weakBindingInfo;
259 std::vector<BindLocation> lazyBindingInfo;
260 std::vector<Export> exportInfo;
Pete Cooper41f3e8e2016-02-09 01:38:13 +0000261 std::vector<uint8_t> functionStarts;
Nick Kledzik21921372014-07-24 23:06:56 +0000262 std::vector<DataInCode> dataInCode;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000263
Nick Kledzik30332b12013-10-08 00:43:34 +0000264 // TODO:
265 // code-signature
266 // split-seg-info
267 // function-starts
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000268
Nick Kledzik6edd7222014-01-11 01:07:43 +0000269 // For any allocations in this struct which need to be owned by this struct.
270 BumpPtrAllocator ownedAllocations;
Nick Kledzik30332b12013-10-08 00:43:34 +0000271};
272
Nick Kledzik635f9c72014-09-04 20:08:30 +0000273/// Tests if a file is a non-fat mach-o object file.
274bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000275
Nick Kledzik14b5d202014-10-08 01:48:10 +0000276/// If the buffer is a fat file with the request arch, then this function
277/// returns true with 'offset' and 'size' set to location of the arch slice
278/// within the buffer. Otherwise returns false;
Rafael Espindolaed48e532015-04-27 22:48:51 +0000279bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
280 uint32_t &offset, uint32_t &size);
Nick Kledzik14b5d202014-10-08 01:48:10 +0000281
Nick Kledzik30332b12013-10-08 00:43:34 +0000282/// Reads a mach-o file and produces an in-memory normalized view.
Pete Cooperc6e7b812016-03-30 23:58:24 +0000283llvm::Expected<std::unique_ptr<NormalizedFile>>
Joey Gouly010b3762014-01-14 22:32:38 +0000284readBinary(std::unique_ptr<MemoryBuffer> &mb,
285 const MachOLinkingContext::Arch arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000286
287/// Takes in-memory normalized view and writes a mach-o object file.
Pete Cooperfefbd222016-03-30 23:10:39 +0000288llvm::Error writeBinary(const NormalizedFile &file, StringRef path);
Nick Kledzik30332b12013-10-08 00:43:34 +0000289
Rui Ueyama7f8ca6e2019-04-17 01:47:16 +0000290size_t headerAndLoadCommandsSize(const NormalizedFile &file,
291 bool includeFunctionStarts);
Nick Kledzik30332b12013-10-08 00:43:34 +0000292
293
294/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
Pete Cooper2f6216c2016-03-31 01:13:04 +0000295llvm::Expected<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000296readYaml(std::unique_ptr<MemoryBuffer> &mb);
297
298/// Writes a yaml encoded mach-o files given an in-memory normalized view.
Rafael Espindolab1a4d3a2014-06-12 14:53:47 +0000299std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
Nick Kledzik30332b12013-10-08 00:43:34 +0000300
Pete Cooperec4e1662016-03-30 23:43:27 +0000301llvm::Error
Rui Ueyama1d510422014-12-12 07:31:09 +0000302normalizedObjectToAtoms(MachOFile *file,
303 const NormalizedFile &normalizedFile,
304 bool copyRefs);
305
Pete Cooperec4e1662016-03-30 23:43:27 +0000306llvm::Error
Rui Ueyama1d510422014-12-12 07:31:09 +0000307normalizedDylibToAtoms(MachODylibFile *file,
308 const NormalizedFile &normalizedFile,
309 bool copyRefs);
310
Nick Kledzik30332b12013-10-08 00:43:34 +0000311/// Takes in-memory normalized dylib or object and parses it into lld::File
Pete Cooperec4e1662016-03-30 23:43:27 +0000312llvm::Expected<std::unique_ptr<lld::File>>
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000313normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
Nick Kledzik6edd7222014-01-11 01:07:43 +0000314 bool copyRefs);
Nick Kledzik30332b12013-10-08 00:43:34 +0000315
316/// Takes atoms and generates a normalized macho-o view.
Pete Cooperfefbd222016-03-30 23:10:39 +0000317llvm::Expected<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000318normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
319
320
Nick Kledzik30332b12013-10-08 00:43:34 +0000321} // namespace normalized
Nick Kledzik6edd7222014-01-11 01:07:43 +0000322
323/// Class for interfacing mach-o yaml files into generic yaml parsing
324class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
Nick Kledzik378066c2014-06-30 22:57:33 +0000325public:
326 MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
327 : _arch(arch) { }
Rui Ueyamabc69bce2014-03-28 21:36:33 +0000328 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
Nick Kledzik378066c2014-06-30 22:57:33 +0000329private:
330 const MachOLinkingContext::Arch _arch;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000331};
332
Nick Kledzik30332b12013-10-08 00:43:34 +0000333} // namespace mach_o
334} // namespace lld
335
Rui Ueyama014192db2013-11-15 03:09:26 +0000336#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H