blob: c2d0ac3a2028d0f1abeb14853f1ff4a0f69bff93 [file] [log] [blame]
Nick Kledzik5b9e48b2014-11-19 02:21:53 +00001//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===//
Nick Kledzik30332b12013-10-08 00:43:34 +00002//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10///
11/// \file These data structures comprise the "normalized" view of
12/// mach-o object files. The normalized view is an in-memory only data structure
Shankar Easwaran3d8de472014-01-27 03:09:26 +000013/// which is always in native endianness and pointer size.
14///
15/// The normalized view easily converts to and from YAML using YAML I/O.
Nick Kledzik30332b12013-10-08 00:43:34 +000016///
17/// The normalized view converts to and from binary mach-o object files using
18/// the writeBinary() and readBinary() functions.
19///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000020/// The normalized view converts to and from lld::Atoms using the
Nick Kledzik30332b12013-10-08 00:43:34 +000021/// normalizedToAtoms() and normalizedFromAtoms().
22///
23/// Overall, the conversion paths available look like:
24///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000025/// +---------------+
26/// | binary mach-o |
27/// +---------------+
Nick Kledzik30332b12013-10-08 00:43:34 +000028/// ^
29/// |
30/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000031/// +------------+ +------+
32/// | normalized | <-> | yaml |
33/// +------------+ +------+
Nick Kledzik30332b12013-10-08 00:43:34 +000034/// ^
35/// |
36/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000037/// +-------+
Nick Kledzik30332b12013-10-08 00:43:34 +000038/// | Atoms |
Shankar Easwaran3d8de472014-01-27 03:09:26 +000039/// +-------+
40///
Nick Kledzik30332b12013-10-08 00:43:34 +000041
Pete Cooperd75b7182016-02-08 21:50:45 +000042#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
43#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
44
Lang Hames436f7d62016-07-27 22:55:30 +000045#include "DebugInfo.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000046#include "lld/Core/Error.h"
47#include "lld/Core/LLVM.h"
48#include "lld/ReaderWriter/MachOLinkingContext.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000049#include "llvm/ADT/SmallString.h"
50#include "llvm/ADT/StringRef.h"
Nick Kledzik6edd7222014-01-11 01:07:43 +000051#include "llvm/Support/Allocator.h"
Pete Coopere5fa5a32015-12-16 22:03:21 +000052#include "llvm/Support/Debug.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000053#include "llvm/Support/ErrorOr.h"
54#include "llvm/Support/MachO.h"
55#include "llvm/Support/YAMLTraits.h"
56
Nick Kledzik6edd7222014-01-11 01:07:43 +000057using llvm::BumpPtrAllocator;
Nick Kledzik30332b12013-10-08 00:43:34 +000058using llvm::yaml::Hex64;
59using llvm::yaml::Hex32;
Nick Kledzik21921372014-07-24 23:06:56 +000060using llvm::yaml::Hex16;
Nick Kledzik30332b12013-10-08 00:43:34 +000061using llvm::yaml::Hex8;
62using llvm::yaml::SequenceTraits;
63using llvm::MachO::HeaderFileType;
64using llvm::MachO::BindType;
65using llvm::MachO::RebaseType;
66using llvm::MachO::NListType;
67using llvm::MachO::RelocationInfoType;
68using llvm::MachO::SectionType;
69using llvm::MachO::LoadCommandType;
70using llvm::MachO::ExportSymbolKind;
Nick Kledzik21921372014-07-24 23:06:56 +000071using llvm::MachO::DataRegionType;
Nick Kledzik30332b12013-10-08 00:43:34 +000072
73namespace lld {
74namespace mach_o {
75namespace normalized {
76
77
78/// The real mach-o relocation record is 8-bytes on disk and is
79/// encoded in one of two different bit-field patterns. This
Nick Kledzik369ffd12013-10-08 02:07:19 +000080/// normalized form has the union of all possible fields.
Nick Kledzik30332b12013-10-08 00:43:34 +000081struct Relocation {
Shankar Easwaran3d8de472014-01-27 03:09:26 +000082 Relocation() : offset(0), scattered(false),
83 type(llvm::MachO::GENERIC_RELOC_VANILLA),
84 length(0), pcRel(false), isExtern(false), value(0),
Nick Kledzik30332b12013-10-08 00:43:34 +000085 symbol(0) { }
86
87 Hex32 offset;
88 bool scattered;
89 RelocationInfoType type;
90 uint8_t length;
91 bool pcRel;
92 bool isExtern;
93 Hex32 value;
94 uint32_t symbol;
Lang Hames436f7d62016-07-27 22:55:30 +000095
96#ifndef NDEBUG
97 raw_ostream& operator<<(raw_ostream &OS) const {
98 dump(OS);
99 return OS;
100 }
101
102 void dump(raw_ostream &OS = llvm::dbgs()) const;
103#endif
Nick Kledzik30332b12013-10-08 00:43:34 +0000104};
105
Lang Hames436f7d62016-07-27 22:55:30 +0000106inline raw_ostream& operator<<(raw_ostream &OS, const Relocation &R) {
107 R.dump(OS);
108 return OS;
109}
110
Nick Kledzik30332b12013-10-08 00:43:34 +0000111/// A typedef so that YAML I/O can treat this vector as a sequence.
112typedef std::vector<Relocation> Relocations;
113
114/// A typedef so that YAML I/O can process the raw bytes in a section.
115typedef std::vector<Hex8> ContentBytes;
116
117/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
118typedef std::vector<uint32_t> IndirectSymbols;
119
120/// A typedef so that YAML I/O can encode/decode section attributes.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000121LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
Nick Kledzik30332b12013-10-08 00:43:34 +0000122
Pete Cooper3f564a52016-03-24 00:36:37 +0000123/// A typedef so that YAML I/O can encode/decode section alignment.
124LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment)
125
Nick Kledzik30332b12013-10-08 00:43:34 +0000126/// Mach-O has a 32-bit and 64-bit section record. This normalized form
127/// can support either kind.
128struct Section {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000129 Section() : type(llvm::MachO::S_REGULAR),
Rui Ueyamaf006f4d2015-03-26 01:44:01 +0000130 attributes(0), alignment(1), address(0) { }
Nick Kledzik30332b12013-10-08 00:43:34 +0000131
132 StringRef segmentName;
133 StringRef sectionName;
134 SectionType type;
135 SectionAttr attributes;
Pete Cooper3f564a52016-03-24 00:36:37 +0000136 SectionAlignment alignment;
Nick Kledzik30332b12013-10-08 00:43:34 +0000137 Hex64 address;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000138 ArrayRef<uint8_t> content;
Nick Kledzik30332b12013-10-08 00:43:34 +0000139 Relocations relocations;
140 IndirectSymbols indirectSymbols;
Pete Coopere5fa5a32015-12-16 22:03:21 +0000141
142#ifndef NDEBUG
143 raw_ostream& operator<<(raw_ostream &OS) const {
144 dump(OS);
145 return OS;
146 }
147
148 void dump(raw_ostream &OS = llvm::dbgs()) const;
149#endif
Nick Kledzik30332b12013-10-08 00:43:34 +0000150};
151
152
153/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000154LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
Nick Kledzik30332b12013-10-08 00:43:34 +0000155
156/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000157LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
Nick Kledzik30332b12013-10-08 00:43:34 +0000158
159/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
160/// type and scope and mixed in the same n_type field. This normalized form
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000161/// works for any pointer size and separates out the type and scope.
Nick Kledzik30332b12013-10-08 00:43:34 +0000162struct Symbol {
163 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
164
165 StringRef name;
166 NListType type;
167 SymbolScope scope;
168 uint8_t sect;
169 SymbolDesc desc;
170 Hex64 value;
171};
172
Lang Hamesac2adce2015-12-11 23:25:09 +0000173/// Check whether the given section type indicates a zero-filled section.
174// FIXME: Utility functions of this kind should probably be moved into
175// llvm/Support.
176inline bool isZeroFillSection(SectionType T) {
177 return (T == llvm::MachO::S_ZEROFILL ||
178 T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL);
179}
180
Nick Kledzik30332b12013-10-08 00:43:34 +0000181/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000182LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
Nick Kledzik30332b12013-10-08 00:43:34 +0000183
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000184/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz
185LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion)
186
Nick Kledzik30332b12013-10-08 00:43:34 +0000187/// Segments are only used in normalized final linked images (not in relocatable
188/// object files). They specify how a range of the file is loaded.
189struct Segment {
190 StringRef name;
191 Hex64 address;
192 Hex64 size;
Pete Cooperb8fec3e2016-02-06 00:51:16 +0000193 VMProtect init_access;
194 VMProtect max_access;
Nick Kledzik30332b12013-10-08 00:43:34 +0000195};
196
197/// Only used in normalized final linked images to specify on which dylibs
198/// it depends.
199struct DependentDylib {
200 StringRef path;
201 LoadCommandType kind;
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000202 PackedVersion compatVersion;
203 PackedVersion currentVersion;
Nick Kledzik30332b12013-10-08 00:43:34 +0000204};
205
206/// A normalized rebasing entry. Only used in normalized final linked images.
207struct RebaseLocation {
208 Hex32 segOffset;
209 uint8_t segIndex;
210 RebaseType kind;
211};
212
213/// A normalized binding entry. Only used in normalized final linked images.
214struct BindLocation {
215 Hex32 segOffset;
216 uint8_t segIndex;
217 BindType kind;
218 bool canBeNull;
219 int ordinal;
220 StringRef symbolName;
221 Hex64 addend;
222};
223
224/// A typedef so that YAML I/O can encode/decode export flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000225LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000226
227/// A normalized export entry. Only used in normalized final linked images.
228struct Export {
229 StringRef name;
230 Hex64 offset;
231 ExportSymbolKind kind;
232 ExportFlags flags;
233 Hex32 otherOffset;
234 StringRef otherName;
235};
236
Nick Kledzik21921372014-07-24 23:06:56 +0000237/// A normalized data-in-code entry.
238struct DataInCode {
239 Hex32 offset;
240 Hex16 length;
241 DataRegionType kind;
242};
243
Nick Kledzik30332b12013-10-08 00:43:34 +0000244/// A typedef so that YAML I/O can encode/decode mach_header.flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000245LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000246
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000247///
Nick Kledzik30332b12013-10-08 00:43:34 +0000248struct NormalizedFile {
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000249 MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown;
250 HeaderFileType fileType = llvm::MachO::MH_OBJECT;
251 FileFlags flags = 0;
Nick Kledzik30332b12013-10-08 00:43:34 +0000252 std::vector<Segment> segments; // Not used in object files.
253 std::vector<Section> sections;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000254
Nick Kledzik30332b12013-10-08 00:43:34 +0000255 // Symbols sorted by kind.
256 std::vector<Symbol> localSymbols;
257 std::vector<Symbol> globalSymbols;
258 std::vector<Symbol> undefinedSymbols;
Lang Hames436f7d62016-07-27 22:55:30 +0000259 std::vector<Symbol> stabsSymbols;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000260
Nick Kledzik30332b12013-10-08 00:43:34 +0000261 // Maps to load commands with no LINKEDIT content (final linked images only).
262 std::vector<DependentDylib> dependentDylibs;
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000263 StringRef installName; // dylibs only
264 PackedVersion compatVersion = 0; // dylibs only
265 PackedVersion currentVersion = 0; // dylibs only
266 bool hasUUID = false;
Pete Cooper354809e2016-02-03 22:28:29 +0000267 bool hasMinVersionLoadCommand = false;
Pete Cooper9b28a452016-02-09 02:10:39 +0000268 bool generateDataInCodeLoadCommand = false;
Nick Kledzik30332b12013-10-08 00:43:34 +0000269 std::vector<StringRef> rpaths;
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000270 Hex64 entryAddress = 0;
271 Hex64 stackSize = 0;
272 MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown;
273 Hex64 sourceVersion = 0;
274 PackedVersion minOSverson = 0;
275 PackedVersion sdkVersion = 0;
Pete Cooperceee5de2016-02-04 02:16:08 +0000276 LoadCommandType minOSVersionKind = (LoadCommandType)0;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000277
Nick Kledzik30332b12013-10-08 00:43:34 +0000278 // Maps to load commands with LINKEDIT content (final linked images only).
Benjamin Kramercfacc9d2015-06-23 19:55:04 +0000279 Hex32 pageSize = 0;
Nick Kledzik30332b12013-10-08 00:43:34 +0000280 std::vector<RebaseLocation> rebasingInfo;
281 std::vector<BindLocation> bindingInfo;
282 std::vector<BindLocation> weakBindingInfo;
283 std::vector<BindLocation> lazyBindingInfo;
284 std::vector<Export> exportInfo;
Pete Cooper41f3e8e2016-02-09 01:38:13 +0000285 std::vector<uint8_t> functionStarts;
Nick Kledzik21921372014-07-24 23:06:56 +0000286 std::vector<DataInCode> dataInCode;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000287
Nick Kledzik30332b12013-10-08 00:43:34 +0000288 // TODO:
289 // code-signature
290 // split-seg-info
291 // function-starts
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000292
Nick Kledzik6edd7222014-01-11 01:07:43 +0000293 // For any allocations in this struct which need to be owned by this struct.
294 BumpPtrAllocator ownedAllocations;
Nick Kledzik30332b12013-10-08 00:43:34 +0000295};
296
Nick Kledzik635f9c72014-09-04 20:08:30 +0000297/// Tests if a file is a non-fat mach-o object file.
298bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000299
Nick Kledzik14b5d202014-10-08 01:48:10 +0000300/// If the buffer is a fat file with the request arch, then this function
301/// returns true with 'offset' and 'size' set to location of the arch slice
302/// within the buffer. Otherwise returns false;
Rafael Espindolaed48e532015-04-27 22:48:51 +0000303bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
304 uint32_t &offset, uint32_t &size);
Nick Kledzik14b5d202014-10-08 01:48:10 +0000305
Nick Kledzik30332b12013-10-08 00:43:34 +0000306/// Reads a mach-o file and produces an in-memory normalized view.
Pete Cooperc6e7b812016-03-30 23:58:24 +0000307llvm::Expected<std::unique_ptr<NormalizedFile>>
Joey Gouly010b3762014-01-14 22:32:38 +0000308readBinary(std::unique_ptr<MemoryBuffer> &mb,
309 const MachOLinkingContext::Arch arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000310
311/// Takes in-memory normalized view and writes a mach-o object file.
Pete Cooperfefbd222016-03-30 23:10:39 +0000312llvm::Error writeBinary(const NormalizedFile &file, StringRef path);
Nick Kledzik30332b12013-10-08 00:43:34 +0000313
314size_t headerAndLoadCommandsSize(const NormalizedFile &file);
315
316
317/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
Pete Cooper2f6216c2016-03-31 01:13:04 +0000318llvm::Expected<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000319readYaml(std::unique_ptr<MemoryBuffer> &mb);
320
321/// Writes a yaml encoded mach-o files given an in-memory normalized view.
Rafael Espindolab1a4d3a2014-06-12 14:53:47 +0000322std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
Nick Kledzik30332b12013-10-08 00:43:34 +0000323
Pete Cooperec4e1662016-03-30 23:43:27 +0000324llvm::Error
Rui Ueyama1d510422014-12-12 07:31:09 +0000325normalizedObjectToAtoms(MachOFile *file,
326 const NormalizedFile &normalizedFile,
327 bool copyRefs);
328
Pete Cooperec4e1662016-03-30 23:43:27 +0000329llvm::Error
Rui Ueyama1d510422014-12-12 07:31:09 +0000330normalizedDylibToAtoms(MachODylibFile *file,
331 const NormalizedFile &normalizedFile,
332 bool copyRefs);
333
Nick Kledzik30332b12013-10-08 00:43:34 +0000334/// Takes in-memory normalized dylib or object and parses it into lld::File
Pete Cooperec4e1662016-03-30 23:43:27 +0000335llvm::Expected<std::unique_ptr<lld::File>>
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000336normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
Nick Kledzik6edd7222014-01-11 01:07:43 +0000337 bool copyRefs);
Nick Kledzik30332b12013-10-08 00:43:34 +0000338
339/// Takes atoms and generates a normalized macho-o view.
Pete Cooperfefbd222016-03-30 23:10:39 +0000340llvm::Expected<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000341normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
342
343
Nick Kledzik30332b12013-10-08 00:43:34 +0000344} // namespace normalized
Nick Kledzik6edd7222014-01-11 01:07:43 +0000345
346/// Class for interfacing mach-o yaml files into generic yaml parsing
347class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
Nick Kledzik378066c2014-06-30 22:57:33 +0000348public:
349 MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
350 : _arch(arch) { }
Rui Ueyamabc69bce2014-03-28 21:36:33 +0000351 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
Nick Kledzik378066c2014-06-30 22:57:33 +0000352private:
353 const MachOLinkingContext::Arch _arch;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000354};
355
Nick Kledzik30332b12013-10-08 00:43:34 +0000356} // namespace mach_o
357} // namespace lld
358
Rui Ueyama014192db2013-11-15 03:09:26 +0000359#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H