blob: eb3294d28883af99c5041bee27e336de7d382b0d [file] [log] [blame]
Nick Kledzik30332b12013-10-08 00:43:34 +00001//===- lib/ReaderWriter/MachO/NormalizedFile.h ----------------------===//
2//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10///
11/// \file These data structures comprise the "normalized" view of
12/// mach-o object files. The normalized view is an in-memory only data structure
Shankar Easwaran3d8de472014-01-27 03:09:26 +000013/// which is always in native endianness and pointer size.
14///
15/// The normalized view easily converts to and from YAML using YAML I/O.
Nick Kledzik30332b12013-10-08 00:43:34 +000016///
17/// The normalized view converts to and from binary mach-o object files using
18/// the writeBinary() and readBinary() functions.
19///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000020/// The normalized view converts to and from lld::Atoms using the
Nick Kledzik30332b12013-10-08 00:43:34 +000021/// normalizedToAtoms() and normalizedFromAtoms().
22///
23/// Overall, the conversion paths available look like:
24///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000025/// +---------------+
26/// | binary mach-o |
27/// +---------------+
Nick Kledzik30332b12013-10-08 00:43:34 +000028/// ^
29/// |
30/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000031/// +------------+ +------+
32/// | normalized | <-> | yaml |
33/// +------------+ +------+
Nick Kledzik30332b12013-10-08 00:43:34 +000034/// ^
35/// |
36/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000037/// +-------+
Nick Kledzik30332b12013-10-08 00:43:34 +000038/// | Atoms |
Shankar Easwaran3d8de472014-01-27 03:09:26 +000039/// +-------+
40///
Nick Kledzik30332b12013-10-08 00:43:34 +000041
42#include "lld/Core/Error.h"
43#include "lld/Core/LLVM.h"
44#include "lld/ReaderWriter/MachOLinkingContext.h"
45
46#include "llvm/ADT/SmallString.h"
47#include "llvm/ADT/StringRef.h"
Nick Kledzik6edd7222014-01-11 01:07:43 +000048#include "llvm/Support/Allocator.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000049#include "llvm/Support/ErrorOr.h"
50#include "llvm/Support/MachO.h"
51#include "llvm/Support/YAMLTraits.h"
52
Rui Ueyama014192db2013-11-15 03:09:26 +000053#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
54#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
Nick Kledzik30332b12013-10-08 00:43:34 +000055
Nick Kledzik6edd7222014-01-11 01:07:43 +000056using llvm::BumpPtrAllocator;
Nick Kledzik30332b12013-10-08 00:43:34 +000057using llvm::yaml::Hex64;
58using llvm::yaml::Hex32;
59using llvm::yaml::Hex8;
60using llvm::yaml::SequenceTraits;
61using llvm::MachO::HeaderFileType;
62using llvm::MachO::BindType;
63using llvm::MachO::RebaseType;
64using llvm::MachO::NListType;
65using llvm::MachO::RelocationInfoType;
66using llvm::MachO::SectionType;
67using llvm::MachO::LoadCommandType;
68using llvm::MachO::ExportSymbolKind;
69
70namespace lld {
71namespace mach_o {
72namespace normalized {
73
74
75/// The real mach-o relocation record is 8-bytes on disk and is
76/// encoded in one of two different bit-field patterns. This
Nick Kledzik369ffd12013-10-08 02:07:19 +000077/// normalized form has the union of all possible fields.
Nick Kledzik30332b12013-10-08 00:43:34 +000078struct Relocation {
Shankar Easwaran3d8de472014-01-27 03:09:26 +000079 Relocation() : offset(0), scattered(false),
80 type(llvm::MachO::GENERIC_RELOC_VANILLA),
81 length(0), pcRel(false), isExtern(false), value(0),
Nick Kledzik30332b12013-10-08 00:43:34 +000082 symbol(0) { }
83
84 Hex32 offset;
85 bool scattered;
86 RelocationInfoType type;
87 uint8_t length;
88 bool pcRel;
89 bool isExtern;
90 Hex32 value;
91 uint32_t symbol;
92};
93
94/// A typedef so that YAML I/O can treat this vector as a sequence.
95typedef std::vector<Relocation> Relocations;
96
97/// A typedef so that YAML I/O can process the raw bytes in a section.
98typedef std::vector<Hex8> ContentBytes;
99
100/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
101typedef std::vector<uint32_t> IndirectSymbols;
102
103/// A typedef so that YAML I/O can encode/decode section attributes.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000104LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
Nick Kledzik30332b12013-10-08 00:43:34 +0000105
106/// Mach-O has a 32-bit and 64-bit section record. This normalized form
107/// can support either kind.
108struct Section {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000109 Section() : type(llvm::MachO::S_REGULAR),
Nick Kledzik30332b12013-10-08 00:43:34 +0000110 attributes(0), alignment(0), address(0) { }
111
112 StringRef segmentName;
113 StringRef sectionName;
114 SectionType type;
115 SectionAttr attributes;
116 uint32_t alignment;
117 Hex64 address;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000118 ArrayRef<uint8_t> content;
Nick Kledzik30332b12013-10-08 00:43:34 +0000119 Relocations relocations;
120 IndirectSymbols indirectSymbols;
121};
122
123
124/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000125LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
Nick Kledzik30332b12013-10-08 00:43:34 +0000126
127/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000128LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
Nick Kledzik30332b12013-10-08 00:43:34 +0000129
130/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
131/// type and scope and mixed in the same n_type field. This normalized form
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000132/// works for any pointer size and separates out the type and scope.
Nick Kledzik30332b12013-10-08 00:43:34 +0000133struct Symbol {
134 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
135
136 StringRef name;
137 NListType type;
138 SymbolScope scope;
139 uint8_t sect;
140 SymbolDesc desc;
141 Hex64 value;
142};
143
144/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000145LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
Nick Kledzik30332b12013-10-08 00:43:34 +0000146
147/// Segments are only used in normalized final linked images (not in relocatable
148/// object files). They specify how a range of the file is loaded.
149struct Segment {
150 StringRef name;
151 Hex64 address;
152 Hex64 size;
153 VMProtect access;
154};
155
156/// Only used in normalized final linked images to specify on which dylibs
157/// it depends.
158struct DependentDylib {
159 StringRef path;
160 LoadCommandType kind;
161};
162
163/// A normalized rebasing entry. Only used in normalized final linked images.
164struct RebaseLocation {
165 Hex32 segOffset;
166 uint8_t segIndex;
167 RebaseType kind;
168};
169
170/// A normalized binding entry. Only used in normalized final linked images.
171struct BindLocation {
172 Hex32 segOffset;
173 uint8_t segIndex;
174 BindType kind;
175 bool canBeNull;
176 int ordinal;
177 StringRef symbolName;
178 Hex64 addend;
179};
180
181/// A typedef so that YAML I/O can encode/decode export flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000182LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000183
184/// A normalized export entry. Only used in normalized final linked images.
185struct Export {
186 StringRef name;
187 Hex64 offset;
188 ExportSymbolKind kind;
189 ExportFlags flags;
190 Hex32 otherOffset;
191 StringRef otherName;
192};
193
194
195/// A typedef so that YAML I/O can encode/decode mach_header.flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000196LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000197
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000198///
Nick Kledzik30332b12013-10-08 00:43:34 +0000199struct NormalizedFile {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000200 NormalizedFile() : arch(MachOLinkingContext::arch_unknown),
Nick Kledzik30332b12013-10-08 00:43:34 +0000201 fileType(llvm::MachO::MH_OBJECT),
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000202 flags(0),
203 hasUUID(false),
Nick Kledzik30332b12013-10-08 00:43:34 +0000204 os(MachOLinkingContext::OS::unknown) { }
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000205
Nick Kledzik30332b12013-10-08 00:43:34 +0000206 MachOLinkingContext::Arch arch;
207 HeaderFileType fileType;
208 FileFlags flags;
209 std::vector<Segment> segments; // Not used in object files.
210 std::vector<Section> sections;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000211
Nick Kledzik30332b12013-10-08 00:43:34 +0000212 // Symbols sorted by kind.
213 std::vector<Symbol> localSymbols;
214 std::vector<Symbol> globalSymbols;
215 std::vector<Symbol> undefinedSymbols;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000216
Nick Kledzik30332b12013-10-08 00:43:34 +0000217 // Maps to load commands with no LINKEDIT content (final linked images only).
218 std::vector<DependentDylib> dependentDylibs;
219 StringRef installName;
220 bool hasUUID;
221 std::vector<StringRef> rpaths;
222 Hex64 entryAddress;
223 MachOLinkingContext::OS os;
224 Hex64 sourceVersion;
225 Hex32 minOSverson;
226 Hex32 sdkVersion;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000227
Nick Kledzik30332b12013-10-08 00:43:34 +0000228 // Maps to load commands with LINKEDIT content (final linked images only).
229 std::vector<RebaseLocation> rebasingInfo;
230 std::vector<BindLocation> bindingInfo;
231 std::vector<BindLocation> weakBindingInfo;
232 std::vector<BindLocation> lazyBindingInfo;
233 std::vector<Export> exportInfo;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000234
Nick Kledzik30332b12013-10-08 00:43:34 +0000235 // TODO:
236 // code-signature
237 // split-seg-info
238 // function-starts
239 // data-in-code
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000240
Nick Kledzik6edd7222014-01-11 01:07:43 +0000241 // For any allocations in this struct which need to be owned by this struct.
242 BumpPtrAllocator ownedAllocations;
Nick Kledzik30332b12013-10-08 00:43:34 +0000243};
244
245
246/// Reads a mach-o file and produces an in-memory normalized view.
Joey Gouly010b3762014-01-14 22:32:38 +0000247ErrorOr<std::unique_ptr<NormalizedFile>>
248readBinary(std::unique_ptr<MemoryBuffer> &mb,
249 const MachOLinkingContext::Arch arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000250
251/// Takes in-memory normalized view and writes a mach-o object file.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000252error_code
Nick Kledzik30332b12013-10-08 00:43:34 +0000253writeBinary(const NormalizedFile &file, StringRef path);
254
255size_t headerAndLoadCommandsSize(const NormalizedFile &file);
256
257
258/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000259ErrorOr<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000260readYaml(std::unique_ptr<MemoryBuffer> &mb);
261
262/// Writes a yaml encoded mach-o files given an in-memory normalized view.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000263error_code
Rui Ueyamac1800be2013-11-05 01:37:40 +0000264writeYaml(const NormalizedFile &file, raw_ostream &out);
Nick Kledzik30332b12013-10-08 00:43:34 +0000265
266
267/// Takes in-memory normalized dylib or object and parses it into lld::File
Rui Ueyama170a1a82013-12-20 07:48:29 +0000268ErrorOr<std::unique_ptr<lld::File>>
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000269normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
Nick Kledzik6edd7222014-01-11 01:07:43 +0000270 bool copyRefs);
Nick Kledzik30332b12013-10-08 00:43:34 +0000271
272/// Takes atoms and generates a normalized macho-o view.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000273ErrorOr<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000274normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
275
276
Nick Kledzik30332b12013-10-08 00:43:34 +0000277} // namespace normalized
Nick Kledzik6edd7222014-01-11 01:07:43 +0000278
279/// Class for interfacing mach-o yaml files into generic yaml parsing
280class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
281 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const;
282};
283
284
Nick Kledzik30332b12013-10-08 00:43:34 +0000285} // namespace mach_o
286} // namespace lld
287
Rui Ueyama014192db2013-11-15 03:09:26 +0000288#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H