blob: c6e2a3a9b75f29695b4b6220b727b6d4263ea167 [file] [log] [blame]
Nick Kledzik30332b12013-10-08 00:43:34 +00001//===- lib/ReaderWriter/MachO/NormalizedFile.h ----------------------===//
2//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10///
11/// \file These data structures comprise the "normalized" view of
12/// mach-o object files. The normalized view is an in-memory only data structure
Shankar Easwaran3d8de472014-01-27 03:09:26 +000013/// which is always in native endianness and pointer size.
14///
15/// The normalized view easily converts to and from YAML using YAML I/O.
Nick Kledzik30332b12013-10-08 00:43:34 +000016///
17/// The normalized view converts to and from binary mach-o object files using
18/// the writeBinary() and readBinary() functions.
19///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000020/// The normalized view converts to and from lld::Atoms using the
Nick Kledzik30332b12013-10-08 00:43:34 +000021/// normalizedToAtoms() and normalizedFromAtoms().
22///
23/// Overall, the conversion paths available look like:
24///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000025/// +---------------+
26/// | binary mach-o |
27/// +---------------+
Nick Kledzik30332b12013-10-08 00:43:34 +000028/// ^
29/// |
30/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000031/// +------------+ +------+
32/// | normalized | <-> | yaml |
33/// +------------+ +------+
Nick Kledzik30332b12013-10-08 00:43:34 +000034/// ^
35/// |
36/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000037/// +-------+
Nick Kledzik30332b12013-10-08 00:43:34 +000038/// | Atoms |
Shankar Easwaran3d8de472014-01-27 03:09:26 +000039/// +-------+
40///
Nick Kledzik30332b12013-10-08 00:43:34 +000041
42#include "lld/Core/Error.h"
43#include "lld/Core/LLVM.h"
44#include "lld/ReaderWriter/MachOLinkingContext.h"
45
46#include "llvm/ADT/SmallString.h"
47#include "llvm/ADT/StringRef.h"
Nick Kledzik6edd7222014-01-11 01:07:43 +000048#include "llvm/Support/Allocator.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000049#include "llvm/Support/ErrorOr.h"
50#include "llvm/Support/MachO.h"
51#include "llvm/Support/YAMLTraits.h"
52
Rui Ueyama014192db2013-11-15 03:09:26 +000053#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
54#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
Nick Kledzik30332b12013-10-08 00:43:34 +000055
Nick Kledzik6edd7222014-01-11 01:07:43 +000056using llvm::BumpPtrAllocator;
Nick Kledzik30332b12013-10-08 00:43:34 +000057using llvm::yaml::Hex64;
58using llvm::yaml::Hex32;
Nick Kledzik21921372014-07-24 23:06:56 +000059using llvm::yaml::Hex16;
Nick Kledzik30332b12013-10-08 00:43:34 +000060using llvm::yaml::Hex8;
61using llvm::yaml::SequenceTraits;
62using llvm::MachO::HeaderFileType;
63using llvm::MachO::BindType;
64using llvm::MachO::RebaseType;
65using llvm::MachO::NListType;
66using llvm::MachO::RelocationInfoType;
67using llvm::MachO::SectionType;
68using llvm::MachO::LoadCommandType;
69using llvm::MachO::ExportSymbolKind;
Nick Kledzik21921372014-07-24 23:06:56 +000070using llvm::MachO::DataRegionType;
Nick Kledzik30332b12013-10-08 00:43:34 +000071
72namespace lld {
73namespace mach_o {
74namespace normalized {
75
76
77/// The real mach-o relocation record is 8-bytes on disk and is
78/// encoded in one of two different bit-field patterns. This
Nick Kledzik369ffd12013-10-08 02:07:19 +000079/// normalized form has the union of all possible fields.
Nick Kledzik30332b12013-10-08 00:43:34 +000080struct Relocation {
Shankar Easwaran3d8de472014-01-27 03:09:26 +000081 Relocation() : offset(0), scattered(false),
82 type(llvm::MachO::GENERIC_RELOC_VANILLA),
83 length(0), pcRel(false), isExtern(false), value(0),
Nick Kledzik30332b12013-10-08 00:43:34 +000084 symbol(0) { }
85
86 Hex32 offset;
87 bool scattered;
88 RelocationInfoType type;
89 uint8_t length;
90 bool pcRel;
91 bool isExtern;
92 Hex32 value;
93 uint32_t symbol;
94};
95
96/// A typedef so that YAML I/O can treat this vector as a sequence.
97typedef std::vector<Relocation> Relocations;
98
99/// A typedef so that YAML I/O can process the raw bytes in a section.
100typedef std::vector<Hex8> ContentBytes;
101
102/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
103typedef std::vector<uint32_t> IndirectSymbols;
104
105/// A typedef so that YAML I/O can encode/decode section attributes.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000106LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
Nick Kledzik30332b12013-10-08 00:43:34 +0000107
108/// Mach-O has a 32-bit and 64-bit section record. This normalized form
109/// can support either kind.
110struct Section {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000111 Section() : type(llvm::MachO::S_REGULAR),
Nick Kledzik30332b12013-10-08 00:43:34 +0000112 attributes(0), alignment(0), address(0) { }
113
114 StringRef segmentName;
115 StringRef sectionName;
116 SectionType type;
117 SectionAttr attributes;
118 uint32_t alignment;
119 Hex64 address;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000120 ArrayRef<uint8_t> content;
Nick Kledzik30332b12013-10-08 00:43:34 +0000121 Relocations relocations;
122 IndirectSymbols indirectSymbols;
123};
124
125
126/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000127LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
Nick Kledzik30332b12013-10-08 00:43:34 +0000128
129/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000130LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
Nick Kledzik30332b12013-10-08 00:43:34 +0000131
132/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
133/// type and scope and mixed in the same n_type field. This normalized form
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000134/// works for any pointer size and separates out the type and scope.
Nick Kledzik30332b12013-10-08 00:43:34 +0000135struct Symbol {
136 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
137
138 StringRef name;
139 NListType type;
140 SymbolScope scope;
141 uint8_t sect;
142 SymbolDesc desc;
143 Hex64 value;
144};
145
146/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000147LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
Nick Kledzik30332b12013-10-08 00:43:34 +0000148
149/// Segments are only used in normalized final linked images (not in relocatable
150/// object files). They specify how a range of the file is loaded.
151struct Segment {
152 StringRef name;
153 Hex64 address;
154 Hex64 size;
155 VMProtect access;
156};
157
158/// Only used in normalized final linked images to specify on which dylibs
159/// it depends.
160struct DependentDylib {
161 StringRef path;
162 LoadCommandType kind;
163};
164
165/// A normalized rebasing entry. Only used in normalized final linked images.
166struct RebaseLocation {
167 Hex32 segOffset;
168 uint8_t segIndex;
169 RebaseType kind;
170};
171
172/// A normalized binding entry. Only used in normalized final linked images.
173struct BindLocation {
174 Hex32 segOffset;
175 uint8_t segIndex;
176 BindType kind;
177 bool canBeNull;
178 int ordinal;
179 StringRef symbolName;
180 Hex64 addend;
181};
182
183/// A typedef so that YAML I/O can encode/decode export flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000184LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000185
186/// A normalized export entry. Only used in normalized final linked images.
187struct Export {
188 StringRef name;
189 Hex64 offset;
190 ExportSymbolKind kind;
191 ExportFlags flags;
192 Hex32 otherOffset;
193 StringRef otherName;
194};
195
Nick Kledzik21921372014-07-24 23:06:56 +0000196/// A normalized data-in-code entry.
197struct DataInCode {
198 Hex32 offset;
199 Hex16 length;
200 DataRegionType kind;
201};
202
Nick Kledzik30332b12013-10-08 00:43:34 +0000203
204/// A typedef so that YAML I/O can encode/decode mach_header.flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000205LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000206
Nick Kledzik21921372014-07-24 23:06:56 +0000207
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000208///
Nick Kledzik30332b12013-10-08 00:43:34 +0000209struct NormalizedFile {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000210 NormalizedFile() : arch(MachOLinkingContext::arch_unknown),
Nick Kledzik30332b12013-10-08 00:43:34 +0000211 fileType(llvm::MachO::MH_OBJECT),
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000212 flags(0),
213 hasUUID(false),
Nick Kledzik30332b12013-10-08 00:43:34 +0000214 os(MachOLinkingContext::OS::unknown) { }
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000215
Nick Kledzik30332b12013-10-08 00:43:34 +0000216 MachOLinkingContext::Arch arch;
217 HeaderFileType fileType;
218 FileFlags flags;
219 std::vector<Segment> segments; // Not used in object files.
220 std::vector<Section> sections;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000221
Nick Kledzik30332b12013-10-08 00:43:34 +0000222 // Symbols sorted by kind.
223 std::vector<Symbol> localSymbols;
224 std::vector<Symbol> globalSymbols;
225 std::vector<Symbol> undefinedSymbols;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000226
Nick Kledzik30332b12013-10-08 00:43:34 +0000227 // Maps to load commands with no LINKEDIT content (final linked images only).
228 std::vector<DependentDylib> dependentDylibs;
229 StringRef installName;
230 bool hasUUID;
231 std::vector<StringRef> rpaths;
232 Hex64 entryAddress;
233 MachOLinkingContext::OS os;
234 Hex64 sourceVersion;
235 Hex32 minOSverson;
236 Hex32 sdkVersion;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000237
Nick Kledzik30332b12013-10-08 00:43:34 +0000238 // Maps to load commands with LINKEDIT content (final linked images only).
239 std::vector<RebaseLocation> rebasingInfo;
240 std::vector<BindLocation> bindingInfo;
241 std::vector<BindLocation> weakBindingInfo;
242 std::vector<BindLocation> lazyBindingInfo;
243 std::vector<Export> exportInfo;
Nick Kledzik21921372014-07-24 23:06:56 +0000244 std::vector<DataInCode> dataInCode;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000245
Nick Kledzik30332b12013-10-08 00:43:34 +0000246 // TODO:
247 // code-signature
248 // split-seg-info
249 // function-starts
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000250
Nick Kledzik6edd7222014-01-11 01:07:43 +0000251 // For any allocations in this struct which need to be owned by this struct.
252 BumpPtrAllocator ownedAllocations;
Nick Kledzik30332b12013-10-08 00:43:34 +0000253};
254
255
256/// Reads a mach-o file and produces an in-memory normalized view.
Joey Gouly010b3762014-01-14 22:32:38 +0000257ErrorOr<std::unique_ptr<NormalizedFile>>
258readBinary(std::unique_ptr<MemoryBuffer> &mb,
259 const MachOLinkingContext::Arch arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000260
261/// Takes in-memory normalized view and writes a mach-o object file.
Rafael Espindolab1a4d3a2014-06-12 14:53:47 +0000262std::error_code writeBinary(const NormalizedFile &file, StringRef path);
Nick Kledzik30332b12013-10-08 00:43:34 +0000263
264size_t headerAndLoadCommandsSize(const NormalizedFile &file);
265
266
267/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000268ErrorOr<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000269readYaml(std::unique_ptr<MemoryBuffer> &mb);
270
271/// Writes a yaml encoded mach-o files given an in-memory normalized view.
Rafael Espindolab1a4d3a2014-06-12 14:53:47 +0000272std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
Nick Kledzik30332b12013-10-08 00:43:34 +0000273
274/// Takes in-memory normalized dylib or object and parses it into lld::File
Rui Ueyama170a1a82013-12-20 07:48:29 +0000275ErrorOr<std::unique_ptr<lld::File>>
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000276normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
Nick Kledzik6edd7222014-01-11 01:07:43 +0000277 bool copyRefs);
Nick Kledzik30332b12013-10-08 00:43:34 +0000278
279/// Takes atoms and generates a normalized macho-o view.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000280ErrorOr<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000281normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
282
283
Nick Kledzik30332b12013-10-08 00:43:34 +0000284} // namespace normalized
Nick Kledzik6edd7222014-01-11 01:07:43 +0000285
286/// Class for interfacing mach-o yaml files into generic yaml parsing
287class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
Nick Kledzik378066c2014-06-30 22:57:33 +0000288public:
289 MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
290 : _arch(arch) { }
Rui Ueyamabc69bce2014-03-28 21:36:33 +0000291 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
Nick Kledzik378066c2014-06-30 22:57:33 +0000292private:
293 const MachOLinkingContext::Arch _arch;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000294};
295
Nick Kledzik30332b12013-10-08 00:43:34 +0000296} // namespace mach_o
297} // namespace lld
298
Rui Ueyama014192db2013-11-15 03:09:26 +0000299#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H