blob: 100ea9d6a91168dcfca8a4f16076026e25bbb34f [file] [log] [blame]
Nick Kledzik5b9e48b2014-11-19 02:21:53 +00001//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===//
Nick Kledzik30332b12013-10-08 00:43:34 +00002//
3// The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10///
11/// \file These data structures comprise the "normalized" view of
12/// mach-o object files. The normalized view is an in-memory only data structure
Shankar Easwaran3d8de472014-01-27 03:09:26 +000013/// which is always in native endianness and pointer size.
14///
15/// The normalized view easily converts to and from YAML using YAML I/O.
Nick Kledzik30332b12013-10-08 00:43:34 +000016///
17/// The normalized view converts to and from binary mach-o object files using
18/// the writeBinary() and readBinary() functions.
19///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000020/// The normalized view converts to and from lld::Atoms using the
Nick Kledzik30332b12013-10-08 00:43:34 +000021/// normalizedToAtoms() and normalizedFromAtoms().
22///
23/// Overall, the conversion paths available look like:
24///
Shankar Easwaran3d8de472014-01-27 03:09:26 +000025/// +---------------+
26/// | binary mach-o |
27/// +---------------+
Nick Kledzik30332b12013-10-08 00:43:34 +000028/// ^
29/// |
30/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000031/// +------------+ +------+
32/// | normalized | <-> | yaml |
33/// +------------+ +------+
Nick Kledzik30332b12013-10-08 00:43:34 +000034/// ^
35/// |
36/// v
Shankar Easwaran3d8de472014-01-27 03:09:26 +000037/// +-------+
Nick Kledzik30332b12013-10-08 00:43:34 +000038/// | Atoms |
Shankar Easwaran3d8de472014-01-27 03:09:26 +000039/// +-------+
40///
Nick Kledzik30332b12013-10-08 00:43:34 +000041
42#include "lld/Core/Error.h"
43#include "lld/Core/LLVM.h"
44#include "lld/ReaderWriter/MachOLinkingContext.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000045#include "llvm/ADT/SmallString.h"
46#include "llvm/ADT/StringRef.h"
Nick Kledzik6edd7222014-01-11 01:07:43 +000047#include "llvm/Support/Allocator.h"
Nick Kledzik30332b12013-10-08 00:43:34 +000048#include "llvm/Support/ErrorOr.h"
49#include "llvm/Support/MachO.h"
50#include "llvm/Support/YAMLTraits.h"
51
Rui Ueyama014192db2013-11-15 03:09:26 +000052#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
53#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
Nick Kledzik30332b12013-10-08 00:43:34 +000054
Nick Kledzik6edd7222014-01-11 01:07:43 +000055using llvm::BumpPtrAllocator;
Nick Kledzik30332b12013-10-08 00:43:34 +000056using llvm::yaml::Hex64;
57using llvm::yaml::Hex32;
Nick Kledzik21921372014-07-24 23:06:56 +000058using llvm::yaml::Hex16;
Nick Kledzik30332b12013-10-08 00:43:34 +000059using llvm::yaml::Hex8;
60using llvm::yaml::SequenceTraits;
61using llvm::MachO::HeaderFileType;
62using llvm::MachO::BindType;
63using llvm::MachO::RebaseType;
64using llvm::MachO::NListType;
65using llvm::MachO::RelocationInfoType;
66using llvm::MachO::SectionType;
67using llvm::MachO::LoadCommandType;
68using llvm::MachO::ExportSymbolKind;
Nick Kledzik21921372014-07-24 23:06:56 +000069using llvm::MachO::DataRegionType;
Nick Kledzik30332b12013-10-08 00:43:34 +000070
71namespace lld {
72namespace mach_o {
73namespace normalized {
74
75
76/// The real mach-o relocation record is 8-bytes on disk and is
77/// encoded in one of two different bit-field patterns. This
Nick Kledzik369ffd12013-10-08 02:07:19 +000078/// normalized form has the union of all possible fields.
Nick Kledzik30332b12013-10-08 00:43:34 +000079struct Relocation {
Shankar Easwaran3d8de472014-01-27 03:09:26 +000080 Relocation() : offset(0), scattered(false),
81 type(llvm::MachO::GENERIC_RELOC_VANILLA),
82 length(0), pcRel(false), isExtern(false), value(0),
Nick Kledzik30332b12013-10-08 00:43:34 +000083 symbol(0) { }
84
85 Hex32 offset;
86 bool scattered;
87 RelocationInfoType type;
88 uint8_t length;
89 bool pcRel;
90 bool isExtern;
91 Hex32 value;
92 uint32_t symbol;
93};
94
95/// A typedef so that YAML I/O can treat this vector as a sequence.
96typedef std::vector<Relocation> Relocations;
97
98/// A typedef so that YAML I/O can process the raw bytes in a section.
99typedef std::vector<Hex8> ContentBytes;
100
101/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
102typedef std::vector<uint32_t> IndirectSymbols;
103
104/// A typedef so that YAML I/O can encode/decode section attributes.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000105LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
Nick Kledzik30332b12013-10-08 00:43:34 +0000106
107/// Mach-O has a 32-bit and 64-bit section record. This normalized form
108/// can support either kind.
109struct Section {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000110 Section() : type(llvm::MachO::S_REGULAR),
Rui Ueyamaf006f4d2015-03-26 01:44:01 +0000111 attributes(0), alignment(1), address(0) { }
Nick Kledzik30332b12013-10-08 00:43:34 +0000112
113 StringRef segmentName;
114 StringRef sectionName;
115 SectionType type;
116 SectionAttr attributes;
Rui Ueyama629f9642015-03-26 02:20:25 +0000117 uint16_t alignment;
Nick Kledzik30332b12013-10-08 00:43:34 +0000118 Hex64 address;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000119 ArrayRef<uint8_t> content;
Nick Kledzik30332b12013-10-08 00:43:34 +0000120 Relocations relocations;
121 IndirectSymbols indirectSymbols;
122};
123
124
125/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000126LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
Nick Kledzik30332b12013-10-08 00:43:34 +0000127
128/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000129LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
Nick Kledzik30332b12013-10-08 00:43:34 +0000130
131/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
132/// type and scope and mixed in the same n_type field. This normalized form
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000133/// works for any pointer size and separates out the type and scope.
Nick Kledzik30332b12013-10-08 00:43:34 +0000134struct Symbol {
135 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
136
137 StringRef name;
138 NListType type;
139 SymbolScope scope;
140 uint8_t sect;
141 SymbolDesc desc;
142 Hex64 value;
143};
144
145/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000146LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
Nick Kledzik30332b12013-10-08 00:43:34 +0000147
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000148/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz
149LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion)
150
Nick Kledzik30332b12013-10-08 00:43:34 +0000151/// Segments are only used in normalized final linked images (not in relocatable
152/// object files). They specify how a range of the file is loaded.
153struct Segment {
154 StringRef name;
155 Hex64 address;
156 Hex64 size;
157 VMProtect access;
158};
159
160/// Only used in normalized final linked images to specify on which dylibs
161/// it depends.
162struct DependentDylib {
163 StringRef path;
164 LoadCommandType kind;
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000165 PackedVersion compatVersion;
166 PackedVersion currentVersion;
Nick Kledzik30332b12013-10-08 00:43:34 +0000167};
168
169/// A normalized rebasing entry. Only used in normalized final linked images.
170struct RebaseLocation {
171 Hex32 segOffset;
172 uint8_t segIndex;
173 RebaseType kind;
174};
175
176/// A normalized binding entry. Only used in normalized final linked images.
177struct BindLocation {
178 Hex32 segOffset;
179 uint8_t segIndex;
180 BindType kind;
181 bool canBeNull;
182 int ordinal;
183 StringRef symbolName;
184 Hex64 addend;
185};
186
187/// A typedef so that YAML I/O can encode/decode export flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000188LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000189
190/// A normalized export entry. Only used in normalized final linked images.
191struct Export {
192 StringRef name;
193 Hex64 offset;
194 ExportSymbolKind kind;
195 ExportFlags flags;
196 Hex32 otherOffset;
197 StringRef otherName;
198};
199
Nick Kledzik21921372014-07-24 23:06:56 +0000200/// A normalized data-in-code entry.
201struct DataInCode {
202 Hex32 offset;
203 Hex16 length;
204 DataRegionType kind;
205};
206
Nick Kledzik30332b12013-10-08 00:43:34 +0000207
208/// A typedef so that YAML I/O can encode/decode mach_header.flags.
Alexey Samsonov8e6829e2014-03-19 09:38:31 +0000209LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
Nick Kledzik30332b12013-10-08 00:43:34 +0000210
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000211///
Nick Kledzik30332b12013-10-08 00:43:34 +0000212struct NormalizedFile {
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000213 NormalizedFile() : arch(MachOLinkingContext::arch_unknown),
Nick Kledzik30332b12013-10-08 00:43:34 +0000214 fileType(llvm::MachO::MH_OBJECT),
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000215 flags(0),
216 hasUUID(false),
Nick Kledzik30332b12013-10-08 00:43:34 +0000217 os(MachOLinkingContext::OS::unknown) { }
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000218
Nick Kledzik30332b12013-10-08 00:43:34 +0000219 MachOLinkingContext::Arch arch;
220 HeaderFileType fileType;
221 FileFlags flags;
222 std::vector<Segment> segments; // Not used in object files.
223 std::vector<Section> sections;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000224
Nick Kledzik30332b12013-10-08 00:43:34 +0000225 // Symbols sorted by kind.
226 std::vector<Symbol> localSymbols;
227 std::vector<Symbol> globalSymbols;
228 std::vector<Symbol> undefinedSymbols;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000229
Nick Kledzik30332b12013-10-08 00:43:34 +0000230 // Maps to load commands with no LINKEDIT content (final linked images only).
231 std::vector<DependentDylib> dependentDylibs;
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000232 StringRef installName; // dylibs only
233 PackedVersion compatVersion; // dylibs only
234 PackedVersion currentVersion; // dylibs only
Nick Kledzik30332b12013-10-08 00:43:34 +0000235 bool hasUUID;
236 std::vector<StringRef> rpaths;
237 Hex64 entryAddress;
Lang Hames65a64c92015-05-20 22:10:50 +0000238 Hex64 stackSize;
Nick Kledzik30332b12013-10-08 00:43:34 +0000239 MachOLinkingContext::OS os;
240 Hex64 sourceVersion;
Nick Kledzik5b9e48b2014-11-19 02:21:53 +0000241 PackedVersion minOSverson;
242 PackedVersion sdkVersion;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000243
Nick Kledzik30332b12013-10-08 00:43:34 +0000244 // Maps to load commands with LINKEDIT content (final linked images only).
Nick Kledzik1bebb282014-09-09 23:52:59 +0000245 Hex32 pageSize;
Nick Kledzik30332b12013-10-08 00:43:34 +0000246 std::vector<RebaseLocation> rebasingInfo;
247 std::vector<BindLocation> bindingInfo;
248 std::vector<BindLocation> weakBindingInfo;
249 std::vector<BindLocation> lazyBindingInfo;
250 std::vector<Export> exportInfo;
Nick Kledzik21921372014-07-24 23:06:56 +0000251 std::vector<DataInCode> dataInCode;
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000252
Nick Kledzik30332b12013-10-08 00:43:34 +0000253 // TODO:
254 // code-signature
255 // split-seg-info
256 // function-starts
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000257
Nick Kledzik6edd7222014-01-11 01:07:43 +0000258 // For any allocations in this struct which need to be owned by this struct.
259 BumpPtrAllocator ownedAllocations;
Nick Kledzik30332b12013-10-08 00:43:34 +0000260};
261
Nick Kledzik635f9c72014-09-04 20:08:30 +0000262/// Tests if a file is a non-fat mach-o object file.
263bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000264
Nick Kledzik14b5d202014-10-08 01:48:10 +0000265/// If the buffer is a fat file with the request arch, then this function
266/// returns true with 'offset' and 'size' set to location of the arch slice
267/// within the buffer. Otherwise returns false;
Rafael Espindolaed48e532015-04-27 22:48:51 +0000268bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
269 uint32_t &offset, uint32_t &size);
Nick Kledzik14b5d202014-10-08 01:48:10 +0000270
Nick Kledzik30332b12013-10-08 00:43:34 +0000271/// Reads a mach-o file and produces an in-memory normalized view.
Joey Gouly010b3762014-01-14 22:32:38 +0000272ErrorOr<std::unique_ptr<NormalizedFile>>
273readBinary(std::unique_ptr<MemoryBuffer> &mb,
274 const MachOLinkingContext::Arch arch);
Nick Kledzik30332b12013-10-08 00:43:34 +0000275
276/// Takes in-memory normalized view and writes a mach-o object file.
Rafael Espindolab1a4d3a2014-06-12 14:53:47 +0000277std::error_code writeBinary(const NormalizedFile &file, StringRef path);
Nick Kledzik30332b12013-10-08 00:43:34 +0000278
279size_t headerAndLoadCommandsSize(const NormalizedFile &file);
280
281
282/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000283ErrorOr<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000284readYaml(std::unique_ptr<MemoryBuffer> &mb);
285
286/// Writes a yaml encoded mach-o files given an in-memory normalized view.
Rafael Espindolab1a4d3a2014-06-12 14:53:47 +0000287std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
Nick Kledzik30332b12013-10-08 00:43:34 +0000288
Rui Ueyama1d510422014-12-12 07:31:09 +0000289std::error_code
290normalizedObjectToAtoms(MachOFile *file,
291 const NormalizedFile &normalizedFile,
292 bool copyRefs);
293
294std::error_code
295normalizedDylibToAtoms(MachODylibFile *file,
296 const NormalizedFile &normalizedFile,
297 bool copyRefs);
298
Nick Kledzik30332b12013-10-08 00:43:34 +0000299/// Takes in-memory normalized dylib or object and parses it into lld::File
Rui Ueyama170a1a82013-12-20 07:48:29 +0000300ErrorOr<std::unique_ptr<lld::File>>
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000301normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
Nick Kledzik6edd7222014-01-11 01:07:43 +0000302 bool copyRefs);
Nick Kledzik30332b12013-10-08 00:43:34 +0000303
304/// Takes atoms and generates a normalized macho-o view.
Shankar Easwaran3d8de472014-01-27 03:09:26 +0000305ErrorOr<std::unique_ptr<NormalizedFile>>
Nick Kledzik30332b12013-10-08 00:43:34 +0000306normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
307
308
Nick Kledzik30332b12013-10-08 00:43:34 +0000309} // namespace normalized
Nick Kledzik6edd7222014-01-11 01:07:43 +0000310
311/// Class for interfacing mach-o yaml files into generic yaml parsing
312class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
Nick Kledzik378066c2014-06-30 22:57:33 +0000313public:
314 MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
315 : _arch(arch) { }
Rui Ueyamabc69bce2014-03-28 21:36:33 +0000316 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
Nick Kledzik378066c2014-06-30 22:57:33 +0000317private:
318 const MachOLinkingContext::Arch _arch;
Nick Kledzik6edd7222014-01-11 01:07:43 +0000319};
320
Nick Kledzik30332b12013-10-08 00:43:34 +0000321} // namespace mach_o
322} // namespace lld
323
Rui Ueyama014192db2013-11-15 03:09:26 +0000324#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H