blob: 76c869992c42b64a9ad4c0ad955030e21459409f [file] [log] [blame]
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +00001//=== MachO.h - Mach-O structures and constants -----------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines MachO .
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef MACHO_H
15#define MACHO_H
16
17#include "llvm/Constants.h"
18#include "llvm/DerivedTypes.h"
19#include "llvm/CodeGen/MachineRelocation.h"
Bruno Cardoso Lopesac57e6e2009-07-06 05:09:34 +000020#include "llvm/CodeGen/BinaryObject.h"
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +000021#include "llvm/Target/TargetAsmInfo.h"
22#include <string>
23#include <vector>
24
25namespace llvm {
26
27typedef std::vector<unsigned char> DataBuffer;
28
29/// MachOSym - This struct contains information about each symbol that is
30/// added to logical symbol table for the module. This is eventually
31/// turned into a real symbol table in the file.
32struct MachOSym {
33 const GlobalValue *GV; // The global value this corresponds to.
34 std::string GVName; // The mangled name of the global value.
35 uint32_t n_strx; // index into the string table
36 uint8_t n_type; // type flag
37 uint8_t n_sect; // section number or NO_SECT
38 int16_t n_desc; // see <mach-o/stab.h>
39 uint64_t n_value; // value for this symbol (or stab offset)
40
41 // Constants for the n_sect field
42 // see <mach-o/nlist.h>
43 enum { NO_SECT = 0 }; // symbol is not in any section
44
45 // Constants for the n_type field
46 // see <mach-o/nlist.h>
47 enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
48 N_ABS = 0x2, // absolute, n_sect == NO_SECT
49 N_SECT = 0xe, // defined in section number n_sect
50 N_PBUD = 0xc, // prebound undefined (defined in a dylib)
51 N_INDR = 0xa // indirect
52 };
53 // The following bits are OR'd into the types above. For example, a type
54 // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
55 enum { N_EXT = 0x01, // external symbol bit
56 N_PEXT = 0x10 // private external symbol bit
57 };
58
59 // Constants for the n_desc field
60 // see <mach-o/loader.h>
61 enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
62 REFERENCE_FLAG_UNDEFINED_LAZY = 1,
63 REFERENCE_FLAG_DEFINED = 2,
64 REFERENCE_FLAG_PRIVATE_DEFINED = 3,
65 REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
66 REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
67 };
68 enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
69 N_WEAK_REF = 0x0040, // symbol is weak referenced
70 N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
71 };
72
73 MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
74 const TargetAsmInfo *TAI);
75
76 struct SymCmp {
77 // FIXME: this does not appear to be sorting 'f' after 'F'
78 bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
79 return LHS.GVName < RHS.GVName;
80 }
81 };
82
83
84 /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
85 /// a local symbol rather than an external symbol.
86
87 static inline bool PartitionByLocal(const MachOSym &Sym) {
88 return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0;
89 }
90
91 /// PartitionByDefined - Simple boolean predicate that returns true if Sym is
92 /// defined in this module.
93
94 static inline bool PartitionByDefined(const MachOSym &Sym) {
95 // FIXME: Do N_ABS or N_INDR count as defined?
96 return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT;
97 }
98
99}; // end struct MachOSym
100
101/// MachOHeader - This struct contains the header information about a
102/// specific architecture type/subtype pair that is emitted to the file.
103
104struct MachOHeader {
105 uint32_t magic; // mach magic number identifier
106 uint32_t filetype; // type of file
107 uint32_t ncmds; // number of load commands
108 uint32_t sizeofcmds; // the size of all the load commands
109 uint32_t flags; // flags
110 uint32_t reserved; // 64-bit only
111
112 /// HeaderData - The actual data for the header which we are building
113 /// up for emission to the file.
114 DataBuffer HeaderData;
115
116 // Constants for the filetype field
117 // see <mach-o/loader.h> for additional info on the various types
118 enum { MH_OBJECT = 1, // relocatable object file
119 MH_EXECUTE = 2, // demand paged executable file
120 MH_FVMLIB = 3, // fixed VM shared library file
121 MH_CORE = 4, // core file
122 MH_PRELOAD = 5, // preloaded executable file
123 MH_DYLIB = 6, // dynamically bound shared library
124 MH_DYLINKER = 7, // dynamic link editor
125 MH_BUNDLE = 8, // dynamically bound bundle file
126 MH_DYLIB_STUB = 9, // shared library stub for static linking only
127 MH_DSYM = 10 // companion file wiht only debug sections
128 };
129
130 // Constants for the flags field
131 enum { MH_NOUNDEFS = 1 << 0,
132 // the object file has no undefined references
133 MH_INCRLINK = 1 << 1,
134 // the object file is the output of an incremental link against
135 // a base file and cannot be link edited again
136 MH_DYLDLINK = 1 << 2,
137 // the object file is input for the dynamic linker and cannot be
138 // statically link edited again.
139 MH_BINDATLOAD = 1 << 3,
140 // the object file's undefined references are bound by the
141 // dynamic linker when loaded.
142 MH_PREBOUND = 1 << 4,
143 // the file has its dynamic undefined references prebound
144 MH_SPLIT_SEGS = 1 << 5,
145 // the file has its read-only and read-write segments split
146 // see <mach/shared_memory_server.h>
147 MH_LAZY_INIT = 1 << 6,
148 // the shared library init routine is to be run lazily via
149 // catching memory faults to its writable segments (obsolete)
150 MH_TWOLEVEL = 1 << 7,
151 // the image is using two-level namespace bindings
152 MH_FORCE_FLAT = 1 << 8,
153 // the executable is forcing all images to use flat namespace
154 // bindings.
155 MH_NOMULTIDEFS = 1 << 8,
156 // this umbrella guarantees no multiple definitions of symbols
157 // in its sub-images so the two-level namespace hints can
158 // always be used.
159 MH_NOFIXPREBINDING = 1 << 10,
160 // do not have dyld notify the prebidning agent about this
161 // executable.
162 MH_PREBINDABLE = 1 << 11,
163 // the binary is not prebound but can have its prebinding
164 // redone. only used when MH_PREBOUND is not set.
165 MH_ALLMODSBOUND = 1 << 12,
166 // indicates that this binary binds to all two-level namespace
167 // modules of its dependent libraries. Only used when
168 // MH_PREBINDABLE and MH_TWOLEVEL are both set.
169 MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
170 // safe to divide up the sections into sub-sections via symbols
171 // for dead code stripping.
172 MH_CANONICAL = 1 << 14,
173 // the binary has been canonicalized via the unprebind operation
174 MH_WEAK_DEFINES = 1 << 15,
175 // the final linked image contains external weak symbols
176 MH_BINDS_TO_WEAK = 1 << 16,
177 // the final linked image uses weak symbols
178 MH_ALLOW_STACK_EXECUTION = 1 << 17
179 // When this bit is set, all stacks in the task will be given
180 // stack execution privilege. Only used in MH_EXECUTE filetype
181 };
182
183 MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
184 reserved(0) { }
185
186 /// cmdSize - This routine returns the size of the MachOSection as written
187 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
188 unsigned cmdSize(bool is64Bit) const {
189 if (is64Bit)
190 return 8 * sizeof(uint32_t);
191 else
192 return 7 * sizeof(uint32_t);
193 }
194
195 /// setMagic - This routine sets the appropriate value for the 'magic'
196 /// field based on pointer size and endianness.
197 void setMagic(bool isLittleEndian, bool is64Bit) {
198 if (isLittleEndian)
199 if (is64Bit) magic = 0xcffaedfe;
200 else magic = 0xcefaedfe;
201 else
202 if (is64Bit) magic = 0xfeedfacf;
203 else magic = 0xfeedface;
204 }
205
206}; // end struct MachOHeader
207
208/// MachOSegment - This struct contains the necessary information to
209/// emit the load commands for each section in the file.
210struct MachOSegment {
211 uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
212 uint32_t cmdsize; // Total size of this struct and section commands
213 std::string segname; // segment name
214 uint64_t vmaddr; // address of this segment
215 uint64_t vmsize; // size of this segment, may be larger than filesize
216 uint64_t fileoff; // offset in file
217 uint64_t filesize; // amount to read from file
218 uint32_t maxprot; // maximum VM protection
219 uint32_t initprot; // initial VM protection
220 uint32_t nsects; // number of sections in this segment
221 uint32_t flags; // flags
222
223 // The following constants are getting pulled in by one of the
224 // system headers, which creates a neat clash with the enum.
225#if !defined(VM_PROT_NONE)
226#define VM_PROT_NONE 0x00
227#endif
228#if !defined(VM_PROT_READ)
229#define VM_PROT_READ 0x01
230#endif
231#if !defined(VM_PROT_WRITE)
232#define VM_PROT_WRITE 0x02
233#endif
234#if !defined(VM_PROT_EXECUTE)
235#define VM_PROT_EXECUTE 0x04
236#endif
237#if !defined(VM_PROT_ALL)
238#define VM_PROT_ALL 0x07
239#endif
240
241 // Constants for the vm protection fields
242 // see <mach-o/vm_prot.h>
243 enum { SEG_VM_PROT_NONE = VM_PROT_NONE,
244 SEG_VM_PROT_READ = VM_PROT_READ, // read permission
245 SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission
246 SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE,
247 SEG_VM_PROT_ALL = VM_PROT_ALL
248 };
249
250 // Constants for the cmd field
251 // see <mach-o/loader.h>
252 enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
253 LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
254 };
255
256 /// cmdSize - This routine returns the size of the MachOSection as written
257 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
258 unsigned cmdSize(bool is64Bit) const {
259 if (is64Bit)
260 return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
261 else
262 return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
263 }
264
265 MachOSegment(const std::string &seg, bool is64Bit)
266 : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
267 vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
268 initprot(VM_PROT_ALL), nsects(0), flags(0) { }
269};
270
271/// MachOSection - This struct contains information about each section in a
272/// particular segment that is emitted to the file. This is eventually
273/// turned into the SectionCommand in the load command for a particlar
274/// segment.
275
Bruno Cardoso Lopesac57e6e2009-07-06 05:09:34 +0000276struct MachOSection : public BinaryObject {
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +0000277 std::string sectname; // name of this section,
278 std::string segname; // segment this section goes in
279 uint64_t addr; // memory address of this section
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +0000280 uint32_t offset; // file offset of this section
281 uint32_t align; // section alignment (power of 2)
282 uint32_t reloff; // file offset of relocation entries
283 uint32_t nreloc; // number of relocation entries
284 uint32_t flags; // flags (section type and attributes)
285 uint32_t reserved1; // reserved (for offset or index)
286 uint32_t reserved2; // reserved (for count or sizeof)
287 uint32_t reserved3; // reserved (64 bit only)
288
289 /// A unique number for this section, which will be used to match symbols
290 /// to the correct section.
291 uint32_t Index;
292
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +0000293 /// RelocBuffer - A buffer to hold the mach-o relocations before we write
294 /// them out at the appropriate location in the file.
295 DataBuffer RelocBuffer;
296
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +0000297 // Constants for the section types (low 8 bits of flags field)
298 // see <mach-o/loader.h>
299 enum { S_REGULAR = 0,
300 // regular section
301 S_ZEROFILL = 1,
302 // zero fill on demand section
303 S_CSTRING_LITERALS = 2,
304 // section with only literal C strings
305 S_4BYTE_LITERALS = 3,
306 // section with only 4 byte literals
307 S_8BYTE_LITERALS = 4,
308 // section with only 8 byte literals
309 S_LITERAL_POINTERS = 5,
310 // section with only pointers to literals
311 S_NON_LAZY_SYMBOL_POINTERS = 6,
312 // section with only non-lazy symbol pointers
313 S_LAZY_SYMBOL_POINTERS = 7,
314 // section with only lazy symbol pointers
315 S_SYMBOL_STUBS = 8,
316 // section with only symbol stubs
317 // byte size of stub in the reserved2 field
318 S_MOD_INIT_FUNC_POINTERS = 9,
319 // section with only function pointers for initialization
320 S_MOD_TERM_FUNC_POINTERS = 10,
321 // section with only function pointers for termination
322 S_COALESCED = 11,
323 // section contains symbols that are coalesced
324 S_GB_ZEROFILL = 12,
325 // zero fill on demand section (that can be larger than 4GB)
326 S_INTERPOSING = 13,
327 // section with only pairs of function pointers for interposing
328 S_16BYTE_LITERALS = 14
329 // section with only 16 byte literals
330 };
331
332 // Constants for the section flags (high 24 bits of flags field)
333 // see <mach-o/loader.h>
334 enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
335 // section contains only true machine instructions
336 S_ATTR_NO_TOC = 1 << 30,
337 // section contains coalesced symbols that are not to be in a
338 // ranlib table of contents
339 S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
340 // ok to strip static symbols in this section in files with the
341 // MY_DYLDLINK flag
342 S_ATTR_NO_DEAD_STRIP = 1 << 28,
343 // no dead stripping
344 S_ATTR_LIVE_SUPPORT = 1 << 27,
345 // blocks are live if they reference live blocks
346 S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
347 // used with i386 code stubs written on by dyld
348 S_ATTR_DEBUG = 1 << 25,
349 // a debug section
350 S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
351 // section contains some machine instructions
352 S_ATTR_EXT_RELOC = 1 << 9,
353 // section has external relocation entries
354 S_ATTR_LOC_RELOC = 1 << 8
355 // section has local relocation entries
356 };
357
358 /// cmdSize - This routine returns the size of the MachOSection as written
359 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
360 unsigned cmdSize(bool is64Bit) const {
361 if (is64Bit)
362 return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
363 else
364 return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
365 }
366
367 MachOSection(const std::string &seg, const std::string &sect)
Bruno Cardoso Lopesac57e6e2009-07-06 05:09:34 +0000368 : BinaryObject(), sectname(sect), segname(seg), addr(0), offset(0),
369 align(2), reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +0000370 reserved3(0) { }
371
372}; // end struct MachOSection
373
Bruno Cardoso Lopesac57e6e2009-07-06 05:09:34 +0000374/// MachOSymTab - This struct contains information about the offsets and
375/// size of symbol table information.
376/// segment.
377struct MachODySymTab {
378 uint32_t cmd; // LC_DYSYMTAB
379 uint32_t cmdsize; // sizeof(MachODySymTab)
380 uint32_t ilocalsym; // index to local symbols
381 uint32_t nlocalsym; // number of local symbols
382 uint32_t iextdefsym; // index to externally defined symbols
383 uint32_t nextdefsym; // number of externally defined symbols
384 uint32_t iundefsym; // index to undefined symbols
385 uint32_t nundefsym; // number of undefined symbols
386 uint32_t tocoff; // file offset to table of contents
387 uint32_t ntoc; // number of entries in table of contents
388 uint32_t modtaboff; // file offset to module table
389 uint32_t nmodtab; // number of module table entries
390 uint32_t extrefsymoff; // offset to referenced symbol table
391 uint32_t nextrefsyms; // number of referenced symbol table entries
392 uint32_t indirectsymoff; // file offset to the indirect symbol table
393 uint32_t nindirectsyms; // number of indirect symbol table entries
394 uint32_t extreloff; // offset to external relocation entries
395 uint32_t nextrel; // number of external relocation entries
396 uint32_t locreloff; // offset to local relocation entries
397 uint32_t nlocrel; // number of local relocation entries
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +0000398
Bruno Cardoso Lopesac57e6e2009-07-06 05:09:34 +0000399 // Constants for the cmd field
400 // see <mach-o/loader.h>
401 enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
402 };
403
404 MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
405 ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
406 iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
407 nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
408 nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
409
410}; // end struct MachODySymTab
Bruno Cardoso Lopesa321dcd2009-06-03 03:43:31 +0000411
412} // end namespace llvm
413
414#endif
415