Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 1 | //=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
Chris Lattner | 4ee451d | 2007-12-29 20:36:04 +0000 | [diff] [blame] | 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file defines the MachOWriter class. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Bill Wendling | 4b2ca1a | 2007-02-08 01:30:50 +0000 | [diff] [blame] | 14 | #ifndef MACHOWRITER_H |
| 15 | #define MACHOWRITER_H |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 16 | |
Nate Begeman | 1257c85 | 2007-01-29 21:20:42 +0000 | [diff] [blame] | 17 | #include "llvm/Constants.h" |
Nate Begeman | f8f2c5a | 2006-08-25 06:36:58 +0000 | [diff] [blame] | 18 | #include "llvm/DerivedTypes.h" |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 19 | #include "llvm/CodeGen/MachineFunctionPass.h" |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 20 | #include "llvm/CodeGen/MachineRelocation.h" |
Nate Begeman | f8f2c5a | 2006-08-25 06:36:58 +0000 | [diff] [blame] | 21 | #include "llvm/Target/TargetData.h" |
| 22 | #include "llvm/Target/TargetMachine.h" |
Bill Wendling | 40fab40 | 2007-01-24 03:37:18 +0000 | [diff] [blame] | 23 | #include "llvm/Target/TargetMachOWriterInfo.h" |
Dan Gohman | c9235d2 | 2008-03-21 23:51:57 +0000 | [diff] [blame^] | 24 | #include <map> |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 25 | |
| 26 | namespace llvm { |
| 27 | class GlobalVariable; |
| 28 | class Mangler; |
| 29 | class MachineCodeEmitter; |
| 30 | class MachOCodeEmitter; |
Bill Wendling | 0f43b22 | 2007-02-03 02:37:51 +0000 | [diff] [blame] | 31 | class OutputBuffer; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 32 | |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 33 | /// MachOSym - This struct contains information about each symbol that is |
| 34 | /// added to logical symbol table for the module. This is eventually |
| 35 | /// turned into a real symbol table in the file. |
| 36 | struct MachOSym { |
| 37 | const GlobalValue *GV; // The global value this corresponds to. |
| 38 | std::string GVName; // The mangled name of the global value. |
| 39 | uint32_t n_strx; // index into the string table |
| 40 | uint8_t n_type; // type flag |
| 41 | uint8_t n_sect; // section number or NO_SECT |
| 42 | int16_t n_desc; // see <mach-o/stab.h> |
| 43 | uint64_t n_value; // value for this symbol (or stab offset) |
| 44 | |
| 45 | // Constants for the n_sect field |
| 46 | // see <mach-o/nlist.h> |
| 47 | enum { NO_SECT = 0 }; // symbol is not in any section |
| 48 | |
| 49 | // Constants for the n_type field |
| 50 | // see <mach-o/nlist.h> |
| 51 | enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT |
| 52 | N_ABS = 0x2, // absolute, n_sect == NO_SECT |
| 53 | N_SECT = 0xe, // defined in section number n_sect |
| 54 | N_PBUD = 0xc, // prebound undefined (defined in a dylib) |
| 55 | N_INDR = 0xa // indirect |
| 56 | }; |
| 57 | // The following bits are OR'd into the types above. For example, a type |
| 58 | // of 0x0f would be an external N_SECT symbol (0x0e | 0x01). |
| 59 | enum { N_EXT = 0x01, // external symbol bit |
| 60 | N_PEXT = 0x10 // private external symbol bit |
| 61 | }; |
| 62 | |
| 63 | // Constants for the n_desc field |
| 64 | // see <mach-o/loader.h> |
| 65 | enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, |
| 66 | REFERENCE_FLAG_UNDEFINED_LAZY = 1, |
| 67 | REFERENCE_FLAG_DEFINED = 2, |
| 68 | REFERENCE_FLAG_PRIVATE_DEFINED = 3, |
| 69 | REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, |
| 70 | REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5 |
| 71 | }; |
| 72 | enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped |
| 73 | N_WEAK_REF = 0x0040, // symbol is weak referenced |
| 74 | N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition |
| 75 | }; |
| 76 | |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 77 | MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, |
| 78 | TargetMachine &TM); |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 79 | }; |
| 80 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 81 | /// MachOWriter - This class implements the common target-independent code for |
| 82 | /// writing Mach-O files. Targets should derive a class from this to |
| 83 | /// parameterize the output format. |
| 84 | /// |
| 85 | class MachOWriter : public MachineFunctionPass { |
| 86 | friend class MachOCodeEmitter; |
| 87 | public: |
Devang Patel | 1997473 | 2007-05-03 01:11:54 +0000 | [diff] [blame] | 88 | static char ID; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 89 | MachineCodeEmitter &getMachineCodeEmitter() const { |
| 90 | return *(MachineCodeEmitter*)MCE; |
| 91 | } |
Bill Wendling | 4b2ca1a | 2007-02-08 01:30:50 +0000 | [diff] [blame] | 92 | |
| 93 | MachOWriter(std::ostream &O, TargetMachine &TM); |
Bill Wendling | 2b72182 | 2007-01-24 07:13:56 +0000 | [diff] [blame] | 94 | virtual ~MachOWriter(); |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 95 | |
Bill Wendling | 2b72182 | 2007-01-24 07:13:56 +0000 | [diff] [blame] | 96 | virtual const char *getPassName() const { |
| 97 | return "Mach-O Writer"; |
| 98 | } |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 99 | |
| 100 | typedef std::vector<unsigned char> DataBuffer; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 101 | protected: |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 102 | /// Output stream to send the resultant object file to. |
| 103 | /// |
| 104 | std::ostream &O; |
| 105 | |
| 106 | /// Target machine description. |
| 107 | /// |
| 108 | TargetMachine &TM; |
| 109 | |
| 110 | /// Mang - The object used to perform name mangling for this module. |
| 111 | /// |
| 112 | Mangler *Mang; |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 113 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 114 | /// MCE - The MachineCodeEmitter object that we are exposing to emit machine |
| 115 | /// code for functions to the .o file. |
| 116 | MachOCodeEmitter *MCE; |
| 117 | |
| 118 | /// is64Bit/isLittleEndian - This information is inferred from the target |
| 119 | /// machine directly, indicating what header values and flags to set. |
| 120 | bool is64Bit, isLittleEndian; |
| 121 | |
| 122 | /// doInitialization - Emit the file header and all of the global variables |
| 123 | /// for the module to the Mach-O file. |
| 124 | bool doInitialization(Module &M); |
| 125 | |
| 126 | bool runOnMachineFunction(MachineFunction &MF); |
| 127 | |
| 128 | /// doFinalization - Now that the module has been completely processed, emit |
| 129 | /// the Mach-O file to 'O'. |
| 130 | bool doFinalization(Module &M); |
| 131 | |
| 132 | /// MachOHeader - This struct contains the header information about a |
| 133 | /// specific architecture type/subtype pair that is emitted to the file. |
| 134 | struct MachOHeader { |
Nate Begeman | ea7b8cf | 2006-08-23 21:33:27 +0000 | [diff] [blame] | 135 | uint32_t magic; // mach magic number identifier |
Nate Begeman | ea7b8cf | 2006-08-23 21:33:27 +0000 | [diff] [blame] | 136 | uint32_t filetype; // type of file |
| 137 | uint32_t ncmds; // number of load commands |
| 138 | uint32_t sizeofcmds; // the size of all the load commands |
| 139 | uint32_t flags; // flags |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 140 | uint32_t reserved; // 64-bit only |
| 141 | |
| 142 | /// HeaderData - The actual data for the header which we are building |
| 143 | /// up for emission to the file. |
| 144 | DataBuffer HeaderData; |
| 145 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 146 | // Constants for the filetype field |
| 147 | // see <mach-o/loader.h> for additional info on the various types |
| 148 | enum { MH_OBJECT = 1, // relocatable object file |
| 149 | MH_EXECUTE = 2, // demand paged executable file |
| 150 | MH_FVMLIB = 3, // fixed VM shared library file |
| 151 | MH_CORE = 4, // core file |
| 152 | MH_PRELOAD = 5, // preloaded executable file |
| 153 | MH_DYLIB = 6, // dynamically bound shared library |
| 154 | MH_DYLINKER = 7, // dynamic link editor |
| 155 | MH_BUNDLE = 8, // dynamically bound bundle file |
| 156 | MH_DYLIB_STUB = 9, // shared library stub for static linking only |
| 157 | MH_DSYM = 10 // companion file wiht only debug sections |
| 158 | }; |
| 159 | |
| 160 | // Constants for the flags field |
| 161 | enum { MH_NOUNDEFS = 1 << 0, |
| 162 | // the object file has no undefined references |
| 163 | MH_INCRLINK = 1 << 1, |
| 164 | // the object file is the output of an incremental link against |
| 165 | // a base file and cannot be link edited again |
| 166 | MH_DYLDLINK = 1 << 2, |
| 167 | // the object file is input for the dynamic linker and cannot be |
| 168 | // statically link edited again. |
| 169 | MH_BINDATLOAD = 1 << 3, |
| 170 | // the object file's undefined references are bound by the |
| 171 | // dynamic linker when loaded. |
| 172 | MH_PREBOUND = 1 << 4, |
| 173 | // the file has its dynamic undefined references prebound |
| 174 | MH_SPLIT_SEGS = 1 << 5, |
| 175 | // the file has its read-only and read-write segments split |
| 176 | // see <mach/shared_memory_server.h> |
| 177 | MH_LAZY_INIT = 1 << 6, |
| 178 | // the shared library init routine is to be run lazily via |
| 179 | // catching memory faults to its writable segments (obsolete) |
| 180 | MH_TWOLEVEL = 1 << 7, |
| 181 | // the image is using two-level namespace bindings |
| 182 | MH_FORCE_FLAT = 1 << 8, |
| 183 | // the executable is forcing all images to use flat namespace |
| 184 | // bindings. |
| 185 | MH_NOMULTIDEFS = 1 << 8, |
| 186 | // this umbrella guarantees no multiple definitions of symbols |
| 187 | // in its sub-images so the two-level namespace hints can |
| 188 | // always be used. |
| 189 | MH_NOFIXPREBINDING = 1 << 10, |
| 190 | // do not have dyld notify the prebidning agent about this |
| 191 | // executable. |
| 192 | MH_PREBINDABLE = 1 << 11, |
| 193 | // the binary is not prebound but can have its prebinding |
| 194 | // redone. only used when MH_PREBOUND is not set. |
| 195 | MH_ALLMODSBOUND = 1 << 12, |
| 196 | // indicates that this binary binds to all two-level namespace |
| 197 | // modules of its dependent libraries. Only used when |
| 198 | // MH_PREBINDABLE and MH_TWOLEVEL are both set. |
| 199 | MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13, |
| 200 | // safe to divide up the sections into sub-sections via symbols |
| 201 | // for dead code stripping. |
| 202 | MH_CANONICAL = 1 << 14, |
| 203 | // the binary has been canonicalized via the unprebind operation |
| 204 | MH_WEAK_DEFINES = 1 << 15, |
| 205 | // the final linked image contains external weak symbols |
| 206 | MH_BINDS_TO_WEAK = 1 << 16, |
| 207 | // the final linked image uses weak symbols |
| 208 | MH_ALLOW_STACK_EXECUTION = 1 << 17 |
| 209 | // When this bit is set, all stacks in the task will be given |
| 210 | // stack execution privilege. Only used in MH_EXECUTE filetype |
| 211 | }; |
| 212 | |
Bill Wendling | 40fab40 | 2007-01-24 03:37:18 +0000 | [diff] [blame] | 213 | MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), |
| 214 | reserved(0) { } |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 215 | |
| 216 | /// cmdSize - This routine returns the size of the MachOSection as written |
| 217 | /// to disk, depending on whether the destination is a 64 bit Mach-O file. |
| 218 | unsigned cmdSize(bool is64Bit) const { |
| 219 | if (is64Bit) |
| 220 | return 8 * sizeof(uint32_t); |
| 221 | else |
| 222 | return 7 * sizeof(uint32_t); |
| 223 | } |
| 224 | |
| 225 | /// setMagic - This routine sets the appropriate value for the 'magic' |
| 226 | /// field based on pointer size and endianness. |
| 227 | void setMagic(bool isLittleEndian, bool is64Bit) { |
| 228 | if (isLittleEndian) |
| 229 | if (is64Bit) magic = 0xcffaedfe; |
| 230 | else magic = 0xcefaedfe; |
| 231 | else |
| 232 | if (is64Bit) magic = 0xfeedfacf; |
| 233 | else magic = 0xfeedface; |
| 234 | } |
| 235 | }; |
| 236 | |
| 237 | /// Header - An instance of MachOHeader that we will update while we build |
| 238 | /// the file, and then emit during finalization. |
| 239 | MachOHeader Header; |
| 240 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 241 | /// MachOSegment - This struct contains the necessary information to |
| 242 | /// emit the load commands for each section in the file. |
| 243 | struct MachOSegment { |
| 244 | uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64 |
| 245 | uint32_t cmdsize; // Total size of this struct and section commands |
| 246 | std::string segname; // segment name |
| 247 | uint64_t vmaddr; // address of this segment |
| 248 | uint64_t vmsize; // size of this segment, may be larger than filesize |
| 249 | uint64_t fileoff; // offset in file |
| 250 | uint64_t filesize; // amount to read from file |
| 251 | uint32_t maxprot; // maximum VM protection |
| 252 | uint32_t initprot; // initial VM protection |
| 253 | uint32_t nsects; // number of sections in this segment |
| 254 | uint32_t flags; // flags |
| 255 | |
Chris Lattner | 3381f0a | 2006-12-16 20:23:42 +0000 | [diff] [blame] | 256 | // The following constants are getting pulled in by one of the |
| 257 | // system headers, which creates a neat clash with the enum. |
| 258 | #if !defined(VM_PROT_NONE) |
Anton Korobeynikov | bed2946 | 2007-04-16 18:10:23 +0000 | [diff] [blame] | 259 | #define VM_PROT_NONE 0x00 |
Chris Lattner | 3381f0a | 2006-12-16 20:23:42 +0000 | [diff] [blame] | 260 | #endif |
| 261 | #if !defined(VM_PROT_READ) |
Anton Korobeynikov | bed2946 | 2007-04-16 18:10:23 +0000 | [diff] [blame] | 262 | #define VM_PROT_READ 0x01 |
Chris Lattner | 3381f0a | 2006-12-16 20:23:42 +0000 | [diff] [blame] | 263 | #endif |
| 264 | #if !defined(VM_PROT_WRITE) |
Anton Korobeynikov | bed2946 | 2007-04-16 18:10:23 +0000 | [diff] [blame] | 265 | #define VM_PROT_WRITE 0x02 |
Chris Lattner | 3381f0a | 2006-12-16 20:23:42 +0000 | [diff] [blame] | 266 | #endif |
| 267 | #if !defined(VM_PROT_EXECUTE) |
Anton Korobeynikov | bed2946 | 2007-04-16 18:10:23 +0000 | [diff] [blame] | 268 | #define VM_PROT_EXECUTE 0x04 |
Chris Lattner | 3381f0a | 2006-12-16 20:23:42 +0000 | [diff] [blame] | 269 | #endif |
| 270 | #if !defined(VM_PROT_ALL) |
Anton Korobeynikov | bed2946 | 2007-04-16 18:10:23 +0000 | [diff] [blame] | 271 | #define VM_PROT_ALL 0x07 |
Chris Lattner | 3381f0a | 2006-12-16 20:23:42 +0000 | [diff] [blame] | 272 | #endif |
| 273 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 274 | // Constants for the vm protection fields |
| 275 | // see <mach-o/vm_prot.h> |
Chris Lattner | 3381f0a | 2006-12-16 20:23:42 +0000 | [diff] [blame] | 276 | enum { SEG_VM_PROT_NONE = VM_PROT_NONE, |
| 277 | SEG_VM_PROT_READ = VM_PROT_READ, // read permission |
| 278 | SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission |
| 279 | SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, |
| 280 | SEG_VM_PROT_ALL = VM_PROT_ALL |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 281 | }; |
| 282 | |
| 283 | // Constants for the cmd field |
| 284 | // see <mach-o/loader.h> |
| 285 | enum { LC_SEGMENT = 0x01, // segment of this file to be mapped |
| 286 | LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped |
| 287 | }; |
| 288 | |
| 289 | /// cmdSize - This routine returns the size of the MachOSection as written |
| 290 | /// to disk, depending on whether the destination is a 64 bit Mach-O file. |
| 291 | unsigned cmdSize(bool is64Bit) const { |
| 292 | if (is64Bit) |
| 293 | return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16; |
| 294 | else |
| 295 | return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits |
| 296 | } |
| 297 | |
| 298 | MachOSegment(const std::string &seg, bool is64Bit) |
| 299 | : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg), |
| 300 | vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL), |
| 301 | initprot(VM_PROT_ALL), nsects(0), flags(0) { } |
| 302 | }; |
| 303 | |
| 304 | /// MachOSection - This struct contains information about each section in a |
| 305 | /// particular segment that is emitted to the file. This is eventually |
| 306 | /// turned into the SectionCommand in the load command for a particlar |
| 307 | /// segment. |
| 308 | struct MachOSection { |
Nate Begeman | ea7b8cf | 2006-08-23 21:33:27 +0000 | [diff] [blame] | 309 | std::string sectname; // name of this section, |
| 310 | std::string segname; // segment this section goes in |
| 311 | uint64_t addr; // memory address of this section |
| 312 | uint64_t size; // size in bytes of this section |
| 313 | uint32_t offset; // file offset of this section |
| 314 | uint32_t align; // section alignment (power of 2) |
| 315 | uint32_t reloff; // file offset of relocation entries |
| 316 | uint32_t nreloc; // number of relocation entries |
| 317 | uint32_t flags; // flags (section type and attributes) |
| 318 | uint32_t reserved1; // reserved (for offset or index) |
| 319 | uint32_t reserved2; // reserved (for count or sizeof) |
| 320 | uint32_t reserved3; // reserved (64 bit only) |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 321 | |
| 322 | /// A unique number for this section, which will be used to match symbols |
| 323 | /// to the correct section. |
| 324 | uint32_t Index; |
| 325 | |
| 326 | /// SectionData - The actual data for this section which we are building |
| 327 | /// up for emission to the file. |
| 328 | DataBuffer SectionData; |
Nate Begeman | 019f851 | 2006-09-10 23:03:44 +0000 | [diff] [blame] | 329 | |
| 330 | /// RelocBuffer - A buffer to hold the mach-o relocations before we write |
| 331 | /// them out at the appropriate location in the file. |
| 332 | DataBuffer RelocBuffer; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 333 | |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 334 | /// Relocations - The relocations that we have encountered so far in this |
| 335 | /// section that we will need to convert to MachORelocation entries when |
| 336 | /// the file is written. |
| 337 | std::vector<MachineRelocation> Relocations; |
| 338 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 339 | // Constants for the section types (low 8 bits of flags field) |
| 340 | // see <mach-o/loader.h> |
| 341 | enum { S_REGULAR = 0, |
| 342 | // regular section |
| 343 | S_ZEROFILL = 1, |
| 344 | // zero fill on demand section |
| 345 | S_CSTRING_LITERALS = 2, |
| 346 | // section with only literal C strings |
| 347 | S_4BYTE_LITERALS = 3, |
| 348 | // section with only 4 byte literals |
| 349 | S_8BYTE_LITERALS = 4, |
| 350 | // section with only 8 byte literals |
| 351 | S_LITERAL_POINTERS = 5, |
| 352 | // section with only pointers to literals |
| 353 | S_NON_LAZY_SYMBOL_POINTERS = 6, |
| 354 | // section with only non-lazy symbol pointers |
| 355 | S_LAZY_SYMBOL_POINTERS = 7, |
| 356 | // section with only lazy symbol pointers |
| 357 | S_SYMBOL_STUBS = 8, |
| 358 | // section with only symbol stubs |
| 359 | // byte size of stub in the reserved2 field |
| 360 | S_MOD_INIT_FUNC_POINTERS = 9, |
| 361 | // section with only function pointers for initialization |
| 362 | S_MOD_TERM_FUNC_POINTERS = 10, |
| 363 | // section with only function pointers for termination |
| 364 | S_COALESCED = 11, |
| 365 | // section contains symbols that are coalesced |
| 366 | S_GB_ZEROFILL = 12, |
| 367 | // zero fill on demand section (that can be larger than 4GB) |
| 368 | S_INTERPOSING = 13, |
| 369 | // section with only pairs of function pointers for interposing |
| 370 | S_16BYTE_LITERALS = 14 |
| 371 | // section with only 16 byte literals |
| 372 | }; |
| 373 | |
| 374 | // Constants for the section flags (high 24 bits of flags field) |
| 375 | // see <mach-o/loader.h> |
| 376 | enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31, |
| 377 | // section contains only true machine instructions |
| 378 | S_ATTR_NO_TOC = 1 << 30, |
| 379 | // section contains coalesced symbols that are not to be in a |
| 380 | // ranlib table of contents |
| 381 | S_ATTR_STRIP_STATIC_SYMS = 1 << 29, |
| 382 | // ok to strip static symbols in this section in files with the |
| 383 | // MY_DYLDLINK flag |
| 384 | S_ATTR_NO_DEAD_STRIP = 1 << 28, |
| 385 | // no dead stripping |
| 386 | S_ATTR_LIVE_SUPPORT = 1 << 27, |
| 387 | // blocks are live if they reference live blocks |
| 388 | S_ATTR_SELF_MODIFYING_CODE = 1 << 26, |
| 389 | // used with i386 code stubs written on by dyld |
| 390 | S_ATTR_DEBUG = 1 << 25, |
| 391 | // a debug section |
| 392 | S_ATTR_SOME_INSTRUCTIONS = 1 << 10, |
| 393 | // section contains some machine instructions |
| 394 | S_ATTR_EXT_RELOC = 1 << 9, |
| 395 | // section has external relocation entries |
| 396 | S_ATTR_LOC_RELOC = 1 << 8 |
| 397 | // section has local relocation entries |
| 398 | }; |
| 399 | |
| 400 | /// cmdSize - This routine returns the size of the MachOSection as written |
| 401 | /// to disk, depending on whether the destination is a 64 bit Mach-O file. |
| 402 | unsigned cmdSize(bool is64Bit) const { |
| 403 | if (is64Bit) |
| 404 | return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32; |
| 405 | else |
| 406 | return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits |
| 407 | } |
| 408 | |
| 409 | MachOSection(const std::string &seg, const std::string §) |
Nate Begeman | 019f851 | 2006-09-10 23:03:44 +0000 | [diff] [blame] | 410 | : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 411 | reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), |
| 412 | reserved3(0) { } |
| 413 | }; |
| 414 | |
Nate Begeman | d2030e6 | 2006-08-26 15:46:34 +0000 | [diff] [blame] | 415 | private: |
| 416 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 417 | /// SectionList - This is the list of sections that we have emitted to the |
| 418 | /// file. Once the file has been completely built, the segment load command |
| 419 | /// SectionCommands are constructed from this info. |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 420 | std::vector<MachOSection*> SectionList; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 421 | |
| 422 | /// SectionLookup - This is a mapping from section name to SectionList entry |
| 423 | std::map<std::string, MachOSection*> SectionLookup; |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 424 | |
| 425 | /// GVSection - This is a mapping from a GlobalValue to a MachOSection, |
| 426 | /// to aid in emitting relocations. |
| 427 | std::map<GlobalValue*, MachOSection*> GVSection; |
| 428 | |
| 429 | /// GVOffset - This is a mapping from a GlobalValue to an offset from the |
| 430 | /// start of the section in which the GV resides, to aid in emitting |
| 431 | /// relocations. |
| 432 | std::map<GlobalValue*, intptr_t> GVOffset; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 433 | |
| 434 | /// getSection - Return the section with the specified name, creating a new |
| 435 | /// section if one does not already exist. |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 436 | MachOSection *getSection(const std::string &seg, const std::string §, |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 437 | unsigned Flags = 0) { |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 438 | MachOSection *MOS = SectionLookup[seg+sect]; |
| 439 | if (MOS) return MOS; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 440 | |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 441 | MOS = new MachOSection(seg, sect); |
| 442 | SectionList.push_back(MOS); |
| 443 | MOS->Index = SectionList.size(); |
| 444 | MOS->flags = MachOSection::S_REGULAR | Flags; |
| 445 | SectionLookup[seg+sect] = MOS; |
| 446 | return MOS; |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 447 | } |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 448 | MachOSection *getTextSection(bool isCode = true) { |
Nate Begeman | 019f851 | 2006-09-10 23:03:44 +0000 | [diff] [blame] | 449 | if (isCode) |
| 450 | return getSection("__TEXT", "__text", |
| 451 | MachOSection::S_ATTR_PURE_INSTRUCTIONS | |
| 452 | MachOSection::S_ATTR_SOME_INSTRUCTIONS); |
| 453 | else |
| 454 | return getSection("__TEXT", "__text"); |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 455 | } |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 456 | MachOSection *getBSSSection() { |
Nate Begeman | f8f2c5a | 2006-08-25 06:36:58 +0000 | [diff] [blame] | 457 | return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); |
| 458 | } |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 459 | MachOSection *getDataSection() { |
Nate Begeman | f8f2c5a | 2006-08-25 06:36:58 +0000 | [diff] [blame] | 460 | return getSection("__DATA", "__data"); |
| 461 | } |
Nate Begeman | 1257c85 | 2007-01-29 21:20:42 +0000 | [diff] [blame] | 462 | MachOSection *getConstSection(Constant *C) { |
| 463 | const ConstantArray *CVA = dyn_cast<ConstantArray>(C); |
| 464 | if (CVA && CVA->isCString()) |
| 465 | return getSection("__TEXT", "__cstring", |
| 466 | MachOSection::S_CSTRING_LITERALS); |
| 467 | |
| 468 | const Type *Ty = C->getType(); |
Chris Lattner | 42a7551 | 2007-01-15 02:27:26 +0000 | [diff] [blame] | 469 | if (Ty->isPrimitiveType() || Ty->isInteger()) { |
Duncan Sands | ca0ed74 | 2007-11-05 00:04:43 +0000 | [diff] [blame] | 470 | unsigned Size = TM.getTargetData()->getABITypeSize(Ty); |
Nate Begeman | f8f2c5a | 2006-08-25 06:36:58 +0000 | [diff] [blame] | 471 | switch(Size) { |
| 472 | default: break; // Fall through to __TEXT,__const |
| 473 | case 4: |
| 474 | return getSection("__TEXT", "__literal4", |
| 475 | MachOSection::S_4BYTE_LITERALS); |
| 476 | case 8: |
| 477 | return getSection("__TEXT", "__literal8", |
| 478 | MachOSection::S_8BYTE_LITERALS); |
| 479 | case 16: |
| 480 | return getSection("__TEXT", "__literal16", |
| 481 | MachOSection::S_16BYTE_LITERALS); |
| 482 | } |
| 483 | } |
| 484 | return getSection("__TEXT", "__const"); |
| 485 | } |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 486 | MachOSection *getJumpTableSection() { |
Nate Begeman | 019f851 | 2006-09-10 23:03:44 +0000 | [diff] [blame] | 487 | if (TM.getRelocationModel() == Reloc::PIC_) |
| 488 | return getTextSection(false); |
| 489 | else |
| 490 | return getSection("__TEXT", "__const"); |
| 491 | } |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 492 | |
| 493 | /// MachOSymTab - This struct contains information about the offsets and |
| 494 | /// size of symbol table information. |
| 495 | /// segment. |
| 496 | struct MachOSymTab { |
| 497 | uint32_t cmd; // LC_SYMTAB |
| 498 | uint32_t cmdsize; // sizeof( MachOSymTab ) |
| 499 | uint32_t symoff; // symbol table offset |
| 500 | uint32_t nsyms; // number of symbol table entries |
| 501 | uint32_t stroff; // string table offset |
| 502 | uint32_t strsize; // string table size in bytes |
| 503 | |
| 504 | // Constants for the cmd field |
| 505 | // see <mach-o/loader.h> |
| 506 | enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info |
| 507 | }; |
| 508 | |
| 509 | MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0), |
| 510 | nsyms(0), stroff(0), strsize(0) { } |
| 511 | }; |
| 512 | |
| 513 | /// MachOSymTab - This struct contains information about the offsets and |
| 514 | /// size of symbol table information. |
| 515 | /// segment. |
| 516 | struct MachODySymTab { |
| 517 | uint32_t cmd; // LC_DYSYMTAB |
| 518 | uint32_t cmdsize; // sizeof( MachODySymTab ) |
| 519 | uint32_t ilocalsym; // index to local symbols |
| 520 | uint32_t nlocalsym; // number of local symbols |
| 521 | uint32_t iextdefsym; // index to externally defined symbols |
| 522 | uint32_t nextdefsym; // number of externally defined symbols |
| 523 | uint32_t iundefsym; // index to undefined symbols |
| 524 | uint32_t nundefsym; // number of undefined symbols |
| 525 | uint32_t tocoff; // file offset to table of contents |
| 526 | uint32_t ntoc; // number of entries in table of contents |
| 527 | uint32_t modtaboff; // file offset to module table |
| 528 | uint32_t nmodtab; // number of module table entries |
| 529 | uint32_t extrefsymoff; // offset to referenced symbol table |
| 530 | uint32_t nextrefsyms; // number of referenced symbol table entries |
| 531 | uint32_t indirectsymoff; // file offset to the indirect symbol table |
| 532 | uint32_t nindirectsyms; // number of indirect symbol table entries |
| 533 | uint32_t extreloff; // offset to external relocation entries |
| 534 | uint32_t nextrel; // number of external relocation entries |
| 535 | uint32_t locreloff; // offset to local relocation entries |
| 536 | uint32_t nlocrel; // number of local relocation entries |
| 537 | |
| 538 | // Constants for the cmd field |
| 539 | // see <mach-o/loader.h> |
| 540 | enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info |
| 541 | }; |
| 542 | |
| 543 | MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), |
| 544 | ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), |
| 545 | iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), |
| 546 | nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), |
| 547 | nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } |
| 548 | }; |
| 549 | |
| 550 | /// SymTab - The "stab" style symbol table information |
| 551 | MachOSymTab SymTab; |
| 552 | /// DySymTab - symbol table info for the dynamic link editor |
| 553 | MachODySymTab DySymTab; |
| 554 | |
Nate Begeman | d2030e6 | 2006-08-26 15:46:34 +0000 | [diff] [blame] | 555 | struct MachOSymCmp { |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 556 | // FIXME: this does not appear to be sorting 'f' after 'F' |
Nate Begeman | d2030e6 | 2006-08-26 15:46:34 +0000 | [diff] [blame] | 557 | bool operator()(const MachOSym &LHS, const MachOSym &RHS) { |
| 558 | return LHS.GVName < RHS.GVName; |
| 559 | } |
| 560 | }; |
| 561 | |
| 562 | /// PartitionByLocal - Simple boolean predicate that returns true if Sym is |
| 563 | /// a local symbol rather than an external symbol. |
| 564 | static bool PartitionByLocal(const MachOSym &Sym); |
| 565 | |
| 566 | /// PartitionByDefined - Simple boolean predicate that returns true if Sym |
| 567 | /// is defined in this module. |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 568 | static bool PartitionByDefined(const MachOSym &Sym); |
Nate Begeman | d2030e6 | 2006-08-26 15:46:34 +0000 | [diff] [blame] | 569 | |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 570 | protected: |
| 571 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 572 | /// SymbolTable - This is the list of symbols we have emitted to the file. |
| 573 | /// This actually gets rearranged before emission to the file (to put the |
| 574 | /// local symbols first in the list). |
| 575 | std::vector<MachOSym> SymbolTable; |
| 576 | |
Nate Begeman | d2030e6 | 2006-08-26 15:46:34 +0000 | [diff] [blame] | 577 | /// SymT - A buffer to hold the symbol table before we write it out at the |
| 578 | /// appropriate location in the file. |
| 579 | DataBuffer SymT; |
| 580 | |
| 581 | /// StrT - A buffer to hold the string table before we write it out at the |
| 582 | /// appropriate location in the file. |
| 583 | DataBuffer StrT; |
| 584 | |
Nate Begeman | f8f2c5a | 2006-08-25 06:36:58 +0000 | [diff] [blame] | 585 | /// PendingSyms - This is a list of externally defined symbols that we have |
| 586 | /// been asked to emit, but have not seen a reference to. When a reference |
| 587 | /// is seen, the symbol will move from this list to the SymbolTable. |
Nate Begeman | fec910c | 2007-02-28 07:40:50 +0000 | [diff] [blame] | 588 | std::vector<GlobalValue*> PendingGlobals; |
Nate Begeman | f8f2c5a | 2006-08-25 06:36:58 +0000 | [diff] [blame] | 589 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 590 | /// DynamicSymbolTable - This is just a vector of indices into |
| 591 | /// SymbolTable to aid in emitting the DYSYMTAB load command. |
| 592 | std::vector<unsigned> DynamicSymbolTable; |
| 593 | |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 594 | static void InitMem(const Constant *C, void *Addr, intptr_t Offset, |
| 595 | const TargetData *TD, |
| 596 | std::vector<MachineRelocation> &MRs); |
Nate Begeman | 019f851 | 2006-09-10 23:03:44 +0000 | [diff] [blame] | 597 | |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 598 | private: |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 599 | void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV); |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 600 | void EmitGlobal(GlobalVariable *GV); |
| 601 | void EmitHeaderAndLoadCommands(); |
| 602 | void EmitSections(); |
Nate Begeman | d2030e6 | 2006-08-26 15:46:34 +0000 | [diff] [blame] | 603 | void BufferSymbolAndStringTable(); |
Nate Begeman | bfaaaa6 | 2006-12-11 02:20:45 +0000 | [diff] [blame] | 604 | void CalculateRelocations(MachOSection &MOS); |
Nate Begeman | 94be248 | 2006-09-08 22:42:09 +0000 | [diff] [blame] | 605 | |
Bill Wendling | 40fab40 | 2007-01-24 03:37:18 +0000 | [diff] [blame] | 606 | MachineRelocation GetJTRelocation(unsigned Offset, |
| 607 | MachineBasicBlock *MBB) const { |
| 608 | return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); |
| 609 | } |
Bill Wendling | 0f43b22 | 2007-02-03 02:37:51 +0000 | [diff] [blame] | 610 | |
| 611 | /// GetTargetRelocation - Returns the number of relocations. |
| 612 | unsigned GetTargetRelocation(MachineRelocation &MR, |
| 613 | unsigned FromIdx, |
| 614 | unsigned ToAddr, |
| 615 | unsigned ToIndex, |
| 616 | OutputBuffer &RelocOut, |
| 617 | OutputBuffer &SecOut, |
Nate Begeman | fec910c | 2007-02-28 07:40:50 +0000 | [diff] [blame] | 618 | bool Scattered, |
| 619 | bool Extern) { |
Bill Wendling | 0f43b22 | 2007-02-03 02:37:51 +0000 | [diff] [blame] | 620 | return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, |
| 621 | ToIndex, RelocOut, |
Nate Begeman | fec910c | 2007-02-28 07:40:50 +0000 | [diff] [blame] | 622 | SecOut, Scattered, |
| 623 | Extern); |
Bill Wendling | 0f43b22 | 2007-02-03 02:37:51 +0000 | [diff] [blame] | 624 | } |
Nate Begeman | eb883af | 2006-08-23 21:08:52 +0000 | [diff] [blame] | 625 | }; |
| 626 | } |
| 627 | |
| 628 | #endif |