| |
| /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c. |
| GPL 2+ therefore. |
| |
| Can be compiled as either a 32- or 64-bit program (doesn't matter). |
| */ |
| |
| /* What does this program do? In short it postprocesses tool |
| executables on MacOSX, after linking using /usr/bin/ld. |
| |
| This is to deal with two separate and entirely unrelated problems. |
| Problem (1) is a bug in the linker in Xcode 4.0.0. Problem (2) is |
| much newer and concerns linking 64-bit tool executables for |
| Yosemite (10.10). |
| |
| --- Problem (1) ------------------------------------------------ |
| |
| This is a bug in the linker on Xcode 4.0.0 and Xcode 4.0.1. Xcode |
| versions prior to 4.0.0 are unaffected. |
| |
| The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997 |
| |
| The bug causes 64-bit tool executables to segfault at startup, |
| because: |
| |
| Comparing the MachO load commands vs a (working) tool executable |
| that was created by Xcode 3.2.x, it appears that the new linker has |
| partially ignored the build system's request to place the tool |
| executable's stack at a non standard location. The build system |
| tells the linker "-stack_addr 0x134000000 -stack_size 0x800000". |
| |
| With the Xcode 3.2 linker those flags produce two results: |
| |
| (1) A load command to allocate the stack at the said location: |
| Load command 3 |
| cmd LC_SEGMENT_64 |
| cmdsize 72 |
| segname __UNIXSTACK |
| vmaddr 0x0000000133800000 |
| vmsize 0x0000000000800000 |
| fileoff 2285568 |
| filesize 0 |
| maxprot 0x00000007 |
| initprot 0x00000003 |
| nsects 0 |
| flags 0x0 |
| |
| (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value |
| at process startup, 0x134000000. |
| |
| With Xcode 4.0.1, (1) is missing but (2) is still present. The |
| tool executable therefore starts up with %rsp pointing to unmapped |
| memory and faults almost instantly. |
| |
| The workaround implemented by this program is documented in comment |
| 8 of bug 267997, viz: |
| |
| One really sick workaround is to observe that the executables |
| contain a redundant MachO load command: |
| |
| Load command 2 |
| cmd LC_SEGMENT_64 |
| cmdsize 72 |
| segname __LINKEDIT |
| vmaddr 0x0000000138dea000 |
| vmsize 0x00000000000ad000 |
| fileoff 2658304 |
| filesize 705632 |
| maxprot 0x00000007 |
| initprot 0x00000001 |
| nsects 0 |
| flags 0x0 |
| |
| The described section presumably contains information intended for |
| the dynamic linker, but is irrelevant because this is a statically |
| linked executable. Hence it might be possible to postprocess the |
| executables after linking, to overwrite this entry with the |
| information that would have been in the missing __UNIXSTACK entry. |
| I tried this by hand (with a binary editor) earlier and got |
| something that worked. |
| |
| --- Problem (2) ------------------------------------------------ |
| |
| On MacOSX 10.10 (Yosemite), the kernel requires all valid |
| executables to have a __PAGEZERO section with SVMA of zero and size |
| of at least one page. However, our tool executables have a |
| __PAGEZERO section with SVMA set to the requested Valgrind load |
| address (typically 0x1'3800'0000). And the kernel won't start |
| those. So we take the opportunity to "fix" this by setting the |
| SVMA to zero. Seems to work and have no obvious bad side effects. |
| */ |
| |
| #define DEBUGPRINTING 0 |
| |
| #include <assert.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <sys/mman.h> |
| #include <sys/stat.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| |
| #undef PLAT_x86_darwin |
| #undef PLAT_amd64_darwin |
| |
| #if defined(__APPLE__) && defined(__i386__) |
| # define PLAT_x86_darwin 1 |
| #elif defined(__APPLE__) && defined(__x86_64__) |
| # define PLAT_amd64_darwin 1 |
| #else |
| # error "Can't be compiled on this platform" |
| #endif |
| |
| #include <mach-o/loader.h> |
| #include <mach-o/nlist.h> |
| #include <mach-o/fat.h> |
| #include <mach/i386/thread_status.h> |
| |
| /* Get hold of DARWIN_VERS, and check it has a sane value. */ |
| #include "config.h" |
| #if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \ |
| && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \ |
| && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10 \ |
| && DARWIN_VERS != DARWIN_10_11 |
| # error "Unknown DARWIN_VERS value. This file only compiles on Darwin." |
| #endif |
| |
| |
| typedef unsigned char UChar; |
| typedef signed char Char; |
| typedef char HChar; /* signfulness depends on host */ |
| |
| typedef unsigned int UInt; |
| typedef signed int Int; |
| |
| typedef unsigned char Bool; |
| #define True ((Bool)1) |
| #define False ((Bool)0) |
| |
| typedef unsigned long UWord; |
| |
| typedef UWord SizeT; |
| typedef UWord Addr; |
| |
| typedef unsigned long long int ULong; |
| typedef signed long long int Long; |
| |
| |
| |
| __attribute__((noreturn)) |
| void fail ( HChar* msg ) |
| { |
| fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg); |
| exit(1); |
| } |
| |
| |
| /*------------------------------------------------------------*/ |
| /*--- ---*/ |
| /*--- Mach-O file mapping/unmapping helpers ---*/ |
| /*--- ---*/ |
| /*------------------------------------------------------------*/ |
| |
| typedef |
| struct { |
| /* These two describe the entire mapped-in ("primary") image, |
| fat headers, kitchen sink, whatnot: the entire file. The |
| image is mapped into img[0 .. img_szB-1]. */ |
| UChar* img; |
| SizeT img_szB; |
| /* These two describe the Mach-O object of interest, which is |
| presumably somewhere inside the primary image. |
| map_image_aboard() below, which generates this info, will |
| carefully check that the macho_ fields denote a section of |
| memory that falls entirely inside img[0 .. img_szB-1]. */ |
| UChar* macho_img; |
| SizeT macho_img_szB; |
| } |
| ImageInfo; |
| |
| |
| Bool is_macho_object_file( const void* buf, SizeT szB ) |
| { |
| /* (JRS: the Mach-O headers might not be in this mapped data, |
| because we only mapped a page for this initial check, |
| or at least not very much, and what's at the start of the file |
| is in general a so-called fat header. The Mach-O object we're |
| interested in could be arbitrarily far along the image, and so |
| we can't assume its header will fall within this page.) */ |
| |
| /* But we can say that either it's a fat object, in which case it |
| begins with a fat header, or it's unadorned Mach-O, in which |
| case it starts with a normal header. At least do what checks we |
| can to establish whether or not we're looking at something |
| sane. */ |
| |
| const struct fat_header* fh_be = buf; |
| const struct mach_header_64* mh = buf; |
| |
| assert(buf); |
| if (szB < sizeof(struct fat_header)) |
| return False; |
| if (ntohl(fh_be->magic) == FAT_MAGIC) |
| return True; |
| |
| if (szB < sizeof(struct mach_header_64)) |
| return False; |
| if (mh->magic == MH_MAGIC_64) |
| return True; |
| |
| return False; |
| } |
| |
| |
| /* Unmap an image mapped in by map_image_aboard. */ |
| static void unmap_image ( /*MOD*/ImageInfo* ii ) |
| { |
| Int r; |
| assert(ii->img); |
| assert(ii->img_szB > 0); |
| r = munmap( ii->img, ii->img_szB ); |
| /* Do we care if this fails? I suppose so; it would indicate |
| some fairly serious snafu with the mapping of the file. */ |
| assert( !r ); |
| memset(ii, 0, sizeof(*ii)); |
| } |
| |
| |
| /* Map a given fat or thin object aboard, find the thin part if |
| necessary, do some checks, and write details of both the fat and |
| thin parts into *ii. Returns 32 (and leaves the file unmapped) if |
| the thin part is a 32 bit file. Returns 64 if it's a 64 bit file. |
| Does not return on failure. Guarantees to return pointers to a |
| valid(ish) Mach-O image if it succeeds. */ |
| static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename ) |
| { |
| memset(ii, 0, sizeof(*ii)); |
| |
| /* First off, try to map the thing in. */ |
| { SizeT size; |
| Int r, fd; |
| struct stat stat_buf; |
| |
| r = stat(filename, &stat_buf); |
| if (r) |
| fail("Can't stat image (to determine its size)?!"); |
| size = stat_buf.st_size; |
| |
| fd = open(filename, O_RDWR, 0); |
| if (fd == -1) |
| fail("Can't open image for possible modification!"); |
| if (DEBUGPRINTING) |
| printf("size %lu fd %d\n", size, fd); |
| void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE, |
| MAP_FILE|MAP_SHARED, fd, 0 ); |
| if (v == MAP_FAILED) { |
| perror("mmap failed"); |
| fail("Can't mmap image for possible modification!"); |
| } |
| |
| close(fd); |
| |
| ii->img = (UChar*)v; |
| ii->img_szB = size; |
| } |
| |
| /* Now it's mapped in and we have .img and .img_szB set. Look for |
| the embedded Mach-O object. If not findable, unmap and fail. */ |
| { struct fat_header* fh_be; |
| struct fat_header fh; |
| struct mach_header_64* mh; |
| |
| // Assume initially that we have a thin image, and update |
| // these if it turns out to be fat. |
| ii->macho_img = ii->img; |
| ii->macho_img_szB = ii->img_szB; |
| |
| // Check for fat header. |
| if (ii->img_szB < sizeof(struct fat_header)) |
| fail("Invalid Mach-O file (0 too small)."); |
| |
| // Fat header is always BIG-ENDIAN |
| fh_be = (struct fat_header *)ii->img; |
| fh.magic = ntohl(fh_be->magic); |
| fh.nfat_arch = ntohl(fh_be->nfat_arch); |
| if (fh.magic == FAT_MAGIC) { |
| // Look for a good architecture. |
| struct fat_arch *arch_be; |
| struct fat_arch arch; |
| Int f; |
| if (ii->img_szB < sizeof(struct fat_header) |
| + fh.nfat_arch * sizeof(struct fat_arch)) |
| fail("Invalid Mach-O file (1 too small)."); |
| |
| for (f = 0, arch_be = (struct fat_arch *)(fh_be+1); |
| f < fh.nfat_arch; |
| f++, arch_be++) { |
| Int cputype; |
| # if defined(PLAT_x86_darwin) |
| cputype = CPU_TYPE_X86; |
| # elif defined(PLAT_amd64_darwin) |
| cputype = CPU_TYPE_X86_64; |
| # else |
| # error "unknown architecture" |
| # endif |
| arch.cputype = ntohl(arch_be->cputype); |
| arch.cpusubtype = ntohl(arch_be->cpusubtype); |
| arch.offset = ntohl(arch_be->offset); |
| arch.size = ntohl(arch_be->size); |
| if (arch.cputype == cputype) { |
| if (ii->img_szB < arch.offset + arch.size) |
| fail("Invalid Mach-O file (2 too small)."); |
| ii->macho_img = ii->img + arch.offset; |
| ii->macho_img_szB = arch.size; |
| break; |
| } |
| } |
| if (f == fh.nfat_arch) |
| fail("No acceptable architecture found in fat file."); |
| } |
| |
| /* Sanity check what we found. */ |
| |
| /* assured by logic above */ |
| assert(ii->img_szB >= sizeof(struct fat_header)); |
| |
| if (ii->macho_img_szB < sizeof(struct mach_header_64)) |
| fail("Invalid Mach-O file (3 too small)."); |
| |
| if (ii->macho_img_szB > ii->img_szB) |
| fail("Invalid Mach-O file (thin bigger than fat)."); |
| |
| if (ii->macho_img >= ii->img |
| && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) { |
| /* thin entirely within fat, as expected */ |
| } else { |
| fail("Invalid Mach-O file (thin not inside fat)."); |
| } |
| |
| mh = (struct mach_header_64 *)ii->macho_img; |
| if (mh->magic == MH_MAGIC) { |
| assert(ii->img); |
| assert(ii->macho_img); |
| assert(ii->img_szB > 0); |
| assert(ii->macho_img_szB > 0); |
| assert(ii->macho_img >= ii->img); |
| assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB); |
| return 32; |
| } |
| if (mh->magic != MH_MAGIC_64) |
| fail("Invalid Mach-O file (bad magic)."); |
| |
| if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds) |
| fail("Invalid Mach-O file (4 too small)."); |
| } |
| |
| assert(ii->img); |
| assert(ii->macho_img); |
| assert(ii->img_szB > 0); |
| assert(ii->macho_img_szB > 0); |
| assert(ii->macho_img >= ii->img); |
| assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB); |
| return 64; |
| } |
| |
| |
| /*------------------------------------------------------------*/ |
| /*--- ---*/ |
| /*--- Mach-O top-level processing ---*/ |
| /*--- ---*/ |
| /*------------------------------------------------------------*/ |
| |
| void modify_macho_loadcmds ( HChar* filename, |
| ULong expected_stack_start, |
| ULong expected_stack_size ) |
| { |
| ImageInfo ii; |
| memset(&ii, 0, sizeof(ii)); |
| |
| Int size = map_image_aboard( &ii, filename ); |
| if (size == 32) { |
| fprintf(stderr, "fixup_macho_loadcmds: Is 32-bit MachO file;" |
| " no modifications needed.\n"); |
| goto out; |
| } |
| |
| assert(size == 64); |
| |
| assert(ii.macho_img != NULL && ii.macho_img_szB > 0); |
| |
| /* Poke around in the Mach-O header, to find some important |
| stuff. |
| * the location of the __UNIXSTACK load command, if any |
| * the location of the __LINKEDIT load command, if any |
| * the initial RSP value as stated in the LC_UNIXTHREAD |
| */ |
| |
| /* The collected data */ |
| ULong init_rsp = 0; |
| Bool have_rsp = False; |
| struct segment_command_64* seg__unixstack = NULL; |
| struct segment_command_64* seg__linkedit = NULL; |
| struct segment_command_64* seg__pagezero = NULL; |
| |
| /* Loop over the load commands and fill in the above 4 variables. */ |
| |
| { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img; |
| struct load_command *cmd; |
| Int c; |
| |
| for (c = 0, cmd = (struct load_command *)(mh+1); |
| c < mh->ncmds; |
| c++, cmd = (struct load_command *)(cmd->cmdsize |
| + (unsigned long)cmd)) { |
| if (DEBUGPRINTING) |
| printf("load cmd: offset %4lu size %3d kind %2d = ", |
| (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img), |
| cmd->cmdsize, cmd->cmd); |
| |
| switch (cmd->cmd) { |
| case LC_SEGMENT_64: |
| if (DEBUGPRINTING) |
| printf("LC_SEGMENT_64"); |
| break; |
| case LC_SYMTAB: |
| if (DEBUGPRINTING) |
| printf("LC_SYMTAB"); |
| break; |
| case LC_DYSYMTAB: |
| if (DEBUGPRINTING) |
| printf("LC_DYSYMTAB"); |
| break; |
| case LC_UUID: |
| if (DEBUGPRINTING) |
| printf("LC_UUID"); |
| break; |
| case LC_UNIXTHREAD: |
| if (DEBUGPRINTING) |
| printf("LC_UNIXTHREAD"); |
| break; |
| default: |
| if (DEBUGPRINTING) |
| printf("???"); |
| fail("unexpected load command in Mach header"); |
| break; |
| } |
| if (DEBUGPRINTING) |
| printf("\n"); |
| |
| /* Note what the stated initial RSP value is, so we can |
| check it is as expected. */ |
| if (cmd->cmd == LC_UNIXTHREAD) { |
| struct thread_command* tcmd = (struct thread_command*)cmd; |
| UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) ); |
| if (DEBUGPRINTING) |
| printf("UnixThread: flavor %u = ", w32s[0]); |
| if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) { |
| if (DEBUGPRINTING) |
| printf("x86_THREAD_STATE64\n"); |
| x86_thread_state64_t* state64 |
| = (x86_thread_state64_t*)(&w32s[2]); |
| have_rsp = True; |
| init_rsp = state64->__rsp; |
| if (DEBUGPRINTING) |
| printf("rsp = 0x%llx\n", init_rsp); |
| } else { |
| if (DEBUGPRINTING) |
| printf("???"); |
| } |
| if (DEBUGPRINTING) |
| printf("\n"); |
| } |
| |
| if (cmd->cmd == LC_SEGMENT_64) { |
| struct segment_command_64 *seg = (struct segment_command_64 *)cmd; |
| if (0 == strcmp(seg->segname, "__LINKEDIT")) |
| seg__linkedit = seg; |
| if (0 == strcmp(seg->segname, "__UNIXSTACK")) |
| seg__unixstack = seg; |
| if (0 == strcmp(seg->segname, "__PAGEZERO")) |
| seg__pagezero = seg; |
| } |
| |
| } |
| } |
| |
| /* |
| Actions are then as follows: |
| |
| * (always) check the RSP value is as expected, and abort if not |
| |
| * if there's a UNIXSTACK load command, check it is as expected. |
| If not abort, if yes, do nothing more. |
| |
| * (so there's no UNIXSTACK load command). if there's a LINKEDIT |
| load command, check if it is minimally usable (has 0 for |
| nsects and flags). If yes, convert it to a UNIXSTACK load |
| command. If there is none, or is unusable, then we're out of |
| options and have to abort. |
| */ |
| if (!have_rsp) |
| fail("Can't find / check initial RSP setting"); |
| if (init_rsp != expected_stack_start + expected_stack_size) |
| fail("Initial RSP value not as expected"); |
| |
| fprintf(stderr, "fixup_macho_loadcmds: " |
| "initial RSP is as expected (0x%llx)\n", |
| expected_stack_start + expected_stack_size ); |
| |
| if (seg__unixstack) { |
| struct segment_command_64 *seg = seg__unixstack; |
| if (seg->vmaddr != expected_stack_start) |
| fail("has __UNIXSTACK, but wrong ::vmaddr"); |
| if (seg->vmsize != expected_stack_size) |
| fail("has __UNIXSTACK, but wrong ::vmsize"); |
| if (seg->maxprot != 7) |
| fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)"); |
| if (seg->initprot != 3) |
| fail("has __UNIXSTACK, but wrong ::initprot (should be 3)"); |
| if (seg->nsects != 0) |
| fail("has __UNIXSTACK, but wrong ::nsects (should be 0)"); |
| if (seg->flags != 0) |
| fail("has __UNIXSTACK, but wrong ::flags (should be 0)"); |
| /* looks ok */ |
| fprintf(stderr, "fixup_macho_loadcmds: " |
| "acceptable __UNIXSTACK present; no modifications.\n" ); |
| goto maybe_mash_pagezero; |
| } |
| |
| if (seg__linkedit) { |
| struct segment_command_64 *seg = seg__linkedit; |
| if (seg->nsects != 0) |
| fail("has __LINKEDIT, but wrong ::nsects (should be 0)"); |
| if (seg->flags != 0) |
| fail("has __LINKEDIT, but wrong ::flags (should be 0)"); |
| fprintf(stderr, "fixup_macho_loadcmds: " |
| "no __UNIXSTACK present.\n" ); |
| fprintf(stderr, "fixup_macho_loadcmds: " |
| "converting __LINKEDIT to __UNIXSTACK.\n" ); |
| strcpy(seg->segname, "__UNIXSTACK"); |
| seg->vmaddr = expected_stack_start; |
| seg->vmsize = expected_stack_size; |
| seg->fileoff = 0; |
| seg->filesize = 0; |
| seg->maxprot = 7; |
| seg->initprot = 3; |
| /* success */ |
| goto maybe_mash_pagezero; |
| } |
| |
| /* out of options */ |
| fail("no __UNIXSTACK found and no usable __LINKEDIT found; " |
| "out of options."); |
| /* NOTREACHED */ |
| |
| maybe_mash_pagezero: |
| /* Deal with Problem (2) as documented above. */ |
| # if DARWIN_VERS >= DARWIN_10_10 |
| assert(size == 64); |
| if (!seg__pagezero) { |
| fail("Can't find __PAGEZERO to modify; can't continue."); |
| } |
| fprintf(stderr, "fixup_macho_loadcmds: " |
| "changing __PAGEZERO.vmaddr from %p to 0x0.\n", |
| (void*)seg__pagezero->vmaddr); |
| seg__pagezero->vmaddr = 0; |
| # endif |
| |
| out: |
| if (ii.img) |
| unmap_image(&ii); |
| } |
| |
| |
| static Bool is_plausible_tool_exe_name ( HChar* nm ) |
| { |
| HChar* p; |
| if (!nm) |
| return False; |
| |
| // Does it end with this string? |
| p = strstr(nm, "-x86-darwin"); |
| if (p && 0 == strcmp(p, "-x86-darwin")) |
| return True; |
| |
| p = strstr(nm, "-amd64-darwin"); |
| if (p && 0 == strcmp(p, "-amd64-darwin")) |
| return True; |
| |
| return False; |
| } |
| |
| |
| int main ( int argc, char** argv ) |
| { |
| Int r; |
| ULong req_stack_addr = 0; |
| ULong req_stack_size = 0; |
| |
| if (argc != 4) |
| fail("args: -stack_addr-arg -stack_size-arg " |
| "name-of-tool-executable-to-modify"); |
| |
| r= sscanf(argv[1], "0x%llx", &req_stack_addr); |
| if (r != 1) fail("invalid stack_addr arg"); |
| |
| r= sscanf(argv[2], "0x%llx", &req_stack_size); |
| if (r != 1) fail("invalid stack_size arg"); |
| |
| fprintf(stderr, "fixup_macho_loadcmds: " |
| "requested stack_addr (top) 0x%llx, " |
| "stack_size 0x%llx\n", req_stack_addr, req_stack_size ); |
| |
| if (!is_plausible_tool_exe_name(argv[3])) |
| fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin"); |
| |
| fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n", |
| argv[3] ); |
| modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size, |
| req_stack_size ); |
| |
| return 0; |
| } |
| |
| /* |
| cmd LC_SEGMENT_64 |
| cmdsize 72 |
| segname __LINKEDIT |
| vmaddr 0x0000000138dea000 |
| vmsize 0x00000000000ad000 |
| fileoff 2658304 |
| filesize 705632 |
| maxprot 0x00000007 |
| initprot 0x00000001 |
| nsects 0 |
| flags 0x0 |
| */ |
| |
| /* |
| cmd LC_SEGMENT_64 |
| cmdsize 72 |
| segname __UNIXSTACK |
| vmaddr 0x0000000133800000 |
| vmsize 0x0000000000800000 |
| fileoff 2498560 |
| filesize 0 |
| maxprot 0x00000007 |
| initprot 0x00000003 |
| nsects 0 |
| flags 0x0 |
| */ |