blob: 77524f8820007d44f859a39dad19aa3e82cc6174 [file] [log] [blame]
sewardj08f5a272011-04-06 11:17:16 +00001
2/* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
3 GPL 2+ therefore.
4
5 Can be compiled as either a 32- or 64-bit program (doesn't matter).
6*/
7
8/* What does this program do? In short it postprocesses tool
9 executables on MacOSX, after linking using /usr/bin/ld. This is so
10 as to work around a bug in the linker on Xcode 4.0.0 and Xcode
11 4.0.1. Xcode versions prior to 4.0.0 are unaffected.
12
13 The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
14
15 The bug causes 64-bit tool executables to segfault at startup,
16 because:
17
18 Comparing the MachO load commands vs a (working) tool executable
19 that was created by Xcode 3.2.x, it appears that the new linker has
20 partially ignored the build system's request to place the tool
21 executable's stack at a non standard location. The build system
22 tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
23
24 With the Xcode 3.2 linker those flags produce two results:
25
26 (1) A load command to allocate the stack at the said location:
27 Load command 3
28 cmd LC_SEGMENT_64
29 cmdsize 72
30 segname __UNIXSTACK
31 vmaddr 0x0000000133800000
32 vmsize 0x0000000000800000
33 fileoff 2285568
34 filesize 0
35 maxprot 0x00000007
36 initprot 0x00000003
37 nsects 0
38 flags 0x0
39
40 (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
41 at process startup, 0x134000000.
42
43 With Xcode 4.0.1, (1) is missing but (2) is still present. The
44 tool executable therefore starts up with %rsp pointing to unmapped
45 memory and faults almost instantly.
46
47 The workaround implemented by this program is documented in comment
48 8 of bug 267997, viz:
49
50 One really sick workaround is to observe that the executables
51 contain a redundant MachO load command:
52
53 Load command 2
54 cmd LC_SEGMENT_64
55 cmdsize 72
56 segname __LINKEDIT
57 vmaddr 0x0000000138dea000
58 vmsize 0x00000000000ad000
59 fileoff 2658304
60 filesize 705632
61 maxprot 0x00000007
62 initprot 0x00000001
63 nsects 0
64 flags 0x0
65
66 The described section presumably contains information intended for
67 the dynamic linker, but is irrelevant because this is a statically
68 linked executable. Hence it might be possible to postprocess the
69 executables after linking, to overwrite this entry with the
70 information that would have been in the missing __UNIXSTACK entry.
71 I tried this by hand (with a binary editor) earlier and got
72 something that worked.
73*/
74
75#define DEBUGPRINTING 0
76
77#include <assert.h>
78#include <stdlib.h>
79#include <stdio.h>
80#include <string.h>
81#include <sys/mman.h>
82#include <sys/stat.h>
83#include <unistd.h>
84#include <fcntl.h>
85
86
87#undef PLAT_x86_darwin
88#undef PLAT_amd64_darwin
89
90#if defined(__APPLE__) && defined(__i386__)
91# define PLAT_x86_darwin 1
92#elif defined(__APPLE__) && defined(__x86_64__)
93# define PLAT_amd64_darwin 1
94#else
95# error "Can't be compiled on this platform"
96#endif
97
98#include <mach-o/loader.h>
99#include <mach-o/nlist.h>
100#include <mach-o/fat.h>
101#include <mach/i386/thread_status.h>
102
103
104typedef unsigned char UChar;
105typedef signed char Char;
106typedef char HChar; /* signfulness depends on host */
107
108typedef unsigned int UInt;
109typedef signed int Int;
110
111typedef unsigned char Bool;
112#define True ((Bool)1)
113#define False ((Bool)0)
114
115typedef unsigned long UWord;
116
117typedef UWord SizeT;
118typedef UWord Addr;
119
120typedef unsigned long long int ULong;
121typedef signed long long int Long;
122
123
124
125__attribute__((noreturn))
126void fail ( HChar* msg )
127{
128 fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
129 exit(1);
130}
131
132
133/*------------------------------------------------------------*/
134/*--- ---*/
135/*--- Mach-O file mapping/unmapping helpers ---*/
136/*--- ---*/
137/*------------------------------------------------------------*/
138
139typedef
140 struct {
141 /* These two describe the entire mapped-in ("primary") image,
142 fat headers, kitchen sink, whatnot: the entire file. The
143 image is mapped into img[0 .. img_szB-1]. */
144 UChar* img;
145 SizeT img_szB;
146 /* These two describe the Mach-O object of interest, which is
147 presumably somewhere inside the primary image.
148 map_image_aboard() below, which generates this info, will
149 carefully check that the macho_ fields denote a section of
150 memory that falls entirely inside img[0 .. img_szB-1]. */
151 UChar* macho_img;
152 SizeT macho_img_szB;
153 }
154 ImageInfo;
155
156
157Bool is_macho_object_file( const void* buf, SizeT szB )
158{
159 /* (JRS: the Mach-O headers might not be in this mapped data,
160 because we only mapped a page for this initial check,
161 or at least not very much, and what's at the start of the file
162 is in general a so-called fat header. The Mach-O object we're
163 interested in could be arbitrarily far along the image, and so
164 we can't assume its header will fall within this page.) */
165
166 /* But we can say that either it's a fat object, in which case it
167 begins with a fat header, or it's unadorned Mach-O, in which
168 case it starts with a normal header. At least do what checks we
169 can to establish whether or not we're looking at something
170 sane. */
171
172 const struct fat_header* fh_be = buf;
173 const struct mach_header_64* mh = buf;
174
175 assert(buf);
176 if (szB < sizeof(struct fat_header))
177 return False;
178 if (ntohl(fh_be->magic) == FAT_MAGIC)
179 return True;
180
181 if (szB < sizeof(struct mach_header_64))
182 return False;
183 if (mh->magic == MH_MAGIC_64)
184 return True;
185
186 return False;
187}
188
189
190/* Unmap an image mapped in by map_image_aboard. */
191static void unmap_image ( /*MOD*/ImageInfo* ii )
192{
193 Int r;
194 assert(ii->img);
195 assert(ii->img_szB > 0);
196 r = munmap( ii->img, ii->img_szB );
197 /* Do we care if this fails? I suppose so; it would indicate
198 some fairly serious snafu with the mapping of the file. */
199 assert( !r );
200 memset(ii, 0, sizeof(*ii));
201}
202
203
204/* Map a given fat or thin object aboard, find the thin part if
205 necessary, do some checks, and write details of both the fat and
206 thin parts into *ii. Returns 32 (and leaves the file unmapped) if
207 the thin part is a 32 bit file. Returns 64 if it's a 64 bit file.
208 Does not return on failure. Guarantees to return pointers to a
209 valid(ish) Mach-O image if it succeeds. */
210static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
211{
212 memset(ii, 0, sizeof(*ii));
213
214 /* First off, try to map the thing in. */
215 { SizeT size;
216 Int r, fd;
217 struct stat stat_buf;
218
219 r = stat(filename, &stat_buf);
220 if (r)
221 fail("Can't stat image (to determine its size)?!");
222 size = stat_buf.st_size;
223
224 fd = open(filename, O_RDWR, 0);
225 if (fd == -1)
226 fail("Can't open image for possible modification!");
227 if (DEBUGPRINTING)
228 printf("size %lu fd %d\n", size, fd);
229 void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
230 MAP_FILE|MAP_SHARED, fd, 0 );
231 if (v == MAP_FAILED) {
232 perror("mmap failed");
233 fail("Can't mmap image for possible modification!");
234 }
235
236 close(fd);
237
238 ii->img = (UChar*)v;
239 ii->img_szB = size;
240 }
241
242 /* Now it's mapped in and we have .img and .img_szB set. Look for
243 the embedded Mach-O object. If not findable, unmap and fail. */
244 { struct fat_header* fh_be;
245 struct fat_header fh;
246 struct mach_header_64* mh;
247
248 // Assume initially that we have a thin image, and update
249 // these if it turns out to be fat.
250 ii->macho_img = ii->img;
251 ii->macho_img_szB = ii->img_szB;
252
253 // Check for fat header.
254 if (ii->img_szB < sizeof(struct fat_header))
255 fail("Invalid Mach-O file (0 too small).");
256
257 // Fat header is always BIG-ENDIAN
258 fh_be = (struct fat_header *)ii->img;
259 fh.magic = ntohl(fh_be->magic);
260 fh.nfat_arch = ntohl(fh_be->nfat_arch);
261 if (fh.magic == FAT_MAGIC) {
262 // Look for a good architecture.
263 struct fat_arch *arch_be;
264 struct fat_arch arch;
265 Int f;
266 if (ii->img_szB < sizeof(struct fat_header)
267 + fh.nfat_arch * sizeof(struct fat_arch))
268 fail("Invalid Mach-O file (1 too small).");
269
270 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
271 f < fh.nfat_arch;
272 f++, arch_be++) {
273 Int cputype;
274# if defined(PLAT_x86_darwin)
275 cputype = CPU_TYPE_X86;
276# elif defined(PLAT_amd64_darwin)
277 cputype = CPU_TYPE_X86_64;
278# else
279# error "unknown architecture"
280# endif
281 arch.cputype = ntohl(arch_be->cputype);
282 arch.cpusubtype = ntohl(arch_be->cpusubtype);
283 arch.offset = ntohl(arch_be->offset);
284 arch.size = ntohl(arch_be->size);
285 if (arch.cputype == cputype) {
286 if (ii->img_szB < arch.offset + arch.size)
287 fail("Invalid Mach-O file (2 too small).");
288 ii->macho_img = ii->img + arch.offset;
289 ii->macho_img_szB = arch.size;
290 break;
291 }
292 }
293 if (f == fh.nfat_arch)
294 fail("No acceptable architecture found in fat file.");
295 }
296
297 /* Sanity check what we found. */
298
299 /* assured by logic above */
300 assert(ii->img_szB >= sizeof(struct fat_header));
301
302 if (ii->macho_img_szB < sizeof(struct mach_header_64))
303 fail("Invalid Mach-O file (3 too small).");
304
305 if (ii->macho_img_szB > ii->img_szB)
306 fail("Invalid Mach-O file (thin bigger than fat).");
307
308 if (ii->macho_img >= ii->img
309 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
310 /* thin entirely within fat, as expected */
311 } else {
312 fail("Invalid Mach-O file (thin not inside fat).");
313 }
314
315 mh = (struct mach_header_64 *)ii->macho_img;
316 if (mh->magic == MH_MAGIC) {
317 assert(ii->img);
318 assert(ii->macho_img);
319 assert(ii->img_szB > 0);
320 assert(ii->macho_img_szB > 0);
321 assert(ii->macho_img >= ii->img);
322 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
323 return 32;
324 }
325 if (mh->magic != MH_MAGIC_64)
326 fail("Invalid Mach-O file (bad magic).");
327
328 if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
329 fail("Invalid Mach-O file (4 too small).");
330 }
331
332 assert(ii->img);
333 assert(ii->macho_img);
334 assert(ii->img_szB > 0);
335 assert(ii->macho_img_szB > 0);
336 assert(ii->macho_img >= ii->img);
337 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
338 return 64;
339}
340
341
342/*------------------------------------------------------------*/
343/*--- ---*/
344/*--- Mach-O top-level processing ---*/
345/*--- ---*/
346/*------------------------------------------------------------*/
347
348void modify_macho_loadcmds ( HChar* filename,
349 ULong expected_stack_start,
350 ULong expected_stack_size )
351{
352 ImageInfo ii;
353 memset(&ii, 0, sizeof(ii));
354
355 Int size = map_image_aboard( &ii, filename );
356 if (size == 32) {
357 fprintf(stderr, "fixup_macho_loadcmds: Is 32-bit MachO file;"
358 " no modifications needed.\n");
359 goto out;
360 }
361
362 assert(size == 64);
363
364 assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
365
366 /* Poke around in the Mach-O header, to find some important
367 stuff.
368 * the location of the __UNIXSTACK load command, if any
369 * the location of the __LINKEDIT load command, if any
370 * the initial RSP value as stated in the LC_UNIXTHREAD
371 */
372
373 /* The collected data */
374 ULong init_rsp = 0;
375 Bool have_rsp = False;
376 struct segment_command_64* seg__unixstack = NULL;
377 struct segment_command_64* seg__linkedit = NULL;
378
379 /* Loop over the load commands and fill in the above 4 variables. */
380
381 { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
382 struct load_command *cmd;
383 Int c;
384
385 for (c = 0, cmd = (struct load_command *)(mh+1);
386 c < mh->ncmds;
387 c++, cmd = (struct load_command *)(cmd->cmdsize
388 + (unsigned long)cmd)) {
389 if (DEBUGPRINTING)
390 printf("load cmd: offset %4lu size %3d kind %2d = ",
391 (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
392 cmd->cmdsize, cmd->cmd);
393
394 switch (cmd->cmd) {
395 case LC_SEGMENT_64:
396 if (DEBUGPRINTING)
397 printf("LC_SEGMENT_64");
398 break;
399 case LC_SYMTAB:
400 if (DEBUGPRINTING)
401 printf("LC_SYMTAB");
402 break;
sewardj627c6492011-05-10 08:42:14 +0000403 case LC_DYSYMTAB:
404 if (DEBUGPRINTING)
405 printf("LC_DYSYMTAB");
406 break;
sewardj08f5a272011-04-06 11:17:16 +0000407 case LC_UUID:
408 if (DEBUGPRINTING)
409 printf("LC_UUID");
410 break;
411 case LC_UNIXTHREAD:
412 if (DEBUGPRINTING)
413 printf("LC_UNIXTHREAD");
414 break;
415 default:
416 printf("???");
417 fail("unexpected load command in Mach header");
418 break;
419 }
420 if (DEBUGPRINTING)
421 printf("\n");
422
423 /* Note what the stated initial RSP value is, so we can
424 check it is as expected. */
425 if (cmd->cmd == LC_UNIXTHREAD) {
426 struct thread_command* tcmd = (struct thread_command*)cmd;
427 UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
428 if (DEBUGPRINTING)
429 printf("UnixThread: flavor %u = ", w32s[0]);
430 if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
431 if (DEBUGPRINTING)
432 printf("x86_THREAD_STATE64\n");
433 x86_thread_state64_t* state64
434 = (x86_thread_state64_t*)(&w32s[2]);
435 have_rsp = True;
436 init_rsp = state64->__rsp;
437 if (DEBUGPRINTING)
438 printf("rsp = 0x%llx\n", init_rsp);
439 } else {
440 if (DEBUGPRINTING)
441 printf("???");
442 }
443 if (DEBUGPRINTING)
444 printf("\n");
445 }
446
447 if (cmd->cmd == LC_SEGMENT_64) {
448 struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
449 if (0 == strcmp(seg->segname, "__LINKEDIT"))
450 seg__linkedit = seg;
451 if (0 == strcmp(seg->segname, "__UNIXSTACK"))
452 seg__unixstack = seg;
453 }
454
455 }
456 }
457
458 /*
459 Actions are then as follows:
460
461 * (always) check the RSP value is as expected, and abort if not
462
463 * if there's a UNIXSTACK load command, check it is as expected.
464 If not abort, if yes, do nothing more.
465
466 * (so there's no UNIXSTACK load command). if there's a LINKEDIT
467 load command, check if it is minimally usable (has 0 for
468 nsects and flags). If yes, convert it to a UNIXSTACK load
469 command. If there is none, or is unusable, then we're out of
470 options and have to abort.
471 */
472 if (!have_rsp)
473 fail("Can't find / check initial RSP setting");
474 if (init_rsp != expected_stack_start + expected_stack_size)
475 fail("Initial RSP value not as expected");
476
477 fprintf(stderr, "fixup_macho_loadcmds: "
478 "initial RSP is as expected (0x%llx)\n",
479 expected_stack_start + expected_stack_size );
480
481 if (seg__unixstack) {
482 struct segment_command_64 *seg = seg__unixstack;
483 if (seg->vmaddr != expected_stack_start)
484 fail("has __UNIXSTACK, but wrong ::vmaddr");
485 if (seg->vmsize != expected_stack_size)
486 fail("has __UNIXSTACK, but wrong ::vmsize");
487 if (seg->maxprot != 7)
488 fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
489 if (seg->initprot != 3)
490 fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
491 if (seg->nsects != 0)
492 fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
493 if (seg->flags != 0)
494 fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
495 /* looks ok */
496 fprintf(stderr, "fixup_macho_loadcmds: "
497 "acceptable __UNIXSTACK present; no modifications.\n" );
498 goto out;
499 }
500
501 if (seg__linkedit) {
502 struct segment_command_64 *seg = seg__linkedit;
503 if (seg->nsects != 0)
504 fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
505 if (seg->flags != 0)
506 fail("has __LINKEDIT, but wrong ::flags (should be 0)");
507 fprintf(stderr, "fixup_macho_loadcmds: "
508 "no __UNIXSTACK present.\n" );
509 fprintf(stderr, "fixup_macho_loadcmds: "
510 "converting __LINKEDIT to __UNIXSTACK.\n" );
511 strcpy(seg->segname, "__UNIXSTACK");
512 seg->vmaddr = expected_stack_start;
513 seg->vmsize = expected_stack_size;
514 seg->fileoff = 0;
515 seg->filesize = 0;
516 seg->maxprot = 7;
517 seg->initprot = 3;
518 /* success */
519 goto out;
520 }
521
522 /* out of options */
523 fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
524 "out of options.");
525 /* NOTREACHED */
526
527 out:
528 if (ii.img)
529 unmap_image(&ii);
530}
531
532
533static Bool is_plausible_tool_exe_name ( HChar* nm )
534{
535 HChar* p;
536 if (!nm)
537 return False;
538
539 // Does it end with this string?
540 p = strstr(nm, "-x86-darwin");
541 if (p && 0 == strcmp(p, "-x86-darwin"))
542 return True;
543
544 p = strstr(nm, "-amd64-darwin");
545 if (p && 0 == strcmp(p, "-amd64-darwin"))
546 return True;
547
548 return False;
549}
550
551
552int main ( int argc, char** argv )
553{
554 Int r;
555 ULong req_stack_addr = 0;
556 ULong req_stack_size = 0;
557
558 if (argc != 4)
559 fail("args: -stack_addr-arg -stack_size-arg "
560 "name-of-tool-executable-to-modify");
561
562 r= sscanf(argv[1], "0x%llx", &req_stack_addr);
563 if (r != 1) fail("invalid stack_addr arg");
564
565 r= sscanf(argv[2], "0x%llx", &req_stack_size);
566 if (r != 1) fail("invalid stack_size arg");
567
568 fprintf(stderr, "fixup_macho_loadcmds: "
569 "requested stack_addr (top) 0x%llx, "
570 "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
571
572 if (!is_plausible_tool_exe_name(argv[3]))
573 fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
574
575 fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
576 argv[3] );
577 modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
578 req_stack_size );
579
580 return 0;
581}
582
583/*
584 cmd LC_SEGMENT_64
585 cmdsize 72
586 segname __LINKEDIT
587 vmaddr 0x0000000138dea000
588 vmsize 0x00000000000ad000
589 fileoff 2658304
590 filesize 705632
591 maxprot 0x00000007
592 initprot 0x00000001
593 nsects 0
594 flags 0x0
595*/
596
597/*
598 cmd LC_SEGMENT_64
599 cmdsize 72
600 segname __UNIXSTACK
601 vmaddr 0x0000000133800000
602 vmsize 0x0000000000800000
603 fileoff 2498560
604 filesize 0
605 maxprot 0x00000007
606 initprot 0x00000003
607 nsects 0
608 flags 0x0
609*/