blob: 6fca1bb89148c7ac46047413db6c2c26442234bb [file] [log] [blame]
njnf76d27a2009-05-28 01:53:07 +00001
2/*--------------------------------------------------------------------*/
3/*--- Reading of syms & debug info from Mach-O files. ---*/
4/*--- readmacho.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
sewardj0f157dd2013-10-18 14:27:36 +000011 Copyright (C) 2005-2013 Apple Inc.
njnf76d27a2009-05-28 01:53:07 +000012 Greg Parker gparker@apple.com
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njn8b68b642009-06-24 00:37:09 +000032#if defined(VGO_darwin)
33
njnf76d27a2009-05-28 01:53:07 +000034#include "pub_core_basics.h"
35#include "pub_core_vki.h"
36#include "pub_core_libcbase.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_libcassert.h"
39#include "pub_core_libcfile.h"
40#include "pub_core_libcproc.h"
41#include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
42#include "pub_core_machine.h" /* VG_ELF_CLASS */
43#include "pub_core_options.h"
44#include "pub_core_oset.h"
45#include "pub_core_tooliface.h" /* VG_(needs) */
46#include "pub_core_xarray.h"
47#include "pub_core_clientstate.h"
48#include "pub_core_debuginfo.h"
49
njnf76d27a2009-05-28 01:53:07 +000050#include "priv_misc.h"
sewardj5d616df2013-07-02 08:07:15 +000051#include "priv_image.h"
52#include "priv_d3basics.h"
njnf76d27a2009-05-28 01:53:07 +000053#include "priv_tytypes.h"
54#include "priv_storage.h"
55#include "priv_readmacho.h"
56#include "priv_readdwarf.h"
57#include "priv_readdwarf3.h"
58#include "priv_readstabs.h"
59
60/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
61#include <mach-o/loader.h>
62#include <mach-o/nlist.h>
63#include <mach-o/fat.h>
64/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
65
66#if VG_WORDSIZE == 4
67# define MAGIC MH_MAGIC
68# define MACH_HEADER mach_header
69# define LC_SEGMENT_CMD LC_SEGMENT
70# define SEGMENT_COMMAND segment_command
71# define SECTION section
72# define NLIST nlist
73#else
74# define MAGIC MH_MAGIC_64
75# define MACH_HEADER mach_header_64
76# define LC_SEGMENT_CMD LC_SEGMENT_64
77# define SEGMENT_COMMAND segment_command_64
78# define SECTION section_64
79# define NLIST nlist_64
80#endif
81
82
83/*------------------------------------------------------------*/
84/*--- ---*/
85/*--- Mach-O file mapping/unmapping helpers ---*/
86/*--- ---*/
87/*------------------------------------------------------------*/
88
sewardj5d616df2013-07-02 08:07:15 +000089/* A DiSlice is used to handle the thin/fat distinction for MachO images.
90 (1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
91 whatnot: the entire file. This is the DiImage* that is the backing
92 for the DiSlice.
93 (2) the Mach-O object of interest, which is presumably somewhere inside
94 the primary image. map_image_aboard() below, which generates this
95 info, will carefully check that the macho_ fields denote a section of
96 memory that falls entirely inside the primary image.
97*/
njnf76d27a2009-05-28 01:53:07 +000098
99Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
100{
101 /* (JRS: the Mach-O headers might not be in this mapped data,
102 because we only mapped a page for this initial check,
103 or at least not very much, and what's at the start of the file
104 is in general a so-called fat header. The Mach-O object we're
105 interested in could be arbitrarily far along the image, and so
106 we can't assume its header will fall within this page.) */
107
108 /* But we can say that either it's a fat object, in which case it
109 begins with a fat header, or it's unadorned Mach-O, in which
110 case it starts with a normal header. At least do what checks we
111 can to establish whether or not we're looking at something
112 sane. */
113
114 const struct fat_header* fh_be = buf;
115 const struct MACH_HEADER* mh = buf;
116
117 vg_assert(buf);
118 if (szB < sizeof(struct fat_header))
119 return False;
120 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
121 return True;
122
123 if (szB < sizeof(struct MACH_HEADER))
124 return False;
125 if (mh->magic == MAGIC)
126 return True;
127
128 return False;
129}
130
131
132/* Unmap an image mapped in by map_image_aboard. */
sewardj5d616df2013-07-02 08:07:15 +0000133static void unmap_image ( /*MOD*/DiSlice* sli )
njnf76d27a2009-05-28 01:53:07 +0000134{
sewardj5d616df2013-07-02 08:07:15 +0000135 vg_assert(sli);
136 if (ML_(sli_is_valid)(*sli)) {
137 ML_(img_done)(sli->img);
138 *sli = DiSlice_INVALID;
139 }
njnf76d27a2009-05-28 01:53:07 +0000140}
141
142
sewardj5d616df2013-07-02 08:07:15 +0000143/* Open the given file, find the thin part if necessary, do some
144 checks, and return a DiSlice containing details of both the thin
145 part and (implicitly, via the contained DiImage*) the fat part.
146 returns DiSlice_INVALID if it fails. If it succeeds, the returned
147 slice is guaranteed to refer to a valid(ish) Mach-O image. */
148static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */
149 const HChar* filename )
njnf76d27a2009-05-28 01:53:07 +0000150{
sewardj5d616df2013-07-02 08:07:15 +0000151 DiSlice sli = DiSlice_INVALID;
njnf76d27a2009-05-28 01:53:07 +0000152
153 /* First off, try to map the thing in. */
sewardj5d616df2013-07-02 08:07:15 +0000154 DiImage* mimg = ML_(img_from_local_file)(filename);
155 if (mimg == NULL) {
156 VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
157 filename );
158 VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" );
159 return DiSlice_INVALID;
njnf76d27a2009-05-28 01:53:07 +0000160 }
161
sewardj5d616df2013-07-02 08:07:15 +0000162 /* Now we have a viable DiImage* for it. Look for the embedded
163 Mach-O object. If not findable, close the image and fail. */
164 DiOffT fh_be_ioff = 0;
165 struct fat_header fh_be;
166 struct fat_header fh;
njnf76d27a2009-05-28 01:53:07 +0000167
sewardj5d616df2013-07-02 08:07:15 +0000168 // Assume initially that we have a thin image, and narrow
169 // the bounds if it turns out to be fat. This stores |mimg| as
170 // |sli.img|, so NULL out |mimg| after this point, for the sake of
171 // clarity.
172 sli = ML_(sli_from_img)(mimg);
173 mimg = NULL;
njnf76d27a2009-05-28 01:53:07 +0000174
sewardj5d616df2013-07-02 08:07:15 +0000175 // Check for fat header.
176 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)) {
177 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
178 goto close_and_fail;
njnf76d27a2009-05-28 01:53:07 +0000179 }
180
sewardj5d616df2013-07-02 08:07:15 +0000181 // Fat header is always BIG-ENDIAN
182 ML_(img_get)(&fh_be, sli.img, fh_be_ioff, sizeof(fh_be));
183 VG_(memset)(&fh, 0, sizeof(fh));
184 fh.magic = VG_(ntohl)(fh_be.magic);
185 fh.nfat_arch = VG_(ntohl)(fh_be.nfat_arch);
186 if (fh.magic == FAT_MAGIC) {
187 // Look for a good architecture.
188 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)
189 + fh.nfat_arch * sizeof(struct fat_arch)) {
190 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
191 goto close_and_fail;
192 }
193 DiOffT arch_be_ioff;
194 Int f;
195 for (f = 0, arch_be_ioff = sizeof(struct fat_header);
196 f < fh.nfat_arch;
197 f++, arch_be_ioff += sizeof(struct fat_arch)) {
198# if defined(VGA_ppc)
199 Int cputype = CPU_TYPE_POWERPC;
200# elif defined(VGA_ppc64)
201 Int cputype = CPU_TYPE_POWERPC64;
202# elif defined(VGA_x86)
203 Int cputype = CPU_TYPE_X86;
204# elif defined(VGA_amd64)
205 Int cputype = CPU_TYPE_X86_64;
206# else
207# error "unknown architecture"
208# endif
209 struct fat_arch arch_be;
210 struct fat_arch arch;
211 ML_(img_get)(&arch_be, sli.img, arch_be_ioff, sizeof(arch_be));
212 VG_(memset)(&arch, 0, sizeof(arch));
213 arch.cputype = VG_(ntohl)(arch_be.cputype);
214 arch.cpusubtype = VG_(ntohl)(arch_be.cpusubtype);
215 arch.offset = VG_(ntohl)(arch_be.offset);
216 arch.size = VG_(ntohl)(arch_be.size);
217 if (arch.cputype == cputype) {
218 if (ML_(img_size)(sli.img) < arch.offset + arch.size) {
219 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
220 goto close_and_fail;
221 }
222 /* Found a suitable arch. Narrow down the slice accordingly. */
223 sli.ioff = arch.offset;
224 sli.szB = arch.size;
225 break;
226 }
227 }
228 if (f == fh.nfat_arch) {
229 ML_(symerr)(di, True,
230 "No acceptable architecture found in fat file.");
231 goto close_and_fail;
232 }
233 }
234
235 /* Sanity check what we found. */
236
237 /* assured by logic above */
238 vg_assert(ML_(img_size)(sli.img) >= sizeof(struct fat_header));
239
240 if (sli.szB < sizeof(struct MACH_HEADER)) {
241 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
242 goto close_and_fail;
243 }
244
245 if (sli.szB > ML_(img_size)(sli.img)) {
246 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
247 goto close_and_fail;
248 }
249
250 if (sli.ioff >= 0 && sli.ioff + sli.szB <= ML_(img_size)(sli.img)) {
251 /* thin entirely within fat, as expected */
252 } else {
253 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
254 goto close_and_fail;
255 }
256
257 /* Peer at the Mach header for the thin object, starting at the
258 beginning of the slice, to check it's at least marginally
259 sane. */
260 struct MACH_HEADER mh;
261 ML_(cur_read_get)(&mh, ML_(cur_from_sli)(sli), sizeof(mh));
262 if (mh.magic != MAGIC) {
263 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
264 goto close_and_fail;
265 }
266
267 if (sli.szB < sizeof(struct MACH_HEADER) + mh.sizeofcmds) {
268 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
269 goto close_and_fail;
270 }
271
272 /* "main image is plausible" */
273 vg_assert(sli.img);
274 vg_assert(ML_(img_size)(sli.img) > 0);
275 /* "thin image exists and is a sub-part (or all) of main image" */
276 vg_assert(sli.ioff >= 0);
277 vg_assert(sli.szB > 0);
278 vg_assert(sli.ioff + sli.szB <= ML_(img_size)(sli.img));
279 return sli; /* success */
njnf76d27a2009-05-28 01:53:07 +0000280 /*NOTREACHED*/
281
sewardj5d616df2013-07-02 08:07:15 +0000282 close_and_fail:
283 unmap_image(&sli);
284 return DiSlice_INVALID; /* bah! */
njnf76d27a2009-05-28 01:53:07 +0000285}
286
287
288/*------------------------------------------------------------*/
289/*--- ---*/
290/*--- Mach-O symbol table reading ---*/
291/*--- ---*/
292/*------------------------------------------------------------*/
293
294/* Read a symbol table (nlist). Add the resulting candidate symbols
295 to 'syms'; the caller will post-process them and hand them off to
296 ML_(addSym) itself. */
297static
298void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
299 struct _DebugInfo* di,
sewardj5d616df2013-07-02 08:07:15 +0000300 DiCursor symtab_cur, UInt symtab_count,
301 DiCursor strtab_cur, UInt strtab_sz )
njnf76d27a2009-05-28 01:53:07 +0000302{
303 Int i;
sewardjc448a642011-08-15 10:07:56 +0000304 DiSym disym;
njnf76d27a2009-05-28 01:53:07 +0000305
sewardj5d616df2013-07-02 08:07:15 +0000306 // "start_according_to_valgrind"
307 static HChar* s_a_t_v = NULL; /* do not make non-static */
njnf76d27a2009-05-28 01:53:07 +0000308
sewardj5d616df2013-07-02 08:07:15 +0000309 for (i = 0; i < symtab_count; i++) {
310 struct NLIST nl;
311 ML_(cur_read_get)(&nl,
312 ML_(cur_plus)(symtab_cur, i * sizeof(struct NLIST)),
313 sizeof(nl));
314
315 Addr sym_addr = 0;
316 if ((nl.n_type & N_TYPE) == N_SECT) {
317 sym_addr = di->text_bias + nl.n_value;
318 /*} else if ((nl.n_type & N_TYPE) == N_ABS) {
njnf76d27a2009-05-28 01:53:07 +0000319 GrP fixme don't ignore absolute symbols?
sewardj5d616df2013-07-02 08:07:15 +0000320 sym_addr = nl.n_value; */
njnf76d27a2009-05-28 01:53:07 +0000321 } else {
322 continue;
323 }
324
sewardj5d616df2013-07-02 08:07:15 +0000325 if (di->trace_symtab) {
326 HChar* str = ML_(cur_read_strdup)(
327 ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
328 "di.read_symtab.1");
329 VG_(printf)("nlist raw: avma %010lx %s\n", sym_addr, str );
330 ML_(dinfo_free)(str);
331 }
njnf76d27a2009-05-28 01:53:07 +0000332
333 /* If no part of the symbol falls within the mapped range,
334 ignore it. */
335 if (sym_addr <= di->text_avma
336 || sym_addr >= di->text_avma+di->text_size) {
337 continue;
338 }
339
340 /* skip names which point outside the string table;
341 following these risks segfaulting Valgrind */
sewardj5d616df2013-07-02 08:07:15 +0000342 if (nl.n_un.n_strx < 0 || nl.n_un.n_strx >= strtab_sz) {
njnf76d27a2009-05-28 01:53:07 +0000343 continue;
sewardj5d616df2013-07-02 08:07:15 +0000344 }
345
346 HChar* name
347 = ML_(cur_read_strdup)( ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
348 "di.read_symtab.2");
njnf76d27a2009-05-28 01:53:07 +0000349
350 /* skip nameless symbols; these appear to be common, but
351 useless */
sewardj5d616df2013-07-02 08:07:15 +0000352 if (*name == 0) {
353 ML_(dinfo_free)(name);
njnf76d27a2009-05-28 01:53:07 +0000354 continue;
sewardj5d616df2013-07-02 08:07:15 +0000355 }
njnf76d27a2009-05-28 01:53:07 +0000356
sewardjc448a642011-08-15 10:07:56 +0000357 disym.addr = sym_addr;
358 disym.tocptr = 0;
359 disym.pri_name = ML_(addStr)(di, name, -1);
360 disym.sec_names = NULL;
361 disym.size = // let canonicalize fix it
362 di->text_avma+di->text_size - sym_addr;
363 disym.isText = True;
364 disym.isIFunc = False;
njnf76d27a2009-05-28 01:53:07 +0000365 // Lots of user function names get prepended with an underscore. Eg. the
366 // function 'f' becomes the symbol '_f'. And the "below main"
367 // function is called "start". So we skip the leading underscore, and
368 // if we see 'start' and --show-below-main=no, we rename it as
369 // "start_according_to_valgrind", which makes it easy to spot later
370 // and display as "(below main)".
sewardjc448a642011-08-15 10:07:56 +0000371 if (disym.pri_name[0] == '_') {
372 disym.pri_name++;
373 }
374 else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
njnf76d27a2009-05-28 01:53:07 +0000375 if (s_a_t_v == NULL)
376 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
377 vg_assert(s_a_t_v);
sewardjc448a642011-08-15 10:07:56 +0000378 disym.pri_name = s_a_t_v;
njnf76d27a2009-05-28 01:53:07 +0000379 }
380
sewardjc448a642011-08-15 10:07:56 +0000381 vg_assert(disym.pri_name);
382 VG_(addToXA)( syms, &disym );
sewardj5d616df2013-07-02 08:07:15 +0000383 ML_(dinfo_free)(name);
njnf76d27a2009-05-28 01:53:07 +0000384 }
385}
386
387
388/* Compare DiSyms by their start address, and for equal addresses, use
sewardjc448a642011-08-15 10:07:56 +0000389 the primary name as a secondary sort key. */
floriane07cbb32013-01-15 03:19:54 +0000390static Int cmp_DiSym_by_start_then_name ( const void* v1, const void* v2 )
njnf76d27a2009-05-28 01:53:07 +0000391{
floriane07cbb32013-01-15 03:19:54 +0000392 const DiSym* s1 = (DiSym*)v1;
393 const DiSym* s2 = (DiSym*)v2;
njnf76d27a2009-05-28 01:53:07 +0000394 if (s1->addr < s2->addr) return -1;
395 if (s1->addr > s2->addr) return 1;
sewardjc448a642011-08-15 10:07:56 +0000396 return VG_(strcmp)(s1->pri_name, s2->pri_name);
njnf76d27a2009-05-28 01:53:07 +0000397}
398
399/* 'cand' is a bunch of candidate symbols obtained by reading
400 nlist-style symbol table entries. Their ends may overlap, so sort
401 them and truncate them accordingly. The code in this routine is
402 copied almost verbatim from read_symbol_table() in readxcoff.c. */
403static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
404 Bool trace_symtab )
405{
406 Word nsyms, i, j, k, m;
407
408 nsyms = VG_(sizeXA)(syms);
409
410 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
411 VG_(sortXA)(syms);
412
413 /* We only know for sure the start addresses (actual VMAs) of
414 symbols, and an overestimation of their end addresses. So sort
415 by start address, then clip each symbol so that its end address
416 does not overlap with the next one along.
417
418 There is a small refinement: if a group of symbols have the same
419 address, treat them as a group: find the next symbol along that
420 has a higher start address, and clip all of the group
421 accordingly. This clips the group as a whole so as not to
422 overlap following symbols. This leaves prefersym() in
423 storage.c, which is not nlist-specific, to later decide which of
424 the symbols in the group to keep.
425
426 Another refinement is that we need to get rid of symbols which,
427 after clipping, have identical starts, ends, and names. So the
428 sorting uses the name as a secondary key.
429 */
430
431 for (i = 0; i < nsyms; i++) {
432 for (k = i+1;
433 k < nsyms
434 && ((DiSym*)VG_(indexXA)(syms,i))->addr
435 == ((DiSym*)VG_(indexXA)(syms,k))->addr;
436 k++)
437 ;
438 /* So now [i .. k-1] is a group all with the same start address.
439 Clip their ending addresses so they don't overlap [k]. In
440 the normal case (no overlaps), k == i+1. */
441 if (k < nsyms) {
442 DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
443 for (m = i; m < k; m++) {
444 DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
445 vg_assert(here->addr < next->addr);
446 if (here->addr + here->size > next->addr)
447 here->size = next->addr - here->addr;
448 }
449 }
450 i = k-1;
451 vg_assert(i <= nsyms);
452 }
453
454 j = 0;
455 if (nsyms > 0) {
456 j = 1;
457 for (i = 1; i < nsyms; i++) {
458 DiSym *s_j1, *s_j, *s_i;
459 vg_assert(j <= i);
460 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
461 s_j = (DiSym*)VG_(indexXA)(syms, j);
462 s_i = (DiSym*)VG_(indexXA)(syms, i);
463 if (s_i->addr != s_j1->addr
464 || s_i->size != s_j1->size
sewardjc448a642011-08-15 10:07:56 +0000465 || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
njnf76d27a2009-05-28 01:53:07 +0000466 *s_j = *s_i;
467 j++;
468 } else {
469 if (trace_symtab)
470 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n",
sewardjc448a642011-08-15 10:07:56 +0000471 s_i->addr, s_i->pri_name );
njnf76d27a2009-05-28 01:53:07 +0000472 }
473 }
474 }
475 vg_assert(j >= 0 && j <= nsyms);
476 VG_(dropTailXA)(syms, nsyms - j);
477}
478
479
480/*------------------------------------------------------------*/
481/*--- ---*/
482/*--- Mach-O top-level processing ---*/
483/*--- ---*/
484/*------------------------------------------------------------*/
485
486#if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
487#define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
488#endif
489
490
sewardj5d616df2013-07-02 08:07:15 +0000491static Bool file_exists_p(const HChar *path)
njnf76d27a2009-05-28 01:53:07 +0000492{
493 struct vg_stat sbuf;
494 SysRes res = VG_(stat)(path, &sbuf);
495 return sr_isError(res) ? False : True;
496}
497
498
499/* Search for an existing dSYM file as a possible separate debug file.
500 Adapted from gdb. */
sewardj5d616df2013-07-02 08:07:15 +0000501static HChar *
502find_separate_debug_file (const HChar *executable_name)
njnf76d27a2009-05-28 01:53:07 +0000503{
sewardj5d616df2013-07-02 08:07:15 +0000504 const HChar *basename_str;
505 HChar *dot_ptr;
506 HChar *slash_ptr;
507 HChar *dsymfile;
njnf76d27a2009-05-28 01:53:07 +0000508
509 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
510 will end up with an infinite loop where after we add a dSYM symbol file,
511 it will then enter this function asking if there is a debug file for the
512 dSYM file itself. */
513 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
514 {
515 /* Check for the existence of a .dSYM file for a given executable. */
516 basename_str = VG_(basename) (executable_name);
517 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
518 VG_(strlen) (executable_name)
519 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
520 + VG_(strlen) (basename_str)
521 + 1
522 );
523
524 /* First try for the dSYM in the same directory as the original file. */
525 VG_(strcpy) (dsymfile, executable_name);
526 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
527 VG_(strcat) (dsymfile, basename_str);
528
529 if (file_exists_p (dsymfile))
530 return dsymfile;
531
532 /* Now search for any parent directory that has a '.' in it so we can find
533 Mac OS X applications, bundles, plugins, and any other kinds of files.
534 Mac OS X application bundles wil have their program in
535 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
536 ".bundle" or ".plugin" for other types of bundles). So we look for any
537 prior '.' character and try appending the apple dSYM extension and
538 subdirectory and see if we find an existing dSYM file (in the above
539 MyApp example the dSYM would be at either:
540 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
541 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
542 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
543 while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
544 {
545 /* Find the directory delimiter that follows the '.' character since
546 we now look for a .dSYM that follows any bundle extension. */
547 slash_ptr = VG_(strchr) (dot_ptr, '/');
548 if (slash_ptr)
549 {
550 /* NULL terminate the string at the '/' character and append
551 the path down to the dSYM file. */
552 *slash_ptr = '\0';
553 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
554 VG_(strcat) (slash_ptr, basename_str);
555 if (file_exists_p (dsymfile))
556 return dsymfile;
557 }
558
559 /* NULL terminate the string at the '.' character and append
560 the path down to the dSYM file. */
561 *dot_ptr = '\0';
562 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
563 VG_(strcat) (dot_ptr, basename_str);
564 if (file_exists_p (dsymfile))
565 return dsymfile;
566
567 /* NULL terminate the string at the '.' locatated by the strrchr()
568 function again. */
569 *dot_ptr = '\0';
570
571 /* We found a previous extension '.' character and did not find a
572 dSYM file so now find previous directory delimiter so we don't
573 try multiple times on a file name that may have a version number
574 in it such as "/some/path/MyApp.6.0.4.app". */
575 slash_ptr = VG_(strrchr) (dsymfile, '/');
576 if (!slash_ptr)
577 break;
578 /* NULL terminate the string at the previous directory character
579 and search again. */
580 *slash_ptr = '\0';
581 }
582 }
583
584 return NULL;
585}
586
587
sewardj5d616df2013-07-02 08:07:15 +0000588/* Given a DiSlice covering the entire Mach-O thin image, find the
589 DiSlice for the specified (segname, sectname) pairing, if
590 possible. */
591static DiSlice getsectdata ( DiSlice img,
592 const HChar *segname, const HChar *sectname )
njnf76d27a2009-05-28 01:53:07 +0000593{
sewardj5d616df2013-07-02 08:07:15 +0000594 DiCursor cur = ML_(cur_from_sli)(img);
njnf76d27a2009-05-28 01:53:07 +0000595
sewardj5d616df2013-07-02 08:07:15 +0000596 struct MACH_HEADER mh;
597 ML_(cur_step_get)(&mh, &cur, sizeof(mh));
598
599 Int c;
600 for (c = 0; c < mh.ncmds; c++) {
601 struct load_command cmd;
602 ML_(cur_read_get)(&cmd, cur, sizeof(cmd));
603 if (cmd.cmd == LC_SEGMENT_CMD) {
604 struct SEGMENT_COMMAND seg;
605 ML_(cur_read_get)(&seg, cur, sizeof(seg));
606 if (0 == VG_(strncmp(&seg.segname[0],
607 segname, sizeof(seg.segname)))) {
608 DiCursor sects_cur = ML_(cur_plus)(cur, sizeof(seg));
njnf76d27a2009-05-28 01:53:07 +0000609 Int s;
sewardj5d616df2013-07-02 08:07:15 +0000610 for (s = 0; s < seg.nsects; s++) {
611 struct SECTION sect;
612 ML_(cur_step_get)(&sect, &sects_cur, sizeof(sect));
613 if (0 == VG_(strncmp(sect.sectname, sectname,
614 sizeof(sect.sectname)))) {
615 DiSlice res = img;
616 res.ioff = sect.offset;
617 res.szB = sect.size;
618 return res;
njnf76d27a2009-05-28 01:53:07 +0000619 }
620 }
sewardj5d616df2013-07-02 08:07:15 +0000621
njnf76d27a2009-05-28 01:53:07 +0000622 }
623 }
sewardj5d616df2013-07-02 08:07:15 +0000624 cur = ML_(cur_plus)(cur, cmd.cmdsize);
njnf76d27a2009-05-28 01:53:07 +0000625 }
626
sewardj5d616df2013-07-02 08:07:15 +0000627 return DiSlice_INVALID;
njnf76d27a2009-05-28 01:53:07 +0000628}
629
630
sewardj5d616df2013-07-02 08:07:15 +0000631/* Brute force just simply search for uuid[0..15] in |sli| */
632static Bool check_uuid_matches ( DiSlice sli, UChar* uuid )
njnf76d27a2009-05-28 01:53:07 +0000633{
sewardj5d616df2013-07-02 08:07:15 +0000634 if (sli.szB < 16)
njnf76d27a2009-05-28 01:53:07 +0000635 return False;
sewardj5d616df2013-07-02 08:07:15 +0000636
637 /* Work through the slice in 1 KB chunks. */
638 UChar first = uuid[0];
639 DiOffT min_off = sli.ioff;
640 DiOffT max1_off = sli.ioff + sli.szB;
641 DiOffT curr_off = min_off;
642 vg_assert(min_off < max1_off);
643 while (1) {
644 vg_assert(curr_off >= min_off && curr_off <= max1_off);
645 if (curr_off == max1_off) break;
646 DiOffT avail = max1_off - curr_off;
647 vg_assert(avail > 0 && avail <= max1_off);
648 if (avail > 1024) avail = 1024;
649 UChar buf[1024];
650 SizeT nGot = ML_(img_get_some)(buf, sli.img, curr_off, avail);
651 vg_assert(nGot >= 1 && nGot <= avail);
652 UInt i;
653 /* Scan through the 1K chunk we got, looking for the start char. */
654 for (i = 0; i < (UInt)nGot; i++) {
sewardjd6daf232013-09-04 06:17:44 +0000655 if (LIKELY(buf[i] != first))
sewardj5d616df2013-07-02 08:07:15 +0000656 continue;
657 /* first char matches. See if we can get 16 bytes at this
658 offset, and compare. */
659 if (curr_off + i < max1_off && max1_off - (curr_off + i) >= 16) {
660 UChar buff16[16];
661 ML_(img_get)(&buff16[0], sli.img, curr_off + i, 16);
662 if (0 == VG_(memcmp)(&buff16[0], &uuid[0], 16))
663 return True;
664 }
sewardj5d616df2013-07-02 08:07:15 +0000665 }
sewardjd6daf232013-09-04 06:17:44 +0000666 curr_off += nGot;
njnf76d27a2009-05-28 01:53:07 +0000667 }
668 return False;
669}
670
671
672/* Heuristic kludge: return True if this looks like an installed
673 standard library; hence we shouldn't consider automagically running
674 dsymutil on it. */
sewardj5d616df2013-07-02 08:07:15 +0000675static Bool is_systemish_library_name ( HChar* name )
njnf76d27a2009-05-28 01:53:07 +0000676{
677 vg_assert(name);
678 if (0 == VG_(strncasecmp)(name, "/usr/", 5)
679 || 0 == VG_(strncasecmp)(name, "/bin/", 5)
680 || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
sewardjca04aa62009-05-31 08:31:06 +0000681 || 0 == VG_(strncasecmp)(name, "/opt/", 5)
sewardj166dc7d2009-05-31 19:59:29 +0000682 || 0 == VG_(strncasecmp)(name, "/sw/", 4)
njnf76d27a2009-05-28 01:53:07 +0000683 || 0 == VG_(strncasecmp)(name, "/System/", 8)
sewardja9269e32009-08-03 01:04:18 +0000684 || 0 == VG_(strncasecmp)(name, "/Library/", 9)
685 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
njnf76d27a2009-05-28 01:53:07 +0000686 return True;
687 } else {
688 return False;
689 }
690}
691
692
693Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
694{
sewardj5d616df2013-07-02 08:07:15 +0000695 DiSlice msli = DiSlice_INVALID; // the main image
696 DiSlice dsli = DiSlice_INVALID; // the debuginfo image
697 DiCursor sym_cur = DiCursor_INVALID;
698 DiCursor dysym_cur = DiCursor_INVALID;
699 HChar* dsymfilename = NULL;
700 Bool have_uuid = False;
701 UChar uuid[16];
702 Word i;
sewardj6b5625b2012-07-13 11:24:05 +0000703 struct _DebugInfoMapping* rx_map = NULL;
704 struct _DebugInfoMapping* rw_map = NULL;
njnf76d27a2009-05-28 01:53:07 +0000705
706 /* mmap the object file to look for di->soname and di->text_bias
707 and uuid and nlist and STABS */
708
sewardj94bb7722011-09-20 22:36:26 +0000709 /* This should be ensured by our caller (that we're in the accept
710 state). */
711 vg_assert(di->fsm.have_rx_map);
712 vg_assert(di->fsm.have_rw_map);
njnf76d27a2009-05-28 01:53:07 +0000713
sewardj6b5625b2012-07-13 11:24:05 +0000714 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
715 struct _DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
716 if (map->rx && !rx_map)
717 rx_map = map;
718 if (map->rw && !rw_map)
719 rw_map = map;
720 if (rx_map && rw_map)
721 break;
722 }
723 vg_assert(rx_map);
724 vg_assert(rw_map);
725
726 if (VG_(clo_verbosity) > 1)
727 VG_(message)(Vg_DebugMsg,
728 "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
729 rx_map->avma, rw_map->avma );
730
njnf76d27a2009-05-28 01:53:07 +0000731 VG_(memset)(&uuid, 0, sizeof(uuid));
732
sewardj5d616df2013-07-02 08:07:15 +0000733 msli = map_image_aboard( di, di->fsm.filename );
734 if (!ML_(sli_is_valid)(msli)) {
735 ML_(symerr)(di, False, "Connect to main image failed.");
736 goto fail;
737 }
njnf76d27a2009-05-28 01:53:07 +0000738
sewardj5d616df2013-07-02 08:07:15 +0000739 vg_assert(msli.img != NULL && msli.szB > 0);
njnf76d27a2009-05-28 01:53:07 +0000740
741 /* Poke around in the Mach-O header, to find some important
742 stuff. */
743 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
744 // Read di->soname from LC_ID_DYLIB if present,
745 // or from LC_ID_DYLINKER if present,
746 // or use "NONE".
747 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
748 // Get uuid for later dsym search
749
750 di->text_bias = 0;
751
sewardj5d616df2013-07-02 08:07:15 +0000752 {
753 DiCursor cmd_cur = ML_(cur_from_sli)(msli);
njnf76d27a2009-05-28 01:53:07 +0000754
sewardj5d616df2013-07-02 08:07:15 +0000755 struct MACH_HEADER mh;
756 ML_(cur_step_get)(&mh, &cmd_cur, sizeof(mh));
757
758 /* Now cur_cmd points just after the Mach header, right at the
759 start of the load commands, which is where we need it to start
760 the following loop. */
761
762 Int c;
763 for (c = 0; c < mh.ncmds; c++) {
764 struct load_command cmd;
765 ML_(cur_read_get)(&cmd, cmd_cur, sizeof(cmd));
766
767 if (cmd.cmd == LC_SYMTAB) {
768 sym_cur = cmd_cur;
njnf76d27a2009-05-28 01:53:07 +0000769 }
sewardj5d616df2013-07-02 08:07:15 +0000770 else if (cmd.cmd == LC_DYSYMTAB) {
771 dysym_cur = cmd_cur;
njnf76d27a2009-05-28 01:53:07 +0000772 }
sewardj5d616df2013-07-02 08:07:15 +0000773 else if (cmd.cmd == LC_ID_DYLIB && mh.filetype == MH_DYLIB) {
njnf76d27a2009-05-28 01:53:07 +0000774 // GrP fixme bundle?
sewardj5d616df2013-07-02 08:07:15 +0000775 struct dylib_command dcmd;
776 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
777 DiCursor dylibname_cur
778 = ML_(cur_plus)(cmd_cur, dcmd.dylib.name.offset);
779 HChar* dylibname
780 = ML_(cur_read_strdup)(dylibname_cur, "di.rmdi.1");
781 HChar* soname = VG_(strrchr)(dylibname, '/');
njnf76d27a2009-05-28 01:53:07 +0000782 if (!soname) soname = dylibname;
783 else soname++;
784 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
785 soname);
sewardj5d616df2013-07-02 08:07:15 +0000786 ML_(dinfo_free)(dylibname);
njnf76d27a2009-05-28 01:53:07 +0000787 }
sewardj5d616df2013-07-02 08:07:15 +0000788 else if (cmd.cmd==LC_ID_DYLINKER && mh.filetype==MH_DYLINKER) {
789 struct dylinker_command dcmd;
790 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
791 DiCursor dylinkername_cur
792 = ML_(cur_plus)(cmd_cur, dcmd.name.offset);
793 HChar* dylinkername
794 = ML_(cur_read_strdup)(dylinkername_cur, "di.rmdi.2");
795 HChar* soname = VG_(strrchr)(dylinkername, '/');
njnf76d27a2009-05-28 01:53:07 +0000796 if (!soname) soname = dylinkername;
797 else soname++;
798 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
799 soname);
sewardj5d616df2013-07-02 08:07:15 +0000800 ML_(dinfo_free)(dylinkername);
njnf76d27a2009-05-28 01:53:07 +0000801 }
802
803 // A comment from Julian about why varinfo[35] fail:
804 //
805 // My impression is, from comparing the output of otool -l for these
806 // executables with the logic in ML_(read_macho_debug_info),
807 // specifically the part that begins "else if (cmd->cmd ==
808 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
809 // to work ok for text symbols. In particular, it appears to assume
810 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
811 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
812 // need. However, otool -l shows, and also the Apple docs state,
813 // that a struct load_command may contain an arbitrary number of
814 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
815 // snarf the first. But I'm not sure about this.
816 //
817 // The "Try for __DATA" block below simply adds acquisition of data
818 // svma/bias values using the same assumption. It also needs
819 // (probably) to deal with bss sections, but I don't understand how
820 // this all ties together really, so it requires further study.
821 //
822 // If you can get your head around the relationship between MachO
823 // segments, sections and load commands, this might be relatively
824 // easy to fix properly.
825 //
826 // Basically we need to come up with plausible numbers for di->
827 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
828 // then trivially derived. Then I think the debuginfo reader should
829 // work pretty well.
sewardj5d616df2013-07-02 08:07:15 +0000830 else if (cmd.cmd == LC_SEGMENT_CMD) {
831 struct SEGMENT_COMMAND seg;
832 ML_(cur_read_get)(&seg, cmd_cur, sizeof(seg));
njnf76d27a2009-05-28 01:53:07 +0000833 /* Try for __TEXT */
834 if (!di->text_present
sewardj5d616df2013-07-02 08:07:15 +0000835 && 0 == VG_(strcmp)(&seg.segname[0], "__TEXT")
njnf76d27a2009-05-28 01:53:07 +0000836 /* DDD: is the next line a kludge? -- JRS */
sewardj5d616df2013-07-02 08:07:15 +0000837 && seg.fileoff == 0 && seg.filesize != 0) {
njnf76d27a2009-05-28 01:53:07 +0000838 di->text_present = True;
sewardj5d616df2013-07-02 08:07:15 +0000839 di->text_svma = (Addr)seg.vmaddr;
sewardj6b5625b2012-07-13 11:24:05 +0000840 di->text_avma = rx_map->avma;
sewardj5d616df2013-07-02 08:07:15 +0000841 di->text_size = seg.vmsize;
njnf76d27a2009-05-28 01:53:07 +0000842 di->text_bias = di->text_avma - di->text_svma;
843 /* Make the _debug_ values be the same as the
844 svma/bias for the primary object, since there is
845 no secondary (debuginfo) object, but nevertheless
846 downstream biasing of Dwarf3 relies on the
847 _debug_ values. */
848 di->text_debug_svma = di->text_svma;
849 di->text_debug_bias = di->text_bias;
850 }
851 /* Try for __DATA */
852 if (!di->data_present
sewardj5d616df2013-07-02 08:07:15 +0000853 && 0 == VG_(strcmp)(&seg.segname[0], "__DATA")
854 /* && DDD:seg->fileoff == 0 */ && seg.filesize != 0) {
njnf76d27a2009-05-28 01:53:07 +0000855 di->data_present = True;
sewardj5d616df2013-07-02 08:07:15 +0000856 di->data_svma = (Addr)seg.vmaddr;
sewardj6b5625b2012-07-13 11:24:05 +0000857 di->data_avma = rw_map->avma;
sewardj5d616df2013-07-02 08:07:15 +0000858 di->data_size = seg.vmsize;
njnf76d27a2009-05-28 01:53:07 +0000859 di->data_bias = di->data_avma - di->data_svma;
860 di->data_debug_svma = di->data_svma;
861 di->data_debug_bias = di->data_bias;
862 }
863 }
sewardj5d616df2013-07-02 08:07:15 +0000864 else if (cmd.cmd == LC_UUID) {
865 ML_(cur_read_get)(&uuid, cmd_cur, sizeof(uuid));
njnf76d27a2009-05-28 01:53:07 +0000866 have_uuid = True;
867 }
sewardj5d616df2013-07-02 08:07:15 +0000868 // Move the cursor along
869 cmd_cur = ML_(cur_plus)(cmd_cur, cmd.cmdsize);
njnf76d27a2009-05-28 01:53:07 +0000870 }
871 }
872
873 if (!di->soname) {
874 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
875 }
876
sewardjc5077b92011-09-06 11:26:31 +0000877 if (di->trace_symtab) {
878 VG_(printf)("\n");
879 VG_(printf)("SONAME = %s\n", di->soname);
880 VG_(printf)("\n");
881 }
882
njnf76d27a2009-05-28 01:53:07 +0000883 /* Now we have the base object to hand. Read symbols from it. */
884
sewardj5d616df2013-07-02 08:07:15 +0000885 // We already asserted that ..
886 vg_assert(msli.img != NULL && msli.szB > 0);
887
888 if (ML_(cur_is_valid)(sym_cur) && ML_(cur_is_valid)(dysym_cur)) {
889
890 struct symtab_command symcmd;
891 struct dysymtab_command dysymcmd;
892
893 ML_(cur_read_get)(&symcmd, sym_cur, sizeof(symcmd));
894 ML_(cur_read_get)(&dysymcmd, dysym_cur, sizeof(dysymcmd));
njnf76d27a2009-05-28 01:53:07 +0000895
896 /* Read nlist symbol table */
sewardj5d616df2013-07-02 08:07:15 +0000897 DiCursor syms = DiCursor_INVALID;
898 DiCursor strs = DiCursor_INVALID;
njnf76d27a2009-05-28 01:53:07 +0000899 XArray* /* DiSym */ candSyms = NULL;
sewardj6b5625b2012-07-13 11:24:05 +0000900 Word nCandSyms;
njnf76d27a2009-05-28 01:53:07 +0000901
sewardj5d616df2013-07-02 08:07:15 +0000902 if (msli.szB < symcmd.stroff + symcmd.strsize
903 || msli.szB < symcmd.symoff + symcmd.nsyms
904 * sizeof(struct NLIST)) {
njnf76d27a2009-05-28 01:53:07 +0000905 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
906 goto fail;
907 }
sewardj5d616df2013-07-02 08:07:15 +0000908 if (dysymcmd.ilocalsym + dysymcmd.nlocalsym > symcmd.nsyms
909 || dysymcmd.iextdefsym + dysymcmd.nextdefsym > symcmd.nsyms) {
njnf76d27a2009-05-28 01:53:07 +0000910 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
911 goto fail;
912 }
sewardj5d616df2013-07-02 08:07:15 +0000913
914 syms = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.symoff);
915 strs = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.stroff);
njnf76d27a2009-05-28 01:53:07 +0000916
917 if (VG_(clo_verbosity) > 1)
918 VG_(message)(Vg_DebugMsg,
njn6ff700c2009-07-20 05:39:50 +0000919 " reading syms from primary file (%d %d)\n",
sewardj5d616df2013-07-02 08:07:15 +0000920 dysymcmd.nextdefsym, dysymcmd.nlocalsym );
njnf76d27a2009-05-28 01:53:07 +0000921
922 /* Read candidate symbols into 'candSyms', so we can truncate
923 overlapping ends and generally tidy up, before presenting
924 them to ML_(addSym). */
925 candSyms = VG_(newXA)(
926 ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
927 ML_(dinfo_free), sizeof(DiSym)
928 );
929 vg_assert(candSyms);
930
931 // extern symbols
932 read_symtab(candSyms,
sewardj5d616df2013-07-02 08:07:15 +0000933 di,
934 ML_(cur_plus)(syms,
935 dysymcmd.iextdefsym * sizeof(struct NLIST)),
936 dysymcmd.nextdefsym, strs, symcmd.strsize);
njnf76d27a2009-05-28 01:53:07 +0000937 // static and private_extern symbols
938 read_symtab(candSyms,
sewardj5d616df2013-07-02 08:07:15 +0000939 di,
940 ML_(cur_plus)(syms,
941 dysymcmd.ilocalsym * sizeof(struct NLIST)),
942 dysymcmd.nlocalsym, strs, symcmd.strsize);
njnf76d27a2009-05-28 01:53:07 +0000943
944 /* tidy up the cand syms -- trim overlapping ends. May resize
945 candSyms. */
946 tidy_up_cand_syms( candSyms, di->trace_symtab );
947
948 /* and finally present them to ML_(addSym) */
949 nCandSyms = VG_(sizeXA)( candSyms );
950 for (i = 0; i < nCandSyms; i++) {
951 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
sewardjc448a642011-08-15 10:07:56 +0000952 vg_assert(cand->pri_name != NULL);
953 vg_assert(cand->sec_names == NULL);
njnf76d27a2009-05-28 01:53:07 +0000954 if (di->trace_symtab)
955 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n",
sewardjc448a642011-08-15 10:07:56 +0000956 cand->addr, cand->addr + cand->size - 1,
957 cand->pri_name );
njnf76d27a2009-05-28 01:53:07 +0000958 ML_(addSym)( di, cand );
959 }
960 VG_(deleteXA)( candSyms );
961 }
962
963 /* If there's no UUID in the primary, don't even bother to try and
964 read any DWARF, since we won't be able to verify it matches.
965 Our policy is not to load debug info unless we can verify that
966 it matches the primary. Just declare success at this point.
967 And don't complain to the user, since that would cause us to
968 complain on objects compiled without -g. (Some versions of
969 XCode are observed to omit a UUID entry for object linked(?)
970 without -g. Others don't appear to omit it.) */
971 if (!have_uuid)
972 goto success;
973
974 /* mmap the dSYM file to look for DWARF debug info. If successful,
sewardj5d616df2013-07-02 08:07:15 +0000975 use the .macho_img and .macho_img_szB in dsli. */
njnf76d27a2009-05-28 01:53:07 +0000976
sewardj94bb7722011-09-20 22:36:26 +0000977 dsymfilename = find_separate_debug_file( di->fsm.filename );
njnf76d27a2009-05-28 01:53:07 +0000978
979 /* Try to load it. */
980 if (dsymfilename) {
981 Bool valid;
982
983 if (VG_(clo_verbosity) > 1)
njn6ff700c2009-07-20 05:39:50 +0000984 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename);
njnf76d27a2009-05-28 01:53:07 +0000985
sewardj5d616df2013-07-02 08:07:15 +0000986 dsli = map_image_aboard( di, dsymfilename );
987 if (!ML_(sli_is_valid)(dsli)) {
988 ML_(symerr)(di, False, "Connect to debuginfo image failed "
989 "(first attempt).");
990 goto fail;
991 }
njnf76d27a2009-05-28 01:53:07 +0000992
993 /* check it has the right uuid. */
994 vg_assert(have_uuid);
sewardj5d616df2013-07-02 08:07:15 +0000995 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
njnf76d27a2009-05-28 01:53:07 +0000996 if (valid)
997 goto read_the_dwarf;
998
999 if (VG_(clo_verbosity) > 1)
1000 VG_(message)(Vg_DebugMsg, " dSYM does not have "
njn6ff700c2009-07-20 05:39:50 +00001001 "correct UUID (out of date?)\n");
njnf76d27a2009-05-28 01:53:07 +00001002 }
1003
1004 /* There was no dsym file, or it doesn't match. We'll have to try
njn97db7612009-08-04 02:32:55 +00001005 regenerating it, unless --dsymutil=no, in which case just complain
1006 instead. */
njnf76d27a2009-05-28 01:53:07 +00001007
1008 /* If this looks like a lib that we shouldn't run dsymutil on, just
1009 give up. (possible reasons: is system lib, or in /usr etc, or
1010 the dsym dir would not be writable by the user, or we're running
1011 as root) */
sewardj94bb7722011-09-20 22:36:26 +00001012 vg_assert(di->fsm.filename);
1013 if (is_systemish_library_name(di->fsm.filename))
njnf76d27a2009-05-28 01:53:07 +00001014 goto success;
1015
njn97db7612009-08-04 02:32:55 +00001016 if (!VG_(clo_dsymutil)) {
njnf76d27a2009-05-28 01:53:07 +00001017 if (VG_(clo_verbosity) == 1) {
sewardj94bb7722011-09-20 22:36:26 +00001018 VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
njnf76d27a2009-05-28 01:53:07 +00001019 }
1020 if (VG_(clo_verbosity) > 0)
1021 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
njn97db7612009-08-04 02:32:55 +00001022 "--dsymutil=yes\n",
njnf76d27a2009-05-28 01:53:07 +00001023 VG_(clo_verbosity) > 1 ? " " : "",
1024 dsymfilename ? "has wrong UUID" : "is missing");
1025 goto success;
1026 }
1027
1028 /* Run dsymutil */
1029
1030 { Int r;
floriane07cbb32013-01-15 03:19:54 +00001031 const HChar* dsymutil = "/usr/bin/dsymutil ";
njnf76d27a2009-05-28 01:53:07 +00001032 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
1033 VG_(strlen)(dsymutil)
sewardj94bb7722011-09-20 22:36:26 +00001034 + VG_(strlen)(di->fsm.filename)
sewardjf7ee6fb2010-07-21 16:00:08 +00001035 + 32 /* misc */ );
njnf76d27a2009-05-28 01:53:07 +00001036 VG_(strcpy)(cmd, dsymutil);
1037 if (0) VG_(strcat)(cmd, "--verbose ");
sewardjf7ee6fb2010-07-21 16:00:08 +00001038 VG_(strcat)(cmd, "\"");
sewardj94bb7722011-09-20 22:36:26 +00001039 VG_(strcat)(cmd, di->fsm.filename);
sewardjf7ee6fb2010-07-21 16:00:08 +00001040 VG_(strcat)(cmd, "\"");
njn6ff700c2009-07-20 05:39:50 +00001041 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
njnf76d27a2009-05-28 01:53:07 +00001042 r = VG_(system)( cmd );
1043 if (r)
njn6ff700c2009-07-20 05:39:50 +00001044 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
njnf76d27a2009-05-28 01:53:07 +00001045 ML_(dinfo_free)(cmd);
sewardj94bb7722011-09-20 22:36:26 +00001046 dsymfilename = find_separate_debug_file(di->fsm.filename);
njnf76d27a2009-05-28 01:53:07 +00001047 }
1048
1049 /* Try again to load it. */
1050 if (dsymfilename) {
1051 Bool valid;
1052
1053 if (VG_(clo_verbosity) > 1)
njn6ff700c2009-07-20 05:39:50 +00001054 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename);
njnf76d27a2009-05-28 01:53:07 +00001055
sewardj5d616df2013-07-02 08:07:15 +00001056 dsli = map_image_aboard( di, dsymfilename );
1057 if (!ML_(sli_is_valid)(dsli)) {
1058 ML_(symerr)(di, False, "Connect to debuginfo image failed "
1059 "(second attempt).");
1060 goto fail;
1061 }
njnf76d27a2009-05-28 01:53:07 +00001062
1063 /* check it has the right uuid. */
1064 vg_assert(have_uuid);
sewardj5d616df2013-07-02 08:07:15 +00001065 vg_assert(have_uuid);
1066 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
njnf76d27a2009-05-28 01:53:07 +00001067 if (!valid) {
1068 if (VG_(clo_verbosity) > 0) {
1069 VG_(message)(Vg_DebugMsg,
1070 "WARNING: did not find expected UUID %02X%02X%02X%02X"
1071 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
njn6ff700c2009-07-20 05:39:50 +00001072 " in dSYM dir\n",
njnf76d27a2009-05-28 01:53:07 +00001073 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
1074 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1075 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1076 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1077 (UInt)uuid[14], (UInt)uuid[15] );
1078 VG_(message)(Vg_DebugMsg,
sewardj94bb7722011-09-20 22:36:26 +00001079 "WARNING: for %s\n", di->fsm.filename);
njnf76d27a2009-05-28 01:53:07 +00001080 }
sewardj5d616df2013-07-02 08:07:15 +00001081 unmap_image( &dsli );
1082 /* unmap_image zeroes out dsli, so it's safe for "fail:" to
1083 re-try unmap_image. */
njnf76d27a2009-05-28 01:53:07 +00001084 goto fail;
1085 }
1086 }
1087
1088 /* Right. Finally we have our best try at the dwarf image, so go
1089 on to reading stuff out of it. */
1090
1091 read_the_dwarf:
sewardj5d616df2013-07-02 08:07:15 +00001092 if (ML_(sli_is_valid)(msli) && msli.szB > 0) {
1093 // "_mscn" is "mach-o section"
1094 DiSlice debug_info_mscn
1095 = getsectdata(dsli, "__DWARF", "__debug_info");
1096 DiSlice debug_abbv_mscn
1097 = getsectdata(dsli, "__DWARF", "__debug_abbrev");
1098 DiSlice debug_line_mscn
1099 = getsectdata(dsli, "__DWARF", "__debug_line");
1100 DiSlice debug_str_mscn
1101 = getsectdata(dsli, "__DWARF", "__debug_str");
1102 DiSlice debug_ranges_mscn
1103 = getsectdata(dsli, "__DWARF", "__debug_ranges");
1104 DiSlice debug_loc_mscn
1105 = getsectdata(dsli, "__DWARF", "__debug_loc");
njnf76d27a2009-05-28 01:53:07 +00001106
sewardj5d616df2013-07-02 08:07:15 +00001107 if (ML_(sli_is_valid)(debug_info_mscn)) {
njnf76d27a2009-05-28 01:53:07 +00001108 if (VG_(clo_verbosity) > 1) {
1109 if (0)
1110 VG_(message)(Vg_DebugMsg,
1111 "Reading dwarf3 for %s (%#lx) from %s"
sewardj5d616df2013-07-02 08:07:15 +00001112 " (%lld %lld %lld %lld %lld %lld)\n",
sewardj94bb7722011-09-20 22:36:26 +00001113 di->fsm.filename, di->text_avma, dsymfilename,
sewardj5d616df2013-07-02 08:07:15 +00001114 debug_info_mscn.szB, debug_abbv_mscn.szB,
1115 debug_line_mscn.szB, debug_str_mscn.szB,
1116 debug_ranges_mscn.szB, debug_loc_mscn.szB
njnf76d27a2009-05-28 01:53:07 +00001117 );
1118 VG_(message)(Vg_DebugMsg,
njn6ff700c2009-07-20 05:39:50 +00001119 " reading dwarf3 from dsyms file\n");
njnf76d27a2009-05-28 01:53:07 +00001120 }
1121 /* The old reader: line numbers and unwind info only */
1122 ML_(read_debuginfo_dwarf3) ( di,
sewardj5d616df2013-07-02 08:07:15 +00001123 debug_info_mscn,
1124 DiSlice_INVALID, /* .debug_types */
1125 debug_abbv_mscn,
1126 debug_line_mscn,
1127 debug_str_mscn,
1128 DiSlice_INVALID /* ALT .debug_str */ );
njnf76d27a2009-05-28 01:53:07 +00001129
1130 /* The new reader: read the DIEs in .debug_info to acquire
1131 information on variable types and locations. But only if
1132 the tool asks for it, or the user requests it on the
1133 command line. */
1134 if (VG_(needs).var_info /* the tool requires it */
1135 || VG_(clo_read_var_info) /* the user asked for it */) {
1136 ML_(new_dwarf3_reader)(
sewardj5d616df2013-07-02 08:07:15 +00001137 di, debug_info_mscn,
1138 DiSlice_INVALID, /* .debug_types */
1139 debug_abbv_mscn,
1140 debug_line_mscn,
1141 debug_str_mscn,
1142 debug_ranges_mscn,
1143 debug_loc_mscn,
1144 DiSlice_INVALID, /* ALT .debug_info */
1145 DiSlice_INVALID, /* ALT .debug_abbv */
1146 DiSlice_INVALID, /* ALT .debug_line */
1147 DiSlice_INVALID /* ALT .debug_str */
njnf76d27a2009-05-28 01:53:07 +00001148 );
1149 }
1150 }
1151 }
1152
1153 if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1154
1155 success:
sewardj5d616df2013-07-02 08:07:15 +00001156 unmap_image(&msli);
1157 unmap_image(&dsli);
njnf76d27a2009-05-28 01:53:07 +00001158 return True;
1159
1160 /* NOTREACHED */
1161
1162 fail:
1163 ML_(symerr)(di, True, "Error reading Mach-O object.");
sewardj5d616df2013-07-02 08:07:15 +00001164 unmap_image(&msli);
1165 unmap_image(&dsli);
njnf76d27a2009-05-28 01:53:07 +00001166 return False;
1167}
1168
njn8b68b642009-06-24 00:37:09 +00001169#endif // defined(VGO_darwin)
1170
njnf76d27a2009-05-28 01:53:07 +00001171/*--------------------------------------------------------------------*/
njn8b68b642009-06-24 00:37:09 +00001172/*--- end ---*/
njnf76d27a2009-05-28 01:53:07 +00001173/*--------------------------------------------------------------------*/