blob: eb3b39428de2ad6c398a1780e2662dcd48cb7e1f [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
sewardjde4a1d02002-03-22 01:27:54 +000032
33#include <elf.h> /* ELF defns */
34#include <a.out.h> /* stabs defns */
35
njn9aae6742002-04-30 13:44:01 +000036
sewardjde4a1d02002-03-22 01:27:54 +000037/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
38 dlopen()ed libraries, which is something that KDE3 does a lot.
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardj18d75132002-05-16 11:06:21 +000040 Stabs reader greatly improved by Nick Nethercode, Apr 02.
sewardjde4a1d02002-03-22 01:27:54 +000041
sewardj18d75132002-05-16 11:06:21 +000042 16 May 02: when notified about munmap, return a Bool indicating
43 whether or not the area being munmapped had executable permissions.
44 This is then used to determine whether or not
45 VG_(invalid_translations) should be called for that area. In order
46 that this work even if --instrument=no, in this case we still keep
47 track of the mapped executable segments, but do not load any debug
48 info or symbols.
sewardjde4a1d02002-03-22 01:27:54 +000049*/
50
51/*------------------------------------------------------------*/
52/*--- Structs n stuff ---*/
53/*------------------------------------------------------------*/
54
njn4f9c9342002-04-29 16:03:24 +000055/* Stabs entry types, from:
56 * The "stabs" debug format
57 * Menapace, Kingdon and MacKenzie
58 * Cygnus Support
59 */
njn9885df02002-05-01 08:25:03 +000060typedef enum { N_GSYM = 32, /* Global symbol */
61 N_FUN = 36, /* Function start or end */
njn4f9c9342002-04-29 16:03:24 +000062 N_STSYM = 38, /* Data segment file-scope variable */
63 N_LCSYM = 40, /* BSS segment file-scope variable */
64 N_RSYM = 64, /* Register variable */
65 N_SLINE = 68, /* Source line number */
66 N_SO = 100, /* Source file path and name */
67 N_LSYM = 128, /* Stack variable or type */
68 N_SOL = 132, /* Include file name */
69 N_LBRAC = 192, /* Start of lexical block */
70 N_RBRAC = 224 /* End of lexical block */
71 } stab_types;
72
sewardjde4a1d02002-03-22 01:27:54 +000073/* A structure to hold an ELF symbol (very crudely). */
74typedef
75 struct {
76 Addr addr; /* lowest address of entity */
77 UInt size; /* size in bytes */
78 Int nmoff; /* offset of name in this SegInfo's str tab */
79 }
80 RiSym;
81
njne0ee0712002-05-03 16:41:05 +000082/* Line count at which overflow happens, due to line numbers being stored as
83 * shorts in `struct nlist' in a.out.h. */
84#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000085
njne0ee0712002-05-03 16:41:05 +000086#define LINENO_BITS 20
87#define LOC_SIZE_BITS (32 - LINENO_BITS)
sewardj97ff05f2002-05-09 01:32:57 +000088#define MAX_LINENO ((1 << LINENO_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000089
90/* Unlikely to have any lines with instruction ranges > 4096 bytes */
sewardj97ff05f2002-05-09 01:32:57 +000091#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000092
93/* Number used to detect line number overflows; if one line is 60000-odd
94 * smaller than the previous, is was probably an overflow.
95 */
96#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
97
98/* A structure to hold addr-to-source info for a single line. There can be a
99 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +0000100typedef
101 struct {
njne0ee0712002-05-03 16:41:05 +0000102 /* Word 1 */
103 Addr addr; /* lowest address for this line */
104 /* Word 2 */
105 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
106 UInt lineno:LINENO_BITS; /* source line number, or zero */
107 /* Word 3 */
108 UInt fnmoff; /* source filename; offset in this
109 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +0000110 }
111 RiLoc;
112
113
114/* A structure which contains information pertaining to one mapped
115 text segment. */
116typedef
117 struct _SegInfo {
118 struct _SegInfo* next;
119 /* Description of the mapped segment. */
120 Addr start;
121 UInt size;
122 UChar* filename; /* in mallocville */
123 UInt foffset;
124 /* An expandable array of symbols. */
125 RiSym* symtab;
126 UInt symtab_used;
127 UInt symtab_size;
128 /* An expandable array of locations. */
129 RiLoc* loctab;
130 UInt loctab_used;
131 UInt loctab_size;
132 /* An expandable array of characters -- the string table. */
133 Char* strtab;
134 UInt strtab_used;
135 UInt strtab_size;
136 /* offset is what we need to add to symbol table entries
137 to get the real location of that symbol in memory.
138 For executables, offset is zero.
139 For .so's, offset == base_addr.
140 This seems like a giant kludge to me.
141 */
142 UInt offset;
143 }
144 SegInfo;
145
146
147/* -- debug helper -- */
148static void ppSegInfo ( SegInfo* si )
149{
150 VG_(printf)("name: %s\n"
151 "start %p, size %d, foffset %d\n",
152 si->filename?si->filename : (UChar*)"NULL",
153 si->start, si->size, si->foffset );
154}
155
156static void freeSegInfo ( SegInfo* si )
157{
158 vg_assert(si != NULL);
159 if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
160 if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
161 if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
162 if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
163 VG_(free)(VG_AR_SYMTAB, si);
164}
165
166
167/*------------------------------------------------------------*/
168/*--- Adding stuff ---*/
169/*------------------------------------------------------------*/
170
171/* Add a str to the string table, including terminating zero, and
172 return offset of the string in vg_strtab. */
173
174static __inline__
175Int addStr ( SegInfo* si, Char* str )
176{
177 Char* new_tab;
178 Int new_sz, i, space_needed;
179
180 space_needed = 1 + VG_(strlen)(str);
181 if (si->strtab_used + space_needed > si->strtab_size) {
182 new_sz = 2 * si->strtab_size;
183 if (new_sz == 0) new_sz = 5000;
184 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
185 if (si->strtab != NULL) {
186 for (i = 0; i < si->strtab_used; i++)
187 new_tab[i] = si->strtab[i];
188 VG_(free)(VG_AR_SYMTAB, si->strtab);
189 }
190 si->strtab = new_tab;
191 si->strtab_size = new_sz;
192 }
193
194 for (i = 0; i < space_needed; i++)
195 si->strtab[si->strtab_used+i] = str[i];
196
197 si->strtab_used += space_needed;
198 vg_assert(si->strtab_used <= si->strtab_size);
199 return si->strtab_used - space_needed;
200}
201
202/* Add a symbol to the symbol table. */
203
204static __inline__
205void addSym ( SegInfo* si, RiSym* sym )
206{
207 Int new_sz, i;
208 RiSym* new_tab;
209
210 /* Ignore zero-sized syms. */
211 if (sym->size == 0) return;
212
213 if (si->symtab_used == si->symtab_size) {
214 new_sz = 2 * si->symtab_size;
215 if (new_sz == 0) new_sz = 500;
216 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
217 if (si->symtab != NULL) {
218 for (i = 0; i < si->symtab_used; i++)
219 new_tab[i] = si->symtab[i];
220 VG_(free)(VG_AR_SYMTAB, si->symtab);
221 }
222 si->symtab = new_tab;
223 si->symtab_size = new_sz;
224 }
225
226 si->symtab[si->symtab_used] = *sym;
227 si->symtab_used++;
228 vg_assert(si->symtab_used <= si->symtab_size);
229}
230
231/* Add a location to the location table. */
232
233static __inline__
234void addLoc ( SegInfo* si, RiLoc* loc )
235{
236 Int new_sz, i;
237 RiLoc* new_tab;
238
njne0ee0712002-05-03 16:41:05 +0000239 /* Zero-sized locs should have been ignored earlier */
240 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000241
242 if (si->loctab_used == si->loctab_size) {
243 new_sz = 2 * si->loctab_size;
244 if (new_sz == 0) new_sz = 500;
245 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
246 if (si->loctab != NULL) {
247 for (i = 0; i < si->loctab_used; i++)
248 new_tab[i] = si->loctab[i];
249 VG_(free)(VG_AR_SYMTAB, si->loctab);
250 }
251 si->loctab = new_tab;
252 si->loctab_size = new_sz;
253 }
254
255 si->loctab[si->loctab_used] = *loc;
256 si->loctab_used++;
257 vg_assert(si->loctab_used <= si->loctab_size);
258}
259
260
261
262/*------------------------------------------------------------*/
263/*--- Helpers ---*/
264/*------------------------------------------------------------*/
265
266/* Non-fatal -- use vg_panic if terminal. */
267static
268void vg_symerr ( Char* msg )
269{
270 if (VG_(clo_verbosity) > 1)
271 VG_(message)(Vg_UserMsg,"%s", msg );
272}
273
274
275/* Print a symbol. */
276static
277void printSym ( SegInfo* si, Int i )
278{
279 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
280 i,
281 si->symtab[i].addr,
282 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
283 &si->strtab[si->symtab[i].nmoff] );
284}
285
286
287#if 0
288/* Print the entire sym tab. */
289static __attribute__ ((unused))
290void printSymtab ( void )
291{
292 Int i;
293 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
294 for (i = 0; i < vg_symtab_used; i++)
295 printSym(i);
296 VG_(printf)("------ BEGIN vg_symtab ------\n");
297}
298#endif
299
300#if 0
301/* Paranoid strcat. */
302static
303void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
304{
305 UInt i = 0, j = 0;
306 while (True) {
307 if (i >= maxlen) return;
308 if (dst[i] == 0) break;
309 i++;
310 }
311 while (True) {
312 if (i >= maxlen) return;
313 dst[i] = src[j];
314 if (src[j] == 0) return;
315 i++; j++;
316 }
317}
318#endif
319
320/*------------------------------------------------------------*/
321/*--- Canonicalisers ---*/
322/*------------------------------------------------------------*/
323
324/* Sort the symtab by starting address, and emit warnings if any
325 symbols have overlapping address ranges. We use that old chestnut,
326 shellsort. Mash the table around so as to establish the property
327 that addresses are in order and the ranges to not overlap. This
328 facilitates using binary search to map addresses to symbols when we
329 come to query the table.
330*/
331static
332void canonicaliseSymtab ( SegInfo* si )
333{
334 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
335 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
336 4592, 13776, 33936, 86961, 198768,
337 463792, 1391376 };
338 Int lo = 0;
339 Int hi = si->symtab_used-1;
340 Int i, j, h, bigN, hp, n_merged, n_truncated;
341 RiSym v;
342 Addr s1, s2, e1, e2;
343
344# define SWAP(ty,aa,bb) \
345 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
346
347 bigN = hi - lo + 1; if (bigN < 2) return;
348 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
349 vg_assert(0 <= hp && hp < 16);
350
351 for (; hp >= 0; hp--) {
352 h = incs[hp];
353 i = lo + h;
354 while (1) {
355 if (i > hi) break;
356 v = si->symtab[i];
357 j = i;
358 while (si->symtab[j-h].addr > v.addr) {
359 si->symtab[j] = si->symtab[j-h];
360 j = j - h;
361 if (j <= (lo + h - 1)) break;
362 }
363 si->symtab[j] = v;
364 i++;
365 }
366 }
367
368 cleanup_more:
369
370 /* If two symbols have identical address ranges, favour the
371 one with the longer name.
372 */
373 do {
374 n_merged = 0;
375 j = si->symtab_used;
376 si->symtab_used = 0;
377 for (i = 0; i < j; i++) {
378 if (i < j-1
379 && si->symtab[i].addr == si->symtab[i+1].addr
380 && si->symtab[i].size == si->symtab[i+1].size) {
381 n_merged++;
382 /* merge the two into one */
383 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
384 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
385 si->symtab[si->symtab_used++] = si->symtab[i];
386 } else {
387 si->symtab[si->symtab_used++] = si->symtab[i+1];
388 }
389 i++;
390 } else {
391 si->symtab[si->symtab_used++] = si->symtab[i];
392 }
393 }
394 if (VG_(clo_trace_symtab))
395 VG_(printf)( "%d merged\n", n_merged);
396 }
397 while (n_merged > 0);
398
399 /* Detect and "fix" overlapping address ranges. */
400 n_truncated = 0;
401
402 for (i = 0; i < si->symtab_used-1; i++) {
403
404 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
405
406 /* Check for common (no overlap) case. */
407 if (si->symtab[i].addr + si->symtab[i].size
408 <= si->symtab[i+1].addr)
409 continue;
410
411 /* There's an overlap. Truncate one or the other. */
412 if (VG_(clo_trace_symtab)) {
413 VG_(printf)("overlapping address ranges in symbol table\n\t");
414 printSym(si,i);
415 VG_(printf)("\t");
416 printSym(si,i+1);
417 VG_(printf)("\n");
418 }
419
420 /* Truncate one or the other. */
421 s1 = si->symtab[i].addr;
422 s2 = si->symtab[i+1].addr;
423 e1 = s1 + si->symtab[i].size - 1;
424 e2 = s2 + si->symtab[i+1].size - 1;
425 if (s1 < s2) {
426 e1 = s2-1;
427 } else {
428 vg_assert(s1 == s2);
429 if (e1 > e2) {
430 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
431 } else
432 if (e1 < e2) {
433 s2 = e1+1;
434 } else {
435 /* e1 == e2. Identical addr ranges. We'll eventually wind
436 up back at cleanup_more, which will take care of it. */
437 }
438 }
439 si->symtab[i].addr = s1;
440 si->symtab[i+1].addr = s2;
441 si->symtab[i].size = e1 - s1 + 1;
442 si->symtab[i+1].size = e2 - s2 + 1;
443 vg_assert(s1 <= s2);
444 vg_assert(si->symtab[i].size > 0);
445 vg_assert(si->symtab[i+1].size > 0);
446 /* It may be that the i+1 entry now needs to be moved further
447 along to maintain the address order requirement. */
448 j = i+1;
449 while (j < si->symtab_used-1
450 && si->symtab[j].addr > si->symtab[j+1].addr) {
451 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
452 j++;
453 }
454 n_truncated++;
455 }
456
457 if (n_truncated > 0) goto cleanup_more;
458
459 /* Ensure relevant postconditions hold. */
460 for (i = 0; i < si->symtab_used-1; i++) {
461 /* No zero-sized symbols. */
462 vg_assert(si->symtab[i].size > 0);
463 /* In order. */
464 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
465 /* No overlaps. */
466 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
467 < si->symtab[i+1].addr);
468 }
469# undef SWAP
470}
471
472
473
474/* Sort the location table by starting address. Mash the table around
475 so as to establish the property that addresses are in order and the
476 ranges do not overlap. This facilitates using binary search to map
477 addresses to locations when we come to query the table. */
478static
479void canonicaliseLoctab ( SegInfo* si )
480{
481 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
482 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
483 4592, 13776, 33936, 86961, 198768,
484 463792, 1391376 };
485 Int lo = 0;
486 Int hi = si->loctab_used-1;
487 Int i, j, h, bigN, hp;
488 RiLoc v;
489
490# define SWAP(ty,aa,bb) \
491 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
492
493 /* Sort by start address. */
494
495 bigN = hi - lo + 1; if (bigN < 2) return;
496 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
497 vg_assert(0 <= hp && hp < 16);
498
499 for (; hp >= 0; hp--) {
500 h = incs[hp];
501 i = lo + h;
502 while (1) {
503 if (i > hi) break;
504 v = si->loctab[i];
505 j = i;
506 while (si->loctab[j-h].addr > v.addr) {
507 si->loctab[j] = si->loctab[j-h];
508 j = j - h;
509 if (j <= (lo + h - 1)) break;
510 }
511 si->loctab[j] = v;
512 i++;
513 }
514 }
515
516 /* If two adjacent entries overlap, truncate the first. */
517 for (i = 0; i < si->loctab_used-1; i++) {
518 vg_assert(si->loctab[i].size < 10000);
519 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
520 /* Do this in signed int32 because the actual .size fields
521 are unsigned 16s. */
522 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
523 if (new_size < 0) {
524 si->loctab[i].size = 0;
525 } else
526 if (new_size >= 65536) {
527 si->loctab[i].size = 65535;
528 } else {
529 si->loctab[i].size = (UShort)new_size;
530 }
531 }
532 }
533
534 /* Zap any zero-sized entries resulting from the truncation
535 process. */
536 j = 0;
537 for (i = 0; i < si->loctab_used; i++) {
538 if (si->loctab[i].size > 0) {
539 si->loctab[j] = si->loctab[i];
540 j++;
541 }
542 }
543 si->loctab_used = j;
544
545 /* Ensure relevant postconditions hold. */
546 for (i = 0; i < si->loctab_used-1; i++) {
547 /*
548 VG_(printf)("%d (%d) %d 0x%x\n",
549 i, si->loctab[i+1].confident,
550 si->loctab[i+1].size, si->loctab[i+1].addr );
551 */
552 /* No zero-sized symbols. */
553 vg_assert(si->loctab[i].size > 0);
554 /* In order. */
555 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
556 /* No overlaps. */
557 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
558 < si->loctab[i+1].addr);
559 }
560# undef SWAP
561}
562
563
564/*------------------------------------------------------------*/
565/*--- Read info from a .so/exe file. ---*/
566/*------------------------------------------------------------*/
567
568static __inline__
569void addLineInfo ( SegInfo* si,
570 Int fnmoff,
njne0ee0712002-05-03 16:41:05 +0000571 Addr this,
572 Addr next,
573 Int lineno,
574 Int entry )
sewardjde4a1d02002-03-22 01:27:54 +0000575{
576 RiLoc loc;
njne0ee0712002-05-03 16:41:05 +0000577 Int size = next - this;
njn4f9c9342002-04-29 16:03:24 +0000578
njne0ee0712002-05-03 16:41:05 +0000579 /* Ignore zero-sized locs */
580 if (this == next) return;
sewardjde4a1d02002-03-22 01:27:54 +0000581
njne0ee0712002-05-03 16:41:05 +0000582 /* Maximum sanity checking. Some versions of GNU as do a shabby job with
583 * stabs entries; if anything looks suspicious, revert to a size of 1.
584 * This should catch the instruction of interest (since if using asm-level
585 * debug info, one instruction will correspond to one line, unlike with
586 * C-level debug info where multiple instructions can map to the one line),
587 * but avoid catching any other instructions bogusly. */
588 if (this > next) {
589 VG_(message)(Vg_DebugMsg,
590 "warning: stabs addresses out of order "
591 "at entry %d: 0x%x 0x%x", entry, this, next);
592 size = 1;
593 }
sewardjde4a1d02002-03-22 01:27:54 +0000594
njne0ee0712002-05-03 16:41:05 +0000595 if (size > MAX_LOC_SIZE) {
596 VG_(message)(Vg_DebugMsg,
597 "warning: stabs line address range too large "
598 "at entry %d: %d", entry, size);
599 size = 1;
600 }
601
sewardj573a1e62002-05-09 11:03:57 +0000602 vg_assert(this < si->start + si->size && next-1 >= si->start);
603 vg_assert(lineno >= 0 && lineno <= MAX_LINENO);
njne0ee0712002-05-03 16:41:05 +0000604
605 loc.addr = this;
sewardjde4a1d02002-03-22 01:27:54 +0000606 loc.size = (UShort)size;
607 loc.lineno = lineno;
608 loc.fnmoff = fnmoff;
609 addLoc ( si, &loc );
610}
611
612
613/* Read the symbols from the object/exe specified by the SegInfo into
614 the tables within the supplied SegInfo. */
615static
616void vg_read_lib_symbols ( SegInfo* si )
617{
618 Elf32_Ehdr* ehdr; /* The ELF header */
619 Elf32_Shdr* shdr; /* The section table */
620 UChar* sh_strtab; /* The section table's string table */
621 struct nlist* stab; /* The .stab table */
622 UChar* stabstr; /* The .stab string table */
623 Int stab_sz; /* Size in bytes of the .stab table */
624 Int stabstr_sz; /* Size in bytes of the .stab string table */
625 Int fd;
626 Int i;
627 Bool ok;
628 Addr oimage;
629 Int n_oimage;
sewardjb3586202002-05-09 17:38:13 +0000630 struct vki_stat stat_buf;
sewardjde4a1d02002-03-22 01:27:54 +0000631
632 /* for the .stabs reader */
633 Int curr_filenmoff;
634 Addr curr_fnbaseaddr;
njne0ee0712002-05-03 16:41:05 +0000635 Char *curr_file_name, *curr_fn_name;
njn4f9c9342002-04-29 16:03:24 +0000636 Int n_stab_entries;
njne0ee0712002-05-03 16:41:05 +0000637 Int prev_lineno, lineno;
638 Int lineno_overflows;
639 Bool same_file;
sewardjde4a1d02002-03-22 01:27:54 +0000640
641 oimage = (Addr)NULL;
642 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +0000643 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +0000644
645 /* mmap the object image aboard, so that we can read symbols and
646 line number info out of it. It will be munmapped immediately
647 thereafter; it is only aboard transiently. */
648
sewardjb3586202002-05-09 17:38:13 +0000649 i = VG_(stat)(si->filename, &stat_buf);
sewardjde4a1d02002-03-22 01:27:54 +0000650 if (i != 0) {
651 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
652 return;
653 }
654 n_oimage = stat_buf.st_size;
655
656 fd = VG_(open_read)(si->filename);
657 if (fd == -1) {
658 vg_symerr("Can't open .so/.exe to read symbols?!");
659 return;
660 }
661
sewardjb3586202002-05-09 17:38:13 +0000662 oimage = (Addr)VG_(mmap)( NULL, n_oimage,
663 VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 );
sewardjde4a1d02002-03-22 01:27:54 +0000664 if (oimage == ((Addr)(-1))) {
665 VG_(message)(Vg_UserMsg,
666 "mmap failed on %s", si->filename );
667 VG_(close)(fd);
668 return;
669 }
670
671 VG_(close)(fd);
672
673 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
674 Now verify that it is a valid ELF .so or executable image.
675 */
676 ok = (n_oimage >= sizeof(Elf32_Ehdr));
677 ehdr = (Elf32_Ehdr*)oimage;
678
679 if (ok) {
680 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
681 && ehdr->e_ident[EI_MAG1] == 'E'
682 && ehdr->e_ident[EI_MAG2] == 'L'
683 && ehdr->e_ident[EI_MAG3] == 'F');
684 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
685 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
686 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
687 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
688 ok &= (ehdr->e_machine == EM_386);
689 ok &= (ehdr->e_version == EV_CURRENT);
690 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
691 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
692 }
693
694 if (!ok) {
695 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
696 VG_(munmap) ( (void*)oimage, n_oimage );
697 return;
698 }
699
700 if (VG_(clo_trace_symtab))
701 VG_(printf)(
702 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
703 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
704
705 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
706 vg_symerr("ELF section header is beyond image end?!");
707 VG_(munmap) ( (void*)oimage, n_oimage );
708 return;
709 }
710
711 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
712 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
713
714 /* try and read the object's symbol table */
715 {
716 UChar* o_strtab = NULL;
717 Elf32_Sym* o_symtab = NULL;
718 UInt o_strtab_sz = 0;
719 UInt o_symtab_sz = 0;
720
721 UChar* o_got = NULL;
722 UChar* o_plt = NULL;
723 UInt o_got_sz = 0;
724 UInt o_plt_sz = 0;
725
726 Bool snaffle_it;
727 Addr sym_addr;
728
729 /* find the .stabstr and .stab sections */
730 for (i = 0; i < ehdr->e_shnum; i++) {
731 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
732 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
733 o_symtab_sz = shdr[i].sh_size;
734 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
735 /* check image overrun here */
736 }
737 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
738 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
739 o_strtab_sz = shdr[i].sh_size;
740 /* check image overrun here */
741 }
742
743 /* find out where the .got and .plt sections will be in the
744 executable image, not in the object image transiently loaded.
745 */
746 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
747 o_got = (UChar*)(si->offset
748 + shdr[i].sh_offset);
749 o_got_sz = shdr[i].sh_size;
750 /* check image overrun here */
751 }
752 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
753 o_plt = (UChar*)(si->offset
754 + shdr[i].sh_offset);
755 o_plt_sz = shdr[i].sh_size;
756 /* check image overrun here */
757 }
758
759 }
760
761 if (VG_(clo_trace_symtab)) {
762 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
763 o_plt, o_plt + o_plt_sz - 1 );
764 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
765 o_got, o_got + o_got_sz - 1 );
766 }
767
768 if (o_strtab == NULL || o_symtab == NULL) {
769 vg_symerr(" object doesn't have a symbol table");
770 } else {
771 /* Perhaps should start at i = 1; ELF docs suggest that entry
772 0 always denotes `unknown symbol'. */
773 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
774# if 0
775 VG_(printf)("raw symbol: ");
776 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
777 case STB_LOCAL: VG_(printf)("LOC "); break;
778 case STB_GLOBAL: VG_(printf)("GLO "); break;
779 case STB_WEAK: VG_(printf)("WEA "); break;
780 case STB_LOPROC: VG_(printf)("lop "); break;
781 case STB_HIPROC: VG_(printf)("hip "); break;
782 default: VG_(printf)("??? "); break;
783 }
784 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
785 case STT_NOTYPE: VG_(printf)("NOT "); break;
786 case STT_OBJECT: VG_(printf)("OBJ "); break;
787 case STT_FUNC: VG_(printf)("FUN "); break;
788 case STT_SECTION: VG_(printf)("SEC "); break;
789 case STT_FILE: VG_(printf)("FIL "); break;
790 case STT_LOPROC: VG_(printf)("lop "); break;
791 case STT_HIPROC: VG_(printf)("hip "); break;
792 default: VG_(printf)("??? "); break;
793 }
794 VG_(printf)(
795 ": value %p, size %d, name %s\n",
796 si->offset+(UChar*)o_symtab[i].st_value,
797 o_symtab[i].st_size,
798 o_symtab[i].st_name
799 ? ((Char*)o_strtab+o_symtab[i].st_name)
800 : (Char*)"NONAME");
801# endif
802
803 /* Figure out if we're interested in the symbol.
804 Firstly, is it of the right flavour?
805 */
806 snaffle_it
807 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
808 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
809 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
810 &&
811 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
812 ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
813 );
814
815 /* Secondly, if it's apparently in a GOT or PLT, it's really
816 a reference to a symbol defined elsewhere, so ignore it.
817 */
818 sym_addr = si->offset
819 + (UInt)o_symtab[i].st_value;
820 if (o_got != NULL
821 && sym_addr >= (Addr)o_got
822 && sym_addr < (Addr)(o_got+o_got_sz)) {
823 snaffle_it = False;
824 if (VG_(clo_trace_symtab)) {
825 VG_(printf)( "in GOT: %s\n",
826 o_strtab+o_symtab[i].st_name);
827 }
828 }
829 if (o_plt != NULL
830 && sym_addr >= (Addr)o_plt
831 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
832 snaffle_it = False;
833 if (VG_(clo_trace_symtab)) {
834 VG_(printf)( "in PLT: %s\n",
835 o_strtab+o_symtab[i].st_name);
836 }
837 }
838
839 /* Don't bother if nameless, or zero-sized. */
840 if (snaffle_it
841 && (o_symtab[i].st_name == (Elf32_Word)NULL
842 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
843 /* equivalent but cheaper ... */
844 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
845 || o_symtab[i].st_size == 0)) {
846 snaffle_it = False;
847 if (VG_(clo_trace_symtab)) {
848 VG_(printf)( "size=0: %s\n",
849 o_strtab+o_symtab[i].st_name);
850 }
851 }
852
853# if 0
854 /* Avoid _dl_ junk. (Why?) */
855 /* 01-02-24: disabled until I find out if it really helps. */
856 if (snaffle_it
857 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
858 || VG_(strncmp)("_r_debug",
859 o_strtab+o_symtab[i].st_name, 8) == 0)) {
860 snaffle_it = False;
861 if (VG_(clo_trace_symtab)) {
862 VG_(printf)( "_dl_ junk: %s\n",
863 o_strtab+o_symtab[i].st_name);
864 }
865 }
866# endif
867
868 /* This seems to significantly reduce the number of junk
869 symbols, and particularly reduces the number of
870 overlapping address ranges. Don't ask me why ... */
871 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
872 snaffle_it = False;
873 if (VG_(clo_trace_symtab)) {
874 VG_(printf)( "valu=0: %s\n",
875 o_strtab+o_symtab[i].st_name);
876 }
877 }
878
879 /* If no part of the symbol falls within the mapped range,
880 ignore it. */
881 if (sym_addr+o_symtab[i].st_size <= si->start
882 || sym_addr >= si->start+si->size) {
883 snaffle_it = False;
884 }
885
886 if (snaffle_it) {
887 /* it's an interesting symbol; record ("snaffle") it. */
888 RiSym sym;
889 Char* t0 = o_symtab[i].st_name
890 ? (Char*)(o_strtab+o_symtab[i].st_name)
891 : (Char*)"NONAME";
892 Int nmoff = addStr ( si, t0 );
893 vg_assert(nmoff >= 0
894 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
895 vg_assert( (Int)o_symtab[i].st_value >= 0);
896 /* VG_(printf)("%p + %d: %s\n", si->addr,
897 (Int)o_symtab[i].st_value, t0 ); */
898 sym.addr = sym_addr;
899 sym.size = o_symtab[i].st_size;
900 sym.nmoff = nmoff;
901 addSym ( si, &sym );
902 }
903 }
904 }
905 }
906
907 /* Reading of the "stabs" debug format information, if any. */
908 stabstr = NULL;
909 stab = NULL;
910 stabstr_sz = 0;
911 stab_sz = 0;
912 /* find the .stabstr and .stab sections */
913 for (i = 0; i < ehdr->e_shnum; i++) {
914 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
915 stab = (struct nlist *)(oimage + shdr[i].sh_offset);
916 stab_sz = shdr[i].sh_size;
917 }
918 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
919 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
920 stabstr_sz = shdr[i].sh_size;
921 }
922 }
923
924 if (stab == NULL || stabstr == NULL) {
925 vg_symerr(" object doesn't have any debug info");
926 VG_(munmap) ( (void*)oimage, n_oimage );
927 return;
928 }
929
930 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
931 || stabstr_sz + (UChar*)stabstr
932 > n_oimage + (UChar*)oimage ) {
933 vg_symerr(" ELF debug data is beyond image end?!");
934 VG_(munmap) ( (void*)oimage, n_oimage );
935 return;
936 }
937
938 /* Ok. It all looks plausible. Go on and read debug data.
939 stab kinds: 100 N_SO a source file name
940 68 N_SLINE a source line number
njn4f9c9342002-04-29 16:03:24 +0000941 36 N_FUN start of a function
sewardjde4a1d02002-03-22 01:27:54 +0000942
njn4f9c9342002-04-29 16:03:24 +0000943 In this loop, we maintain a current file name, updated as
944 N_SO/N_SOLs appear, and a current function base address,
945 updated as N_FUNs appear. Based on that, address ranges for
946 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000947
njn4f9c9342002-04-29 16:03:24 +0000948 Finding the instruction address range covered by an N_SLINE is
949 complicated; see the N_SLINE case below.
sewardjde4a1d02002-03-22 01:27:54 +0000950 */
951 curr_filenmoff = addStr(si,"???");
952 curr_fnbaseaddr = (Addr)NULL;
njne0ee0712002-05-03 16:41:05 +0000953 curr_file_name = curr_fn_name = (Char*)NULL;
954 lineno = prev_lineno = 0;
955 lineno_overflows = 0;
956 same_file = True;
sewardjde4a1d02002-03-22 01:27:54 +0000957
njn4f9c9342002-04-29 16:03:24 +0000958 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
959
960 for (i = 0; i < n_stab_entries; i++) {
sewardjde4a1d02002-03-22 01:27:54 +0000961# if 0
962 VG_(printf) ( " %2d ", i );
963 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
964 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
965 (int)stab[i].n_value,
966 (int)stab[i].n_un.n_strx,
967 stabstr + stab[i].n_un.n_strx );
968 VG_(printf)("\n");
969# endif
970
njne0ee0712002-05-03 16:41:05 +0000971 Char *no_fn_name = "???";
972
sewardjde4a1d02002-03-22 01:27:54 +0000973 switch (stab[i].n_type) {
njn4f9c9342002-04-29 16:03:24 +0000974 UInt next_addr;
sewardjde4a1d02002-03-22 01:27:54 +0000975
njne0ee0712002-05-03 16:41:05 +0000976 /* Two complicated things here:
977 * 1. the n_desc field in 'struct n_list' in a.out.h is only 16-bits,
978 * which gives a maximum of 65535 lines. We handle files bigger
979 * than this by detecting heuristically overflows -- if the line
980 * count goes from 65000-odd to 0-odd within the same file, we
981 * assume it's an overflow. Once we switch files, we zero the
982 * overflow count
983 *
984 * 2. To compute the instr address range covered by a single line,
985 * find the address of the next thing and compute the difference.
986 * The approach used depends on what kind of entry/entries
987 * follow...
988 */
njn4f9c9342002-04-29 16:03:24 +0000989 case N_SLINE: {
njn4f9c9342002-04-29 16:03:24 +0000990 Int this_addr = (UInt)stab[i].n_value;
991
njne0ee0712002-05-03 16:41:05 +0000992 /* Although stored as a short, neg values really are > 32768, hence
993 * the UShort cast. Then we use an Int to handle overflows. */
994 prev_lineno = lineno;
995 lineno = (Int)((UShort)stab[i].n_desc);
996
997 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
998 VG_(message)(Vg_DebugMsg,
999 "Line number overflow detected (%d --> %d) in %s",
1000 prev_lineno, lineno, curr_file_name);
1001 lineno_overflows++;
1002 }
1003 same_file = True;
1004
njn4f9c9342002-04-29 16:03:24 +00001005 LOOP:
njn9aae6742002-04-30 13:44:01 +00001006 if (i+1 >= n_stab_entries) {
1007 /* If it's the last entry, just guess the range is four; can't
1008 * do any better */
njne0ee0712002-05-03 16:41:05 +00001009 next_addr = this_addr + 4;
njn9aae6742002-04-30 13:44:01 +00001010 } else {
1011 switch (stab[i+1].n_type) {
1012 /* Easy, common case: use address of next entry */
1013 case N_SLINE: case N_SO:
njn4f9c9342002-04-29 16:03:24 +00001014 next_addr = (UInt)stab[i+1].n_value;
njn4f9c9342002-04-29 16:03:24 +00001015 break;
njn4f9c9342002-04-29 16:03:24 +00001016
njn9aae6742002-04-30 13:44:01 +00001017 /* Boring one: skip, look for something more useful. */
1018 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
njn9885df02002-05-01 08:25:03 +00001019 case N_STSYM: case N_LCSYM: case N_GSYM:
njn9aae6742002-04-30 13:44:01 +00001020 i++;
1021 goto LOOP;
1022
1023 /* Should be an end of fun entry, use its address */
1024 case N_FUN:
1025 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
1026 next_addr = (UInt)stab[i+1].n_value;
1027 } else {
njne0ee0712002-05-03 16:41:05 +00001028 VG_(message)(Vg_DebugMsg,
1029 "warning: function %s missing closing "
1030 "N_FUN stab at entry %d",
1031 curr_fn_name, i );
1032 next_addr = this_addr; /* assume zero-size loc */
njn9aae6742002-04-30 13:44:01 +00001033 }
1034 break;
1035
1036 /* N_SOL should be followed by an N_SLINE which can be used */
1037 case N_SOL:
1038 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
1039 next_addr = (UInt)stab[i+2].n_value;
1040 break;
1041 } else {
1042 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
1043 stab[i+1].n_type, i, n_stab_entries);
sewardj177d3232002-05-01 09:25:56 +00001044 VG_(panic)("unhandled N_SOL stabs case");
njn9aae6742002-04-30 13:44:01 +00001045 }
1046
1047 default:
1048 VG_(printf)("unhandled stabs case: %d %d",
1049 stab[i+1].n_type,i);
sewardj177d3232002-05-01 09:25:56 +00001050 VG_(panic)("unhandled (other) stabs case");
njn9aae6742002-04-30 13:44:01 +00001051 }
sewardjde4a1d02002-03-22 01:27:54 +00001052 }
njn4f9c9342002-04-29 16:03:24 +00001053
njn4f9c9342002-04-29 16:03:24 +00001054 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
njne0ee0712002-05-03 16:41:05 +00001055 curr_fnbaseaddr + next_addr,
1056 lineno + lineno_overflows * LINENO_OVERFLOW, i);
sewardjde4a1d02002-03-22 01:27:54 +00001057 break;
1058 }
1059
njn4f9c9342002-04-29 16:03:24 +00001060 case N_FUN: {
1061 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
sewardjde4a1d02002-03-22 01:27:54 +00001062 /* N_FUN with a name -- indicates the start of a fn. */
njn4f9c9342002-04-29 16:03:24 +00001063 curr_fnbaseaddr = si->offset + (Addr)stab[i].n_value;
njne0ee0712002-05-03 16:41:05 +00001064 curr_fn_name = stabstr + stab[i].n_un.n_strx;
1065 } else {
1066 curr_fn_name = no_fn_name;
sewardjde4a1d02002-03-22 01:27:54 +00001067 }
1068 break;
1069 }
1070
njne0ee0712002-05-03 16:41:05 +00001071 case N_SOL:
1072 if (lineno_overflows != 0) {
njn7efaa112002-05-07 10:26:57 +00001073 VG_(message)(Vg_UserMsg,
1074 "Warning: file %s is very big (> 65535 lines) "
1075 "Line numbers and annotation for this file might "
1076 "be wrong. Sorry",
1077 curr_file_name);
njne0ee0712002-05-03 16:41:05 +00001078 }
1079 /* fall through! */
1080 case N_SO:
1081 lineno_overflows = 0;
1082
sewardjde4a1d02002-03-22 01:27:54 +00001083 /* seems to give lots of locations in header files */
1084 /* case 130: */ /* BINCL */
1085 {
1086 UChar* nm = stabstr + stab[i].n_un.n_strx;
1087 UInt len = VG_(strlen)(nm);
njn4f9c9342002-04-29 16:03:24 +00001088
1089 if (len > 0 && nm[len-1] != '/') {
sewardjde4a1d02002-03-22 01:27:54 +00001090 curr_filenmoff = addStr ( si, nm );
njn4f9c9342002-04-29 16:03:24 +00001091 curr_file_name = stabstr + stab[i].n_un.n_strx;
1092 }
sewardjde4a1d02002-03-22 01:27:54 +00001093 else
1094 if (len == 0)
1095 curr_filenmoff = addStr ( si, "?1\0" );
njn4f9c9342002-04-29 16:03:24 +00001096
sewardjde4a1d02002-03-22 01:27:54 +00001097 break;
1098 }
1099
1100# if 0
1101 case 162: /* EINCL */
1102 curr_filenmoff = addStr ( si, "?2\0" );
1103 break;
1104# endif
1105
1106 default:
1107 break;
1108 }
1109 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
1110
1111 /* Last, but not least, heave the oimage back overboard. */
1112 VG_(munmap) ( (void*)oimage, n_oimage );
1113}
1114
1115
1116/*------------------------------------------------------------*/
1117/*--- Main entry point for symbols table reading. ---*/
1118/*------------------------------------------------------------*/
1119
1120/* The root structure for the entire symbol table system. It is a
1121 linked list of SegInfos. Note that this entire mechanism assumes
1122 that what we read from /proc/self/maps doesn't contain overlapping
1123 address ranges, and as a result the SegInfos in this list describe
1124 disjoint address ranges.
1125*/
1126static SegInfo* segInfo = NULL;
1127
1128
1129static
1130void read_symtab_callback (
1131 Addr start, UInt size,
1132 Char rr, Char ww, Char xx,
1133 UInt foffset, UChar* filename )
1134{
1135 SegInfo* si;
1136
1137 /* Stay sane ... */
1138 if (size == 0)
1139 return;
1140
1141 /* We're only interested in collecting symbols in executable
1142 segments which are associated with a real file. Hence: */
1143 if (filename == NULL || xx != 'x')
1144 return;
1145 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1146 return;
1147
1148 /* Perhaps we already have this one? If so, skip. */
1149 for (si = segInfo; si != NULL; si = si->next) {
1150 /*
1151 if (0==VG_(strcmp)(si->filename, filename))
1152 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1153 rr,ww,xx,si->start,si->size,start,size,filename);
1154 */
1155 /* For some reason the observed size of a mapping can change, so
1156 we don't use that to determine uniqueness. */
1157 if (si->start == start
1158 /* && si->size == size */
1159 && 0==VG_(strcmp)(si->filename, filename)) {
1160 return;
1161 }
1162 }
1163
1164 /* Get the record initialised right. */
1165 si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
1166 si->next = segInfo;
1167 segInfo = si;
1168
1169 si->start = start;
1170 si->size = size;
1171 si->foffset = foffset;
1172 si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
1173 VG_(strcpy)(si->filename, filename);
1174
1175 si->symtab = NULL;
1176 si->symtab_size = si->symtab_used = 0;
1177 si->loctab = NULL;
1178 si->loctab_size = si->loctab_used = 0;
1179 si->strtab = NULL;
1180 si->strtab_size = si->strtab_used = 0;
1181
1182 /* Kludge ... */
1183 si->offset
1184 = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
1185
1186 /* And actually fill it up. */
sewardj18d75132002-05-16 11:06:21 +00001187 if (VG_(clo_instrument) || VG_(clo_cachesim)) {
1188 vg_read_lib_symbols ( si );
1189 canonicaliseSymtab ( si );
1190 canonicaliseLoctab ( si );
1191 }
sewardjde4a1d02002-03-22 01:27:54 +00001192}
1193
1194
1195/* This one really is the Head Honcho. Update the symbol tables to
1196 reflect the current state of /proc/self/maps. Rather than re-read
1197 everything, just read the entries which are not already in segInfo.
1198 So we can call here repeatedly, after every mmap of a non-anonymous
1199 segment with execute permissions, for example, to pick up new
1200 libraries as they are dlopen'd. Conversely, when the client does
1201 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1202 which happen to correspond to the munmap()d area. */
1203void VG_(read_symbols) ( void )
1204{
sewardjde4a1d02002-03-22 01:27:54 +00001205 VG_(read_procselfmaps) ( read_symtab_callback );
1206
1207 /* Do a sanity check on the symbol tables: ensure that the address
1208 space pieces they cover do not overlap (otherwise we are severely
1209 hosed). This is a quadratic algorithm, but there shouldn't be
1210 many of them.
1211 */
1212 { SegInfo *si, *si2;
1213 for (si = segInfo; si != NULL; si = si->next) {
1214 /* Check no overlap between *si and those in the rest of the
1215 list. */
1216 for (si2 = si->next; si2 != NULL; si2 = si2->next) {
1217 Addr lo = si->start;
1218 Addr hi = si->start + si->size - 1;
1219 Addr lo2 = si2->start;
1220 Addr hi2 = si2->start + si2->size - 1;
1221 Bool overlap;
1222 vg_assert(lo < hi);
1223 vg_assert(lo2 < hi2);
1224 /* the main assertion */
1225 overlap = (lo <= lo2 && lo2 <= hi)
1226 || (lo <= hi2 && hi2 <= hi);
sewardjde4a1d02002-03-22 01:27:54 +00001227 if (overlap) {
1228 VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
1229 ppSegInfo ( si );
1230 ppSegInfo ( si2 );
1231 VG_(printf)("\n\n");
1232 vg_assert(! overlap);
1233 }
1234 }
1235 }
1236 }
1237}
1238
1239
1240/* When an munmap() call happens, check to see whether it corresponds
1241 to a segment for a .so, and if so discard the relevant SegInfo.
1242 This might not be a very clever idea from the point of view of
1243 accuracy of error messages, but we need to do it in order to
sewardj18d75132002-05-16 11:06:21 +00001244 maintain the no-overlapping invariant.
1245
1246 16 May 02: Returns a Bool indicating whether or not the discarded
1247 range falls inside a known executable segment. See comment at top
1248 of file for why.
sewardjde4a1d02002-03-22 01:27:54 +00001249*/
sewardj18d75132002-05-16 11:06:21 +00001250Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
sewardjde4a1d02002-03-22 01:27:54 +00001251{
1252 SegInfo *prev, *curr;
1253
sewardjde4a1d02002-03-22 01:27:54 +00001254 prev = NULL;
1255 curr = segInfo;
1256 while (True) {
1257 if (curr == NULL) break;
1258 if (start == curr->start) break;
1259 prev = curr;
1260 curr = curr->next;
1261 }
sewardj18d75132002-05-16 11:06:21 +00001262 if (curr == NULL)
1263 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001264
1265 VG_(message)(Vg_UserMsg,
1266 "discard syms in %s due to munmap()",
1267 curr->filename ? curr->filename : (UChar*)"???");
1268
1269 vg_assert(prev == NULL || prev->next == curr);
1270
1271 if (prev == NULL) {
1272 segInfo = curr->next;
1273 } else {
1274 prev->next = curr->next;
1275 }
1276
1277 freeSegInfo(curr);
sewardj18d75132002-05-16 11:06:21 +00001278 return True;
sewardjde4a1d02002-03-22 01:27:54 +00001279}
1280
1281
1282/*------------------------------------------------------------*/
1283/*--- Use of symbol table & location info to create ---*/
1284/*--- plausible-looking stack dumps. ---*/
1285/*------------------------------------------------------------*/
1286
1287/* Find a symbol-table index containing the specified pointer, or -1
1288 if not found. Binary search. */
1289
1290static Int search_one_symtab ( SegInfo* si, Addr ptr )
1291{
1292 Addr a_mid_lo, a_mid_hi;
1293 Int mid,
1294 lo = 0,
1295 hi = si->symtab_used-1;
1296 while (True) {
1297 /* current unsearched space is from lo to hi, inclusive. */
1298 if (lo > hi) return -1; /* not found */
1299 mid = (lo + hi) / 2;
1300 a_mid_lo = si->symtab[mid].addr;
1301 a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
1302
1303 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1304 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1305 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1306 return mid;
1307 }
1308}
1309
1310
1311/* Search all symtabs that we know about to locate ptr. If found, set
1312 *psi to the relevant SegInfo, and *symno to the symtab entry number
1313 within that. If not found, *psi is set to NULL. */
1314
1315static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
1316{
1317 Int sno;
1318 SegInfo* si;
1319 for (si = segInfo; si != NULL; si = si->next) {
1320 if (si->start <= ptr && ptr < si->start+si->size) {
1321 sno = search_one_symtab ( si, ptr );
1322 if (sno == -1) goto not_found;
1323 *symno = sno;
1324 *psi = si;
1325 return;
1326 }
1327 }
1328 not_found:
1329 *psi = NULL;
1330}
1331
1332
1333/* Find a location-table index containing the specified pointer, or -1
1334 if not found. Binary search. */
1335
1336static Int search_one_loctab ( SegInfo* si, Addr ptr )
1337{
1338 Addr a_mid_lo, a_mid_hi;
1339 Int mid,
1340 lo = 0,
1341 hi = si->loctab_used-1;
1342 while (True) {
1343 /* current unsearched space is from lo to hi, inclusive. */
1344 if (lo > hi) return -1; /* not found */
1345 mid = (lo + hi) / 2;
1346 a_mid_lo = si->loctab[mid].addr;
1347 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
1348
1349 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1350 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1351 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1352 return mid;
1353 }
1354}
1355
1356
1357/* Search all loctabs that we know about to locate ptr. If found, set
1358 *psi to the relevant SegInfo, and *locno to the loctab entry number
1359 within that. If not found, *psi is set to NULL.
1360*/
1361static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
1362{
1363 Int lno;
1364 SegInfo* si;
1365 for (si = segInfo; si != NULL; si = si->next) {
1366 if (si->start <= ptr && ptr < si->start+si->size) {
1367 lno = search_one_loctab ( si, ptr );
1368 if (lno == -1) goto not_found;
1369 *locno = lno;
1370 *psi = si;
1371 return;
1372 }
1373 }
1374 not_found:
1375 *psi = NULL;
1376}
1377
1378
1379/* The whole point of this whole big deal: map a code address to a
1380 plausible symbol name. Returns False if no idea; otherwise True.
1381 Caller supplies buf and nbuf. If no_demangle is True, don't do
1382 demangling, regardless of vg_clo_demangle -- probably because the
1383 call has come from vg_what_fn_or_object_is_this. */
njn4f9c9342002-04-29 16:03:24 +00001384Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
1385 Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00001386{
1387 SegInfo* si;
1388 Int sno;
1389 search_all_symtabs ( a, &si, &sno );
1390 if (si == NULL)
1391 return False;
1392 if (no_demangle) {
1393 VG_(strncpy_safely)
1394 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
1395 } else {
1396 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
1397 }
1398 return True;
1399}
1400
1401
1402/* Map a code address to the name of a shared object file. Returns
1403 False if no idea; otherwise False. Caller supplies buf and
1404 nbuf. */
1405static
1406Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
1407{
1408 SegInfo* si;
1409 for (si = segInfo; si != NULL; si = si->next) {
1410 if (si->start <= a && a < si->start+si->size) {
1411 VG_(strncpy_safely)(buf, si->filename, nbuf);
1412 return True;
1413 }
1414 }
1415 return False;
1416}
1417
1418/* Return the name of an erring fn in a way which is useful
1419 for comparing against the contents of a suppressions file.
1420 Always writes something to buf. Also, doesn't demangle the
1421 name, because we want to refer to mangled names in the
1422 suppressions file.
1423*/
1424void VG_(what_obj_and_fun_is_this) ( Addr a,
1425 Char* obj_buf, Int n_obj_buf,
1426 Char* fun_buf, Int n_fun_buf )
1427{
1428 (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
njn4f9c9342002-04-29 16:03:24 +00001429 (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
sewardjde4a1d02002-03-22 01:27:54 +00001430}
1431
1432
1433/* Map a code address to a (filename, line number) pair.
1434 Returns True if successful.
1435*/
njn4f9c9342002-04-29 16:03:24 +00001436Bool VG_(what_line_is_this)( Addr a,
1437 UChar* filename, Int n_filename,
1438 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00001439{
1440 SegInfo* si;
1441 Int locno;
1442 search_all_loctabs ( a, &si, &locno );
1443 if (si == NULL)
1444 return False;
1445 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
1446 n_filename);
1447 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00001448
sewardjde4a1d02002-03-22 01:27:54 +00001449 return True;
1450}
1451
1452
1453/* Print a mini stack dump, showing the current location. */
1454void VG_(mini_stack_dump) ( ExeContext* ec )
1455{
1456
1457#define APPEND(str) \
1458 { UChar* sss; \
1459 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
1460 buf[n] = *sss; \
1461 buf[n] = 0; \
1462 }
1463
1464 Bool know_fnname;
1465 Bool know_objname;
1466 Bool know_srcloc;
1467 UInt lineno;
1468 UChar ibuf[20];
1469 UInt i, n, clueless;
1470
1471 UChar buf[M_VG_ERRTXT];
1472 UChar buf_fn[M_VG_ERRTXT];
1473 UChar buf_obj[M_VG_ERRTXT];
1474 UChar buf_srcloc[M_VG_ERRTXT];
1475
1476 Int stop_at = VG_(clo_backtrace_size);
1477
1478 n = 0;
1479
njn4f9c9342002-04-29 16:03:24 +00001480 know_fnname = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001481 know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001482 know_srcloc = VG_(what_line_is_this)(ec->eips[0],
1483 buf_srcloc, M_VG_ERRTXT,
1484 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00001485
1486 APPEND(" at ");
1487 VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
1488 APPEND(ibuf);
1489 if (know_fnname) {
1490 APPEND(buf_fn);
1491 if (!know_srcloc && know_objname) {
1492 APPEND(" (in ");
1493 APPEND(buf_obj);
1494 APPEND(")");
1495 }
1496 } else if (know_objname && !know_srcloc) {
1497 APPEND("(within ");
1498 APPEND(buf_obj);
1499 APPEND(")");
1500 } else {
1501 APPEND("???");
1502 }
1503 if (know_srcloc) {
1504 APPEND(" (");
1505 APPEND(buf_srcloc);
1506 APPEND(":");
1507 VG_(sprintf)(ibuf,"%d",lineno);
1508 APPEND(ibuf);
1509 APPEND(")");
1510 }
1511 VG_(message)(Vg_UserMsg, "%s", buf);
1512
1513 clueless = 0;
1514 for (i = 1; i < stop_at; i++) {
njn4f9c9342002-04-29 16:03:24 +00001515 know_fnname = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001516 know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001517 know_srcloc = VG_(what_line_is_this)(ec->eips[i],
sewardjde4a1d02002-03-22 01:27:54 +00001518 buf_srcloc, M_VG_ERRTXT,
1519 &lineno);
1520 n = 0;
1521 APPEND(" by ");
1522 if (ec->eips[i] == 0) {
1523 APPEND("<bogus frame pointer> ");
1524 } else {
1525 VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
1526 APPEND(ibuf);
1527 }
1528 if (know_fnname) {
1529 APPEND(buf_fn)
1530 if (!know_srcloc && know_objname) {
1531 APPEND(" (in ");
1532 APPEND(buf_obj);
1533 APPEND(")");
1534 }
1535 } else {
1536 if (know_objname && !know_srcloc) {
1537 APPEND("(within ");
1538 APPEND(buf_obj);
1539 APPEND(")");
1540 } else {
1541 APPEND("???");
1542 }
1543 if (!know_srcloc) clueless++;
1544 if (clueless == 2)
1545 i = stop_at; /* force exit after this iteration */
1546 };
1547 if (know_srcloc) {
1548 APPEND(" (");
1549 APPEND(buf_srcloc);
1550 APPEND(":");
1551 VG_(sprintf)(ibuf,"%d",lineno);
1552 APPEND(ibuf);
1553 APPEND(")");
1554 }
1555 VG_(message)(Vg_UserMsg, "%s", buf);
1556 }
1557}
1558
1559#undef APPEND
1560
1561/*--------------------------------------------------------------------*/
1562/*--- end vg_symtab2.c ---*/
1563/*--------------------------------------------------------------------*/