blob: c269b77bd5919015c78016e3843298969fe6a441 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_unsafe.h"
33
34#include <elf.h> /* ELF defns */
35#include <a.out.h> /* stabs defns */
36
njn9aae6742002-04-30 13:44:01 +000037
sewardjde4a1d02002-03-22 01:27:54 +000038/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
39 dlopen()ed libraries, which is something that KDE3 does a lot.
40 Still kludgey, though less than before:
41
42 * we don't check whether we should throw away some symbol tables
43 when munmap() happens
44
45 * symbol table reading code for ELF binaries is a shambles.
46 Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
47*/
48
49/*------------------------------------------------------------*/
50/*--- Structs n stuff ---*/
51/*------------------------------------------------------------*/
52
njn4f9c9342002-04-29 16:03:24 +000053/* Stabs entry types, from:
54 * The "stabs" debug format
55 * Menapace, Kingdon and MacKenzie
56 * Cygnus Support
57 */
njn9885df02002-05-01 08:25:03 +000058typedef enum { N_GSYM = 32, /* Global symbol */
59 N_FUN = 36, /* Function start or end */
njn4f9c9342002-04-29 16:03:24 +000060 N_STSYM = 38, /* Data segment file-scope variable */
61 N_LCSYM = 40, /* BSS segment file-scope variable */
62 N_RSYM = 64, /* Register variable */
63 N_SLINE = 68, /* Source line number */
64 N_SO = 100, /* Source file path and name */
65 N_LSYM = 128, /* Stack variable or type */
66 N_SOL = 132, /* Include file name */
67 N_LBRAC = 192, /* Start of lexical block */
68 N_RBRAC = 224 /* End of lexical block */
69 } stab_types;
70
sewardjde4a1d02002-03-22 01:27:54 +000071/* A structure to hold an ELF symbol (very crudely). */
72typedef
73 struct {
74 Addr addr; /* lowest address of entity */
75 UInt size; /* size in bytes */
76 Int nmoff; /* offset of name in this SegInfo's str tab */
77 }
78 RiSym;
79
njne0ee0712002-05-03 16:41:05 +000080/* Line count at which overflow happens, due to line numbers being stored as
81 * shorts in `struct nlist' in a.out.h. */
82#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000083
njne0ee0712002-05-03 16:41:05 +000084#define LINENO_BITS 20
85#define LOC_SIZE_BITS (32 - LINENO_BITS)
86#define MAX_LINENO (1 << LINENO_BITS)
87
88/* Unlikely to have any lines with instruction ranges > 4096 bytes */
89#define MAX_LOC_SIZE (1 << LOC_SIZE_BITS)
90
91/* Number used to detect line number overflows; if one line is 60000-odd
92 * smaller than the previous, is was probably an overflow.
93 */
94#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
95
96/* A structure to hold addr-to-source info for a single line. There can be a
97 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +000098typedef
99 struct {
njne0ee0712002-05-03 16:41:05 +0000100 /* Word 1 */
101 Addr addr; /* lowest address for this line */
102 /* Word 2 */
103 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
104 UInt lineno:LINENO_BITS; /* source line number, or zero */
105 /* Word 3 */
106 UInt fnmoff; /* source filename; offset in this
107 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +0000108 }
109 RiLoc;
110
111
112/* A structure which contains information pertaining to one mapped
113 text segment. */
114typedef
115 struct _SegInfo {
116 struct _SegInfo* next;
117 /* Description of the mapped segment. */
118 Addr start;
119 UInt size;
120 UChar* filename; /* in mallocville */
121 UInt foffset;
122 /* An expandable array of symbols. */
123 RiSym* symtab;
124 UInt symtab_used;
125 UInt symtab_size;
126 /* An expandable array of locations. */
127 RiLoc* loctab;
128 UInt loctab_used;
129 UInt loctab_size;
130 /* An expandable array of characters -- the string table. */
131 Char* strtab;
132 UInt strtab_used;
133 UInt strtab_size;
134 /* offset is what we need to add to symbol table entries
135 to get the real location of that symbol in memory.
136 For executables, offset is zero.
137 For .so's, offset == base_addr.
138 This seems like a giant kludge to me.
139 */
140 UInt offset;
141 }
142 SegInfo;
143
144
145/* -- debug helper -- */
146static void ppSegInfo ( SegInfo* si )
147{
148 VG_(printf)("name: %s\n"
149 "start %p, size %d, foffset %d\n",
150 si->filename?si->filename : (UChar*)"NULL",
151 si->start, si->size, si->foffset );
152}
153
154static void freeSegInfo ( SegInfo* si )
155{
156 vg_assert(si != NULL);
157 if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
158 if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
159 if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
160 if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
161 VG_(free)(VG_AR_SYMTAB, si);
162}
163
164
165/*------------------------------------------------------------*/
166/*--- Adding stuff ---*/
167/*------------------------------------------------------------*/
168
169/* Add a str to the string table, including terminating zero, and
170 return offset of the string in vg_strtab. */
171
172static __inline__
173Int addStr ( SegInfo* si, Char* str )
174{
175 Char* new_tab;
176 Int new_sz, i, space_needed;
177
178 space_needed = 1 + VG_(strlen)(str);
179 if (si->strtab_used + space_needed > si->strtab_size) {
180 new_sz = 2 * si->strtab_size;
181 if (new_sz == 0) new_sz = 5000;
182 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
183 if (si->strtab != NULL) {
184 for (i = 0; i < si->strtab_used; i++)
185 new_tab[i] = si->strtab[i];
186 VG_(free)(VG_AR_SYMTAB, si->strtab);
187 }
188 si->strtab = new_tab;
189 si->strtab_size = new_sz;
190 }
191
192 for (i = 0; i < space_needed; i++)
193 si->strtab[si->strtab_used+i] = str[i];
194
195 si->strtab_used += space_needed;
196 vg_assert(si->strtab_used <= si->strtab_size);
197 return si->strtab_used - space_needed;
198}
199
200/* Add a symbol to the symbol table. */
201
202static __inline__
203void addSym ( SegInfo* si, RiSym* sym )
204{
205 Int new_sz, i;
206 RiSym* new_tab;
207
208 /* Ignore zero-sized syms. */
209 if (sym->size == 0) return;
210
211 if (si->symtab_used == si->symtab_size) {
212 new_sz = 2 * si->symtab_size;
213 if (new_sz == 0) new_sz = 500;
214 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
215 if (si->symtab != NULL) {
216 for (i = 0; i < si->symtab_used; i++)
217 new_tab[i] = si->symtab[i];
218 VG_(free)(VG_AR_SYMTAB, si->symtab);
219 }
220 si->symtab = new_tab;
221 si->symtab_size = new_sz;
222 }
223
224 si->symtab[si->symtab_used] = *sym;
225 si->symtab_used++;
226 vg_assert(si->symtab_used <= si->symtab_size);
227}
228
229/* Add a location to the location table. */
230
231static __inline__
232void addLoc ( SegInfo* si, RiLoc* loc )
233{
234 Int new_sz, i;
235 RiLoc* new_tab;
236
njne0ee0712002-05-03 16:41:05 +0000237 /* Zero-sized locs should have been ignored earlier */
238 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000239
240 if (si->loctab_used == si->loctab_size) {
241 new_sz = 2 * si->loctab_size;
242 if (new_sz == 0) new_sz = 500;
243 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
244 if (si->loctab != NULL) {
245 for (i = 0; i < si->loctab_used; i++)
246 new_tab[i] = si->loctab[i];
247 VG_(free)(VG_AR_SYMTAB, si->loctab);
248 }
249 si->loctab = new_tab;
250 si->loctab_size = new_sz;
251 }
252
253 si->loctab[si->loctab_used] = *loc;
254 si->loctab_used++;
255 vg_assert(si->loctab_used <= si->loctab_size);
256}
257
258
259
260/*------------------------------------------------------------*/
261/*--- Helpers ---*/
262/*------------------------------------------------------------*/
263
264/* Non-fatal -- use vg_panic if terminal. */
265static
266void vg_symerr ( Char* msg )
267{
268 if (VG_(clo_verbosity) > 1)
269 VG_(message)(Vg_UserMsg,"%s", msg );
270}
271
272
273/* Print a symbol. */
274static
275void printSym ( SegInfo* si, Int i )
276{
277 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
278 i,
279 si->symtab[i].addr,
280 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
281 &si->strtab[si->symtab[i].nmoff] );
282}
283
284
285#if 0
286/* Print the entire sym tab. */
287static __attribute__ ((unused))
288void printSymtab ( void )
289{
290 Int i;
291 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
292 for (i = 0; i < vg_symtab_used; i++)
293 printSym(i);
294 VG_(printf)("------ BEGIN vg_symtab ------\n");
295}
296#endif
297
298#if 0
299/* Paranoid strcat. */
300static
301void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
302{
303 UInt i = 0, j = 0;
304 while (True) {
305 if (i >= maxlen) return;
306 if (dst[i] == 0) break;
307 i++;
308 }
309 while (True) {
310 if (i >= maxlen) return;
311 dst[i] = src[j];
312 if (src[j] == 0) return;
313 i++; j++;
314 }
315}
316#endif
317
318/*------------------------------------------------------------*/
319/*--- Canonicalisers ---*/
320/*------------------------------------------------------------*/
321
322/* Sort the symtab by starting address, and emit warnings if any
323 symbols have overlapping address ranges. We use that old chestnut,
324 shellsort. Mash the table around so as to establish the property
325 that addresses are in order and the ranges to not overlap. This
326 facilitates using binary search to map addresses to symbols when we
327 come to query the table.
328*/
329static
330void canonicaliseSymtab ( SegInfo* si )
331{
332 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
333 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
334 4592, 13776, 33936, 86961, 198768,
335 463792, 1391376 };
336 Int lo = 0;
337 Int hi = si->symtab_used-1;
338 Int i, j, h, bigN, hp, n_merged, n_truncated;
339 RiSym v;
340 Addr s1, s2, e1, e2;
341
342# define SWAP(ty,aa,bb) \
343 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
344
345 bigN = hi - lo + 1; if (bigN < 2) return;
346 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
347 vg_assert(0 <= hp && hp < 16);
348
349 for (; hp >= 0; hp--) {
350 h = incs[hp];
351 i = lo + h;
352 while (1) {
353 if (i > hi) break;
354 v = si->symtab[i];
355 j = i;
356 while (si->symtab[j-h].addr > v.addr) {
357 si->symtab[j] = si->symtab[j-h];
358 j = j - h;
359 if (j <= (lo + h - 1)) break;
360 }
361 si->symtab[j] = v;
362 i++;
363 }
364 }
365
366 cleanup_more:
367
368 /* If two symbols have identical address ranges, favour the
369 one with the longer name.
370 */
371 do {
372 n_merged = 0;
373 j = si->symtab_used;
374 si->symtab_used = 0;
375 for (i = 0; i < j; i++) {
376 if (i < j-1
377 && si->symtab[i].addr == si->symtab[i+1].addr
378 && si->symtab[i].size == si->symtab[i+1].size) {
379 n_merged++;
380 /* merge the two into one */
381 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
382 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
383 si->symtab[si->symtab_used++] = si->symtab[i];
384 } else {
385 si->symtab[si->symtab_used++] = si->symtab[i+1];
386 }
387 i++;
388 } else {
389 si->symtab[si->symtab_used++] = si->symtab[i];
390 }
391 }
392 if (VG_(clo_trace_symtab))
393 VG_(printf)( "%d merged\n", n_merged);
394 }
395 while (n_merged > 0);
396
397 /* Detect and "fix" overlapping address ranges. */
398 n_truncated = 0;
399
400 for (i = 0; i < si->symtab_used-1; i++) {
401
402 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
403
404 /* Check for common (no overlap) case. */
405 if (si->symtab[i].addr + si->symtab[i].size
406 <= si->symtab[i+1].addr)
407 continue;
408
409 /* There's an overlap. Truncate one or the other. */
410 if (VG_(clo_trace_symtab)) {
411 VG_(printf)("overlapping address ranges in symbol table\n\t");
412 printSym(si,i);
413 VG_(printf)("\t");
414 printSym(si,i+1);
415 VG_(printf)("\n");
416 }
417
418 /* Truncate one or the other. */
419 s1 = si->symtab[i].addr;
420 s2 = si->symtab[i+1].addr;
421 e1 = s1 + si->symtab[i].size - 1;
422 e2 = s2 + si->symtab[i+1].size - 1;
423 if (s1 < s2) {
424 e1 = s2-1;
425 } else {
426 vg_assert(s1 == s2);
427 if (e1 > e2) {
428 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
429 } else
430 if (e1 < e2) {
431 s2 = e1+1;
432 } else {
433 /* e1 == e2. Identical addr ranges. We'll eventually wind
434 up back at cleanup_more, which will take care of it. */
435 }
436 }
437 si->symtab[i].addr = s1;
438 si->symtab[i+1].addr = s2;
439 si->symtab[i].size = e1 - s1 + 1;
440 si->symtab[i+1].size = e2 - s2 + 1;
441 vg_assert(s1 <= s2);
442 vg_assert(si->symtab[i].size > 0);
443 vg_assert(si->symtab[i+1].size > 0);
444 /* It may be that the i+1 entry now needs to be moved further
445 along to maintain the address order requirement. */
446 j = i+1;
447 while (j < si->symtab_used-1
448 && si->symtab[j].addr > si->symtab[j+1].addr) {
449 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
450 j++;
451 }
452 n_truncated++;
453 }
454
455 if (n_truncated > 0) goto cleanup_more;
456
457 /* Ensure relevant postconditions hold. */
458 for (i = 0; i < si->symtab_used-1; i++) {
459 /* No zero-sized symbols. */
460 vg_assert(si->symtab[i].size > 0);
461 /* In order. */
462 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
463 /* No overlaps. */
464 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
465 < si->symtab[i+1].addr);
466 }
467# undef SWAP
468}
469
470
471
472/* Sort the location table by starting address. Mash the table around
473 so as to establish the property that addresses are in order and the
474 ranges do not overlap. This facilitates using binary search to map
475 addresses to locations when we come to query the table. */
476static
477void canonicaliseLoctab ( SegInfo* si )
478{
479 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
480 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
481 4592, 13776, 33936, 86961, 198768,
482 463792, 1391376 };
483 Int lo = 0;
484 Int hi = si->loctab_used-1;
485 Int i, j, h, bigN, hp;
486 RiLoc v;
487
488# define SWAP(ty,aa,bb) \
489 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
490
491 /* Sort by start address. */
492
493 bigN = hi - lo + 1; if (bigN < 2) return;
494 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
495 vg_assert(0 <= hp && hp < 16);
496
497 for (; hp >= 0; hp--) {
498 h = incs[hp];
499 i = lo + h;
500 while (1) {
501 if (i > hi) break;
502 v = si->loctab[i];
503 j = i;
504 while (si->loctab[j-h].addr > v.addr) {
505 si->loctab[j] = si->loctab[j-h];
506 j = j - h;
507 if (j <= (lo + h - 1)) break;
508 }
509 si->loctab[j] = v;
510 i++;
511 }
512 }
513
514 /* If two adjacent entries overlap, truncate the first. */
515 for (i = 0; i < si->loctab_used-1; i++) {
516 vg_assert(si->loctab[i].size < 10000);
517 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
518 /* Do this in signed int32 because the actual .size fields
519 are unsigned 16s. */
520 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
521 if (new_size < 0) {
522 si->loctab[i].size = 0;
523 } else
524 if (new_size >= 65536) {
525 si->loctab[i].size = 65535;
526 } else {
527 si->loctab[i].size = (UShort)new_size;
528 }
529 }
530 }
531
532 /* Zap any zero-sized entries resulting from the truncation
533 process. */
534 j = 0;
535 for (i = 0; i < si->loctab_used; i++) {
536 if (si->loctab[i].size > 0) {
537 si->loctab[j] = si->loctab[i];
538 j++;
539 }
540 }
541 si->loctab_used = j;
542
543 /* Ensure relevant postconditions hold. */
544 for (i = 0; i < si->loctab_used-1; i++) {
545 /*
546 VG_(printf)("%d (%d) %d 0x%x\n",
547 i, si->loctab[i+1].confident,
548 si->loctab[i+1].size, si->loctab[i+1].addr );
549 */
550 /* No zero-sized symbols. */
551 vg_assert(si->loctab[i].size > 0);
552 /* In order. */
553 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
554 /* No overlaps. */
555 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
556 < si->loctab[i+1].addr);
557 }
558# undef SWAP
559}
560
561
562/*------------------------------------------------------------*/
563/*--- Read info from a .so/exe file. ---*/
564/*------------------------------------------------------------*/
565
566static __inline__
567void addLineInfo ( SegInfo* si,
568 Int fnmoff,
njne0ee0712002-05-03 16:41:05 +0000569 Addr this,
570 Addr next,
571 Int lineno,
572 Int entry )
sewardjde4a1d02002-03-22 01:27:54 +0000573{
574 RiLoc loc;
njne0ee0712002-05-03 16:41:05 +0000575 Int size = next - this;
njn4f9c9342002-04-29 16:03:24 +0000576
njne0ee0712002-05-03 16:41:05 +0000577 /* Ignore zero-sized locs */
578 if (this == next) return;
sewardjde4a1d02002-03-22 01:27:54 +0000579
njne0ee0712002-05-03 16:41:05 +0000580 /* Maximum sanity checking. Some versions of GNU as do a shabby job with
581 * stabs entries; if anything looks suspicious, revert to a size of 1.
582 * This should catch the instruction of interest (since if using asm-level
583 * debug info, one instruction will correspond to one line, unlike with
584 * C-level debug info where multiple instructions can map to the one line),
585 * but avoid catching any other instructions bogusly. */
586 if (this > next) {
587 VG_(message)(Vg_DebugMsg,
588 "warning: stabs addresses out of order "
589 "at entry %d: 0x%x 0x%x", entry, this, next);
590 size = 1;
591 }
sewardjde4a1d02002-03-22 01:27:54 +0000592
njne0ee0712002-05-03 16:41:05 +0000593 if (size > MAX_LOC_SIZE) {
594 VG_(message)(Vg_DebugMsg,
595 "warning: stabs line address range too large "
596 "at entry %d: %d", entry, size);
597 size = 1;
598 }
599
600 vg_assert(this < si->start + si->size && next-1 > si->start);
601 vg_assert(lineno >= 0 && lineno < MAX_LINENO);
602
603 loc.addr = this;
sewardjde4a1d02002-03-22 01:27:54 +0000604 loc.size = (UShort)size;
605 loc.lineno = lineno;
606 loc.fnmoff = fnmoff;
607 addLoc ( si, &loc );
608}
609
610
611/* Read the symbols from the object/exe specified by the SegInfo into
612 the tables within the supplied SegInfo. */
613static
614void vg_read_lib_symbols ( SegInfo* si )
615{
616 Elf32_Ehdr* ehdr; /* The ELF header */
617 Elf32_Shdr* shdr; /* The section table */
618 UChar* sh_strtab; /* The section table's string table */
619 struct nlist* stab; /* The .stab table */
620 UChar* stabstr; /* The .stab string table */
621 Int stab_sz; /* Size in bytes of the .stab table */
622 Int stabstr_sz; /* Size in bytes of the .stab string table */
623 Int fd;
624 Int i;
625 Bool ok;
626 Addr oimage;
627 Int n_oimage;
628 struct stat stat_buf;
629
630 /* for the .stabs reader */
631 Int curr_filenmoff;
632 Addr curr_fnbaseaddr;
njne0ee0712002-05-03 16:41:05 +0000633 Char *curr_file_name, *curr_fn_name;
njn4f9c9342002-04-29 16:03:24 +0000634 Int n_stab_entries;
njne0ee0712002-05-03 16:41:05 +0000635 Int prev_lineno, lineno;
636 Int lineno_overflows;
637 Bool same_file;
sewardjde4a1d02002-03-22 01:27:54 +0000638
639 oimage = (Addr)NULL;
640 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +0000641 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +0000642
643 /* mmap the object image aboard, so that we can read symbols and
644 line number info out of it. It will be munmapped immediately
645 thereafter; it is only aboard transiently. */
646
647 i = stat(si->filename, &stat_buf);
648 if (i != 0) {
649 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
650 return;
651 }
652 n_oimage = stat_buf.st_size;
653
654 fd = VG_(open_read)(si->filename);
655 if (fd == -1) {
656 vg_symerr("Can't open .so/.exe to read symbols?!");
657 return;
658 }
659
660 oimage = (Addr)VG_(mmap)( NULL, n_oimage, PROT_READ, MAP_PRIVATE, fd, 0 );
661 if (oimage == ((Addr)(-1))) {
662 VG_(message)(Vg_UserMsg,
663 "mmap failed on %s", si->filename );
664 VG_(close)(fd);
665 return;
666 }
667
668 VG_(close)(fd);
669
670 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
671 Now verify that it is a valid ELF .so or executable image.
672 */
673 ok = (n_oimage >= sizeof(Elf32_Ehdr));
674 ehdr = (Elf32_Ehdr*)oimage;
675
676 if (ok) {
677 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
678 && ehdr->e_ident[EI_MAG1] == 'E'
679 && ehdr->e_ident[EI_MAG2] == 'L'
680 && ehdr->e_ident[EI_MAG3] == 'F');
681 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
682 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
683 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
684 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
685 ok &= (ehdr->e_machine == EM_386);
686 ok &= (ehdr->e_version == EV_CURRENT);
687 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
688 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
689 }
690
691 if (!ok) {
692 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
693 VG_(munmap) ( (void*)oimage, n_oimage );
694 return;
695 }
696
697 if (VG_(clo_trace_symtab))
698 VG_(printf)(
699 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
700 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
701
702 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
703 vg_symerr("ELF section header is beyond image end?!");
704 VG_(munmap) ( (void*)oimage, n_oimage );
705 return;
706 }
707
708 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
709 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
710
711 /* try and read the object's symbol table */
712 {
713 UChar* o_strtab = NULL;
714 Elf32_Sym* o_symtab = NULL;
715 UInt o_strtab_sz = 0;
716 UInt o_symtab_sz = 0;
717
718 UChar* o_got = NULL;
719 UChar* o_plt = NULL;
720 UInt o_got_sz = 0;
721 UInt o_plt_sz = 0;
722
723 Bool snaffle_it;
724 Addr sym_addr;
725
726 /* find the .stabstr and .stab sections */
727 for (i = 0; i < ehdr->e_shnum; i++) {
728 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
729 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
730 o_symtab_sz = shdr[i].sh_size;
731 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
732 /* check image overrun here */
733 }
734 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
735 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
736 o_strtab_sz = shdr[i].sh_size;
737 /* check image overrun here */
738 }
739
740 /* find out where the .got and .plt sections will be in the
741 executable image, not in the object image transiently loaded.
742 */
743 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
744 o_got = (UChar*)(si->offset
745 + shdr[i].sh_offset);
746 o_got_sz = shdr[i].sh_size;
747 /* check image overrun here */
748 }
749 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
750 o_plt = (UChar*)(si->offset
751 + shdr[i].sh_offset);
752 o_plt_sz = shdr[i].sh_size;
753 /* check image overrun here */
754 }
755
756 }
757
758 if (VG_(clo_trace_symtab)) {
759 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
760 o_plt, o_plt + o_plt_sz - 1 );
761 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
762 o_got, o_got + o_got_sz - 1 );
763 }
764
765 if (o_strtab == NULL || o_symtab == NULL) {
766 vg_symerr(" object doesn't have a symbol table");
767 } else {
768 /* Perhaps should start at i = 1; ELF docs suggest that entry
769 0 always denotes `unknown symbol'. */
770 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
771# if 0
772 VG_(printf)("raw symbol: ");
773 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
774 case STB_LOCAL: VG_(printf)("LOC "); break;
775 case STB_GLOBAL: VG_(printf)("GLO "); break;
776 case STB_WEAK: VG_(printf)("WEA "); break;
777 case STB_LOPROC: VG_(printf)("lop "); break;
778 case STB_HIPROC: VG_(printf)("hip "); break;
779 default: VG_(printf)("??? "); break;
780 }
781 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
782 case STT_NOTYPE: VG_(printf)("NOT "); break;
783 case STT_OBJECT: VG_(printf)("OBJ "); break;
784 case STT_FUNC: VG_(printf)("FUN "); break;
785 case STT_SECTION: VG_(printf)("SEC "); break;
786 case STT_FILE: VG_(printf)("FIL "); break;
787 case STT_LOPROC: VG_(printf)("lop "); break;
788 case STT_HIPROC: VG_(printf)("hip "); break;
789 default: VG_(printf)("??? "); break;
790 }
791 VG_(printf)(
792 ": value %p, size %d, name %s\n",
793 si->offset+(UChar*)o_symtab[i].st_value,
794 o_symtab[i].st_size,
795 o_symtab[i].st_name
796 ? ((Char*)o_strtab+o_symtab[i].st_name)
797 : (Char*)"NONAME");
798# endif
799
800 /* Figure out if we're interested in the symbol.
801 Firstly, is it of the right flavour?
802 */
803 snaffle_it
804 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
805 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
806 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
807 &&
808 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
809 ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
810 );
811
812 /* Secondly, if it's apparently in a GOT or PLT, it's really
813 a reference to a symbol defined elsewhere, so ignore it.
814 */
815 sym_addr = si->offset
816 + (UInt)o_symtab[i].st_value;
817 if (o_got != NULL
818 && sym_addr >= (Addr)o_got
819 && sym_addr < (Addr)(o_got+o_got_sz)) {
820 snaffle_it = False;
821 if (VG_(clo_trace_symtab)) {
822 VG_(printf)( "in GOT: %s\n",
823 o_strtab+o_symtab[i].st_name);
824 }
825 }
826 if (o_plt != NULL
827 && sym_addr >= (Addr)o_plt
828 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
829 snaffle_it = False;
830 if (VG_(clo_trace_symtab)) {
831 VG_(printf)( "in PLT: %s\n",
832 o_strtab+o_symtab[i].st_name);
833 }
834 }
835
836 /* Don't bother if nameless, or zero-sized. */
837 if (snaffle_it
838 && (o_symtab[i].st_name == (Elf32_Word)NULL
839 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
840 /* equivalent but cheaper ... */
841 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
842 || o_symtab[i].st_size == 0)) {
843 snaffle_it = False;
844 if (VG_(clo_trace_symtab)) {
845 VG_(printf)( "size=0: %s\n",
846 o_strtab+o_symtab[i].st_name);
847 }
848 }
849
850# if 0
851 /* Avoid _dl_ junk. (Why?) */
852 /* 01-02-24: disabled until I find out if it really helps. */
853 if (snaffle_it
854 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
855 || VG_(strncmp)("_r_debug",
856 o_strtab+o_symtab[i].st_name, 8) == 0)) {
857 snaffle_it = False;
858 if (VG_(clo_trace_symtab)) {
859 VG_(printf)( "_dl_ junk: %s\n",
860 o_strtab+o_symtab[i].st_name);
861 }
862 }
863# endif
864
865 /* This seems to significantly reduce the number of junk
866 symbols, and particularly reduces the number of
867 overlapping address ranges. Don't ask me why ... */
868 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
869 snaffle_it = False;
870 if (VG_(clo_trace_symtab)) {
871 VG_(printf)( "valu=0: %s\n",
872 o_strtab+o_symtab[i].st_name);
873 }
874 }
875
876 /* If no part of the symbol falls within the mapped range,
877 ignore it. */
878 if (sym_addr+o_symtab[i].st_size <= si->start
879 || sym_addr >= si->start+si->size) {
880 snaffle_it = False;
881 }
882
883 if (snaffle_it) {
884 /* it's an interesting symbol; record ("snaffle") it. */
885 RiSym sym;
886 Char* t0 = o_symtab[i].st_name
887 ? (Char*)(o_strtab+o_symtab[i].st_name)
888 : (Char*)"NONAME";
889 Int nmoff = addStr ( si, t0 );
890 vg_assert(nmoff >= 0
891 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
892 vg_assert( (Int)o_symtab[i].st_value >= 0);
893 /* VG_(printf)("%p + %d: %s\n", si->addr,
894 (Int)o_symtab[i].st_value, t0 ); */
895 sym.addr = sym_addr;
896 sym.size = o_symtab[i].st_size;
897 sym.nmoff = nmoff;
898 addSym ( si, &sym );
899 }
900 }
901 }
902 }
903
904 /* Reading of the "stabs" debug format information, if any. */
905 stabstr = NULL;
906 stab = NULL;
907 stabstr_sz = 0;
908 stab_sz = 0;
909 /* find the .stabstr and .stab sections */
910 for (i = 0; i < ehdr->e_shnum; i++) {
911 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
912 stab = (struct nlist *)(oimage + shdr[i].sh_offset);
913 stab_sz = shdr[i].sh_size;
914 }
915 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
916 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
917 stabstr_sz = shdr[i].sh_size;
918 }
919 }
920
921 if (stab == NULL || stabstr == NULL) {
922 vg_symerr(" object doesn't have any debug info");
923 VG_(munmap) ( (void*)oimage, n_oimage );
924 return;
925 }
926
927 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
928 || stabstr_sz + (UChar*)stabstr
929 > n_oimage + (UChar*)oimage ) {
930 vg_symerr(" ELF debug data is beyond image end?!");
931 VG_(munmap) ( (void*)oimage, n_oimage );
932 return;
933 }
934
935 /* Ok. It all looks plausible. Go on and read debug data.
936 stab kinds: 100 N_SO a source file name
937 68 N_SLINE a source line number
njn4f9c9342002-04-29 16:03:24 +0000938 36 N_FUN start of a function
sewardjde4a1d02002-03-22 01:27:54 +0000939
njn4f9c9342002-04-29 16:03:24 +0000940 In this loop, we maintain a current file name, updated as
941 N_SO/N_SOLs appear, and a current function base address,
942 updated as N_FUNs appear. Based on that, address ranges for
943 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000944
njn4f9c9342002-04-29 16:03:24 +0000945 Finding the instruction address range covered by an N_SLINE is
946 complicated; see the N_SLINE case below.
sewardjde4a1d02002-03-22 01:27:54 +0000947 */
948 curr_filenmoff = addStr(si,"???");
949 curr_fnbaseaddr = (Addr)NULL;
njne0ee0712002-05-03 16:41:05 +0000950 curr_file_name = curr_fn_name = (Char*)NULL;
951 lineno = prev_lineno = 0;
952 lineno_overflows = 0;
953 same_file = True;
sewardjde4a1d02002-03-22 01:27:54 +0000954
njn4f9c9342002-04-29 16:03:24 +0000955 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
956
957 for (i = 0; i < n_stab_entries; i++) {
sewardjde4a1d02002-03-22 01:27:54 +0000958# if 0
959 VG_(printf) ( " %2d ", i );
960 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
961 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
962 (int)stab[i].n_value,
963 (int)stab[i].n_un.n_strx,
964 stabstr + stab[i].n_un.n_strx );
965 VG_(printf)("\n");
966# endif
967
njne0ee0712002-05-03 16:41:05 +0000968 Char *no_fn_name = "???";
969
sewardjde4a1d02002-03-22 01:27:54 +0000970 switch (stab[i].n_type) {
njn4f9c9342002-04-29 16:03:24 +0000971 UInt next_addr;
sewardjde4a1d02002-03-22 01:27:54 +0000972
njne0ee0712002-05-03 16:41:05 +0000973 /* Two complicated things here:
974 * 1. the n_desc field in 'struct n_list' in a.out.h is only 16-bits,
975 * which gives a maximum of 65535 lines. We handle files bigger
976 * than this by detecting heuristically overflows -- if the line
977 * count goes from 65000-odd to 0-odd within the same file, we
978 * assume it's an overflow. Once we switch files, we zero the
979 * overflow count
980 *
981 * 2. To compute the instr address range covered by a single line,
982 * find the address of the next thing and compute the difference.
983 * The approach used depends on what kind of entry/entries
984 * follow...
985 */
njn4f9c9342002-04-29 16:03:24 +0000986 case N_SLINE: {
njn4f9c9342002-04-29 16:03:24 +0000987 Int this_addr = (UInt)stab[i].n_value;
988
njne0ee0712002-05-03 16:41:05 +0000989 /* Although stored as a short, neg values really are > 32768, hence
990 * the UShort cast. Then we use an Int to handle overflows. */
991 prev_lineno = lineno;
992 lineno = (Int)((UShort)stab[i].n_desc);
993
994 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
995 VG_(message)(Vg_DebugMsg,
996 "Line number overflow detected (%d --> %d) in %s",
997 prev_lineno, lineno, curr_file_name);
998 lineno_overflows++;
999 }
1000 same_file = True;
1001
njn4f9c9342002-04-29 16:03:24 +00001002 LOOP:
njn9aae6742002-04-30 13:44:01 +00001003 if (i+1 >= n_stab_entries) {
1004 /* If it's the last entry, just guess the range is four; can't
1005 * do any better */
njne0ee0712002-05-03 16:41:05 +00001006 next_addr = this_addr + 4;
njn9aae6742002-04-30 13:44:01 +00001007 } else {
1008 switch (stab[i+1].n_type) {
1009 /* Easy, common case: use address of next entry */
1010 case N_SLINE: case N_SO:
njn4f9c9342002-04-29 16:03:24 +00001011 next_addr = (UInt)stab[i+1].n_value;
njn4f9c9342002-04-29 16:03:24 +00001012 break;
njn4f9c9342002-04-29 16:03:24 +00001013
njn9aae6742002-04-30 13:44:01 +00001014 /* Boring one: skip, look for something more useful. */
1015 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
njn9885df02002-05-01 08:25:03 +00001016 case N_STSYM: case N_LCSYM: case N_GSYM:
njn9aae6742002-04-30 13:44:01 +00001017 i++;
1018 goto LOOP;
1019
1020 /* Should be an end of fun entry, use its address */
1021 case N_FUN:
1022 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
1023 next_addr = (UInt)stab[i+1].n_value;
1024 } else {
njne0ee0712002-05-03 16:41:05 +00001025 VG_(message)(Vg_DebugMsg,
1026 "warning: function %s missing closing "
1027 "N_FUN stab at entry %d",
1028 curr_fn_name, i );
1029 next_addr = this_addr; /* assume zero-size loc */
njn9aae6742002-04-30 13:44:01 +00001030 }
1031 break;
1032
1033 /* N_SOL should be followed by an N_SLINE which can be used */
1034 case N_SOL:
1035 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
1036 next_addr = (UInt)stab[i+2].n_value;
1037 break;
1038 } else {
1039 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
1040 stab[i+1].n_type, i, n_stab_entries);
sewardj177d3232002-05-01 09:25:56 +00001041 VG_(panic)("unhandled N_SOL stabs case");
njn9aae6742002-04-30 13:44:01 +00001042 }
1043
1044 default:
1045 VG_(printf)("unhandled stabs case: %d %d",
1046 stab[i+1].n_type,i);
sewardj177d3232002-05-01 09:25:56 +00001047 VG_(panic)("unhandled (other) stabs case");
njn9aae6742002-04-30 13:44:01 +00001048 }
sewardjde4a1d02002-03-22 01:27:54 +00001049 }
njn4f9c9342002-04-29 16:03:24 +00001050
njn4f9c9342002-04-29 16:03:24 +00001051 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
njne0ee0712002-05-03 16:41:05 +00001052 curr_fnbaseaddr + next_addr,
1053 lineno + lineno_overflows * LINENO_OVERFLOW, i);
sewardjde4a1d02002-03-22 01:27:54 +00001054 break;
1055 }
1056
njn4f9c9342002-04-29 16:03:24 +00001057 case N_FUN: {
1058 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
sewardjde4a1d02002-03-22 01:27:54 +00001059 /* N_FUN with a name -- indicates the start of a fn. */
njn4f9c9342002-04-29 16:03:24 +00001060 curr_fnbaseaddr = si->offset + (Addr)stab[i].n_value;
njne0ee0712002-05-03 16:41:05 +00001061 curr_fn_name = stabstr + stab[i].n_un.n_strx;
1062 } else {
1063 curr_fn_name = no_fn_name;
sewardjde4a1d02002-03-22 01:27:54 +00001064 }
1065 break;
1066 }
1067
njne0ee0712002-05-03 16:41:05 +00001068 case N_SOL:
1069 if (lineno_overflows != 0) {
1070 VG_(panic)("Can't currently handle include files in very long "
1071 "(> 65535 lines) files. Sorry.");
1072 }
1073 /* fall through! */
1074 case N_SO:
1075 lineno_overflows = 0;
1076
sewardjde4a1d02002-03-22 01:27:54 +00001077 /* seems to give lots of locations in header files */
1078 /* case 130: */ /* BINCL */
1079 {
1080 UChar* nm = stabstr + stab[i].n_un.n_strx;
1081 UInt len = VG_(strlen)(nm);
njn4f9c9342002-04-29 16:03:24 +00001082
1083 if (len > 0 && nm[len-1] != '/') {
sewardjde4a1d02002-03-22 01:27:54 +00001084 curr_filenmoff = addStr ( si, nm );
njn4f9c9342002-04-29 16:03:24 +00001085 curr_file_name = stabstr + stab[i].n_un.n_strx;
1086 }
sewardjde4a1d02002-03-22 01:27:54 +00001087 else
1088 if (len == 0)
1089 curr_filenmoff = addStr ( si, "?1\0" );
njn4f9c9342002-04-29 16:03:24 +00001090
sewardjde4a1d02002-03-22 01:27:54 +00001091 break;
1092 }
1093
1094# if 0
1095 case 162: /* EINCL */
1096 curr_filenmoff = addStr ( si, "?2\0" );
1097 break;
1098# endif
1099
1100 default:
1101 break;
1102 }
1103 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
1104
1105 /* Last, but not least, heave the oimage back overboard. */
1106 VG_(munmap) ( (void*)oimage, n_oimage );
1107}
1108
1109
1110/*------------------------------------------------------------*/
1111/*--- Main entry point for symbols table reading. ---*/
1112/*------------------------------------------------------------*/
1113
1114/* The root structure for the entire symbol table system. It is a
1115 linked list of SegInfos. Note that this entire mechanism assumes
1116 that what we read from /proc/self/maps doesn't contain overlapping
1117 address ranges, and as a result the SegInfos in this list describe
1118 disjoint address ranges.
1119*/
1120static SegInfo* segInfo = NULL;
1121
1122
1123static
1124void read_symtab_callback (
1125 Addr start, UInt size,
1126 Char rr, Char ww, Char xx,
1127 UInt foffset, UChar* filename )
1128{
1129 SegInfo* si;
1130
1131 /* Stay sane ... */
1132 if (size == 0)
1133 return;
1134
1135 /* We're only interested in collecting symbols in executable
1136 segments which are associated with a real file. Hence: */
1137 if (filename == NULL || xx != 'x')
1138 return;
1139 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1140 return;
1141
1142 /* Perhaps we already have this one? If so, skip. */
1143 for (si = segInfo; si != NULL; si = si->next) {
1144 /*
1145 if (0==VG_(strcmp)(si->filename, filename))
1146 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1147 rr,ww,xx,si->start,si->size,start,size,filename);
1148 */
1149 /* For some reason the observed size of a mapping can change, so
1150 we don't use that to determine uniqueness. */
1151 if (si->start == start
1152 /* && si->size == size */
1153 && 0==VG_(strcmp)(si->filename, filename)) {
1154 return;
1155 }
1156 }
1157
1158 /* Get the record initialised right. */
1159 si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
1160 si->next = segInfo;
1161 segInfo = si;
1162
1163 si->start = start;
1164 si->size = size;
1165 si->foffset = foffset;
1166 si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
1167 VG_(strcpy)(si->filename, filename);
1168
1169 si->symtab = NULL;
1170 si->symtab_size = si->symtab_used = 0;
1171 si->loctab = NULL;
1172 si->loctab_size = si->loctab_used = 0;
1173 si->strtab = NULL;
1174 si->strtab_size = si->strtab_used = 0;
1175
1176 /* Kludge ... */
1177 si->offset
1178 = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
1179
1180 /* And actually fill it up. */
1181 vg_read_lib_symbols ( si );
1182 canonicaliseSymtab ( si );
1183 canonicaliseLoctab ( si );
1184}
1185
1186
1187/* This one really is the Head Honcho. Update the symbol tables to
1188 reflect the current state of /proc/self/maps. Rather than re-read
1189 everything, just read the entries which are not already in segInfo.
1190 So we can call here repeatedly, after every mmap of a non-anonymous
1191 segment with execute permissions, for example, to pick up new
1192 libraries as they are dlopen'd. Conversely, when the client does
1193 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1194 which happen to correspond to the munmap()d area. */
1195void VG_(read_symbols) ( void )
1196{
njn4f9c9342002-04-29 16:03:24 +00001197 if (! VG_(clo_instrument) && ! VG_(clo_cachesim))
1198 return;
sewardjde4a1d02002-03-22 01:27:54 +00001199
1200 VG_(read_procselfmaps) ( read_symtab_callback );
1201
1202 /* Do a sanity check on the symbol tables: ensure that the address
1203 space pieces they cover do not overlap (otherwise we are severely
1204 hosed). This is a quadratic algorithm, but there shouldn't be
1205 many of them.
1206 */
1207 { SegInfo *si, *si2;
1208 for (si = segInfo; si != NULL; si = si->next) {
1209 /* Check no overlap between *si and those in the rest of the
1210 list. */
1211 for (si2 = si->next; si2 != NULL; si2 = si2->next) {
1212 Addr lo = si->start;
1213 Addr hi = si->start + si->size - 1;
1214 Addr lo2 = si2->start;
1215 Addr hi2 = si2->start + si2->size - 1;
1216 Bool overlap;
1217 vg_assert(lo < hi);
1218 vg_assert(lo2 < hi2);
1219 /* the main assertion */
1220 overlap = (lo <= lo2 && lo2 <= hi)
1221 || (lo <= hi2 && hi2 <= hi);
1222 //vg_assert(!overlap);
1223 if (overlap) {
1224 VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
1225 ppSegInfo ( si );
1226 ppSegInfo ( si2 );
1227 VG_(printf)("\n\n");
1228 vg_assert(! overlap);
1229 }
1230 }
1231 }
1232 }
1233}
1234
1235
1236/* When an munmap() call happens, check to see whether it corresponds
1237 to a segment for a .so, and if so discard the relevant SegInfo.
1238 This might not be a very clever idea from the point of view of
1239 accuracy of error messages, but we need to do it in order to
1240 maintain the no-overlapping invariant.
1241*/
1242void VG_(symtab_notify_munmap) ( Addr start, UInt length )
1243{
1244 SegInfo *prev, *curr;
1245
1246 if (! VG_(clo_instrument))
1247 return;
1248
1249 prev = NULL;
1250 curr = segInfo;
1251 while (True) {
1252 if (curr == NULL) break;
1253 if (start == curr->start) break;
1254 prev = curr;
1255 curr = curr->next;
1256 }
1257 if (curr == NULL) return;
1258
1259 VG_(message)(Vg_UserMsg,
1260 "discard syms in %s due to munmap()",
1261 curr->filename ? curr->filename : (UChar*)"???");
1262
1263 vg_assert(prev == NULL || prev->next == curr);
1264
1265 if (prev == NULL) {
1266 segInfo = curr->next;
1267 } else {
1268 prev->next = curr->next;
1269 }
1270
1271 freeSegInfo(curr);
1272}
1273
1274
1275/*------------------------------------------------------------*/
1276/*--- Use of symbol table & location info to create ---*/
1277/*--- plausible-looking stack dumps. ---*/
1278/*------------------------------------------------------------*/
1279
1280/* Find a symbol-table index containing the specified pointer, or -1
1281 if not found. Binary search. */
1282
1283static Int search_one_symtab ( SegInfo* si, Addr ptr )
1284{
1285 Addr a_mid_lo, a_mid_hi;
1286 Int mid,
1287 lo = 0,
1288 hi = si->symtab_used-1;
1289 while (True) {
1290 /* current unsearched space is from lo to hi, inclusive. */
1291 if (lo > hi) return -1; /* not found */
1292 mid = (lo + hi) / 2;
1293 a_mid_lo = si->symtab[mid].addr;
1294 a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
1295
1296 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1297 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1298 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1299 return mid;
1300 }
1301}
1302
1303
1304/* Search all symtabs that we know about to locate ptr. If found, set
1305 *psi to the relevant SegInfo, and *symno to the symtab entry number
1306 within that. If not found, *psi is set to NULL. */
1307
1308static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
1309{
1310 Int sno;
1311 SegInfo* si;
1312 for (si = segInfo; si != NULL; si = si->next) {
1313 if (si->start <= ptr && ptr < si->start+si->size) {
1314 sno = search_one_symtab ( si, ptr );
1315 if (sno == -1) goto not_found;
1316 *symno = sno;
1317 *psi = si;
1318 return;
1319 }
1320 }
1321 not_found:
1322 *psi = NULL;
1323}
1324
1325
1326/* Find a location-table index containing the specified pointer, or -1
1327 if not found. Binary search. */
1328
1329static Int search_one_loctab ( SegInfo* si, Addr ptr )
1330{
1331 Addr a_mid_lo, a_mid_hi;
1332 Int mid,
1333 lo = 0,
1334 hi = si->loctab_used-1;
1335 while (True) {
1336 /* current unsearched space is from lo to hi, inclusive. */
1337 if (lo > hi) return -1; /* not found */
1338 mid = (lo + hi) / 2;
1339 a_mid_lo = si->loctab[mid].addr;
1340 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
1341
1342 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1343 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1344 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1345 return mid;
1346 }
1347}
1348
1349
1350/* Search all loctabs that we know about to locate ptr. If found, set
1351 *psi to the relevant SegInfo, and *locno to the loctab entry number
1352 within that. If not found, *psi is set to NULL.
1353*/
1354static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
1355{
1356 Int lno;
1357 SegInfo* si;
1358 for (si = segInfo; si != NULL; si = si->next) {
1359 if (si->start <= ptr && ptr < si->start+si->size) {
1360 lno = search_one_loctab ( si, ptr );
1361 if (lno == -1) goto not_found;
1362 *locno = lno;
1363 *psi = si;
1364 return;
1365 }
1366 }
1367 not_found:
1368 *psi = NULL;
1369}
1370
1371
1372/* The whole point of this whole big deal: map a code address to a
1373 plausible symbol name. Returns False if no idea; otherwise True.
1374 Caller supplies buf and nbuf. If no_demangle is True, don't do
1375 demangling, regardless of vg_clo_demangle -- probably because the
1376 call has come from vg_what_fn_or_object_is_this. */
njn4f9c9342002-04-29 16:03:24 +00001377Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
1378 Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00001379{
1380 SegInfo* si;
1381 Int sno;
1382 search_all_symtabs ( a, &si, &sno );
1383 if (si == NULL)
1384 return False;
1385 if (no_demangle) {
1386 VG_(strncpy_safely)
1387 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
1388 } else {
1389 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
1390 }
1391 return True;
1392}
1393
1394
1395/* Map a code address to the name of a shared object file. Returns
1396 False if no idea; otherwise False. Caller supplies buf and
1397 nbuf. */
1398static
1399Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
1400{
1401 SegInfo* si;
1402 for (si = segInfo; si != NULL; si = si->next) {
1403 if (si->start <= a && a < si->start+si->size) {
1404 VG_(strncpy_safely)(buf, si->filename, nbuf);
1405 return True;
1406 }
1407 }
1408 return False;
1409}
1410
1411/* Return the name of an erring fn in a way which is useful
1412 for comparing against the contents of a suppressions file.
1413 Always writes something to buf. Also, doesn't demangle the
1414 name, because we want to refer to mangled names in the
1415 suppressions file.
1416*/
1417void VG_(what_obj_and_fun_is_this) ( Addr a,
1418 Char* obj_buf, Int n_obj_buf,
1419 Char* fun_buf, Int n_fun_buf )
1420{
1421 (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
njn4f9c9342002-04-29 16:03:24 +00001422 (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
sewardjde4a1d02002-03-22 01:27:54 +00001423}
1424
1425
1426/* Map a code address to a (filename, line number) pair.
1427 Returns True if successful.
1428*/
njn4f9c9342002-04-29 16:03:24 +00001429Bool VG_(what_line_is_this)( Addr a,
1430 UChar* filename, Int n_filename,
1431 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00001432{
1433 SegInfo* si;
1434 Int locno;
1435 search_all_loctabs ( a, &si, &locno );
1436 if (si == NULL)
1437 return False;
1438 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
1439 n_filename);
1440 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00001441
sewardjde4a1d02002-03-22 01:27:54 +00001442 return True;
1443}
1444
1445
1446/* Print a mini stack dump, showing the current location. */
1447void VG_(mini_stack_dump) ( ExeContext* ec )
1448{
1449
1450#define APPEND(str) \
1451 { UChar* sss; \
1452 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
1453 buf[n] = *sss; \
1454 buf[n] = 0; \
1455 }
1456
1457 Bool know_fnname;
1458 Bool know_objname;
1459 Bool know_srcloc;
1460 UInt lineno;
1461 UChar ibuf[20];
1462 UInt i, n, clueless;
1463
1464 UChar buf[M_VG_ERRTXT];
1465 UChar buf_fn[M_VG_ERRTXT];
1466 UChar buf_obj[M_VG_ERRTXT];
1467 UChar buf_srcloc[M_VG_ERRTXT];
1468
1469 Int stop_at = VG_(clo_backtrace_size);
1470
1471 n = 0;
1472
njn4f9c9342002-04-29 16:03:24 +00001473 know_fnname = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001474 know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001475 know_srcloc = VG_(what_line_is_this)(ec->eips[0],
1476 buf_srcloc, M_VG_ERRTXT,
1477 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00001478
1479 APPEND(" at ");
1480 VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
1481 APPEND(ibuf);
1482 if (know_fnname) {
1483 APPEND(buf_fn);
1484 if (!know_srcloc && know_objname) {
1485 APPEND(" (in ");
1486 APPEND(buf_obj);
1487 APPEND(")");
1488 }
1489 } else if (know_objname && !know_srcloc) {
1490 APPEND("(within ");
1491 APPEND(buf_obj);
1492 APPEND(")");
1493 } else {
1494 APPEND("???");
1495 }
1496 if (know_srcloc) {
1497 APPEND(" (");
1498 APPEND(buf_srcloc);
1499 APPEND(":");
1500 VG_(sprintf)(ibuf,"%d",lineno);
1501 APPEND(ibuf);
1502 APPEND(")");
1503 }
1504 VG_(message)(Vg_UserMsg, "%s", buf);
1505
1506 clueless = 0;
1507 for (i = 1; i < stop_at; i++) {
njn4f9c9342002-04-29 16:03:24 +00001508 know_fnname = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001509 know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001510 know_srcloc = VG_(what_line_is_this)(ec->eips[i],
sewardjde4a1d02002-03-22 01:27:54 +00001511 buf_srcloc, M_VG_ERRTXT,
1512 &lineno);
1513 n = 0;
1514 APPEND(" by ");
1515 if (ec->eips[i] == 0) {
1516 APPEND("<bogus frame pointer> ");
1517 } else {
1518 VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
1519 APPEND(ibuf);
1520 }
1521 if (know_fnname) {
1522 APPEND(buf_fn)
1523 if (!know_srcloc && know_objname) {
1524 APPEND(" (in ");
1525 APPEND(buf_obj);
1526 APPEND(")");
1527 }
1528 } else {
1529 if (know_objname && !know_srcloc) {
1530 APPEND("(within ");
1531 APPEND(buf_obj);
1532 APPEND(")");
1533 } else {
1534 APPEND("???");
1535 }
1536 if (!know_srcloc) clueless++;
1537 if (clueless == 2)
1538 i = stop_at; /* force exit after this iteration */
1539 };
1540 if (know_srcloc) {
1541 APPEND(" (");
1542 APPEND(buf_srcloc);
1543 APPEND(":");
1544 VG_(sprintf)(ibuf,"%d",lineno);
1545 APPEND(ibuf);
1546 APPEND(")");
1547 }
1548 VG_(message)(Vg_UserMsg, "%s", buf);
1549 }
1550}
1551
1552#undef APPEND
1553
1554/*--------------------------------------------------------------------*/
1555/*--- end vg_symtab2.c ---*/
1556/*--------------------------------------------------------------------*/