blob: d7257a408584eb59f770dff1a73be2f205149a2d [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_unsafe.h"
33
34#include <elf.h> /* ELF defns */
35#include <a.out.h> /* stabs defns */
36
njn9aae6742002-04-30 13:44:01 +000037
sewardjde4a1d02002-03-22 01:27:54 +000038/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
39 dlopen()ed libraries, which is something that KDE3 does a lot.
40 Still kludgey, though less than before:
41
42 * we don't check whether we should throw away some symbol tables
43 when munmap() happens
44
45 * symbol table reading code for ELF binaries is a shambles.
46 Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
47*/
48
49/*------------------------------------------------------------*/
50/*--- Structs n stuff ---*/
51/*------------------------------------------------------------*/
52
njn4f9c9342002-04-29 16:03:24 +000053/* Stabs entry types, from:
54 * The "stabs" debug format
55 * Menapace, Kingdon and MacKenzie
56 * Cygnus Support
57 */
njn9885df02002-05-01 08:25:03 +000058typedef enum { N_GSYM = 32, /* Global symbol */
59 N_FUN = 36, /* Function start or end */
njn4f9c9342002-04-29 16:03:24 +000060 N_STSYM = 38, /* Data segment file-scope variable */
61 N_LCSYM = 40, /* BSS segment file-scope variable */
62 N_RSYM = 64, /* Register variable */
63 N_SLINE = 68, /* Source line number */
64 N_SO = 100, /* Source file path and name */
65 N_LSYM = 128, /* Stack variable or type */
66 N_SOL = 132, /* Include file name */
67 N_LBRAC = 192, /* Start of lexical block */
68 N_RBRAC = 224 /* End of lexical block */
69 } stab_types;
70
sewardjde4a1d02002-03-22 01:27:54 +000071/* A structure to hold an ELF symbol (very crudely). */
72typedef
73 struct {
74 Addr addr; /* lowest address of entity */
75 UInt size; /* size in bytes */
76 Int nmoff; /* offset of name in this SegInfo's str tab */
77 }
78 RiSym;
79
njne0ee0712002-05-03 16:41:05 +000080/* Line count at which overflow happens, due to line numbers being stored as
81 * shorts in `struct nlist' in a.out.h. */
82#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000083
njne0ee0712002-05-03 16:41:05 +000084#define LINENO_BITS 20
85#define LOC_SIZE_BITS (32 - LINENO_BITS)
sewardj97ff05f2002-05-09 01:32:57 +000086#define MAX_LINENO ((1 << LINENO_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000087
88/* Unlikely to have any lines with instruction ranges > 4096 bytes */
sewardj97ff05f2002-05-09 01:32:57 +000089#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000090
91/* Number used to detect line number overflows; if one line is 60000-odd
92 * smaller than the previous, is was probably an overflow.
93 */
94#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
95
96/* A structure to hold addr-to-source info for a single line. There can be a
97 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +000098typedef
99 struct {
njne0ee0712002-05-03 16:41:05 +0000100 /* Word 1 */
101 Addr addr; /* lowest address for this line */
102 /* Word 2 */
103 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
104 UInt lineno:LINENO_BITS; /* source line number, or zero */
105 /* Word 3 */
106 UInt fnmoff; /* source filename; offset in this
107 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +0000108 }
109 RiLoc;
110
111
112/* A structure which contains information pertaining to one mapped
113 text segment. */
114typedef
115 struct _SegInfo {
116 struct _SegInfo* next;
117 /* Description of the mapped segment. */
118 Addr start;
119 UInt size;
120 UChar* filename; /* in mallocville */
121 UInt foffset;
122 /* An expandable array of symbols. */
123 RiSym* symtab;
124 UInt symtab_used;
125 UInt symtab_size;
126 /* An expandable array of locations. */
127 RiLoc* loctab;
128 UInt loctab_used;
129 UInt loctab_size;
130 /* An expandable array of characters -- the string table. */
131 Char* strtab;
132 UInt strtab_used;
133 UInt strtab_size;
134 /* offset is what we need to add to symbol table entries
135 to get the real location of that symbol in memory.
136 For executables, offset is zero.
137 For .so's, offset == base_addr.
138 This seems like a giant kludge to me.
139 */
140 UInt offset;
141 }
142 SegInfo;
143
144
145/* -- debug helper -- */
146static void ppSegInfo ( SegInfo* si )
147{
148 VG_(printf)("name: %s\n"
149 "start %p, size %d, foffset %d\n",
150 si->filename?si->filename : (UChar*)"NULL",
151 si->start, si->size, si->foffset );
152}
153
154static void freeSegInfo ( SegInfo* si )
155{
156 vg_assert(si != NULL);
157 if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
158 if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
159 if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
160 if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
161 VG_(free)(VG_AR_SYMTAB, si);
162}
163
164
165/*------------------------------------------------------------*/
166/*--- Adding stuff ---*/
167/*------------------------------------------------------------*/
168
169/* Add a str to the string table, including terminating zero, and
170 return offset of the string in vg_strtab. */
171
172static __inline__
173Int addStr ( SegInfo* si, Char* str )
174{
175 Char* new_tab;
176 Int new_sz, i, space_needed;
177
178 space_needed = 1 + VG_(strlen)(str);
179 if (si->strtab_used + space_needed > si->strtab_size) {
180 new_sz = 2 * si->strtab_size;
181 if (new_sz == 0) new_sz = 5000;
182 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
183 if (si->strtab != NULL) {
184 for (i = 0; i < si->strtab_used; i++)
185 new_tab[i] = si->strtab[i];
186 VG_(free)(VG_AR_SYMTAB, si->strtab);
187 }
188 si->strtab = new_tab;
189 si->strtab_size = new_sz;
190 }
191
192 for (i = 0; i < space_needed; i++)
193 si->strtab[si->strtab_used+i] = str[i];
194
195 si->strtab_used += space_needed;
196 vg_assert(si->strtab_used <= si->strtab_size);
197 return si->strtab_used - space_needed;
198}
199
200/* Add a symbol to the symbol table. */
201
202static __inline__
203void addSym ( SegInfo* si, RiSym* sym )
204{
205 Int new_sz, i;
206 RiSym* new_tab;
207
208 /* Ignore zero-sized syms. */
209 if (sym->size == 0) return;
210
211 if (si->symtab_used == si->symtab_size) {
212 new_sz = 2 * si->symtab_size;
213 if (new_sz == 0) new_sz = 500;
214 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
215 if (si->symtab != NULL) {
216 for (i = 0; i < si->symtab_used; i++)
217 new_tab[i] = si->symtab[i];
218 VG_(free)(VG_AR_SYMTAB, si->symtab);
219 }
220 si->symtab = new_tab;
221 si->symtab_size = new_sz;
222 }
223
224 si->symtab[si->symtab_used] = *sym;
225 si->symtab_used++;
226 vg_assert(si->symtab_used <= si->symtab_size);
227}
228
229/* Add a location to the location table. */
230
231static __inline__
232void addLoc ( SegInfo* si, RiLoc* loc )
233{
234 Int new_sz, i;
235 RiLoc* new_tab;
236
njne0ee0712002-05-03 16:41:05 +0000237 /* Zero-sized locs should have been ignored earlier */
238 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000239
240 if (si->loctab_used == si->loctab_size) {
241 new_sz = 2 * si->loctab_size;
242 if (new_sz == 0) new_sz = 500;
243 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
244 if (si->loctab != NULL) {
245 for (i = 0; i < si->loctab_used; i++)
246 new_tab[i] = si->loctab[i];
247 VG_(free)(VG_AR_SYMTAB, si->loctab);
248 }
249 si->loctab = new_tab;
250 si->loctab_size = new_sz;
251 }
252
253 si->loctab[si->loctab_used] = *loc;
254 si->loctab_used++;
255 vg_assert(si->loctab_used <= si->loctab_size);
256}
257
258
259
260/*------------------------------------------------------------*/
261/*--- Helpers ---*/
262/*------------------------------------------------------------*/
263
264/* Non-fatal -- use vg_panic if terminal. */
265static
266void vg_symerr ( Char* msg )
267{
268 if (VG_(clo_verbosity) > 1)
269 VG_(message)(Vg_UserMsg,"%s", msg );
270}
271
272
273/* Print a symbol. */
274static
275void printSym ( SegInfo* si, Int i )
276{
277 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
278 i,
279 si->symtab[i].addr,
280 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
281 &si->strtab[si->symtab[i].nmoff] );
282}
283
284
285#if 0
286/* Print the entire sym tab. */
287static __attribute__ ((unused))
288void printSymtab ( void )
289{
290 Int i;
291 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
292 for (i = 0; i < vg_symtab_used; i++)
293 printSym(i);
294 VG_(printf)("------ BEGIN vg_symtab ------\n");
295}
296#endif
297
298#if 0
299/* Paranoid strcat. */
300static
301void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
302{
303 UInt i = 0, j = 0;
304 while (True) {
305 if (i >= maxlen) return;
306 if (dst[i] == 0) break;
307 i++;
308 }
309 while (True) {
310 if (i >= maxlen) return;
311 dst[i] = src[j];
312 if (src[j] == 0) return;
313 i++; j++;
314 }
315}
316#endif
317
318/*------------------------------------------------------------*/
319/*--- Canonicalisers ---*/
320/*------------------------------------------------------------*/
321
322/* Sort the symtab by starting address, and emit warnings if any
323 symbols have overlapping address ranges. We use that old chestnut,
324 shellsort. Mash the table around so as to establish the property
325 that addresses are in order and the ranges to not overlap. This
326 facilitates using binary search to map addresses to symbols when we
327 come to query the table.
328*/
329static
330void canonicaliseSymtab ( SegInfo* si )
331{
332 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
333 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
334 4592, 13776, 33936, 86961, 198768,
335 463792, 1391376 };
336 Int lo = 0;
337 Int hi = si->symtab_used-1;
338 Int i, j, h, bigN, hp, n_merged, n_truncated;
339 RiSym v;
340 Addr s1, s2, e1, e2;
341
342# define SWAP(ty,aa,bb) \
343 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
344
345 bigN = hi - lo + 1; if (bigN < 2) return;
346 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
347 vg_assert(0 <= hp && hp < 16);
348
349 for (; hp >= 0; hp--) {
350 h = incs[hp];
351 i = lo + h;
352 while (1) {
353 if (i > hi) break;
354 v = si->symtab[i];
355 j = i;
356 while (si->symtab[j-h].addr > v.addr) {
357 si->symtab[j] = si->symtab[j-h];
358 j = j - h;
359 if (j <= (lo + h - 1)) break;
360 }
361 si->symtab[j] = v;
362 i++;
363 }
364 }
365
366 cleanup_more:
367
368 /* If two symbols have identical address ranges, favour the
369 one with the longer name.
370 */
371 do {
372 n_merged = 0;
373 j = si->symtab_used;
374 si->symtab_used = 0;
375 for (i = 0; i < j; i++) {
376 if (i < j-1
377 && si->symtab[i].addr == si->symtab[i+1].addr
378 && si->symtab[i].size == si->symtab[i+1].size) {
379 n_merged++;
380 /* merge the two into one */
381 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
382 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
383 si->symtab[si->symtab_used++] = si->symtab[i];
384 } else {
385 si->symtab[si->symtab_used++] = si->symtab[i+1];
386 }
387 i++;
388 } else {
389 si->symtab[si->symtab_used++] = si->symtab[i];
390 }
391 }
392 if (VG_(clo_trace_symtab))
393 VG_(printf)( "%d merged\n", n_merged);
394 }
395 while (n_merged > 0);
396
397 /* Detect and "fix" overlapping address ranges. */
398 n_truncated = 0;
399
400 for (i = 0; i < si->symtab_used-1; i++) {
401
402 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
403
404 /* Check for common (no overlap) case. */
405 if (si->symtab[i].addr + si->symtab[i].size
406 <= si->symtab[i+1].addr)
407 continue;
408
409 /* There's an overlap. Truncate one or the other. */
410 if (VG_(clo_trace_symtab)) {
411 VG_(printf)("overlapping address ranges in symbol table\n\t");
412 printSym(si,i);
413 VG_(printf)("\t");
414 printSym(si,i+1);
415 VG_(printf)("\n");
416 }
417
418 /* Truncate one or the other. */
419 s1 = si->symtab[i].addr;
420 s2 = si->symtab[i+1].addr;
421 e1 = s1 + si->symtab[i].size - 1;
422 e2 = s2 + si->symtab[i+1].size - 1;
423 if (s1 < s2) {
424 e1 = s2-1;
425 } else {
426 vg_assert(s1 == s2);
427 if (e1 > e2) {
428 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
429 } else
430 if (e1 < e2) {
431 s2 = e1+1;
432 } else {
433 /* e1 == e2. Identical addr ranges. We'll eventually wind
434 up back at cleanup_more, which will take care of it. */
435 }
436 }
437 si->symtab[i].addr = s1;
438 si->symtab[i+1].addr = s2;
439 si->symtab[i].size = e1 - s1 + 1;
440 si->symtab[i+1].size = e2 - s2 + 1;
441 vg_assert(s1 <= s2);
442 vg_assert(si->symtab[i].size > 0);
443 vg_assert(si->symtab[i+1].size > 0);
444 /* It may be that the i+1 entry now needs to be moved further
445 along to maintain the address order requirement. */
446 j = i+1;
447 while (j < si->symtab_used-1
448 && si->symtab[j].addr > si->symtab[j+1].addr) {
449 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
450 j++;
451 }
452 n_truncated++;
453 }
454
455 if (n_truncated > 0) goto cleanup_more;
456
457 /* Ensure relevant postconditions hold. */
458 for (i = 0; i < si->symtab_used-1; i++) {
459 /* No zero-sized symbols. */
460 vg_assert(si->symtab[i].size > 0);
461 /* In order. */
462 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
463 /* No overlaps. */
464 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
465 < si->symtab[i+1].addr);
466 }
467# undef SWAP
468}
469
470
471
472/* Sort the location table by starting address. Mash the table around
473 so as to establish the property that addresses are in order and the
474 ranges do not overlap. This facilitates using binary search to map
475 addresses to locations when we come to query the table. */
476static
477void canonicaliseLoctab ( SegInfo* si )
478{
479 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
480 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
481 4592, 13776, 33936, 86961, 198768,
482 463792, 1391376 };
483 Int lo = 0;
484 Int hi = si->loctab_used-1;
485 Int i, j, h, bigN, hp;
486 RiLoc v;
487
488# define SWAP(ty,aa,bb) \
489 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
490
491 /* Sort by start address. */
492
493 bigN = hi - lo + 1; if (bigN < 2) return;
494 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
495 vg_assert(0 <= hp && hp < 16);
496
497 for (; hp >= 0; hp--) {
498 h = incs[hp];
499 i = lo + h;
500 while (1) {
501 if (i > hi) break;
502 v = si->loctab[i];
503 j = i;
504 while (si->loctab[j-h].addr > v.addr) {
505 si->loctab[j] = si->loctab[j-h];
506 j = j - h;
507 if (j <= (lo + h - 1)) break;
508 }
509 si->loctab[j] = v;
510 i++;
511 }
512 }
513
514 /* If two adjacent entries overlap, truncate the first. */
515 for (i = 0; i < si->loctab_used-1; i++) {
516 vg_assert(si->loctab[i].size < 10000);
517 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
518 /* Do this in signed int32 because the actual .size fields
519 are unsigned 16s. */
520 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
521 if (new_size < 0) {
522 si->loctab[i].size = 0;
523 } else
524 if (new_size >= 65536) {
525 si->loctab[i].size = 65535;
526 } else {
527 si->loctab[i].size = (UShort)new_size;
528 }
529 }
530 }
531
532 /* Zap any zero-sized entries resulting from the truncation
533 process. */
534 j = 0;
535 for (i = 0; i < si->loctab_used; i++) {
536 if (si->loctab[i].size > 0) {
537 si->loctab[j] = si->loctab[i];
538 j++;
539 }
540 }
541 si->loctab_used = j;
542
543 /* Ensure relevant postconditions hold. */
544 for (i = 0; i < si->loctab_used-1; i++) {
545 /*
546 VG_(printf)("%d (%d) %d 0x%x\n",
547 i, si->loctab[i+1].confident,
548 si->loctab[i+1].size, si->loctab[i+1].addr );
549 */
550 /* No zero-sized symbols. */
551 vg_assert(si->loctab[i].size > 0);
552 /* In order. */
553 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
554 /* No overlaps. */
555 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
556 < si->loctab[i+1].addr);
557 }
558# undef SWAP
559}
560
561
562/*------------------------------------------------------------*/
563/*--- Read info from a .so/exe file. ---*/
564/*------------------------------------------------------------*/
565
566static __inline__
567void addLineInfo ( SegInfo* si,
568 Int fnmoff,
njne0ee0712002-05-03 16:41:05 +0000569 Addr this,
570 Addr next,
571 Int lineno,
572 Int entry )
sewardjde4a1d02002-03-22 01:27:54 +0000573{
574 RiLoc loc;
njne0ee0712002-05-03 16:41:05 +0000575 Int size = next - this;
njn4f9c9342002-04-29 16:03:24 +0000576
njne0ee0712002-05-03 16:41:05 +0000577 /* Ignore zero-sized locs */
578 if (this == next) return;
sewardjde4a1d02002-03-22 01:27:54 +0000579
njne0ee0712002-05-03 16:41:05 +0000580 /* Maximum sanity checking. Some versions of GNU as do a shabby job with
581 * stabs entries; if anything looks suspicious, revert to a size of 1.
582 * This should catch the instruction of interest (since if using asm-level
583 * debug info, one instruction will correspond to one line, unlike with
584 * C-level debug info where multiple instructions can map to the one line),
585 * but avoid catching any other instructions bogusly. */
586 if (this > next) {
587 VG_(message)(Vg_DebugMsg,
588 "warning: stabs addresses out of order "
589 "at entry %d: 0x%x 0x%x", entry, this, next);
590 size = 1;
591 }
sewardjde4a1d02002-03-22 01:27:54 +0000592
njne0ee0712002-05-03 16:41:05 +0000593 if (size > MAX_LOC_SIZE) {
594 VG_(message)(Vg_DebugMsg,
595 "warning: stabs line address range too large "
596 "at entry %d: %d", entry, size);
597 size = 1;
598 }
599
600 vg_assert(this < si->start + si->size && next-1 > si->start);
601 vg_assert(lineno >= 0 && lineno < MAX_LINENO);
602
603 loc.addr = this;
sewardjde4a1d02002-03-22 01:27:54 +0000604 loc.size = (UShort)size;
605 loc.lineno = lineno;
606 loc.fnmoff = fnmoff;
607 addLoc ( si, &loc );
608}
609
610
611/* Read the symbols from the object/exe specified by the SegInfo into
612 the tables within the supplied SegInfo. */
613static
614void vg_read_lib_symbols ( SegInfo* si )
615{
616 Elf32_Ehdr* ehdr; /* The ELF header */
617 Elf32_Shdr* shdr; /* The section table */
618 UChar* sh_strtab; /* The section table's string table */
619 struct nlist* stab; /* The .stab table */
620 UChar* stabstr; /* The .stab string table */
621 Int stab_sz; /* Size in bytes of the .stab table */
622 Int stabstr_sz; /* Size in bytes of the .stab string table */
623 Int fd;
624 Int i;
625 Bool ok;
626 Addr oimage;
627 Int n_oimage;
628 struct stat stat_buf;
629
630 /* for the .stabs reader */
631 Int curr_filenmoff;
632 Addr curr_fnbaseaddr;
njne0ee0712002-05-03 16:41:05 +0000633 Char *curr_file_name, *curr_fn_name;
njn4f9c9342002-04-29 16:03:24 +0000634 Int n_stab_entries;
njne0ee0712002-05-03 16:41:05 +0000635 Int prev_lineno, lineno;
636 Int lineno_overflows;
637 Bool same_file;
sewardjde4a1d02002-03-22 01:27:54 +0000638
639 oimage = (Addr)NULL;
640 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +0000641 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +0000642
643 /* mmap the object image aboard, so that we can read symbols and
644 line number info out of it. It will be munmapped immediately
645 thereafter; it is only aboard transiently. */
646
647 i = stat(si->filename, &stat_buf);
648 if (i != 0) {
649 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
650 return;
651 }
652 n_oimage = stat_buf.st_size;
653
654 fd = VG_(open_read)(si->filename);
655 if (fd == -1) {
656 vg_symerr("Can't open .so/.exe to read symbols?!");
657 return;
658 }
659
660 oimage = (Addr)VG_(mmap)( NULL, n_oimage, PROT_READ, MAP_PRIVATE, fd, 0 );
661 if (oimage == ((Addr)(-1))) {
662 VG_(message)(Vg_UserMsg,
663 "mmap failed on %s", si->filename );
664 VG_(close)(fd);
665 return;
666 }
667
668 VG_(close)(fd);
669
670 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
671 Now verify that it is a valid ELF .so or executable image.
672 */
673 ok = (n_oimage >= sizeof(Elf32_Ehdr));
674 ehdr = (Elf32_Ehdr*)oimage;
675
676 if (ok) {
677 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
678 && ehdr->e_ident[EI_MAG1] == 'E'
679 && ehdr->e_ident[EI_MAG2] == 'L'
680 && ehdr->e_ident[EI_MAG3] == 'F');
681 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
682 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
683 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
684 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
685 ok &= (ehdr->e_machine == EM_386);
686 ok &= (ehdr->e_version == EV_CURRENT);
687 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
688 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
689 }
690
691 if (!ok) {
692 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
693 VG_(munmap) ( (void*)oimage, n_oimage );
694 return;
695 }
696
697 if (VG_(clo_trace_symtab))
698 VG_(printf)(
699 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
700 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
701
702 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
703 vg_symerr("ELF section header is beyond image end?!");
704 VG_(munmap) ( (void*)oimage, n_oimage );
705 return;
706 }
707
708 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
709 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
710
711 /* try and read the object's symbol table */
712 {
713 UChar* o_strtab = NULL;
714 Elf32_Sym* o_symtab = NULL;
715 UInt o_strtab_sz = 0;
716 UInt o_symtab_sz = 0;
717
718 UChar* o_got = NULL;
719 UChar* o_plt = NULL;
720 UInt o_got_sz = 0;
721 UInt o_plt_sz = 0;
722
723 Bool snaffle_it;
724 Addr sym_addr;
725
726 /* find the .stabstr and .stab sections */
727 for (i = 0; i < ehdr->e_shnum; i++) {
728 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
729 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
730 o_symtab_sz = shdr[i].sh_size;
731 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
732 /* check image overrun here */
733 }
734 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
735 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
736 o_strtab_sz = shdr[i].sh_size;
737 /* check image overrun here */
738 }
739
740 /* find out where the .got and .plt sections will be in the
741 executable image, not in the object image transiently loaded.
742 */
743 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
744 o_got = (UChar*)(si->offset
745 + shdr[i].sh_offset);
746 o_got_sz = shdr[i].sh_size;
747 /* check image overrun here */
748 }
749 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
750 o_plt = (UChar*)(si->offset
751 + shdr[i].sh_offset);
752 o_plt_sz = shdr[i].sh_size;
753 /* check image overrun here */
754 }
755
756 }
757
758 if (VG_(clo_trace_symtab)) {
759 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
760 o_plt, o_plt + o_plt_sz - 1 );
761 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
762 o_got, o_got + o_got_sz - 1 );
763 }
764
765 if (o_strtab == NULL || o_symtab == NULL) {
766 vg_symerr(" object doesn't have a symbol table");
767 } else {
768 /* Perhaps should start at i = 1; ELF docs suggest that entry
769 0 always denotes `unknown symbol'. */
770 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
771# if 0
772 VG_(printf)("raw symbol: ");
773 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
774 case STB_LOCAL: VG_(printf)("LOC "); break;
775 case STB_GLOBAL: VG_(printf)("GLO "); break;
776 case STB_WEAK: VG_(printf)("WEA "); break;
777 case STB_LOPROC: VG_(printf)("lop "); break;
778 case STB_HIPROC: VG_(printf)("hip "); break;
779 default: VG_(printf)("??? "); break;
780 }
781 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
782 case STT_NOTYPE: VG_(printf)("NOT "); break;
783 case STT_OBJECT: VG_(printf)("OBJ "); break;
784 case STT_FUNC: VG_(printf)("FUN "); break;
785 case STT_SECTION: VG_(printf)("SEC "); break;
786 case STT_FILE: VG_(printf)("FIL "); break;
787 case STT_LOPROC: VG_(printf)("lop "); break;
788 case STT_HIPROC: VG_(printf)("hip "); break;
789 default: VG_(printf)("??? "); break;
790 }
791 VG_(printf)(
792 ": value %p, size %d, name %s\n",
793 si->offset+(UChar*)o_symtab[i].st_value,
794 o_symtab[i].st_size,
795 o_symtab[i].st_name
796 ? ((Char*)o_strtab+o_symtab[i].st_name)
797 : (Char*)"NONAME");
798# endif
799
800 /* Figure out if we're interested in the symbol.
801 Firstly, is it of the right flavour?
802 */
803 snaffle_it
804 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
805 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
806 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
807 &&
808 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
809 ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
810 );
811
812 /* Secondly, if it's apparently in a GOT or PLT, it's really
813 a reference to a symbol defined elsewhere, so ignore it.
814 */
815 sym_addr = si->offset
816 + (UInt)o_symtab[i].st_value;
817 if (o_got != NULL
818 && sym_addr >= (Addr)o_got
819 && sym_addr < (Addr)(o_got+o_got_sz)) {
820 snaffle_it = False;
821 if (VG_(clo_trace_symtab)) {
822 VG_(printf)( "in GOT: %s\n",
823 o_strtab+o_symtab[i].st_name);
824 }
825 }
826 if (o_plt != NULL
827 && sym_addr >= (Addr)o_plt
828 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
829 snaffle_it = False;
830 if (VG_(clo_trace_symtab)) {
831 VG_(printf)( "in PLT: %s\n",
832 o_strtab+o_symtab[i].st_name);
833 }
834 }
835
836 /* Don't bother if nameless, or zero-sized. */
837 if (snaffle_it
838 && (o_symtab[i].st_name == (Elf32_Word)NULL
839 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
840 /* equivalent but cheaper ... */
841 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
842 || o_symtab[i].st_size == 0)) {
843 snaffle_it = False;
844 if (VG_(clo_trace_symtab)) {
845 VG_(printf)( "size=0: %s\n",
846 o_strtab+o_symtab[i].st_name);
847 }
848 }
849
850# if 0
851 /* Avoid _dl_ junk. (Why?) */
852 /* 01-02-24: disabled until I find out if it really helps. */
853 if (snaffle_it
854 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
855 || VG_(strncmp)("_r_debug",
856 o_strtab+o_symtab[i].st_name, 8) == 0)) {
857 snaffle_it = False;
858 if (VG_(clo_trace_symtab)) {
859 VG_(printf)( "_dl_ junk: %s\n",
860 o_strtab+o_symtab[i].st_name);
861 }
862 }
863# endif
864
865 /* This seems to significantly reduce the number of junk
866 symbols, and particularly reduces the number of
867 overlapping address ranges. Don't ask me why ... */
868 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
869 snaffle_it = False;
870 if (VG_(clo_trace_symtab)) {
871 VG_(printf)( "valu=0: %s\n",
872 o_strtab+o_symtab[i].st_name);
873 }
874 }
875
876 /* If no part of the symbol falls within the mapped range,
877 ignore it. */
878 if (sym_addr+o_symtab[i].st_size <= si->start
879 || sym_addr >= si->start+si->size) {
880 snaffle_it = False;
881 }
882
883 if (snaffle_it) {
884 /* it's an interesting symbol; record ("snaffle") it. */
885 RiSym sym;
886 Char* t0 = o_symtab[i].st_name
887 ? (Char*)(o_strtab+o_symtab[i].st_name)
888 : (Char*)"NONAME";
889 Int nmoff = addStr ( si, t0 );
890 vg_assert(nmoff >= 0
891 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
892 vg_assert( (Int)o_symtab[i].st_value >= 0);
893 /* VG_(printf)("%p + %d: %s\n", si->addr,
894 (Int)o_symtab[i].st_value, t0 ); */
895 sym.addr = sym_addr;
896 sym.size = o_symtab[i].st_size;
897 sym.nmoff = nmoff;
898 addSym ( si, &sym );
899 }
900 }
901 }
902 }
903
904 /* Reading of the "stabs" debug format information, if any. */
905 stabstr = NULL;
906 stab = NULL;
907 stabstr_sz = 0;
908 stab_sz = 0;
909 /* find the .stabstr and .stab sections */
910 for (i = 0; i < ehdr->e_shnum; i++) {
911 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
912 stab = (struct nlist *)(oimage + shdr[i].sh_offset);
913 stab_sz = shdr[i].sh_size;
914 }
915 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
916 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
917 stabstr_sz = shdr[i].sh_size;
918 }
919 }
920
921 if (stab == NULL || stabstr == NULL) {
922 vg_symerr(" object doesn't have any debug info");
923 VG_(munmap) ( (void*)oimage, n_oimage );
924 return;
925 }
926
927 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
928 || stabstr_sz + (UChar*)stabstr
929 > n_oimage + (UChar*)oimage ) {
930 vg_symerr(" ELF debug data is beyond image end?!");
931 VG_(munmap) ( (void*)oimage, n_oimage );
932 return;
933 }
934
935 /* Ok. It all looks plausible. Go on and read debug data.
936 stab kinds: 100 N_SO a source file name
937 68 N_SLINE a source line number
njn4f9c9342002-04-29 16:03:24 +0000938 36 N_FUN start of a function
sewardjde4a1d02002-03-22 01:27:54 +0000939
njn4f9c9342002-04-29 16:03:24 +0000940 In this loop, we maintain a current file name, updated as
941 N_SO/N_SOLs appear, and a current function base address,
942 updated as N_FUNs appear. Based on that, address ranges for
943 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000944
njn4f9c9342002-04-29 16:03:24 +0000945 Finding the instruction address range covered by an N_SLINE is
946 complicated; see the N_SLINE case below.
sewardjde4a1d02002-03-22 01:27:54 +0000947 */
948 curr_filenmoff = addStr(si,"???");
949 curr_fnbaseaddr = (Addr)NULL;
njne0ee0712002-05-03 16:41:05 +0000950 curr_file_name = curr_fn_name = (Char*)NULL;
951 lineno = prev_lineno = 0;
952 lineno_overflows = 0;
953 same_file = True;
sewardjde4a1d02002-03-22 01:27:54 +0000954
njn4f9c9342002-04-29 16:03:24 +0000955 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
956
957 for (i = 0; i < n_stab_entries; i++) {
sewardjde4a1d02002-03-22 01:27:54 +0000958# if 0
959 VG_(printf) ( " %2d ", i );
960 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
961 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
962 (int)stab[i].n_value,
963 (int)stab[i].n_un.n_strx,
964 stabstr + stab[i].n_un.n_strx );
965 VG_(printf)("\n");
966# endif
967
njne0ee0712002-05-03 16:41:05 +0000968 Char *no_fn_name = "???";
969
sewardjde4a1d02002-03-22 01:27:54 +0000970 switch (stab[i].n_type) {
njn4f9c9342002-04-29 16:03:24 +0000971 UInt next_addr;
sewardjde4a1d02002-03-22 01:27:54 +0000972
njne0ee0712002-05-03 16:41:05 +0000973 /* Two complicated things here:
974 * 1. the n_desc field in 'struct n_list' in a.out.h is only 16-bits,
975 * which gives a maximum of 65535 lines. We handle files bigger
976 * than this by detecting heuristically overflows -- if the line
977 * count goes from 65000-odd to 0-odd within the same file, we
978 * assume it's an overflow. Once we switch files, we zero the
979 * overflow count
980 *
981 * 2. To compute the instr address range covered by a single line,
982 * find the address of the next thing and compute the difference.
983 * The approach used depends on what kind of entry/entries
984 * follow...
985 */
njn4f9c9342002-04-29 16:03:24 +0000986 case N_SLINE: {
njn4f9c9342002-04-29 16:03:24 +0000987 Int this_addr = (UInt)stab[i].n_value;
988
njne0ee0712002-05-03 16:41:05 +0000989 /* Although stored as a short, neg values really are > 32768, hence
990 * the UShort cast. Then we use an Int to handle overflows. */
991 prev_lineno = lineno;
992 lineno = (Int)((UShort)stab[i].n_desc);
993
994 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
995 VG_(message)(Vg_DebugMsg,
996 "Line number overflow detected (%d --> %d) in %s",
997 prev_lineno, lineno, curr_file_name);
998 lineno_overflows++;
999 }
1000 same_file = True;
1001
njn4f9c9342002-04-29 16:03:24 +00001002 LOOP:
njn9aae6742002-04-30 13:44:01 +00001003 if (i+1 >= n_stab_entries) {
1004 /* If it's the last entry, just guess the range is four; can't
1005 * do any better */
njne0ee0712002-05-03 16:41:05 +00001006 next_addr = this_addr + 4;
njn9aae6742002-04-30 13:44:01 +00001007 } else {
1008 switch (stab[i+1].n_type) {
1009 /* Easy, common case: use address of next entry */
1010 case N_SLINE: case N_SO:
njn4f9c9342002-04-29 16:03:24 +00001011 next_addr = (UInt)stab[i+1].n_value;
njn4f9c9342002-04-29 16:03:24 +00001012 break;
njn4f9c9342002-04-29 16:03:24 +00001013
njn9aae6742002-04-30 13:44:01 +00001014 /* Boring one: skip, look for something more useful. */
1015 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
njn9885df02002-05-01 08:25:03 +00001016 case N_STSYM: case N_LCSYM: case N_GSYM:
njn9aae6742002-04-30 13:44:01 +00001017 i++;
1018 goto LOOP;
1019
1020 /* Should be an end of fun entry, use its address */
1021 case N_FUN:
1022 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
1023 next_addr = (UInt)stab[i+1].n_value;
1024 } else {
njne0ee0712002-05-03 16:41:05 +00001025 VG_(message)(Vg_DebugMsg,
1026 "warning: function %s missing closing "
1027 "N_FUN stab at entry %d",
1028 curr_fn_name, i );
1029 next_addr = this_addr; /* assume zero-size loc */
njn9aae6742002-04-30 13:44:01 +00001030 }
1031 break;
1032
1033 /* N_SOL should be followed by an N_SLINE which can be used */
1034 case N_SOL:
1035 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
1036 next_addr = (UInt)stab[i+2].n_value;
1037 break;
1038 } else {
1039 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
1040 stab[i+1].n_type, i, n_stab_entries);
sewardj177d3232002-05-01 09:25:56 +00001041 VG_(panic)("unhandled N_SOL stabs case");
njn9aae6742002-04-30 13:44:01 +00001042 }
1043
1044 default:
1045 VG_(printf)("unhandled stabs case: %d %d",
1046 stab[i+1].n_type,i);
sewardj177d3232002-05-01 09:25:56 +00001047 VG_(panic)("unhandled (other) stabs case");
njn9aae6742002-04-30 13:44:01 +00001048 }
sewardjde4a1d02002-03-22 01:27:54 +00001049 }
njn4f9c9342002-04-29 16:03:24 +00001050
njn4f9c9342002-04-29 16:03:24 +00001051 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
njne0ee0712002-05-03 16:41:05 +00001052 curr_fnbaseaddr + next_addr,
1053 lineno + lineno_overflows * LINENO_OVERFLOW, i);
sewardjde4a1d02002-03-22 01:27:54 +00001054 break;
1055 }
1056
njn4f9c9342002-04-29 16:03:24 +00001057 case N_FUN: {
1058 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
sewardjde4a1d02002-03-22 01:27:54 +00001059 /* N_FUN with a name -- indicates the start of a fn. */
njn4f9c9342002-04-29 16:03:24 +00001060 curr_fnbaseaddr = si->offset + (Addr)stab[i].n_value;
njne0ee0712002-05-03 16:41:05 +00001061 curr_fn_name = stabstr + stab[i].n_un.n_strx;
1062 } else {
1063 curr_fn_name = no_fn_name;
sewardjde4a1d02002-03-22 01:27:54 +00001064 }
1065 break;
1066 }
1067
njne0ee0712002-05-03 16:41:05 +00001068 case N_SOL:
1069 if (lineno_overflows != 0) {
njn7efaa112002-05-07 10:26:57 +00001070 VG_(message)(Vg_UserMsg,
1071 "Warning: file %s is very big (> 65535 lines) "
1072 "Line numbers and annotation for this file might "
1073 "be wrong. Sorry",
1074 curr_file_name);
njne0ee0712002-05-03 16:41:05 +00001075 }
1076 /* fall through! */
1077 case N_SO:
1078 lineno_overflows = 0;
1079
sewardjde4a1d02002-03-22 01:27:54 +00001080 /* seems to give lots of locations in header files */
1081 /* case 130: */ /* BINCL */
1082 {
1083 UChar* nm = stabstr + stab[i].n_un.n_strx;
1084 UInt len = VG_(strlen)(nm);
njn4f9c9342002-04-29 16:03:24 +00001085
1086 if (len > 0 && nm[len-1] != '/') {
sewardjde4a1d02002-03-22 01:27:54 +00001087 curr_filenmoff = addStr ( si, nm );
njn4f9c9342002-04-29 16:03:24 +00001088 curr_file_name = stabstr + stab[i].n_un.n_strx;
1089 }
sewardjde4a1d02002-03-22 01:27:54 +00001090 else
1091 if (len == 0)
1092 curr_filenmoff = addStr ( si, "?1\0" );
njn4f9c9342002-04-29 16:03:24 +00001093
sewardjde4a1d02002-03-22 01:27:54 +00001094 break;
1095 }
1096
1097# if 0
1098 case 162: /* EINCL */
1099 curr_filenmoff = addStr ( si, "?2\0" );
1100 break;
1101# endif
1102
1103 default:
1104 break;
1105 }
1106 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
1107
1108 /* Last, but not least, heave the oimage back overboard. */
1109 VG_(munmap) ( (void*)oimage, n_oimage );
1110}
1111
1112
1113/*------------------------------------------------------------*/
1114/*--- Main entry point for symbols table reading. ---*/
1115/*------------------------------------------------------------*/
1116
1117/* The root structure for the entire symbol table system. It is a
1118 linked list of SegInfos. Note that this entire mechanism assumes
1119 that what we read from /proc/self/maps doesn't contain overlapping
1120 address ranges, and as a result the SegInfos in this list describe
1121 disjoint address ranges.
1122*/
1123static SegInfo* segInfo = NULL;
1124
1125
1126static
1127void read_symtab_callback (
1128 Addr start, UInt size,
1129 Char rr, Char ww, Char xx,
1130 UInt foffset, UChar* filename )
1131{
1132 SegInfo* si;
1133
1134 /* Stay sane ... */
1135 if (size == 0)
1136 return;
1137
1138 /* We're only interested in collecting symbols in executable
1139 segments which are associated with a real file. Hence: */
1140 if (filename == NULL || xx != 'x')
1141 return;
1142 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1143 return;
1144
1145 /* Perhaps we already have this one? If so, skip. */
1146 for (si = segInfo; si != NULL; si = si->next) {
1147 /*
1148 if (0==VG_(strcmp)(si->filename, filename))
1149 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1150 rr,ww,xx,si->start,si->size,start,size,filename);
1151 */
1152 /* For some reason the observed size of a mapping can change, so
1153 we don't use that to determine uniqueness. */
1154 if (si->start == start
1155 /* && si->size == size */
1156 && 0==VG_(strcmp)(si->filename, filename)) {
1157 return;
1158 }
1159 }
1160
1161 /* Get the record initialised right. */
1162 si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
1163 si->next = segInfo;
1164 segInfo = si;
1165
1166 si->start = start;
1167 si->size = size;
1168 si->foffset = foffset;
1169 si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
1170 VG_(strcpy)(si->filename, filename);
1171
1172 si->symtab = NULL;
1173 si->symtab_size = si->symtab_used = 0;
1174 si->loctab = NULL;
1175 si->loctab_size = si->loctab_used = 0;
1176 si->strtab = NULL;
1177 si->strtab_size = si->strtab_used = 0;
1178
1179 /* Kludge ... */
1180 si->offset
1181 = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
1182
1183 /* And actually fill it up. */
1184 vg_read_lib_symbols ( si );
1185 canonicaliseSymtab ( si );
1186 canonicaliseLoctab ( si );
1187}
1188
1189
1190/* This one really is the Head Honcho. Update the symbol tables to
1191 reflect the current state of /proc/self/maps. Rather than re-read
1192 everything, just read the entries which are not already in segInfo.
1193 So we can call here repeatedly, after every mmap of a non-anonymous
1194 segment with execute permissions, for example, to pick up new
1195 libraries as they are dlopen'd. Conversely, when the client does
1196 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1197 which happen to correspond to the munmap()d area. */
1198void VG_(read_symbols) ( void )
1199{
njn4f9c9342002-04-29 16:03:24 +00001200 if (! VG_(clo_instrument) && ! VG_(clo_cachesim))
1201 return;
sewardjde4a1d02002-03-22 01:27:54 +00001202
1203 VG_(read_procselfmaps) ( read_symtab_callback );
1204
1205 /* Do a sanity check on the symbol tables: ensure that the address
1206 space pieces they cover do not overlap (otherwise we are severely
1207 hosed). This is a quadratic algorithm, but there shouldn't be
1208 many of them.
1209 */
1210 { SegInfo *si, *si2;
1211 for (si = segInfo; si != NULL; si = si->next) {
1212 /* Check no overlap between *si and those in the rest of the
1213 list. */
1214 for (si2 = si->next; si2 != NULL; si2 = si2->next) {
1215 Addr lo = si->start;
1216 Addr hi = si->start + si->size - 1;
1217 Addr lo2 = si2->start;
1218 Addr hi2 = si2->start + si2->size - 1;
1219 Bool overlap;
1220 vg_assert(lo < hi);
1221 vg_assert(lo2 < hi2);
1222 /* the main assertion */
1223 overlap = (lo <= lo2 && lo2 <= hi)
1224 || (lo <= hi2 && hi2 <= hi);
1225 //vg_assert(!overlap);
1226 if (overlap) {
1227 VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
1228 ppSegInfo ( si );
1229 ppSegInfo ( si2 );
1230 VG_(printf)("\n\n");
1231 vg_assert(! overlap);
1232 }
1233 }
1234 }
1235 }
1236}
1237
1238
1239/* When an munmap() call happens, check to see whether it corresponds
1240 to a segment for a .so, and if so discard the relevant SegInfo.
1241 This might not be a very clever idea from the point of view of
1242 accuracy of error messages, but we need to do it in order to
1243 maintain the no-overlapping invariant.
1244*/
1245void VG_(symtab_notify_munmap) ( Addr start, UInt length )
1246{
1247 SegInfo *prev, *curr;
1248
1249 if (! VG_(clo_instrument))
1250 return;
1251
1252 prev = NULL;
1253 curr = segInfo;
1254 while (True) {
1255 if (curr == NULL) break;
1256 if (start == curr->start) break;
1257 prev = curr;
1258 curr = curr->next;
1259 }
1260 if (curr == NULL) return;
1261
1262 VG_(message)(Vg_UserMsg,
1263 "discard syms in %s due to munmap()",
1264 curr->filename ? curr->filename : (UChar*)"???");
1265
1266 vg_assert(prev == NULL || prev->next == curr);
1267
1268 if (prev == NULL) {
1269 segInfo = curr->next;
1270 } else {
1271 prev->next = curr->next;
1272 }
1273
1274 freeSegInfo(curr);
1275}
1276
1277
1278/*------------------------------------------------------------*/
1279/*--- Use of symbol table & location info to create ---*/
1280/*--- plausible-looking stack dumps. ---*/
1281/*------------------------------------------------------------*/
1282
1283/* Find a symbol-table index containing the specified pointer, or -1
1284 if not found. Binary search. */
1285
1286static Int search_one_symtab ( SegInfo* si, Addr ptr )
1287{
1288 Addr a_mid_lo, a_mid_hi;
1289 Int mid,
1290 lo = 0,
1291 hi = si->symtab_used-1;
1292 while (True) {
1293 /* current unsearched space is from lo to hi, inclusive. */
1294 if (lo > hi) return -1; /* not found */
1295 mid = (lo + hi) / 2;
1296 a_mid_lo = si->symtab[mid].addr;
1297 a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
1298
1299 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1300 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1301 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1302 return mid;
1303 }
1304}
1305
1306
1307/* Search all symtabs that we know about to locate ptr. If found, set
1308 *psi to the relevant SegInfo, and *symno to the symtab entry number
1309 within that. If not found, *psi is set to NULL. */
1310
1311static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
1312{
1313 Int sno;
1314 SegInfo* si;
1315 for (si = segInfo; si != NULL; si = si->next) {
1316 if (si->start <= ptr && ptr < si->start+si->size) {
1317 sno = search_one_symtab ( si, ptr );
1318 if (sno == -1) goto not_found;
1319 *symno = sno;
1320 *psi = si;
1321 return;
1322 }
1323 }
1324 not_found:
1325 *psi = NULL;
1326}
1327
1328
1329/* Find a location-table index containing the specified pointer, or -1
1330 if not found. Binary search. */
1331
1332static Int search_one_loctab ( SegInfo* si, Addr ptr )
1333{
1334 Addr a_mid_lo, a_mid_hi;
1335 Int mid,
1336 lo = 0,
1337 hi = si->loctab_used-1;
1338 while (True) {
1339 /* current unsearched space is from lo to hi, inclusive. */
1340 if (lo > hi) return -1; /* not found */
1341 mid = (lo + hi) / 2;
1342 a_mid_lo = si->loctab[mid].addr;
1343 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
1344
1345 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1346 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1347 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1348 return mid;
1349 }
1350}
1351
1352
1353/* Search all loctabs that we know about to locate ptr. If found, set
1354 *psi to the relevant SegInfo, and *locno to the loctab entry number
1355 within that. If not found, *psi is set to NULL.
1356*/
1357static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
1358{
1359 Int lno;
1360 SegInfo* si;
1361 for (si = segInfo; si != NULL; si = si->next) {
1362 if (si->start <= ptr && ptr < si->start+si->size) {
1363 lno = search_one_loctab ( si, ptr );
1364 if (lno == -1) goto not_found;
1365 *locno = lno;
1366 *psi = si;
1367 return;
1368 }
1369 }
1370 not_found:
1371 *psi = NULL;
1372}
1373
1374
1375/* The whole point of this whole big deal: map a code address to a
1376 plausible symbol name. Returns False if no idea; otherwise True.
1377 Caller supplies buf and nbuf. If no_demangle is True, don't do
1378 demangling, regardless of vg_clo_demangle -- probably because the
1379 call has come from vg_what_fn_or_object_is_this. */
njn4f9c9342002-04-29 16:03:24 +00001380Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
1381 Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00001382{
1383 SegInfo* si;
1384 Int sno;
1385 search_all_symtabs ( a, &si, &sno );
1386 if (si == NULL)
1387 return False;
1388 if (no_demangle) {
1389 VG_(strncpy_safely)
1390 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
1391 } else {
1392 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
1393 }
1394 return True;
1395}
1396
1397
1398/* Map a code address to the name of a shared object file. Returns
1399 False if no idea; otherwise False. Caller supplies buf and
1400 nbuf. */
1401static
1402Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
1403{
1404 SegInfo* si;
1405 for (si = segInfo; si != NULL; si = si->next) {
1406 if (si->start <= a && a < si->start+si->size) {
1407 VG_(strncpy_safely)(buf, si->filename, nbuf);
1408 return True;
1409 }
1410 }
1411 return False;
1412}
1413
1414/* Return the name of an erring fn in a way which is useful
1415 for comparing against the contents of a suppressions file.
1416 Always writes something to buf. Also, doesn't demangle the
1417 name, because we want to refer to mangled names in the
1418 suppressions file.
1419*/
1420void VG_(what_obj_and_fun_is_this) ( Addr a,
1421 Char* obj_buf, Int n_obj_buf,
1422 Char* fun_buf, Int n_fun_buf )
1423{
1424 (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
njn4f9c9342002-04-29 16:03:24 +00001425 (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
sewardjde4a1d02002-03-22 01:27:54 +00001426}
1427
1428
1429/* Map a code address to a (filename, line number) pair.
1430 Returns True if successful.
1431*/
njn4f9c9342002-04-29 16:03:24 +00001432Bool VG_(what_line_is_this)( Addr a,
1433 UChar* filename, Int n_filename,
1434 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00001435{
1436 SegInfo* si;
1437 Int locno;
1438 search_all_loctabs ( a, &si, &locno );
1439 if (si == NULL)
1440 return False;
1441 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
1442 n_filename);
1443 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00001444
sewardjde4a1d02002-03-22 01:27:54 +00001445 return True;
1446}
1447
1448
1449/* Print a mini stack dump, showing the current location. */
1450void VG_(mini_stack_dump) ( ExeContext* ec )
1451{
1452
1453#define APPEND(str) \
1454 { UChar* sss; \
1455 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
1456 buf[n] = *sss; \
1457 buf[n] = 0; \
1458 }
1459
1460 Bool know_fnname;
1461 Bool know_objname;
1462 Bool know_srcloc;
1463 UInt lineno;
1464 UChar ibuf[20];
1465 UInt i, n, clueless;
1466
1467 UChar buf[M_VG_ERRTXT];
1468 UChar buf_fn[M_VG_ERRTXT];
1469 UChar buf_obj[M_VG_ERRTXT];
1470 UChar buf_srcloc[M_VG_ERRTXT];
1471
1472 Int stop_at = VG_(clo_backtrace_size);
1473
1474 n = 0;
1475
njn4f9c9342002-04-29 16:03:24 +00001476 know_fnname = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001477 know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001478 know_srcloc = VG_(what_line_is_this)(ec->eips[0],
1479 buf_srcloc, M_VG_ERRTXT,
1480 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00001481
1482 APPEND(" at ");
1483 VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
1484 APPEND(ibuf);
1485 if (know_fnname) {
1486 APPEND(buf_fn);
1487 if (!know_srcloc && know_objname) {
1488 APPEND(" (in ");
1489 APPEND(buf_obj);
1490 APPEND(")");
1491 }
1492 } else if (know_objname && !know_srcloc) {
1493 APPEND("(within ");
1494 APPEND(buf_obj);
1495 APPEND(")");
1496 } else {
1497 APPEND("???");
1498 }
1499 if (know_srcloc) {
1500 APPEND(" (");
1501 APPEND(buf_srcloc);
1502 APPEND(":");
1503 VG_(sprintf)(ibuf,"%d",lineno);
1504 APPEND(ibuf);
1505 APPEND(")");
1506 }
1507 VG_(message)(Vg_UserMsg, "%s", buf);
1508
1509 clueless = 0;
1510 for (i = 1; i < stop_at; i++) {
njn4f9c9342002-04-29 16:03:24 +00001511 know_fnname = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001512 know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001513 know_srcloc = VG_(what_line_is_this)(ec->eips[i],
sewardjde4a1d02002-03-22 01:27:54 +00001514 buf_srcloc, M_VG_ERRTXT,
1515 &lineno);
1516 n = 0;
1517 APPEND(" by ");
1518 if (ec->eips[i] == 0) {
1519 APPEND("<bogus frame pointer> ");
1520 } else {
1521 VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
1522 APPEND(ibuf);
1523 }
1524 if (know_fnname) {
1525 APPEND(buf_fn)
1526 if (!know_srcloc && know_objname) {
1527 APPEND(" (in ");
1528 APPEND(buf_obj);
1529 APPEND(")");
1530 }
1531 } else {
1532 if (know_objname && !know_srcloc) {
1533 APPEND("(within ");
1534 APPEND(buf_obj);
1535 APPEND(")");
1536 } else {
1537 APPEND("???");
1538 }
1539 if (!know_srcloc) clueless++;
1540 if (clueless == 2)
1541 i = stop_at; /* force exit after this iteration */
1542 };
1543 if (know_srcloc) {
1544 APPEND(" (");
1545 APPEND(buf_srcloc);
1546 APPEND(":");
1547 VG_(sprintf)(ibuf,"%d",lineno);
1548 APPEND(ibuf);
1549 APPEND(")");
1550 }
1551 VG_(message)(Vg_UserMsg, "%s", buf);
1552 }
1553}
1554
1555#undef APPEND
1556
1557/*--------------------------------------------------------------------*/
1558/*--- end vg_symtab2.c ---*/
1559/*--------------------------------------------------------------------*/