blob: c7817519850ab70e79d106eaa7fea009982d5281 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
sewardjde4a1d02002-03-22 01:27:54 +000032
33#include <elf.h> /* ELF defns */
34#include <a.out.h> /* stabs defns */
35
njn9aae6742002-04-30 13:44:01 +000036
sewardjde4a1d02002-03-22 01:27:54 +000037/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
38 dlopen()ed libraries, which is something that KDE3 does a lot.
39 Still kludgey, though less than before:
40
41 * we don't check whether we should throw away some symbol tables
42 when munmap() happens
43
44 * symbol table reading code for ELF binaries is a shambles.
45 Use GHC's fptools/ghc/rts/Linker.c as the basis for something better.
46*/
47
48/*------------------------------------------------------------*/
49/*--- Structs n stuff ---*/
50/*------------------------------------------------------------*/
51
njn4f9c9342002-04-29 16:03:24 +000052/* Stabs entry types, from:
53 * The "stabs" debug format
54 * Menapace, Kingdon and MacKenzie
55 * Cygnus Support
56 */
njn9885df02002-05-01 08:25:03 +000057typedef enum { N_GSYM = 32, /* Global symbol */
58 N_FUN = 36, /* Function start or end */
njn4f9c9342002-04-29 16:03:24 +000059 N_STSYM = 38, /* Data segment file-scope variable */
60 N_LCSYM = 40, /* BSS segment file-scope variable */
61 N_RSYM = 64, /* Register variable */
62 N_SLINE = 68, /* Source line number */
63 N_SO = 100, /* Source file path and name */
64 N_LSYM = 128, /* Stack variable or type */
65 N_SOL = 132, /* Include file name */
66 N_LBRAC = 192, /* Start of lexical block */
67 N_RBRAC = 224 /* End of lexical block */
68 } stab_types;
69
sewardjde4a1d02002-03-22 01:27:54 +000070/* A structure to hold an ELF symbol (very crudely). */
71typedef
72 struct {
73 Addr addr; /* lowest address of entity */
74 UInt size; /* size in bytes */
75 Int nmoff; /* offset of name in this SegInfo's str tab */
76 }
77 RiSym;
78
njne0ee0712002-05-03 16:41:05 +000079/* Line count at which overflow happens, due to line numbers being stored as
80 * shorts in `struct nlist' in a.out.h. */
81#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000082
njne0ee0712002-05-03 16:41:05 +000083#define LINENO_BITS 20
84#define LOC_SIZE_BITS (32 - LINENO_BITS)
sewardj97ff05f2002-05-09 01:32:57 +000085#define MAX_LINENO ((1 << LINENO_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000086
87/* Unlikely to have any lines with instruction ranges > 4096 bytes */
sewardj97ff05f2002-05-09 01:32:57 +000088#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000089
90/* Number used to detect line number overflows; if one line is 60000-odd
91 * smaller than the previous, is was probably an overflow.
92 */
93#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
94
95/* A structure to hold addr-to-source info for a single line. There can be a
96 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +000097typedef
98 struct {
njne0ee0712002-05-03 16:41:05 +000099 /* Word 1 */
100 Addr addr; /* lowest address for this line */
101 /* Word 2 */
102 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
103 UInt lineno:LINENO_BITS; /* source line number, or zero */
104 /* Word 3 */
105 UInt fnmoff; /* source filename; offset in this
106 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +0000107 }
108 RiLoc;
109
110
111/* A structure which contains information pertaining to one mapped
112 text segment. */
113typedef
114 struct _SegInfo {
115 struct _SegInfo* next;
116 /* Description of the mapped segment. */
117 Addr start;
118 UInt size;
119 UChar* filename; /* in mallocville */
120 UInt foffset;
121 /* An expandable array of symbols. */
122 RiSym* symtab;
123 UInt symtab_used;
124 UInt symtab_size;
125 /* An expandable array of locations. */
126 RiLoc* loctab;
127 UInt loctab_used;
128 UInt loctab_size;
129 /* An expandable array of characters -- the string table. */
130 Char* strtab;
131 UInt strtab_used;
132 UInt strtab_size;
133 /* offset is what we need to add to symbol table entries
134 to get the real location of that symbol in memory.
135 For executables, offset is zero.
136 For .so's, offset == base_addr.
137 This seems like a giant kludge to me.
138 */
139 UInt offset;
140 }
141 SegInfo;
142
143
144/* -- debug helper -- */
145static void ppSegInfo ( SegInfo* si )
146{
147 VG_(printf)("name: %s\n"
148 "start %p, size %d, foffset %d\n",
149 si->filename?si->filename : (UChar*)"NULL",
150 si->start, si->size, si->foffset );
151}
152
153static void freeSegInfo ( SegInfo* si )
154{
155 vg_assert(si != NULL);
156 if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
157 if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
158 if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
159 if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
160 VG_(free)(VG_AR_SYMTAB, si);
161}
162
163
164/*------------------------------------------------------------*/
165/*--- Adding stuff ---*/
166/*------------------------------------------------------------*/
167
168/* Add a str to the string table, including terminating zero, and
169 return offset of the string in vg_strtab. */
170
171static __inline__
172Int addStr ( SegInfo* si, Char* str )
173{
174 Char* new_tab;
175 Int new_sz, i, space_needed;
176
177 space_needed = 1 + VG_(strlen)(str);
178 if (si->strtab_used + space_needed > si->strtab_size) {
179 new_sz = 2 * si->strtab_size;
180 if (new_sz == 0) new_sz = 5000;
181 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
182 if (si->strtab != NULL) {
183 for (i = 0; i < si->strtab_used; i++)
184 new_tab[i] = si->strtab[i];
185 VG_(free)(VG_AR_SYMTAB, si->strtab);
186 }
187 si->strtab = new_tab;
188 si->strtab_size = new_sz;
189 }
190
191 for (i = 0; i < space_needed; i++)
192 si->strtab[si->strtab_used+i] = str[i];
193
194 si->strtab_used += space_needed;
195 vg_assert(si->strtab_used <= si->strtab_size);
196 return si->strtab_used - space_needed;
197}
198
199/* Add a symbol to the symbol table. */
200
201static __inline__
202void addSym ( SegInfo* si, RiSym* sym )
203{
204 Int new_sz, i;
205 RiSym* new_tab;
206
207 /* Ignore zero-sized syms. */
208 if (sym->size == 0) return;
209
210 if (si->symtab_used == si->symtab_size) {
211 new_sz = 2 * si->symtab_size;
212 if (new_sz == 0) new_sz = 500;
213 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
214 if (si->symtab != NULL) {
215 for (i = 0; i < si->symtab_used; i++)
216 new_tab[i] = si->symtab[i];
217 VG_(free)(VG_AR_SYMTAB, si->symtab);
218 }
219 si->symtab = new_tab;
220 si->symtab_size = new_sz;
221 }
222
223 si->symtab[si->symtab_used] = *sym;
224 si->symtab_used++;
225 vg_assert(si->symtab_used <= si->symtab_size);
226}
227
228/* Add a location to the location table. */
229
230static __inline__
231void addLoc ( SegInfo* si, RiLoc* loc )
232{
233 Int new_sz, i;
234 RiLoc* new_tab;
235
njne0ee0712002-05-03 16:41:05 +0000236 /* Zero-sized locs should have been ignored earlier */
237 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000238
239 if (si->loctab_used == si->loctab_size) {
240 new_sz = 2 * si->loctab_size;
241 if (new_sz == 0) new_sz = 500;
242 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
243 if (si->loctab != NULL) {
244 for (i = 0; i < si->loctab_used; i++)
245 new_tab[i] = si->loctab[i];
246 VG_(free)(VG_AR_SYMTAB, si->loctab);
247 }
248 si->loctab = new_tab;
249 si->loctab_size = new_sz;
250 }
251
252 si->loctab[si->loctab_used] = *loc;
253 si->loctab_used++;
254 vg_assert(si->loctab_used <= si->loctab_size);
255}
256
257
258
259/*------------------------------------------------------------*/
260/*--- Helpers ---*/
261/*------------------------------------------------------------*/
262
263/* Non-fatal -- use vg_panic if terminal. */
264static
265void vg_symerr ( Char* msg )
266{
267 if (VG_(clo_verbosity) > 1)
268 VG_(message)(Vg_UserMsg,"%s", msg );
269}
270
271
272/* Print a symbol. */
273static
274void printSym ( SegInfo* si, Int i )
275{
276 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
277 i,
278 si->symtab[i].addr,
279 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
280 &si->strtab[si->symtab[i].nmoff] );
281}
282
283
284#if 0
285/* Print the entire sym tab. */
286static __attribute__ ((unused))
287void printSymtab ( void )
288{
289 Int i;
290 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
291 for (i = 0; i < vg_symtab_used; i++)
292 printSym(i);
293 VG_(printf)("------ BEGIN vg_symtab ------\n");
294}
295#endif
296
297#if 0
298/* Paranoid strcat. */
299static
300void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
301{
302 UInt i = 0, j = 0;
303 while (True) {
304 if (i >= maxlen) return;
305 if (dst[i] == 0) break;
306 i++;
307 }
308 while (True) {
309 if (i >= maxlen) return;
310 dst[i] = src[j];
311 if (src[j] == 0) return;
312 i++; j++;
313 }
314}
315#endif
316
317/*------------------------------------------------------------*/
318/*--- Canonicalisers ---*/
319/*------------------------------------------------------------*/
320
321/* Sort the symtab by starting address, and emit warnings if any
322 symbols have overlapping address ranges. We use that old chestnut,
323 shellsort. Mash the table around so as to establish the property
324 that addresses are in order and the ranges to not overlap. This
325 facilitates using binary search to map addresses to symbols when we
326 come to query the table.
327*/
328static
329void canonicaliseSymtab ( SegInfo* si )
330{
331 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
332 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
333 4592, 13776, 33936, 86961, 198768,
334 463792, 1391376 };
335 Int lo = 0;
336 Int hi = si->symtab_used-1;
337 Int i, j, h, bigN, hp, n_merged, n_truncated;
338 RiSym v;
339 Addr s1, s2, e1, e2;
340
341# define SWAP(ty,aa,bb) \
342 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
343
344 bigN = hi - lo + 1; if (bigN < 2) return;
345 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
346 vg_assert(0 <= hp && hp < 16);
347
348 for (; hp >= 0; hp--) {
349 h = incs[hp];
350 i = lo + h;
351 while (1) {
352 if (i > hi) break;
353 v = si->symtab[i];
354 j = i;
355 while (si->symtab[j-h].addr > v.addr) {
356 si->symtab[j] = si->symtab[j-h];
357 j = j - h;
358 if (j <= (lo + h - 1)) break;
359 }
360 si->symtab[j] = v;
361 i++;
362 }
363 }
364
365 cleanup_more:
366
367 /* If two symbols have identical address ranges, favour the
368 one with the longer name.
369 */
370 do {
371 n_merged = 0;
372 j = si->symtab_used;
373 si->symtab_used = 0;
374 for (i = 0; i < j; i++) {
375 if (i < j-1
376 && si->symtab[i].addr == si->symtab[i+1].addr
377 && si->symtab[i].size == si->symtab[i+1].size) {
378 n_merged++;
379 /* merge the two into one */
380 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
381 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
382 si->symtab[si->symtab_used++] = si->symtab[i];
383 } else {
384 si->symtab[si->symtab_used++] = si->symtab[i+1];
385 }
386 i++;
387 } else {
388 si->symtab[si->symtab_used++] = si->symtab[i];
389 }
390 }
391 if (VG_(clo_trace_symtab))
392 VG_(printf)( "%d merged\n", n_merged);
393 }
394 while (n_merged > 0);
395
396 /* Detect and "fix" overlapping address ranges. */
397 n_truncated = 0;
398
399 for (i = 0; i < si->symtab_used-1; i++) {
400
401 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
402
403 /* Check for common (no overlap) case. */
404 if (si->symtab[i].addr + si->symtab[i].size
405 <= si->symtab[i+1].addr)
406 continue;
407
408 /* There's an overlap. Truncate one or the other. */
409 if (VG_(clo_trace_symtab)) {
410 VG_(printf)("overlapping address ranges in symbol table\n\t");
411 printSym(si,i);
412 VG_(printf)("\t");
413 printSym(si,i+1);
414 VG_(printf)("\n");
415 }
416
417 /* Truncate one or the other. */
418 s1 = si->symtab[i].addr;
419 s2 = si->symtab[i+1].addr;
420 e1 = s1 + si->symtab[i].size - 1;
421 e2 = s2 + si->symtab[i+1].size - 1;
422 if (s1 < s2) {
423 e1 = s2-1;
424 } else {
425 vg_assert(s1 == s2);
426 if (e1 > e2) {
427 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
428 } else
429 if (e1 < e2) {
430 s2 = e1+1;
431 } else {
432 /* e1 == e2. Identical addr ranges. We'll eventually wind
433 up back at cleanup_more, which will take care of it. */
434 }
435 }
436 si->symtab[i].addr = s1;
437 si->symtab[i+1].addr = s2;
438 si->symtab[i].size = e1 - s1 + 1;
439 si->symtab[i+1].size = e2 - s2 + 1;
440 vg_assert(s1 <= s2);
441 vg_assert(si->symtab[i].size > 0);
442 vg_assert(si->symtab[i+1].size > 0);
443 /* It may be that the i+1 entry now needs to be moved further
444 along to maintain the address order requirement. */
445 j = i+1;
446 while (j < si->symtab_used-1
447 && si->symtab[j].addr > si->symtab[j+1].addr) {
448 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
449 j++;
450 }
451 n_truncated++;
452 }
453
454 if (n_truncated > 0) goto cleanup_more;
455
456 /* Ensure relevant postconditions hold. */
457 for (i = 0; i < si->symtab_used-1; i++) {
458 /* No zero-sized symbols. */
459 vg_assert(si->symtab[i].size > 0);
460 /* In order. */
461 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
462 /* No overlaps. */
463 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
464 < si->symtab[i+1].addr);
465 }
466# undef SWAP
467}
468
469
470
471/* Sort the location table by starting address. Mash the table around
472 so as to establish the property that addresses are in order and the
473 ranges do not overlap. This facilitates using binary search to map
474 addresses to locations when we come to query the table. */
475static
476void canonicaliseLoctab ( SegInfo* si )
477{
478 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
479 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
480 4592, 13776, 33936, 86961, 198768,
481 463792, 1391376 };
482 Int lo = 0;
483 Int hi = si->loctab_used-1;
484 Int i, j, h, bigN, hp;
485 RiLoc v;
486
487# define SWAP(ty,aa,bb) \
488 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
489
490 /* Sort by start address. */
491
492 bigN = hi - lo + 1; if (bigN < 2) return;
493 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
494 vg_assert(0 <= hp && hp < 16);
495
496 for (; hp >= 0; hp--) {
497 h = incs[hp];
498 i = lo + h;
499 while (1) {
500 if (i > hi) break;
501 v = si->loctab[i];
502 j = i;
503 while (si->loctab[j-h].addr > v.addr) {
504 si->loctab[j] = si->loctab[j-h];
505 j = j - h;
506 if (j <= (lo + h - 1)) break;
507 }
508 si->loctab[j] = v;
509 i++;
510 }
511 }
512
513 /* If two adjacent entries overlap, truncate the first. */
514 for (i = 0; i < si->loctab_used-1; i++) {
515 vg_assert(si->loctab[i].size < 10000);
516 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
517 /* Do this in signed int32 because the actual .size fields
518 are unsigned 16s. */
519 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
520 if (new_size < 0) {
521 si->loctab[i].size = 0;
522 } else
523 if (new_size >= 65536) {
524 si->loctab[i].size = 65535;
525 } else {
526 si->loctab[i].size = (UShort)new_size;
527 }
528 }
529 }
530
531 /* Zap any zero-sized entries resulting from the truncation
532 process. */
533 j = 0;
534 for (i = 0; i < si->loctab_used; i++) {
535 if (si->loctab[i].size > 0) {
536 si->loctab[j] = si->loctab[i];
537 j++;
538 }
539 }
540 si->loctab_used = j;
541
542 /* Ensure relevant postconditions hold. */
543 for (i = 0; i < si->loctab_used-1; i++) {
544 /*
545 VG_(printf)("%d (%d) %d 0x%x\n",
546 i, si->loctab[i+1].confident,
547 si->loctab[i+1].size, si->loctab[i+1].addr );
548 */
549 /* No zero-sized symbols. */
550 vg_assert(si->loctab[i].size > 0);
551 /* In order. */
552 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
553 /* No overlaps. */
554 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
555 < si->loctab[i+1].addr);
556 }
557# undef SWAP
558}
559
560
561/*------------------------------------------------------------*/
562/*--- Read info from a .so/exe file. ---*/
563/*------------------------------------------------------------*/
564
565static __inline__
566void addLineInfo ( SegInfo* si,
567 Int fnmoff,
njne0ee0712002-05-03 16:41:05 +0000568 Addr this,
569 Addr next,
570 Int lineno,
571 Int entry )
sewardjde4a1d02002-03-22 01:27:54 +0000572{
573 RiLoc loc;
njne0ee0712002-05-03 16:41:05 +0000574 Int size = next - this;
njn4f9c9342002-04-29 16:03:24 +0000575
njne0ee0712002-05-03 16:41:05 +0000576 /* Ignore zero-sized locs */
577 if (this == next) return;
sewardjde4a1d02002-03-22 01:27:54 +0000578
njne0ee0712002-05-03 16:41:05 +0000579 /* Maximum sanity checking. Some versions of GNU as do a shabby job with
580 * stabs entries; if anything looks suspicious, revert to a size of 1.
581 * This should catch the instruction of interest (since if using asm-level
582 * debug info, one instruction will correspond to one line, unlike with
583 * C-level debug info where multiple instructions can map to the one line),
584 * but avoid catching any other instructions bogusly. */
585 if (this > next) {
586 VG_(message)(Vg_DebugMsg,
587 "warning: stabs addresses out of order "
588 "at entry %d: 0x%x 0x%x", entry, this, next);
589 size = 1;
590 }
sewardjde4a1d02002-03-22 01:27:54 +0000591
njne0ee0712002-05-03 16:41:05 +0000592 if (size > MAX_LOC_SIZE) {
593 VG_(message)(Vg_DebugMsg,
594 "warning: stabs line address range too large "
595 "at entry %d: %d", entry, size);
596 size = 1;
597 }
598
sewardj573a1e62002-05-09 11:03:57 +0000599 vg_assert(this < si->start + si->size && next-1 >= si->start);
600 vg_assert(lineno >= 0 && lineno <= MAX_LINENO);
njne0ee0712002-05-03 16:41:05 +0000601
602 loc.addr = this;
sewardjde4a1d02002-03-22 01:27:54 +0000603 loc.size = (UShort)size;
604 loc.lineno = lineno;
605 loc.fnmoff = fnmoff;
606 addLoc ( si, &loc );
607}
608
609
610/* Read the symbols from the object/exe specified by the SegInfo into
611 the tables within the supplied SegInfo. */
612static
613void vg_read_lib_symbols ( SegInfo* si )
614{
615 Elf32_Ehdr* ehdr; /* The ELF header */
616 Elf32_Shdr* shdr; /* The section table */
617 UChar* sh_strtab; /* The section table's string table */
618 struct nlist* stab; /* The .stab table */
619 UChar* stabstr; /* The .stab string table */
620 Int stab_sz; /* Size in bytes of the .stab table */
621 Int stabstr_sz; /* Size in bytes of the .stab string table */
622 Int fd;
623 Int i;
624 Bool ok;
625 Addr oimage;
626 Int n_oimage;
sewardjb3586202002-05-09 17:38:13 +0000627 struct vki_stat stat_buf;
sewardjde4a1d02002-03-22 01:27:54 +0000628
629 /* for the .stabs reader */
630 Int curr_filenmoff;
631 Addr curr_fnbaseaddr;
njne0ee0712002-05-03 16:41:05 +0000632 Char *curr_file_name, *curr_fn_name;
njn4f9c9342002-04-29 16:03:24 +0000633 Int n_stab_entries;
njne0ee0712002-05-03 16:41:05 +0000634 Int prev_lineno, lineno;
635 Int lineno_overflows;
636 Bool same_file;
sewardjde4a1d02002-03-22 01:27:54 +0000637
638 oimage = (Addr)NULL;
639 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +0000640 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +0000641
642 /* mmap the object image aboard, so that we can read symbols and
643 line number info out of it. It will be munmapped immediately
644 thereafter; it is only aboard transiently. */
645
sewardjb3586202002-05-09 17:38:13 +0000646 i = VG_(stat)(si->filename, &stat_buf);
sewardjde4a1d02002-03-22 01:27:54 +0000647 if (i != 0) {
648 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
649 return;
650 }
651 n_oimage = stat_buf.st_size;
652
653 fd = VG_(open_read)(si->filename);
654 if (fd == -1) {
655 vg_symerr("Can't open .so/.exe to read symbols?!");
656 return;
657 }
658
sewardjb3586202002-05-09 17:38:13 +0000659 oimage = (Addr)VG_(mmap)( NULL, n_oimage,
660 VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 );
sewardjde4a1d02002-03-22 01:27:54 +0000661 if (oimage == ((Addr)(-1))) {
662 VG_(message)(Vg_UserMsg,
663 "mmap failed on %s", si->filename );
664 VG_(close)(fd);
665 return;
666 }
667
668 VG_(close)(fd);
669
670 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
671 Now verify that it is a valid ELF .so or executable image.
672 */
673 ok = (n_oimage >= sizeof(Elf32_Ehdr));
674 ehdr = (Elf32_Ehdr*)oimage;
675
676 if (ok) {
677 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
678 && ehdr->e_ident[EI_MAG1] == 'E'
679 && ehdr->e_ident[EI_MAG2] == 'L'
680 && ehdr->e_ident[EI_MAG3] == 'F');
681 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
682 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
683 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
684 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
685 ok &= (ehdr->e_machine == EM_386);
686 ok &= (ehdr->e_version == EV_CURRENT);
687 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
688 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
689 }
690
691 if (!ok) {
692 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
693 VG_(munmap) ( (void*)oimage, n_oimage );
694 return;
695 }
696
697 if (VG_(clo_trace_symtab))
698 VG_(printf)(
699 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
700 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
701
702 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
703 vg_symerr("ELF section header is beyond image end?!");
704 VG_(munmap) ( (void*)oimage, n_oimage );
705 return;
706 }
707
708 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
709 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
710
711 /* try and read the object's symbol table */
712 {
713 UChar* o_strtab = NULL;
714 Elf32_Sym* o_symtab = NULL;
715 UInt o_strtab_sz = 0;
716 UInt o_symtab_sz = 0;
717
718 UChar* o_got = NULL;
719 UChar* o_plt = NULL;
720 UInt o_got_sz = 0;
721 UInt o_plt_sz = 0;
722
723 Bool snaffle_it;
724 Addr sym_addr;
725
726 /* find the .stabstr and .stab sections */
727 for (i = 0; i < ehdr->e_shnum; i++) {
728 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
729 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
730 o_symtab_sz = shdr[i].sh_size;
731 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
732 /* check image overrun here */
733 }
734 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
735 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
736 o_strtab_sz = shdr[i].sh_size;
737 /* check image overrun here */
738 }
739
740 /* find out where the .got and .plt sections will be in the
741 executable image, not in the object image transiently loaded.
742 */
743 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
744 o_got = (UChar*)(si->offset
745 + shdr[i].sh_offset);
746 o_got_sz = shdr[i].sh_size;
747 /* check image overrun here */
748 }
749 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
750 o_plt = (UChar*)(si->offset
751 + shdr[i].sh_offset);
752 o_plt_sz = shdr[i].sh_size;
753 /* check image overrun here */
754 }
755
756 }
757
758 if (VG_(clo_trace_symtab)) {
759 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
760 o_plt, o_plt + o_plt_sz - 1 );
761 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
762 o_got, o_got + o_got_sz - 1 );
763 }
764
765 if (o_strtab == NULL || o_symtab == NULL) {
766 vg_symerr(" object doesn't have a symbol table");
767 } else {
768 /* Perhaps should start at i = 1; ELF docs suggest that entry
769 0 always denotes `unknown symbol'. */
770 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
771# if 0
772 VG_(printf)("raw symbol: ");
773 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
774 case STB_LOCAL: VG_(printf)("LOC "); break;
775 case STB_GLOBAL: VG_(printf)("GLO "); break;
776 case STB_WEAK: VG_(printf)("WEA "); break;
777 case STB_LOPROC: VG_(printf)("lop "); break;
778 case STB_HIPROC: VG_(printf)("hip "); break;
779 default: VG_(printf)("??? "); break;
780 }
781 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
782 case STT_NOTYPE: VG_(printf)("NOT "); break;
783 case STT_OBJECT: VG_(printf)("OBJ "); break;
784 case STT_FUNC: VG_(printf)("FUN "); break;
785 case STT_SECTION: VG_(printf)("SEC "); break;
786 case STT_FILE: VG_(printf)("FIL "); break;
787 case STT_LOPROC: VG_(printf)("lop "); break;
788 case STT_HIPROC: VG_(printf)("hip "); break;
789 default: VG_(printf)("??? "); break;
790 }
791 VG_(printf)(
792 ": value %p, size %d, name %s\n",
793 si->offset+(UChar*)o_symtab[i].st_value,
794 o_symtab[i].st_size,
795 o_symtab[i].st_name
796 ? ((Char*)o_strtab+o_symtab[i].st_name)
797 : (Char*)"NONAME");
798# endif
799
800 /* Figure out if we're interested in the symbol.
801 Firstly, is it of the right flavour?
802 */
803 snaffle_it
804 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
805 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
806 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
807 &&
808 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
809 ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
810 );
811
812 /* Secondly, if it's apparently in a GOT or PLT, it's really
813 a reference to a symbol defined elsewhere, so ignore it.
814 */
815 sym_addr = si->offset
816 + (UInt)o_symtab[i].st_value;
817 if (o_got != NULL
818 && sym_addr >= (Addr)o_got
819 && sym_addr < (Addr)(o_got+o_got_sz)) {
820 snaffle_it = False;
821 if (VG_(clo_trace_symtab)) {
822 VG_(printf)( "in GOT: %s\n",
823 o_strtab+o_symtab[i].st_name);
824 }
825 }
826 if (o_plt != NULL
827 && sym_addr >= (Addr)o_plt
828 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
829 snaffle_it = False;
830 if (VG_(clo_trace_symtab)) {
831 VG_(printf)( "in PLT: %s\n",
832 o_strtab+o_symtab[i].st_name);
833 }
834 }
835
836 /* Don't bother if nameless, or zero-sized. */
837 if (snaffle_it
838 && (o_symtab[i].st_name == (Elf32_Word)NULL
839 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
840 /* equivalent but cheaper ... */
841 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
842 || o_symtab[i].st_size == 0)) {
843 snaffle_it = False;
844 if (VG_(clo_trace_symtab)) {
845 VG_(printf)( "size=0: %s\n",
846 o_strtab+o_symtab[i].st_name);
847 }
848 }
849
850# if 0
851 /* Avoid _dl_ junk. (Why?) */
852 /* 01-02-24: disabled until I find out if it really helps. */
853 if (snaffle_it
854 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
855 || VG_(strncmp)("_r_debug",
856 o_strtab+o_symtab[i].st_name, 8) == 0)) {
857 snaffle_it = False;
858 if (VG_(clo_trace_symtab)) {
859 VG_(printf)( "_dl_ junk: %s\n",
860 o_strtab+o_symtab[i].st_name);
861 }
862 }
863# endif
864
865 /* This seems to significantly reduce the number of junk
866 symbols, and particularly reduces the number of
867 overlapping address ranges. Don't ask me why ... */
868 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
869 snaffle_it = False;
870 if (VG_(clo_trace_symtab)) {
871 VG_(printf)( "valu=0: %s\n",
872 o_strtab+o_symtab[i].st_name);
873 }
874 }
875
876 /* If no part of the symbol falls within the mapped range,
877 ignore it. */
878 if (sym_addr+o_symtab[i].st_size <= si->start
879 || sym_addr >= si->start+si->size) {
880 snaffle_it = False;
881 }
882
883 if (snaffle_it) {
884 /* it's an interesting symbol; record ("snaffle") it. */
885 RiSym sym;
886 Char* t0 = o_symtab[i].st_name
887 ? (Char*)(o_strtab+o_symtab[i].st_name)
888 : (Char*)"NONAME";
889 Int nmoff = addStr ( si, t0 );
890 vg_assert(nmoff >= 0
891 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
892 vg_assert( (Int)o_symtab[i].st_value >= 0);
893 /* VG_(printf)("%p + %d: %s\n", si->addr,
894 (Int)o_symtab[i].st_value, t0 ); */
895 sym.addr = sym_addr;
896 sym.size = o_symtab[i].st_size;
897 sym.nmoff = nmoff;
898 addSym ( si, &sym );
899 }
900 }
901 }
902 }
903
904 /* Reading of the "stabs" debug format information, if any. */
905 stabstr = NULL;
906 stab = NULL;
907 stabstr_sz = 0;
908 stab_sz = 0;
909 /* find the .stabstr and .stab sections */
910 for (i = 0; i < ehdr->e_shnum; i++) {
911 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
912 stab = (struct nlist *)(oimage + shdr[i].sh_offset);
913 stab_sz = shdr[i].sh_size;
914 }
915 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
916 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
917 stabstr_sz = shdr[i].sh_size;
918 }
919 }
920
921 if (stab == NULL || stabstr == NULL) {
922 vg_symerr(" object doesn't have any debug info");
923 VG_(munmap) ( (void*)oimage, n_oimage );
924 return;
925 }
926
927 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
928 || stabstr_sz + (UChar*)stabstr
929 > n_oimage + (UChar*)oimage ) {
930 vg_symerr(" ELF debug data is beyond image end?!");
931 VG_(munmap) ( (void*)oimage, n_oimage );
932 return;
933 }
934
935 /* Ok. It all looks plausible. Go on and read debug data.
936 stab kinds: 100 N_SO a source file name
937 68 N_SLINE a source line number
njn4f9c9342002-04-29 16:03:24 +0000938 36 N_FUN start of a function
sewardjde4a1d02002-03-22 01:27:54 +0000939
njn4f9c9342002-04-29 16:03:24 +0000940 In this loop, we maintain a current file name, updated as
941 N_SO/N_SOLs appear, and a current function base address,
942 updated as N_FUNs appear. Based on that, address ranges for
943 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000944
njn4f9c9342002-04-29 16:03:24 +0000945 Finding the instruction address range covered by an N_SLINE is
946 complicated; see the N_SLINE case below.
sewardjde4a1d02002-03-22 01:27:54 +0000947 */
948 curr_filenmoff = addStr(si,"???");
949 curr_fnbaseaddr = (Addr)NULL;
njne0ee0712002-05-03 16:41:05 +0000950 curr_file_name = curr_fn_name = (Char*)NULL;
951 lineno = prev_lineno = 0;
952 lineno_overflows = 0;
953 same_file = True;
sewardjde4a1d02002-03-22 01:27:54 +0000954
njn4f9c9342002-04-29 16:03:24 +0000955 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
956
957 for (i = 0; i < n_stab_entries; i++) {
sewardjde4a1d02002-03-22 01:27:54 +0000958# if 0
959 VG_(printf) ( " %2d ", i );
960 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
961 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
962 (int)stab[i].n_value,
963 (int)stab[i].n_un.n_strx,
964 stabstr + stab[i].n_un.n_strx );
965 VG_(printf)("\n");
966# endif
967
njne0ee0712002-05-03 16:41:05 +0000968 Char *no_fn_name = "???";
969
sewardjde4a1d02002-03-22 01:27:54 +0000970 switch (stab[i].n_type) {
njn4f9c9342002-04-29 16:03:24 +0000971 UInt next_addr;
sewardjde4a1d02002-03-22 01:27:54 +0000972
njne0ee0712002-05-03 16:41:05 +0000973 /* Two complicated things here:
974 * 1. the n_desc field in 'struct n_list' in a.out.h is only 16-bits,
975 * which gives a maximum of 65535 lines. We handle files bigger
976 * than this by detecting heuristically overflows -- if the line
977 * count goes from 65000-odd to 0-odd within the same file, we
978 * assume it's an overflow. Once we switch files, we zero the
979 * overflow count
980 *
981 * 2. To compute the instr address range covered by a single line,
982 * find the address of the next thing and compute the difference.
983 * The approach used depends on what kind of entry/entries
984 * follow...
985 */
njn4f9c9342002-04-29 16:03:24 +0000986 case N_SLINE: {
njn4f9c9342002-04-29 16:03:24 +0000987 Int this_addr = (UInt)stab[i].n_value;
988
njne0ee0712002-05-03 16:41:05 +0000989 /* Although stored as a short, neg values really are > 32768, hence
990 * the UShort cast. Then we use an Int to handle overflows. */
991 prev_lineno = lineno;
992 lineno = (Int)((UShort)stab[i].n_desc);
993
994 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
995 VG_(message)(Vg_DebugMsg,
996 "Line number overflow detected (%d --> %d) in %s",
997 prev_lineno, lineno, curr_file_name);
998 lineno_overflows++;
999 }
1000 same_file = True;
1001
njn4f9c9342002-04-29 16:03:24 +00001002 LOOP:
njn9aae6742002-04-30 13:44:01 +00001003 if (i+1 >= n_stab_entries) {
1004 /* If it's the last entry, just guess the range is four; can't
1005 * do any better */
njne0ee0712002-05-03 16:41:05 +00001006 next_addr = this_addr + 4;
njn9aae6742002-04-30 13:44:01 +00001007 } else {
1008 switch (stab[i+1].n_type) {
1009 /* Easy, common case: use address of next entry */
1010 case N_SLINE: case N_SO:
njn4f9c9342002-04-29 16:03:24 +00001011 next_addr = (UInt)stab[i+1].n_value;
njn4f9c9342002-04-29 16:03:24 +00001012 break;
njn4f9c9342002-04-29 16:03:24 +00001013
njn9aae6742002-04-30 13:44:01 +00001014 /* Boring one: skip, look for something more useful. */
1015 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
njn9885df02002-05-01 08:25:03 +00001016 case N_STSYM: case N_LCSYM: case N_GSYM:
njn9aae6742002-04-30 13:44:01 +00001017 i++;
1018 goto LOOP;
1019
1020 /* Should be an end of fun entry, use its address */
1021 case N_FUN:
1022 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
1023 next_addr = (UInt)stab[i+1].n_value;
1024 } else {
njne0ee0712002-05-03 16:41:05 +00001025 VG_(message)(Vg_DebugMsg,
1026 "warning: function %s missing closing "
1027 "N_FUN stab at entry %d",
1028 curr_fn_name, i );
1029 next_addr = this_addr; /* assume zero-size loc */
njn9aae6742002-04-30 13:44:01 +00001030 }
1031 break;
1032
1033 /* N_SOL should be followed by an N_SLINE which can be used */
1034 case N_SOL:
1035 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
1036 next_addr = (UInt)stab[i+2].n_value;
1037 break;
1038 } else {
1039 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
1040 stab[i+1].n_type, i, n_stab_entries);
sewardj177d3232002-05-01 09:25:56 +00001041 VG_(panic)("unhandled N_SOL stabs case");
njn9aae6742002-04-30 13:44:01 +00001042 }
1043
1044 default:
1045 VG_(printf)("unhandled stabs case: %d %d",
1046 stab[i+1].n_type,i);
sewardj177d3232002-05-01 09:25:56 +00001047 VG_(panic)("unhandled (other) stabs case");
njn9aae6742002-04-30 13:44:01 +00001048 }
sewardjde4a1d02002-03-22 01:27:54 +00001049 }
njn4f9c9342002-04-29 16:03:24 +00001050
njn4f9c9342002-04-29 16:03:24 +00001051 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
njne0ee0712002-05-03 16:41:05 +00001052 curr_fnbaseaddr + next_addr,
1053 lineno + lineno_overflows * LINENO_OVERFLOW, i);
sewardjde4a1d02002-03-22 01:27:54 +00001054 break;
1055 }
1056
njn4f9c9342002-04-29 16:03:24 +00001057 case N_FUN: {
1058 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
sewardjde4a1d02002-03-22 01:27:54 +00001059 /* N_FUN with a name -- indicates the start of a fn. */
njn4f9c9342002-04-29 16:03:24 +00001060 curr_fnbaseaddr = si->offset + (Addr)stab[i].n_value;
njne0ee0712002-05-03 16:41:05 +00001061 curr_fn_name = stabstr + stab[i].n_un.n_strx;
1062 } else {
1063 curr_fn_name = no_fn_name;
sewardjde4a1d02002-03-22 01:27:54 +00001064 }
1065 break;
1066 }
1067
njne0ee0712002-05-03 16:41:05 +00001068 case N_SOL:
1069 if (lineno_overflows != 0) {
njn7efaa112002-05-07 10:26:57 +00001070 VG_(message)(Vg_UserMsg,
1071 "Warning: file %s is very big (> 65535 lines) "
1072 "Line numbers and annotation for this file might "
1073 "be wrong. Sorry",
1074 curr_file_name);
njne0ee0712002-05-03 16:41:05 +00001075 }
1076 /* fall through! */
1077 case N_SO:
1078 lineno_overflows = 0;
1079
sewardjde4a1d02002-03-22 01:27:54 +00001080 /* seems to give lots of locations in header files */
1081 /* case 130: */ /* BINCL */
1082 {
1083 UChar* nm = stabstr + stab[i].n_un.n_strx;
1084 UInt len = VG_(strlen)(nm);
njn4f9c9342002-04-29 16:03:24 +00001085
1086 if (len > 0 && nm[len-1] != '/') {
sewardjde4a1d02002-03-22 01:27:54 +00001087 curr_filenmoff = addStr ( si, nm );
njn4f9c9342002-04-29 16:03:24 +00001088 curr_file_name = stabstr + stab[i].n_un.n_strx;
1089 }
sewardjde4a1d02002-03-22 01:27:54 +00001090 else
1091 if (len == 0)
1092 curr_filenmoff = addStr ( si, "?1\0" );
njn4f9c9342002-04-29 16:03:24 +00001093
sewardjde4a1d02002-03-22 01:27:54 +00001094 break;
1095 }
1096
1097# if 0
1098 case 162: /* EINCL */
1099 curr_filenmoff = addStr ( si, "?2\0" );
1100 break;
1101# endif
1102
1103 default:
1104 break;
1105 }
1106 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
1107
1108 /* Last, but not least, heave the oimage back overboard. */
1109 VG_(munmap) ( (void*)oimage, n_oimage );
1110}
1111
1112
1113/*------------------------------------------------------------*/
1114/*--- Main entry point for symbols table reading. ---*/
1115/*------------------------------------------------------------*/
1116
1117/* The root structure for the entire symbol table system. It is a
1118 linked list of SegInfos. Note that this entire mechanism assumes
1119 that what we read from /proc/self/maps doesn't contain overlapping
1120 address ranges, and as a result the SegInfos in this list describe
1121 disjoint address ranges.
1122*/
1123static SegInfo* segInfo = NULL;
1124
1125
1126static
1127void read_symtab_callback (
1128 Addr start, UInt size,
1129 Char rr, Char ww, Char xx,
1130 UInt foffset, UChar* filename )
1131{
1132 SegInfo* si;
1133
1134 /* Stay sane ... */
1135 if (size == 0)
1136 return;
1137
1138 /* We're only interested in collecting symbols in executable
1139 segments which are associated with a real file. Hence: */
1140 if (filename == NULL || xx != 'x')
1141 return;
1142 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1143 return;
1144
1145 /* Perhaps we already have this one? If so, skip. */
1146 for (si = segInfo; si != NULL; si = si->next) {
1147 /*
1148 if (0==VG_(strcmp)(si->filename, filename))
1149 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1150 rr,ww,xx,si->start,si->size,start,size,filename);
1151 */
1152 /* For some reason the observed size of a mapping can change, so
1153 we don't use that to determine uniqueness. */
1154 if (si->start == start
1155 /* && si->size == size */
1156 && 0==VG_(strcmp)(si->filename, filename)) {
1157 return;
1158 }
1159 }
1160
1161 /* Get the record initialised right. */
1162 si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
1163 si->next = segInfo;
1164 segInfo = si;
1165
1166 si->start = start;
1167 si->size = size;
1168 si->foffset = foffset;
1169 si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
1170 VG_(strcpy)(si->filename, filename);
1171
1172 si->symtab = NULL;
1173 si->symtab_size = si->symtab_used = 0;
1174 si->loctab = NULL;
1175 si->loctab_size = si->loctab_used = 0;
1176 si->strtab = NULL;
1177 si->strtab_size = si->strtab_used = 0;
1178
1179 /* Kludge ... */
1180 si->offset
1181 = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
1182
1183 /* And actually fill it up. */
1184 vg_read_lib_symbols ( si );
1185 canonicaliseSymtab ( si );
1186 canonicaliseLoctab ( si );
1187}
1188
1189
1190/* This one really is the Head Honcho. Update the symbol tables to
1191 reflect the current state of /proc/self/maps. Rather than re-read
1192 everything, just read the entries which are not already in segInfo.
1193 So we can call here repeatedly, after every mmap of a non-anonymous
1194 segment with execute permissions, for example, to pick up new
1195 libraries as they are dlopen'd. Conversely, when the client does
1196 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1197 which happen to correspond to the munmap()d area. */
1198void VG_(read_symbols) ( void )
1199{
njn4f9c9342002-04-29 16:03:24 +00001200 if (! VG_(clo_instrument) && ! VG_(clo_cachesim))
1201 return;
sewardjde4a1d02002-03-22 01:27:54 +00001202
1203 VG_(read_procselfmaps) ( read_symtab_callback );
1204
1205 /* Do a sanity check on the symbol tables: ensure that the address
1206 space pieces they cover do not overlap (otherwise we are severely
1207 hosed). This is a quadratic algorithm, but there shouldn't be
1208 many of them.
1209 */
1210 { SegInfo *si, *si2;
1211 for (si = segInfo; si != NULL; si = si->next) {
1212 /* Check no overlap between *si and those in the rest of the
1213 list. */
1214 for (si2 = si->next; si2 != NULL; si2 = si2->next) {
1215 Addr lo = si->start;
1216 Addr hi = si->start + si->size - 1;
1217 Addr lo2 = si2->start;
1218 Addr hi2 = si2->start + si2->size - 1;
1219 Bool overlap;
1220 vg_assert(lo < hi);
1221 vg_assert(lo2 < hi2);
1222 /* the main assertion */
1223 overlap = (lo <= lo2 && lo2 <= hi)
1224 || (lo <= hi2 && hi2 <= hi);
1225 //vg_assert(!overlap);
1226 if (overlap) {
1227 VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
1228 ppSegInfo ( si );
1229 ppSegInfo ( si2 );
1230 VG_(printf)("\n\n");
1231 vg_assert(! overlap);
1232 }
1233 }
1234 }
1235 }
1236}
1237
1238
1239/* When an munmap() call happens, check to see whether it corresponds
1240 to a segment for a .so, and if so discard the relevant SegInfo.
1241 This might not be a very clever idea from the point of view of
1242 accuracy of error messages, but we need to do it in order to
1243 maintain the no-overlapping invariant.
1244*/
1245void VG_(symtab_notify_munmap) ( Addr start, UInt length )
1246{
1247 SegInfo *prev, *curr;
1248
1249 if (! VG_(clo_instrument))
1250 return;
1251
1252 prev = NULL;
1253 curr = segInfo;
1254 while (True) {
1255 if (curr == NULL) break;
1256 if (start == curr->start) break;
1257 prev = curr;
1258 curr = curr->next;
1259 }
1260 if (curr == NULL) return;
1261
1262 VG_(message)(Vg_UserMsg,
1263 "discard syms in %s due to munmap()",
1264 curr->filename ? curr->filename : (UChar*)"???");
1265
1266 vg_assert(prev == NULL || prev->next == curr);
1267
1268 if (prev == NULL) {
1269 segInfo = curr->next;
1270 } else {
1271 prev->next = curr->next;
1272 }
1273
1274 freeSegInfo(curr);
1275}
1276
1277
1278/*------------------------------------------------------------*/
1279/*--- Use of symbol table & location info to create ---*/
1280/*--- plausible-looking stack dumps. ---*/
1281/*------------------------------------------------------------*/
1282
1283/* Find a symbol-table index containing the specified pointer, or -1
1284 if not found. Binary search. */
1285
1286static Int search_one_symtab ( SegInfo* si, Addr ptr )
1287{
1288 Addr a_mid_lo, a_mid_hi;
1289 Int mid,
1290 lo = 0,
1291 hi = si->symtab_used-1;
1292 while (True) {
1293 /* current unsearched space is from lo to hi, inclusive. */
1294 if (lo > hi) return -1; /* not found */
1295 mid = (lo + hi) / 2;
1296 a_mid_lo = si->symtab[mid].addr;
1297 a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
1298
1299 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1300 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1301 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1302 return mid;
1303 }
1304}
1305
1306
1307/* Search all symtabs that we know about to locate ptr. If found, set
1308 *psi to the relevant SegInfo, and *symno to the symtab entry number
1309 within that. If not found, *psi is set to NULL. */
1310
1311static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
1312{
1313 Int sno;
1314 SegInfo* si;
1315 for (si = segInfo; si != NULL; si = si->next) {
1316 if (si->start <= ptr && ptr < si->start+si->size) {
1317 sno = search_one_symtab ( si, ptr );
1318 if (sno == -1) goto not_found;
1319 *symno = sno;
1320 *psi = si;
1321 return;
1322 }
1323 }
1324 not_found:
1325 *psi = NULL;
1326}
1327
1328
1329/* Find a location-table index containing the specified pointer, or -1
1330 if not found. Binary search. */
1331
1332static Int search_one_loctab ( SegInfo* si, Addr ptr )
1333{
1334 Addr a_mid_lo, a_mid_hi;
1335 Int mid,
1336 lo = 0,
1337 hi = si->loctab_used-1;
1338 while (True) {
1339 /* current unsearched space is from lo to hi, inclusive. */
1340 if (lo > hi) return -1; /* not found */
1341 mid = (lo + hi) / 2;
1342 a_mid_lo = si->loctab[mid].addr;
1343 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
1344
1345 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1346 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1347 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1348 return mid;
1349 }
1350}
1351
1352
1353/* Search all loctabs that we know about to locate ptr. If found, set
1354 *psi to the relevant SegInfo, and *locno to the loctab entry number
1355 within that. If not found, *psi is set to NULL.
1356*/
1357static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
1358{
1359 Int lno;
1360 SegInfo* si;
1361 for (si = segInfo; si != NULL; si = si->next) {
1362 if (si->start <= ptr && ptr < si->start+si->size) {
1363 lno = search_one_loctab ( si, ptr );
1364 if (lno == -1) goto not_found;
1365 *locno = lno;
1366 *psi = si;
1367 return;
1368 }
1369 }
1370 not_found:
1371 *psi = NULL;
1372}
1373
1374
1375/* The whole point of this whole big deal: map a code address to a
1376 plausible symbol name. Returns False if no idea; otherwise True.
1377 Caller supplies buf and nbuf. If no_demangle is True, don't do
1378 demangling, regardless of vg_clo_demangle -- probably because the
1379 call has come from vg_what_fn_or_object_is_this. */
njn4f9c9342002-04-29 16:03:24 +00001380Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
1381 Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00001382{
1383 SegInfo* si;
1384 Int sno;
1385 search_all_symtabs ( a, &si, &sno );
1386 if (si == NULL)
1387 return False;
1388 if (no_demangle) {
1389 VG_(strncpy_safely)
1390 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
1391 } else {
1392 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
1393 }
1394 return True;
1395}
1396
1397
1398/* Map a code address to the name of a shared object file. Returns
1399 False if no idea; otherwise False. Caller supplies buf and
1400 nbuf. */
1401static
1402Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
1403{
1404 SegInfo* si;
1405 for (si = segInfo; si != NULL; si = si->next) {
1406 if (si->start <= a && a < si->start+si->size) {
1407 VG_(strncpy_safely)(buf, si->filename, nbuf);
1408 return True;
1409 }
1410 }
1411 return False;
1412}
1413
1414/* Return the name of an erring fn in a way which is useful
1415 for comparing against the contents of a suppressions file.
1416 Always writes something to buf. Also, doesn't demangle the
1417 name, because we want to refer to mangled names in the
1418 suppressions file.
1419*/
1420void VG_(what_obj_and_fun_is_this) ( Addr a,
1421 Char* obj_buf, Int n_obj_buf,
1422 Char* fun_buf, Int n_fun_buf )
1423{
1424 (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
njn4f9c9342002-04-29 16:03:24 +00001425 (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
sewardjde4a1d02002-03-22 01:27:54 +00001426}
1427
1428
1429/* Map a code address to a (filename, line number) pair.
1430 Returns True if successful.
1431*/
njn4f9c9342002-04-29 16:03:24 +00001432Bool VG_(what_line_is_this)( Addr a,
1433 UChar* filename, Int n_filename,
1434 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00001435{
1436 SegInfo* si;
1437 Int locno;
1438 search_all_loctabs ( a, &si, &locno );
1439 if (si == NULL)
1440 return False;
1441 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
1442 n_filename);
1443 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00001444
sewardjde4a1d02002-03-22 01:27:54 +00001445 return True;
1446}
1447
1448
1449/* Print a mini stack dump, showing the current location. */
1450void VG_(mini_stack_dump) ( ExeContext* ec )
1451{
1452
1453#define APPEND(str) \
1454 { UChar* sss; \
1455 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
1456 buf[n] = *sss; \
1457 buf[n] = 0; \
1458 }
1459
1460 Bool know_fnname;
1461 Bool know_objname;
1462 Bool know_srcloc;
1463 UInt lineno;
1464 UChar ibuf[20];
1465 UInt i, n, clueless;
1466
1467 UChar buf[M_VG_ERRTXT];
1468 UChar buf_fn[M_VG_ERRTXT];
1469 UChar buf_obj[M_VG_ERRTXT];
1470 UChar buf_srcloc[M_VG_ERRTXT];
1471
1472 Int stop_at = VG_(clo_backtrace_size);
1473
1474 n = 0;
1475
njn4f9c9342002-04-29 16:03:24 +00001476 know_fnname = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001477 know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001478 know_srcloc = VG_(what_line_is_this)(ec->eips[0],
1479 buf_srcloc, M_VG_ERRTXT,
1480 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00001481
1482 APPEND(" at ");
1483 VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
1484 APPEND(ibuf);
1485 if (know_fnname) {
1486 APPEND(buf_fn);
1487 if (!know_srcloc && know_objname) {
1488 APPEND(" (in ");
1489 APPEND(buf_obj);
1490 APPEND(")");
1491 }
1492 } else if (know_objname && !know_srcloc) {
1493 APPEND("(within ");
1494 APPEND(buf_obj);
1495 APPEND(")");
1496 } else {
1497 APPEND("???");
1498 }
1499 if (know_srcloc) {
1500 APPEND(" (");
1501 APPEND(buf_srcloc);
1502 APPEND(":");
1503 VG_(sprintf)(ibuf,"%d",lineno);
1504 APPEND(ibuf);
1505 APPEND(")");
1506 }
1507 VG_(message)(Vg_UserMsg, "%s", buf);
1508
1509 clueless = 0;
1510 for (i = 1; i < stop_at; i++) {
njn4f9c9342002-04-29 16:03:24 +00001511 know_fnname = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00001512 know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00001513 know_srcloc = VG_(what_line_is_this)(ec->eips[i],
sewardjde4a1d02002-03-22 01:27:54 +00001514 buf_srcloc, M_VG_ERRTXT,
1515 &lineno);
1516 n = 0;
1517 APPEND(" by ");
1518 if (ec->eips[i] == 0) {
1519 APPEND("<bogus frame pointer> ");
1520 } else {
1521 VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
1522 APPEND(ibuf);
1523 }
1524 if (know_fnname) {
1525 APPEND(buf_fn)
1526 if (!know_srcloc && know_objname) {
1527 APPEND(" (in ");
1528 APPEND(buf_obj);
1529 APPEND(")");
1530 }
1531 } else {
1532 if (know_objname && !know_srcloc) {
1533 APPEND("(within ");
1534 APPEND(buf_obj);
1535 APPEND(")");
1536 } else {
1537 APPEND("???");
1538 }
1539 if (!know_srcloc) clueless++;
1540 if (clueless == 2)
1541 i = stop_at; /* force exit after this iteration */
1542 };
1543 if (know_srcloc) {
1544 APPEND(" (");
1545 APPEND(buf_srcloc);
1546 APPEND(":");
1547 VG_(sprintf)(ibuf,"%d",lineno);
1548 APPEND(ibuf);
1549 APPEND(")");
1550 }
1551 VG_(message)(Vg_UserMsg, "%s", buf);
1552 }
1553}
1554
1555#undef APPEND
1556
1557/*--------------------------------------------------------------------*/
1558/*--- end vg_symtab2.c ---*/
1559/*--------------------------------------------------------------------*/