blob: 8330794ee3187dc75af100038ecf2f7a643c0da2 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
sewardjde4a1d02002-03-22 01:27:54 +000032
33#include <elf.h> /* ELF defns */
34#include <a.out.h> /* stabs defns */
35
njn9aae6742002-04-30 13:44:01 +000036
sewardjde4a1d02002-03-22 01:27:54 +000037/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
38 dlopen()ed libraries, which is something that KDE3 does a lot.
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardj18d75132002-05-16 11:06:21 +000040 Stabs reader greatly improved by Nick Nethercode, Apr 02.
sewardjde4a1d02002-03-22 01:27:54 +000041
sewardj18d75132002-05-16 11:06:21 +000042 16 May 02: when notified about munmap, return a Bool indicating
43 whether or not the area being munmapped had executable permissions.
44 This is then used to determine whether or not
45 VG_(invalid_translations) should be called for that area. In order
46 that this work even if --instrument=no, in this case we still keep
47 track of the mapped executable segments, but do not load any debug
48 info or symbols.
sewardjde4a1d02002-03-22 01:27:54 +000049*/
50
51/*------------------------------------------------------------*/
52/*--- Structs n stuff ---*/
53/*------------------------------------------------------------*/
54
55/* A structure to hold an ELF symbol (very crudely). */
56typedef
57 struct {
58 Addr addr; /* lowest address of entity */
59 UInt size; /* size in bytes */
60 Int nmoff; /* offset of name in this SegInfo's str tab */
61 }
62 RiSym;
63
njne0ee0712002-05-03 16:41:05 +000064/* Line count at which overflow happens, due to line numbers being stored as
65 * shorts in `struct nlist' in a.out.h. */
66#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000067
njne0ee0712002-05-03 16:41:05 +000068#define LINENO_BITS 20
69#define LOC_SIZE_BITS (32 - LINENO_BITS)
sewardj97ff05f2002-05-09 01:32:57 +000070#define MAX_LINENO ((1 << LINENO_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000071
72/* Unlikely to have any lines with instruction ranges > 4096 bytes */
sewardj97ff05f2002-05-09 01:32:57 +000073#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000074
75/* Number used to detect line number overflows; if one line is 60000-odd
76 * smaller than the previous, is was probably an overflow.
77 */
78#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
79
80/* A structure to hold addr-to-source info for a single line. There can be a
81 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +000082typedef
83 struct {
njne0ee0712002-05-03 16:41:05 +000084 /* Word 1 */
85 Addr addr; /* lowest address for this line */
86 /* Word 2 */
87 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
88 UInt lineno:LINENO_BITS; /* source line number, or zero */
89 /* Word 3 */
90 UInt fnmoff; /* source filename; offset in this
91 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +000092 }
93 RiLoc;
94
95
96/* A structure which contains information pertaining to one mapped
97 text segment. */
98typedef
99 struct _SegInfo {
100 struct _SegInfo* next;
101 /* Description of the mapped segment. */
102 Addr start;
103 UInt size;
104 UChar* filename; /* in mallocville */
105 UInt foffset;
106 /* An expandable array of symbols. */
107 RiSym* symtab;
108 UInt symtab_used;
109 UInt symtab_size;
110 /* An expandable array of locations. */
111 RiLoc* loctab;
112 UInt loctab_used;
113 UInt loctab_size;
114 /* An expandable array of characters -- the string table. */
115 Char* strtab;
116 UInt strtab_used;
117 UInt strtab_size;
118 /* offset is what we need to add to symbol table entries
119 to get the real location of that symbol in memory.
120 For executables, offset is zero.
121 For .so's, offset == base_addr.
122 This seems like a giant kludge to me.
123 */
124 UInt offset;
125 }
126 SegInfo;
127
128
129/* -- debug helper -- */
130static void ppSegInfo ( SegInfo* si )
131{
132 VG_(printf)("name: %s\n"
133 "start %p, size %d, foffset %d\n",
134 si->filename?si->filename : (UChar*)"NULL",
135 si->start, si->size, si->foffset );
136}
137
138static void freeSegInfo ( SegInfo* si )
139{
140 vg_assert(si != NULL);
141 if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
142 if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
143 if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
144 if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
145 VG_(free)(VG_AR_SYMTAB, si);
146}
147
148
149/*------------------------------------------------------------*/
150/*--- Adding stuff ---*/
151/*------------------------------------------------------------*/
152
153/* Add a str to the string table, including terminating zero, and
154 return offset of the string in vg_strtab. */
155
156static __inline__
157Int addStr ( SegInfo* si, Char* str )
158{
159 Char* new_tab;
160 Int new_sz, i, space_needed;
161
162 space_needed = 1 + VG_(strlen)(str);
163 if (si->strtab_used + space_needed > si->strtab_size) {
164 new_sz = 2 * si->strtab_size;
165 if (new_sz == 0) new_sz = 5000;
166 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
167 if (si->strtab != NULL) {
168 for (i = 0; i < si->strtab_used; i++)
169 new_tab[i] = si->strtab[i];
170 VG_(free)(VG_AR_SYMTAB, si->strtab);
171 }
172 si->strtab = new_tab;
173 si->strtab_size = new_sz;
174 }
175
176 for (i = 0; i < space_needed; i++)
177 si->strtab[si->strtab_used+i] = str[i];
178
179 si->strtab_used += space_needed;
180 vg_assert(si->strtab_used <= si->strtab_size);
181 return si->strtab_used - space_needed;
182}
183
184/* Add a symbol to the symbol table. */
185
186static __inline__
187void addSym ( SegInfo* si, RiSym* sym )
188{
189 Int new_sz, i;
190 RiSym* new_tab;
191
192 /* Ignore zero-sized syms. */
193 if (sym->size == 0) return;
194
195 if (si->symtab_used == si->symtab_size) {
196 new_sz = 2 * si->symtab_size;
197 if (new_sz == 0) new_sz = 500;
198 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
199 if (si->symtab != NULL) {
200 for (i = 0; i < si->symtab_used; i++)
201 new_tab[i] = si->symtab[i];
202 VG_(free)(VG_AR_SYMTAB, si->symtab);
203 }
204 si->symtab = new_tab;
205 si->symtab_size = new_sz;
206 }
207
208 si->symtab[si->symtab_used] = *sym;
209 si->symtab_used++;
210 vg_assert(si->symtab_used <= si->symtab_size);
211}
212
213/* Add a location to the location table. */
214
215static __inline__
216void addLoc ( SegInfo* si, RiLoc* loc )
217{
218 Int new_sz, i;
219 RiLoc* new_tab;
220
njne0ee0712002-05-03 16:41:05 +0000221 /* Zero-sized locs should have been ignored earlier */
222 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000223
224 if (si->loctab_used == si->loctab_size) {
225 new_sz = 2 * si->loctab_size;
226 if (new_sz == 0) new_sz = 500;
227 new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
228 if (si->loctab != NULL) {
229 for (i = 0; i < si->loctab_used; i++)
230 new_tab[i] = si->loctab[i];
231 VG_(free)(VG_AR_SYMTAB, si->loctab);
232 }
233 si->loctab = new_tab;
234 si->loctab_size = new_sz;
235 }
236
237 si->loctab[si->loctab_used] = *loc;
238 si->loctab_used++;
239 vg_assert(si->loctab_used <= si->loctab_size);
240}
241
242
sewardjb51f2e62002-06-01 23:11:19 +0000243/* Top-level place to call to add a source-location mapping entry. */
244
245static __inline__
246void addLineInfo ( SegInfo* si,
247 Int fnmoff,
248 Addr this,
249 Addr next,
250 Int lineno,
sewardj08a50f62002-06-17 02:21:20 +0000251 Int entry /* only needed for debug printing */
252 )
sewardjb51f2e62002-06-01 23:11:19 +0000253{
254 RiLoc loc;
255 Int size = next - this;
256
257 /* Ignore zero-sized locs */
258 if (this == next) return;
259
260 /* Maximum sanity checking. Some versions of GNU as do a shabby
261 * job with stabs entries; if anything looks suspicious, revert to
262 * a size of 1. This should catch the instruction of interest
263 * (since if using asm-level debug info, one instruction will
264 * correspond to one line, unlike with C-level debug info where
265 * multiple instructions can map to the one line), but avoid
266 * catching any other instructions bogusly. */
267 if (this > next) {
268 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000269 "warning: line info addresses out of order "
sewardjb51f2e62002-06-01 23:11:19 +0000270 "at entry %d: 0x%x 0x%x", entry, this, next);
271 size = 1;
272 }
273
274 if (size > MAX_LOC_SIZE) {
sewardjd84606d2002-06-18 01:04:57 +0000275 if (0)
sewardjb51f2e62002-06-01 23:11:19 +0000276 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000277 "warning: line info address range too large "
sewardjb51f2e62002-06-01 23:11:19 +0000278 "at entry %d: %d", entry, size);
279 size = 1;
280 }
281
sewardj08a50f62002-06-17 02:21:20 +0000282 /* vg_assert(this < si->start + si->size && next-1 >= si->start); */
njne306ffe2002-06-08 13:34:17 +0000283 if (this >= si->start + si->size || next-1 < si->start) {
sewardjd84606d2002-06-18 01:04:57 +0000284 if (0)
sewardj08a50f62002-06-17 02:21:20 +0000285 VG_(message)(Vg_DebugMsg,
286 "warning: ignoring line info entry falling "
287 "outside current SegInfo: %p %p %p %p",
288 si->start, si->start + si->size,
289 this, next-1);
njne306ffe2002-06-08 13:34:17 +0000290 return;
291 }
292
293 vg_assert(lineno >= 0);
294 if (lineno > MAX_LINENO) {
295 VG_(message)(Vg_UserMsg,
sewardj08a50f62002-06-17 02:21:20 +0000296 "warning: ignoring line info entry with "
297 "huge line number (%d)", lineno);
njne306ffe2002-06-08 13:34:17 +0000298 VG_(message)(Vg_UserMsg,
299 " Can't handle line numbers "
sewardj08a50f62002-06-17 02:21:20 +0000300 "greater than %d, sorry", MAX_LINENO);
njne306ffe2002-06-08 13:34:17 +0000301 return;
302 }
sewardjb51f2e62002-06-01 23:11:19 +0000303
304 loc.addr = this;
305 loc.size = (UShort)size;
306 loc.lineno = lineno;
307 loc.fnmoff = fnmoff;
308 addLoc ( si, &loc );
309}
310
sewardjde4a1d02002-03-22 01:27:54 +0000311
312/*------------------------------------------------------------*/
313/*--- Helpers ---*/
314/*------------------------------------------------------------*/
315
316/* Non-fatal -- use vg_panic if terminal. */
317static
318void vg_symerr ( Char* msg )
319{
320 if (VG_(clo_verbosity) > 1)
321 VG_(message)(Vg_UserMsg,"%s", msg );
322}
323
324
325/* Print a symbol. */
326static
327void printSym ( SegInfo* si, Int i )
328{
329 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
330 i,
331 si->symtab[i].addr,
332 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
333 &si->strtab[si->symtab[i].nmoff] );
334}
335
336
337#if 0
338/* Print the entire sym tab. */
339static __attribute__ ((unused))
340void printSymtab ( void )
341{
342 Int i;
343 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
344 for (i = 0; i < vg_symtab_used; i++)
345 printSym(i);
346 VG_(printf)("------ BEGIN vg_symtab ------\n");
347}
348#endif
349
350#if 0
351/* Paranoid strcat. */
352static
353void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
354{
355 UInt i = 0, j = 0;
356 while (True) {
357 if (i >= maxlen) return;
358 if (dst[i] == 0) break;
359 i++;
360 }
361 while (True) {
362 if (i >= maxlen) return;
363 dst[i] = src[j];
364 if (src[j] == 0) return;
365 i++; j++;
366 }
367}
368#endif
369
sewardjb51f2e62002-06-01 23:11:19 +0000370
sewardjde4a1d02002-03-22 01:27:54 +0000371/*------------------------------------------------------------*/
372/*--- Canonicalisers ---*/
373/*------------------------------------------------------------*/
374
375/* Sort the symtab by starting address, and emit warnings if any
376 symbols have overlapping address ranges. We use that old chestnut,
377 shellsort. Mash the table around so as to establish the property
378 that addresses are in order and the ranges to not overlap. This
379 facilitates using binary search to map addresses to symbols when we
380 come to query the table.
381*/
382static
383void canonicaliseSymtab ( SegInfo* si )
384{
385 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
386 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
387 4592, 13776, 33936, 86961, 198768,
388 463792, 1391376 };
389 Int lo = 0;
390 Int hi = si->symtab_used-1;
391 Int i, j, h, bigN, hp, n_merged, n_truncated;
392 RiSym v;
393 Addr s1, s2, e1, e2;
394
395# define SWAP(ty,aa,bb) \
396 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
397
398 bigN = hi - lo + 1; if (bigN < 2) return;
399 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
400 vg_assert(0 <= hp && hp < 16);
401
402 for (; hp >= 0; hp--) {
403 h = incs[hp];
404 i = lo + h;
405 while (1) {
406 if (i > hi) break;
407 v = si->symtab[i];
408 j = i;
409 while (si->symtab[j-h].addr > v.addr) {
410 si->symtab[j] = si->symtab[j-h];
411 j = j - h;
412 if (j <= (lo + h - 1)) break;
413 }
414 si->symtab[j] = v;
415 i++;
416 }
417 }
418
419 cleanup_more:
420
421 /* If two symbols have identical address ranges, favour the
422 one with the longer name.
423 */
424 do {
425 n_merged = 0;
426 j = si->symtab_used;
427 si->symtab_used = 0;
428 for (i = 0; i < j; i++) {
429 if (i < j-1
430 && si->symtab[i].addr == si->symtab[i+1].addr
431 && si->symtab[i].size == si->symtab[i+1].size) {
432 n_merged++;
433 /* merge the two into one */
434 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
435 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
436 si->symtab[si->symtab_used++] = si->symtab[i];
437 } else {
438 si->symtab[si->symtab_used++] = si->symtab[i+1];
439 }
440 i++;
441 } else {
442 si->symtab[si->symtab_used++] = si->symtab[i];
443 }
444 }
445 if (VG_(clo_trace_symtab))
446 VG_(printf)( "%d merged\n", n_merged);
447 }
448 while (n_merged > 0);
449
450 /* Detect and "fix" overlapping address ranges. */
451 n_truncated = 0;
452
453 for (i = 0; i < si->symtab_used-1; i++) {
454
455 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
456
457 /* Check for common (no overlap) case. */
458 if (si->symtab[i].addr + si->symtab[i].size
459 <= si->symtab[i+1].addr)
460 continue;
461
462 /* There's an overlap. Truncate one or the other. */
463 if (VG_(clo_trace_symtab)) {
464 VG_(printf)("overlapping address ranges in symbol table\n\t");
465 printSym(si,i);
466 VG_(printf)("\t");
467 printSym(si,i+1);
468 VG_(printf)("\n");
469 }
470
471 /* Truncate one or the other. */
472 s1 = si->symtab[i].addr;
473 s2 = si->symtab[i+1].addr;
474 e1 = s1 + si->symtab[i].size - 1;
475 e2 = s2 + si->symtab[i+1].size - 1;
476 if (s1 < s2) {
477 e1 = s2-1;
478 } else {
479 vg_assert(s1 == s2);
480 if (e1 > e2) {
481 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
482 } else
483 if (e1 < e2) {
484 s2 = e1+1;
485 } else {
486 /* e1 == e2. Identical addr ranges. We'll eventually wind
487 up back at cleanup_more, which will take care of it. */
488 }
489 }
490 si->symtab[i].addr = s1;
491 si->symtab[i+1].addr = s2;
492 si->symtab[i].size = e1 - s1 + 1;
493 si->symtab[i+1].size = e2 - s2 + 1;
494 vg_assert(s1 <= s2);
495 vg_assert(si->symtab[i].size > 0);
496 vg_assert(si->symtab[i+1].size > 0);
497 /* It may be that the i+1 entry now needs to be moved further
498 along to maintain the address order requirement. */
499 j = i+1;
500 while (j < si->symtab_used-1
501 && si->symtab[j].addr > si->symtab[j+1].addr) {
502 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
503 j++;
504 }
505 n_truncated++;
506 }
507
508 if (n_truncated > 0) goto cleanup_more;
509
510 /* Ensure relevant postconditions hold. */
511 for (i = 0; i < si->symtab_used-1; i++) {
512 /* No zero-sized symbols. */
513 vg_assert(si->symtab[i].size > 0);
514 /* In order. */
515 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
516 /* No overlaps. */
517 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
518 < si->symtab[i+1].addr);
519 }
520# undef SWAP
521}
522
523
524
525/* Sort the location table by starting address. Mash the table around
526 so as to establish the property that addresses are in order and the
527 ranges do not overlap. This facilitates using binary search to map
sewardjb51f2e62002-06-01 23:11:19 +0000528 addresses to locations when we come to query the table.
529*/
sewardjde4a1d02002-03-22 01:27:54 +0000530static
531void canonicaliseLoctab ( SegInfo* si )
532{
533 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
534 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
535 4592, 13776, 33936, 86961, 198768,
536 463792, 1391376 };
537 Int lo = 0;
538 Int hi = si->loctab_used-1;
539 Int i, j, h, bigN, hp;
540 RiLoc v;
541
542# define SWAP(ty,aa,bb) \
543 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
544
545 /* Sort by start address. */
546
547 bigN = hi - lo + 1; if (bigN < 2) return;
548 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
549 vg_assert(0 <= hp && hp < 16);
550
551 for (; hp >= 0; hp--) {
552 h = incs[hp];
553 i = lo + h;
554 while (1) {
555 if (i > hi) break;
556 v = si->loctab[i];
557 j = i;
558 while (si->loctab[j-h].addr > v.addr) {
559 si->loctab[j] = si->loctab[j-h];
560 j = j - h;
561 if (j <= (lo + h - 1)) break;
562 }
563 si->loctab[j] = v;
564 i++;
565 }
566 }
567
568 /* If two adjacent entries overlap, truncate the first. */
569 for (i = 0; i < si->loctab_used-1; i++) {
570 vg_assert(si->loctab[i].size < 10000);
571 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
572 /* Do this in signed int32 because the actual .size fields
573 are unsigned 16s. */
574 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
575 if (new_size < 0) {
576 si->loctab[i].size = 0;
577 } else
578 if (new_size >= 65536) {
579 si->loctab[i].size = 65535;
580 } else {
581 si->loctab[i].size = (UShort)new_size;
582 }
583 }
584 }
585
586 /* Zap any zero-sized entries resulting from the truncation
587 process. */
588 j = 0;
589 for (i = 0; i < si->loctab_used; i++) {
590 if (si->loctab[i].size > 0) {
591 si->loctab[j] = si->loctab[i];
592 j++;
593 }
594 }
595 si->loctab_used = j;
596
597 /* Ensure relevant postconditions hold. */
598 for (i = 0; i < si->loctab_used-1; i++) {
599 /*
600 VG_(printf)("%d (%d) %d 0x%x\n",
601 i, si->loctab[i+1].confident,
602 si->loctab[i+1].size, si->loctab[i+1].addr );
603 */
604 /* No zero-sized symbols. */
605 vg_assert(si->loctab[i].size > 0);
606 /* In order. */
607 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
608 /* No overlaps. */
609 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
610 < si->loctab[i+1].addr);
611 }
612# undef SWAP
613}
614
615
616/*------------------------------------------------------------*/
sewardjb51f2e62002-06-01 23:11:19 +0000617/*--- Read STABS format debug info. ---*/
sewardjde4a1d02002-03-22 01:27:54 +0000618/*------------------------------------------------------------*/
619
sewardjb51f2e62002-06-01 23:11:19 +0000620/* Stabs entry types, from:
621 * The "stabs" debug format
622 * Menapace, Kingdon and MacKenzie
623 * Cygnus Support
624 */
625typedef enum { N_GSYM = 32, /* Global symbol */
626 N_FUN = 36, /* Function start or end */
627 N_STSYM = 38, /* Data segment file-scope variable */
628 N_LCSYM = 40, /* BSS segment file-scope variable */
629 N_RSYM = 64, /* Register variable */
630 N_SLINE = 68, /* Source line number */
631 N_SO = 100, /* Source file path and name */
632 N_LSYM = 128, /* Stack variable or type */
633 N_SOL = 132, /* Include file name */
634 N_LBRAC = 192, /* Start of lexical block */
635 N_RBRAC = 224 /* End of lexical block */
636 } stab_types;
637
638
639/* Read stabs-format debug info. This is all rather horrible because
640 stabs is a underspecified, kludgy hack.
641*/
642static
643void read_debuginfo_stabs ( SegInfo* si,
644 UChar* stabC, Int stab_sz,
645 UChar* stabstr, Int stabstr_sz )
sewardjde4a1d02002-03-22 01:27:54 +0000646{
sewardjb51f2e62002-06-01 23:11:19 +0000647 Int i;
648 Int curr_filenmoff;
njnb79ad342002-06-05 15:30:30 +0000649 Addr curr_fn_stabs_addr = (Addr)NULL;
650 Addr curr_fnbaseaddr = (Addr)NULL;
sewardjb51f2e62002-06-01 23:11:19 +0000651 Char *curr_file_name, *curr_fn_name;
652 Int n_stab_entries;
njnb79ad342002-06-05 15:30:30 +0000653 Int prev_lineno = 0, lineno = 0;
654 Int lineno_overflows = 0;
655 Bool same_file = True;
sewardjb51f2e62002-06-01 23:11:19 +0000656 struct nlist* stab = (struct nlist*)stabC;
njnb79ad342002-06-05 15:30:30 +0000657
sewardjb51f2e62002-06-01 23:11:19 +0000658 /* Ok. It all looks plausible. Go on and read debug data.
659 stab kinds: 100 N_SO a source file name
660 68 N_SLINE a source line number
661 36 N_FUN start of a function
njn4f9c9342002-04-29 16:03:24 +0000662
sewardjb51f2e62002-06-01 23:11:19 +0000663 In this loop, we maintain a current file name, updated as
664 N_SO/N_SOLs appear, and a current function base address,
665 updated as N_FUNs appear. Based on that, address ranges for
666 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000667
sewardjb51f2e62002-06-01 23:11:19 +0000668 Finding the instruction address range covered by an N_SLINE is
669 complicated; see the N_SLINE case below.
670 */
njnb79ad342002-06-05 15:30:30 +0000671 curr_filenmoff = addStr(si,"???");
672 curr_file_name = curr_fn_name = (Char*)NULL;
sewardjde4a1d02002-03-22 01:27:54 +0000673
sewardjb51f2e62002-06-01 23:11:19 +0000674 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
njne0ee0712002-05-03 16:41:05 +0000675
sewardjb51f2e62002-06-01 23:11:19 +0000676 for (i = 0; i < n_stab_entries; i++) {
677# if 0
678 VG_(printf) ( " %2d ", i );
679 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
680 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
681 (int)stab[i].n_value,
682 (int)stab[i].n_un.n_strx,
683 stabstr + stab[i].n_un.n_strx );
684 VG_(printf)("\n");
685# endif
njne0ee0712002-05-03 16:41:05 +0000686
sewardjb51f2e62002-06-01 23:11:19 +0000687 Char *no_fn_name = "???";
688
689 switch (stab[i].n_type) {
690 UInt next_addr;
691
692 /* Two complicated things here:
693 *
694 * 1. the n_desc field in 'struct n_list' in a.out.h is only
695 * 16-bits, which gives a maximum of 65535 lines. We handle
696 * files bigger than this by detecting heuristically
697 * overflows -- if the line count goes from 65000-odd to
698 * 0-odd within the same file, we assume it's an overflow.
699 * Once we switch files, we zero the overflow count.
700 *
701 * 2. To compute the instr address range covered by a single
702 * line, find the address of the next thing and compute the
703 * difference. The approach used depends on what kind of
704 * entry/entries follow...
705 */
706 case N_SLINE: {
707 Int this_addr = (UInt)stab[i].n_value;
708
709 /* Although stored as a short, neg values really are >
710 * 32768, hence the UShort cast. Then we use an Int to
711 * handle overflows. */
712 prev_lineno = lineno;
713 lineno = (Int)((UShort)stab[i].n_desc);
714
715 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
716 VG_(message)(Vg_DebugMsg,
717 "Line number overflow detected (%d --> %d) in %s",
718 prev_lineno, lineno, curr_file_name);
719 lineno_overflows++;
720 }
721 same_file = True;
722
723 LOOP:
724 if (i+1 >= n_stab_entries) {
725 /* If it's the last entry, just guess the range is
726 * four; can't do any better */
727 next_addr = this_addr + 4;
728 } else {
729 switch (stab[i+1].n_type) {
730 /* Easy, common case: use address of next entry */
731 case N_SLINE: case N_SO:
732 next_addr = (UInt)stab[i+1].n_value;
733 break;
734
735 /* Boring one: skip, look for something more
736 useful. */
737 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
738 case N_STSYM: case N_LCSYM: case N_GSYM:
739 i++;
740 goto LOOP;
741
njnb79ad342002-06-05 15:30:30 +0000742 /* If end-of-this-fun entry, use its address.
743 * If start-of-next-fun entry, find difference between start
744 * of current function and start of next function to work
745 * it out.
746 */
sewardjb51f2e62002-06-01 23:11:19 +0000747 case N_FUN:
748 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
749 next_addr = (UInt)stab[i+1].n_value;
750 } else {
njnb79ad342002-06-05 15:30:30 +0000751 next_addr =
752 (UInt)stab[i+1].n_value - curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000753 }
754 break;
755
756 /* N_SOL should be followed by an N_SLINE which can
757 be used */
758 case N_SOL:
759 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
760 next_addr = (UInt)stab[i+2].n_value;
761 break;
762 } else {
763 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
764 stab[i+1].n_type, i, n_stab_entries);
765 VG_(panic)("unhandled N_SOL stabs case");
766 }
767
768 default:
769 VG_(printf)("unhandled (other) stabs case: %d %d",
770 stab[i+1].n_type,i);
771 /* VG_(panic)("unhandled (other) stabs case"); */
772 next_addr = this_addr + 4;
773 break;
774 }
775 }
776
777 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
778 curr_fnbaseaddr + next_addr,
779 lineno + lineno_overflows * LINENO_OVERFLOW, i);
780 break;
781 }
782
783 case N_FUN: {
784 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
785 /* N_FUN with a name -- indicates the start of a fn. */
njnb79ad342002-06-05 15:30:30 +0000786 curr_fn_stabs_addr = (Addr)stab[i].n_value;
787 curr_fnbaseaddr = si->offset + curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000788 curr_fn_name = stabstr + stab[i].n_un.n_strx;
789 } else {
790 curr_fn_name = no_fn_name;
791 }
792 break;
793 }
794
795 case N_SOL:
796 if (lineno_overflows != 0) {
797 VG_(message)(Vg_UserMsg,
798 "Warning: file %s is very big (> 65535 lines) "
799 "Line numbers and annotation for this file might "
800 "be wrong. Sorry",
801 curr_file_name);
802 }
803 /* fall through! */
804 case N_SO:
805 lineno_overflows = 0;
806
807 /* seems to give lots of locations in header files */
808 /* case 130: */ /* BINCL */
809 {
810 UChar* nm = stabstr + stab[i].n_un.n_strx;
811 UInt len = VG_(strlen)(nm);
812
813 if (len > 0 && nm[len-1] != '/') {
814 curr_filenmoff = addStr ( si, nm );
815 curr_file_name = stabstr + stab[i].n_un.n_strx;
816 }
817 else
818 if (len == 0)
819 curr_filenmoff = addStr ( si, "?1\0" );
820
821 break;
822 }
823
824# if 0
825 case 162: /* EINCL */
826 curr_filenmoff = addStr ( si, "?2\0" );
827 break;
828# endif
829
830 default:
831 break;
832 }
833 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
sewardjde4a1d02002-03-22 01:27:54 +0000834}
835
836
sewardjb51f2e62002-06-01 23:11:19 +0000837/*------------------------------------------------------------*/
838/*--- Read DWARF2 format debug info. ---*/
839/*------------------------------------------------------------*/
sewardjc134dd92002-06-01 14:21:36 +0000840
841/* Structure found in the .debug_line section. */
842typedef struct
843{
844 UChar li_length [4];
845 UChar li_version [2];
846 UChar li_prologue_length [4];
847 UChar li_min_insn_length [1];
848 UChar li_default_is_stmt [1];
849 UChar li_line_base [1];
850 UChar li_line_range [1];
851 UChar li_opcode_base [1];
852}
853DWARF2_External_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000854
sewardjc134dd92002-06-01 14:21:36 +0000855typedef struct
856{
sewardj08a50f62002-06-17 02:21:20 +0000857 UInt li_length;
sewardjc134dd92002-06-01 14:21:36 +0000858 UShort li_version;
859 UInt li_prologue_length;
860 UChar li_min_insn_length;
861 UChar li_default_is_stmt;
sewardj08a50f62002-06-17 02:21:20 +0000862 Int li_line_base;
sewardjc134dd92002-06-01 14:21:36 +0000863 UChar li_line_range;
864 UChar li_opcode_base;
865}
866DWARF2_Internal_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000867
sewardjc134dd92002-06-01 14:21:36 +0000868/* Line number opcodes. */
869enum dwarf_line_number_ops
870 {
871 DW_LNS_extended_op = 0,
872 DW_LNS_copy = 1,
873 DW_LNS_advance_pc = 2,
874 DW_LNS_advance_line = 3,
875 DW_LNS_set_file = 4,
876 DW_LNS_set_column = 5,
877 DW_LNS_negate_stmt = 6,
878 DW_LNS_set_basic_block = 7,
879 DW_LNS_const_add_pc = 8,
880 DW_LNS_fixed_advance_pc = 9,
881 /* DWARF 3. */
882 DW_LNS_set_prologue_end = 10,
883 DW_LNS_set_epilogue_begin = 11,
884 DW_LNS_set_isa = 12
885 };
886
887/* Line number extended opcodes. */
888enum dwarf_line_number_x_ops
889 {
890 DW_LNE_end_sequence = 1,
891 DW_LNE_set_address = 2,
892 DW_LNE_define_file = 3
893 };
894
895typedef struct State_Machine_Registers
896{
sewardj08a50f62002-06-17 02:21:20 +0000897 Addr address;
sewardjc134dd92002-06-01 14:21:36 +0000898 UInt file;
899 UInt line;
900 UInt column;
901 Int is_stmt;
902 Int basic_block;
sewardj08a50f62002-06-17 02:21:20 +0000903 Int end_sequence;
904 /* This variable hold the number of the last entry seen
905 in the File Table. */
sewardjc134dd92002-06-01 14:21:36 +0000906 UInt last_file_entry;
907} SMR;
908
sewardjb51f2e62002-06-01 23:11:19 +0000909
910static
911UInt read_leb128 ( UChar* data, Int* length_return, Int sign )
912{
sewardj08a50f62002-06-17 02:21:20 +0000913 UInt result = 0;
914 UInt num_read = 0;
915 Int shift = 0;
916 UChar byte;
sewardjb51f2e62002-06-01 23:11:19 +0000917
918 do
919 {
920 byte = * data ++;
921 num_read ++;
922
923 result |= (byte & 0x7f) << shift;
924
925 shift += 7;
926
927 }
928 while (byte & 0x80);
929
930 if (length_return != NULL)
931 * length_return = num_read;
932
933 if (sign && (shift < 32) && (byte & 0x40))
934 result |= -1 << shift;
935
936 return result;
937}
938
939
sewardjc134dd92002-06-01 14:21:36 +0000940static SMR state_machine_regs;
941
sewardj08a50f62002-06-17 02:21:20 +0000942static
943void reset_state_machine ( Int is_stmt )
sewardjc134dd92002-06-01 14:21:36 +0000944{
sewardj08a50f62002-06-17 02:21:20 +0000945 if (0) VG_(printf)("smr.a := %p (reset)\n", 0 );
sewardjc134dd92002-06-01 14:21:36 +0000946 state_machine_regs.address = 0;
947 state_machine_regs.file = 1;
948 state_machine_regs.line = 1;
949 state_machine_regs.column = 0;
950 state_machine_regs.is_stmt = is_stmt;
951 state_machine_regs.basic_block = 0;
952 state_machine_regs.end_sequence = 0;
953 state_machine_regs.last_file_entry = 0;
954}
955
956/* Handled an extend line op. Returns true if this is the end
957 of sequence. */
sewardj08a50f62002-06-17 02:21:20 +0000958static
959int process_extended_line_op( SegInfo *si, UInt** fnames,
960 UChar* data, Int is_stmt, Int pointer_size)
sewardjc134dd92002-06-01 14:21:36 +0000961{
962 UChar op_code;
sewardj08a50f62002-06-17 02:21:20 +0000963 Int bytes_read;
sewardjc134dd92002-06-01 14:21:36 +0000964 UInt len;
965 UChar * name;
sewardj08a50f62002-06-17 02:21:20 +0000966 Addr adr;
sewardjc134dd92002-06-01 14:21:36 +0000967
968 len = read_leb128 (data, & bytes_read, 0);
969 data += bytes_read;
970
971 if (len == 0)
972 {
sewardj08a50f62002-06-17 02:21:20 +0000973 VG_(message)(Vg_UserMsg,
974 "badly formed extended line op encountered!\n");
sewardjc134dd92002-06-01 14:21:36 +0000975 return bytes_read;
976 }
977
978 len += bytes_read;
979 op_code = * data ++;
980
981
982 switch (op_code)
983 {
984 case DW_LNE_end_sequence:
sewardj08a50f62002-06-17 02:21:20 +0000985 if (0) VG_(printf)("1001: si->o %p, smr.a %p\n",
986 si->offset, state_machine_regs.address );
sewardjd84606d2002-06-18 01:04:57 +0000987 state_machine_regs.end_sequence = 1; /* JRS: added for compliance
988 with spec; is pointless due to reset_state_machine below
989 */
sewardj08a50f62002-06-17 02:21:20 +0000990 addLineInfo (si, (*fnames)[state_machine_regs.file],
991 si->offset + (state_machine_regs.address - 1),
992 si->offset + (state_machine_regs.address),
993 0, 0);
sewardjc134dd92002-06-01 14:21:36 +0000994 reset_state_machine (is_stmt);
995 break;
996
997 case DW_LNE_set_address:
998 /* XXX: Pointer size could be 8 */
sewardj08a50f62002-06-17 02:21:20 +0000999 vg_assert(pointer_size == 4);
sewardjc134dd92002-06-01 14:21:36 +00001000 adr = *((Addr *)data);
sewardj08a50f62002-06-17 02:21:20 +00001001 if (0) VG_(printf)("smr.a := %p\n", adr );
sewardjc134dd92002-06-01 14:21:36 +00001002 state_machine_regs.address = adr;
1003 break;
1004
1005 case DW_LNE_define_file:
sewardjc134dd92002-06-01 14:21:36 +00001006 ++ state_machine_regs.last_file_entry;
1007 name = data;
1008 if (*fnames == NULL)
1009 *fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
1010 else
sewardj08a50f62002-06-17 02:21:20 +00001011 *fnames = VG_(realloc)(
1012 VG_AR_SYMTAB, *fnames,
1013 sizeof(UInt)
1014 * (state_machine_regs.last_file_entry + 1));
sewardjc134dd92002-06-01 14:21:36 +00001015 (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name);
1016 data += VG_(strlen) ((char *) data) + 1;
1017 read_leb128 (data, & bytes_read, 0);
1018 data += bytes_read;
1019 read_leb128 (data, & bytes_read, 0);
1020 data += bytes_read;
sewardj08a50f62002-06-17 02:21:20 +00001021 read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001022 break;
1023
1024 default:
1025 break;
1026 }
1027
1028 return len;
1029}
1030
1031
sewardjb51f2e62002-06-01 23:11:19 +00001032static
1033void read_debuginfo_dwarf2 ( SegInfo* si, UChar* dwarf2, Int dwarf2_sz )
sewardjc134dd92002-06-01 14:21:36 +00001034{
1035 DWARF2_External_LineInfo * external;
1036 DWARF2_Internal_LineInfo info;
1037 UChar * standard_opcodes;
sewardjb51f2e62002-06-01 23:11:19 +00001038 UChar * data = dwarf2;
1039 UChar * end = dwarf2 + dwarf2_sz;
sewardjc134dd92002-06-01 14:21:36 +00001040 UChar * end_of_sequence;
sewardj08a50f62002-06-17 02:21:20 +00001041 UInt * fnames = NULL;
sewardjc134dd92002-06-01 14:21:36 +00001042
sewardjd84606d2002-06-18 01:04:57 +00001043 /* Fails due to gcc padding ...
1044 vg_assert(sizeof(DWARF2_External_LineInfo)
1045 == sizeof(DWARF2_Internal_LineInfo));
1046 */
sewardjc134dd92002-06-01 14:21:36 +00001047
1048 while (data < end)
1049 {
1050 external = (DWARF2_External_LineInfo *) data;
1051
1052 /* Check the length of the block. */
sewardj08a50f62002-06-17 02:21:20 +00001053 info.li_length = * ((UInt *)(external->li_length));
sewardjc134dd92002-06-01 14:21:36 +00001054
1055 if (info.li_length == 0xffffffff)
1056 {
sewardjb51f2e62002-06-01 23:11:19 +00001057 vg_symerr("64-bit DWARF line info is not supported yet.");
sewardjc134dd92002-06-01 14:21:36 +00001058 break;
1059 }
1060
sewardjb51f2e62002-06-01 23:11:19 +00001061 if (info.li_length + sizeof (external->li_length) > dwarf2_sz)
sewardjc134dd92002-06-01 14:21:36 +00001062 {
sewardj08a50f62002-06-17 02:21:20 +00001063 vg_symerr("DWARF line info appears to be corrupt "
1064 "- the section is too small");
sewardjb51f2e62002-06-01 23:11:19 +00001065 return;
sewardjc134dd92002-06-01 14:21:36 +00001066 }
1067
1068 /* Check its version number. */
sewardj08a50f62002-06-17 02:21:20 +00001069 info.li_version = * ((UShort *) (external->li_version));
sewardjc134dd92002-06-01 14:21:36 +00001070 if (info.li_version != 2)
1071 {
sewardj08a50f62002-06-17 02:21:20 +00001072 vg_symerr("Only DWARF version 2 line info "
1073 "is currently supported.");
sewardjb51f2e62002-06-01 23:11:19 +00001074 return;
sewardjc134dd92002-06-01 14:21:36 +00001075 }
1076
sewardjd84606d2002-06-18 01:04:57 +00001077 info.li_prologue_length = * ((UInt *) (external->li_prologue_length));
1078 info.li_min_insn_length = * ((UChar *)(external->li_min_insn_length));
1079 info.li_default_is_stmt = * ((UChar *)(external->li_default_is_stmt));
1080
1081 /* JRS: changed (UInt*) to (UChar*) */
1082 info.li_line_base = * ((UChar *)(external->li_line_base));
1083
1084 info.li_line_range = * ((UChar *)(external->li_line_range));
1085 info.li_opcode_base = * ((UChar *)(external->li_opcode_base));
sewardjc134dd92002-06-01 14:21:36 +00001086
1087 /* Sign extend the line base field. */
1088 info.li_line_base <<= 24;
1089 info.li_line_base >>= 24;
1090
sewardj08a50f62002-06-17 02:21:20 +00001091 end_of_sequence = data + info.li_length
1092 + sizeof (external->li_length);
sewardjc134dd92002-06-01 14:21:36 +00001093
1094 reset_state_machine (info.li_default_is_stmt);
1095
1096 /* Read the contents of the Opcodes table. */
1097 standard_opcodes = data + sizeof (* external);
1098
sewardjc134dd92002-06-01 14:21:36 +00001099 /* Read the contents of the Directory table. */
1100 data = standard_opcodes + info.li_opcode_base - 1;
1101
sewardj08a50f62002-06-17 02:21:20 +00001102 if (* data == 0)
1103 {
1104 }
sewardjc134dd92002-06-01 14:21:36 +00001105 else
1106 {
sewardj08a50f62002-06-17 02:21:20 +00001107 /* We ignore the directory table, since gcc gives the entire
1108 path as part of the filename */
sewardjc134dd92002-06-01 14:21:36 +00001109 while (* data != 0)
1110 {
1111 data += VG_(strlen) ((char *) data) + 1;
1112 }
1113 }
1114
1115 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001116 if (*data != 0) {
1117 vg_symerr("can't find NUL at end of DWARF2 directory table");
1118 return;
1119 }
sewardjc134dd92002-06-01 14:21:36 +00001120 data ++;
1121
1122 /* Read the contents of the File Name table. */
sewardj08a50f62002-06-17 02:21:20 +00001123 if (* data == 0)
1124 {
1125 }
sewardjc134dd92002-06-01 14:21:36 +00001126 else
1127 {
sewardjc134dd92002-06-01 14:21:36 +00001128 while (* data != 0)
1129 {
1130 UChar * name;
1131 Int bytes_read;
1132
sewardj08a50f62002-06-17 02:21:20 +00001133 ++ state_machine_regs.last_file_entry;
sewardjc134dd92002-06-01 14:21:36 +00001134 name = data;
sewardj08a50f62002-06-17 02:21:20 +00001135 /* Since we don't have realloc (0, ....) == malloc (...)
1136 semantics, we need to malloc the first time. */
sewardjc134dd92002-06-01 14:21:36 +00001137
1138 if (fnames == NULL)
1139 fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
1140 else
sewardj08a50f62002-06-17 02:21:20 +00001141 fnames = VG_(realloc)(VG_AR_SYMTAB, fnames,
1142 sizeof(UInt)
1143 * (state_machine_regs.last_file_entry + 1));
1144 data += VG_(strlen) ((Char *) data) + 1;
sewardjc134dd92002-06-01 14:21:36 +00001145 fnames[state_machine_regs.last_file_entry] = addStr (si,name);
1146
1147 read_leb128 (data, & bytes_read, 0);
1148 data += bytes_read;
1149 read_leb128 (data, & bytes_read, 0);
1150 data += bytes_read;
1151 read_leb128 (data, & bytes_read, 0);
1152 data += bytes_read;
1153 }
1154 }
1155
1156 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001157 if (*data != 0) {
1158 vg_symerr("can't find NUL at end of DWARF2 file name table");
1159 return;
1160 }
sewardjc134dd92002-06-01 14:21:36 +00001161 data ++;
1162
1163 /* Now display the statements. */
1164
1165 while (data < end_of_sequence)
1166 {
1167 UChar op_code;
1168 Int adv;
1169 Int bytes_read;
1170
1171 op_code = * data ++;
1172
1173 if (op_code >= info.li_opcode_base)
1174 {
1175 Int advAddr;
1176 op_code -= info.li_opcode_base;
sewardj08a50f62002-06-17 02:21:20 +00001177 adv = (op_code / info.li_line_range)
1178 * info.li_min_insn_length;
sewardjc134dd92002-06-01 14:21:36 +00001179 advAddr = adv;
1180 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001181 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001182 adv = (op_code % info.li_line_range) + info.li_line_base;
sewardj08a50f62002-06-17 02:21:20 +00001183 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1184 si->offset, state_machine_regs.address );
1185 addLineInfo (si, fnames[state_machine_regs.file],
1186 si->offset + (state_machine_regs.address
1187 - advAddr),
1188 si->offset + (state_machine_regs.address),
1189 state_machine_regs.line, 0);
sewardjc134dd92002-06-01 14:21:36 +00001190 state_machine_regs.line += adv;
1191 }
1192 else switch (op_code)
1193 {
1194 case DW_LNS_extended_op:
sewardj08a50f62002-06-17 02:21:20 +00001195 data += process_extended_line_op (
1196 si, &fnames, data,
1197 info.li_default_is_stmt, sizeof (Addr));
sewardjc134dd92002-06-01 14:21:36 +00001198 break;
1199
1200 case DW_LNS_copy:
sewardj08a50f62002-06-17 02:21:20 +00001201 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1202 si->offset, state_machine_regs.address );
1203 addLineInfo (si, fnames[state_machine_regs.file],
1204 si->offset + state_machine_regs.address,
1205 si->offset + (state_machine_regs.address + 1),
1206 state_machine_regs.line , 0);
sewardjd84606d2002-06-18 01:04:57 +00001207 state_machine_regs.basic_block = 0; /* JRS added */
sewardjc134dd92002-06-01 14:21:36 +00001208 break;
1209
1210 case DW_LNS_advance_pc:
sewardj08a50f62002-06-17 02:21:20 +00001211 adv = info.li_min_insn_length
1212 * read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001213 data += bytes_read;
1214 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001215 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001216 break;
1217
1218 case DW_LNS_advance_line:
1219 adv = read_leb128 (data, & bytes_read, 1);
1220 data += bytes_read;
1221 state_machine_regs.line += adv;
1222 break;
1223
1224 case DW_LNS_set_file:
1225 adv = read_leb128 (data, & bytes_read, 0);
1226 data += bytes_read;
1227 state_machine_regs.file = adv;
1228 break;
1229
1230 case DW_LNS_set_column:
1231 adv = read_leb128 (data, & bytes_read, 0);
1232 data += bytes_read;
1233 state_machine_regs.column = adv;
1234 break;
1235
1236 case DW_LNS_negate_stmt:
1237 adv = state_machine_regs.is_stmt;
1238 adv = ! adv;
1239 state_machine_regs.is_stmt = adv;
1240 break;
1241
1242 case DW_LNS_set_basic_block:
1243 state_machine_regs.basic_block = 1;
1244 break;
1245
1246 case DW_LNS_const_add_pc:
1247 adv = (((255 - info.li_opcode_base) / info.li_line_range)
1248 * info.li_min_insn_length);
1249 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001250 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001251 break;
1252
1253 case DW_LNS_fixed_advance_pc:
1254 /* XXX: Need something to get 2 bytes */
1255 adv = *((UShort *)data);
1256 data += 2;
1257 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001258 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001259 break;
1260
1261 case DW_LNS_set_prologue_end:
1262 break;
1263
1264 case DW_LNS_set_epilogue_begin:
1265 break;
1266
1267 case DW_LNS_set_isa:
1268 adv = read_leb128 (data, & bytes_read, 0);
1269 data += bytes_read;
1270 break;
1271
1272 default:
1273 {
1274 int j;
1275 for (j = standard_opcodes[op_code - 1]; j > 0 ; --j)
1276 {
1277 read_leb128 (data, &bytes_read, 0);
1278 data += bytes_read;
1279 }
1280 }
1281 break;
1282 }
1283 }
1284 VG_(free)(VG_AR_SYMTAB, fnames);
1285 fnames = NULL;
1286 }
sewardjc134dd92002-06-01 14:21:36 +00001287}
1288
sewardjb51f2e62002-06-01 23:11:19 +00001289
1290/*------------------------------------------------------------*/
1291/*--- Read info from a .so/exe file. ---*/
1292/*------------------------------------------------------------*/
1293
sewardjde4a1d02002-03-22 01:27:54 +00001294/* Read the symbols from the object/exe specified by the SegInfo into
1295 the tables within the supplied SegInfo. */
1296static
1297void vg_read_lib_symbols ( SegInfo* si )
1298{
1299 Elf32_Ehdr* ehdr; /* The ELF header */
1300 Elf32_Shdr* shdr; /* The section table */
1301 UChar* sh_strtab; /* The section table's string table */
sewardjb51f2e62002-06-01 23:11:19 +00001302 UChar* stab; /* The .stab table */
sewardjde4a1d02002-03-22 01:27:54 +00001303 UChar* stabstr; /* The .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001304 UChar* dwarf2; /* The DWARF2 location info table */
sewardjde4a1d02002-03-22 01:27:54 +00001305 Int stab_sz; /* Size in bytes of the .stab table */
1306 Int stabstr_sz; /* Size in bytes of the .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001307 Int dwarf2_sz; /* Size in bytes of the DWARF2 srcloc table*/
sewardjde4a1d02002-03-22 01:27:54 +00001308 Int fd;
1309 Int i;
1310 Bool ok;
1311 Addr oimage;
1312 Int n_oimage;
sewardjb3586202002-05-09 17:38:13 +00001313 struct vki_stat stat_buf;
sewardjde4a1d02002-03-22 01:27:54 +00001314
sewardjde4a1d02002-03-22 01:27:54 +00001315 oimage = (Addr)NULL;
1316 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +00001317 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +00001318
1319 /* mmap the object image aboard, so that we can read symbols and
1320 line number info out of it. It will be munmapped immediately
1321 thereafter; it is only aboard transiently. */
1322
sewardjb3586202002-05-09 17:38:13 +00001323 i = VG_(stat)(si->filename, &stat_buf);
sewardjde4a1d02002-03-22 01:27:54 +00001324 if (i != 0) {
1325 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
1326 return;
1327 }
1328 n_oimage = stat_buf.st_size;
1329
1330 fd = VG_(open_read)(si->filename);
1331 if (fd == -1) {
1332 vg_symerr("Can't open .so/.exe to read symbols?!");
1333 return;
1334 }
1335
sewardjb3586202002-05-09 17:38:13 +00001336 oimage = (Addr)VG_(mmap)( NULL, n_oimage,
1337 VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 );
sewardjde4a1d02002-03-22 01:27:54 +00001338 if (oimage == ((Addr)(-1))) {
1339 VG_(message)(Vg_UserMsg,
1340 "mmap failed on %s", si->filename );
1341 VG_(close)(fd);
1342 return;
1343 }
1344
1345 VG_(close)(fd);
1346
1347 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
1348 Now verify that it is a valid ELF .so or executable image.
1349 */
1350 ok = (n_oimage >= sizeof(Elf32_Ehdr));
1351 ehdr = (Elf32_Ehdr*)oimage;
1352
1353 if (ok) {
1354 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
1355 && ehdr->e_ident[EI_MAG1] == 'E'
1356 && ehdr->e_ident[EI_MAG2] == 'L'
1357 && ehdr->e_ident[EI_MAG3] == 'F');
1358 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
1359 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
1360 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
1361 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
1362 ok &= (ehdr->e_machine == EM_386);
1363 ok &= (ehdr->e_version == EV_CURRENT);
1364 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
1365 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
1366 }
1367
1368 if (!ok) {
1369 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
1370 VG_(munmap) ( (void*)oimage, n_oimage );
1371 return;
1372 }
1373
1374 if (VG_(clo_trace_symtab))
1375 VG_(printf)(
1376 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
1377 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
1378
1379 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
1380 vg_symerr("ELF section header is beyond image end?!");
1381 VG_(munmap) ( (void*)oimage, n_oimage );
1382 return;
1383 }
1384
1385 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
1386 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
1387
1388 /* try and read the object's symbol table */
1389 {
1390 UChar* o_strtab = NULL;
1391 Elf32_Sym* o_symtab = NULL;
1392 UInt o_strtab_sz = 0;
1393 UInt o_symtab_sz = 0;
1394
1395 UChar* o_got = NULL;
1396 UChar* o_plt = NULL;
1397 UInt o_got_sz = 0;
1398 UInt o_plt_sz = 0;
1399
1400 Bool snaffle_it;
1401 Addr sym_addr;
1402
1403 /* find the .stabstr and .stab sections */
1404 for (i = 0; i < ehdr->e_shnum; i++) {
1405 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
1406 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
1407 o_symtab_sz = shdr[i].sh_size;
1408 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
1409 /* check image overrun here */
1410 }
1411 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
1412 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
1413 o_strtab_sz = shdr[i].sh_size;
1414 /* check image overrun here */
1415 }
1416
1417 /* find out where the .got and .plt sections will be in the
1418 executable image, not in the object image transiently loaded.
1419 */
1420 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
1421 o_got = (UChar*)(si->offset
1422 + shdr[i].sh_offset);
1423 o_got_sz = shdr[i].sh_size;
1424 /* check image overrun here */
1425 }
1426 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
1427 o_plt = (UChar*)(si->offset
1428 + shdr[i].sh_offset);
1429 o_plt_sz = shdr[i].sh_size;
1430 /* check image overrun here */
1431 }
1432
1433 }
1434
1435 if (VG_(clo_trace_symtab)) {
1436 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
1437 o_plt, o_plt + o_plt_sz - 1 );
1438 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
1439 o_got, o_got + o_got_sz - 1 );
1440 }
1441
1442 if (o_strtab == NULL || o_symtab == NULL) {
1443 vg_symerr(" object doesn't have a symbol table");
1444 } else {
1445 /* Perhaps should start at i = 1; ELF docs suggest that entry
1446 0 always denotes `unknown symbol'. */
1447 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
1448# if 0
1449 VG_(printf)("raw symbol: ");
1450 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
1451 case STB_LOCAL: VG_(printf)("LOC "); break;
1452 case STB_GLOBAL: VG_(printf)("GLO "); break;
1453 case STB_WEAK: VG_(printf)("WEA "); break;
1454 case STB_LOPROC: VG_(printf)("lop "); break;
1455 case STB_HIPROC: VG_(printf)("hip "); break;
1456 default: VG_(printf)("??? "); break;
1457 }
1458 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
1459 case STT_NOTYPE: VG_(printf)("NOT "); break;
1460 case STT_OBJECT: VG_(printf)("OBJ "); break;
1461 case STT_FUNC: VG_(printf)("FUN "); break;
1462 case STT_SECTION: VG_(printf)("SEC "); break;
1463 case STT_FILE: VG_(printf)("FIL "); break;
1464 case STT_LOPROC: VG_(printf)("lop "); break;
1465 case STT_HIPROC: VG_(printf)("hip "); break;
1466 default: VG_(printf)("??? "); break;
1467 }
1468 VG_(printf)(
1469 ": value %p, size %d, name %s\n",
1470 si->offset+(UChar*)o_symtab[i].st_value,
1471 o_symtab[i].st_size,
1472 o_symtab[i].st_name
1473 ? ((Char*)o_strtab+o_symtab[i].st_name)
1474 : (Char*)"NONAME");
1475# endif
1476
1477 /* Figure out if we're interested in the symbol.
1478 Firstly, is it of the right flavour?
1479 */
1480 snaffle_it
1481 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
1482 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
1483 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
1484 &&
1485 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
1486 ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
1487 );
1488
1489 /* Secondly, if it's apparently in a GOT or PLT, it's really
1490 a reference to a symbol defined elsewhere, so ignore it.
1491 */
1492 sym_addr = si->offset
1493 + (UInt)o_symtab[i].st_value;
1494 if (o_got != NULL
1495 && sym_addr >= (Addr)o_got
1496 && sym_addr < (Addr)(o_got+o_got_sz)) {
1497 snaffle_it = False;
1498 if (VG_(clo_trace_symtab)) {
1499 VG_(printf)( "in GOT: %s\n",
1500 o_strtab+o_symtab[i].st_name);
1501 }
1502 }
1503 if (o_plt != NULL
1504 && sym_addr >= (Addr)o_plt
1505 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
1506 snaffle_it = False;
1507 if (VG_(clo_trace_symtab)) {
1508 VG_(printf)( "in PLT: %s\n",
1509 o_strtab+o_symtab[i].st_name);
1510 }
1511 }
1512
1513 /* Don't bother if nameless, or zero-sized. */
1514 if (snaffle_it
1515 && (o_symtab[i].st_name == (Elf32_Word)NULL
1516 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
1517 /* equivalent but cheaper ... */
1518 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
1519 || o_symtab[i].st_size == 0)) {
1520 snaffle_it = False;
1521 if (VG_(clo_trace_symtab)) {
1522 VG_(printf)( "size=0: %s\n",
1523 o_strtab+o_symtab[i].st_name);
1524 }
1525 }
1526
1527# if 0
1528 /* Avoid _dl_ junk. (Why?) */
1529 /* 01-02-24: disabled until I find out if it really helps. */
1530 if (snaffle_it
1531 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
1532 || VG_(strncmp)("_r_debug",
1533 o_strtab+o_symtab[i].st_name, 8) == 0)) {
1534 snaffle_it = False;
1535 if (VG_(clo_trace_symtab)) {
1536 VG_(printf)( "_dl_ junk: %s\n",
1537 o_strtab+o_symtab[i].st_name);
1538 }
1539 }
1540# endif
1541
1542 /* This seems to significantly reduce the number of junk
1543 symbols, and particularly reduces the number of
1544 overlapping address ranges. Don't ask me why ... */
1545 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
1546 snaffle_it = False;
1547 if (VG_(clo_trace_symtab)) {
1548 VG_(printf)( "valu=0: %s\n",
1549 o_strtab+o_symtab[i].st_name);
1550 }
1551 }
1552
1553 /* If no part of the symbol falls within the mapped range,
1554 ignore it. */
1555 if (sym_addr+o_symtab[i].st_size <= si->start
1556 || sym_addr >= si->start+si->size) {
1557 snaffle_it = False;
1558 }
1559
1560 if (snaffle_it) {
1561 /* it's an interesting symbol; record ("snaffle") it. */
1562 RiSym sym;
1563 Char* t0 = o_symtab[i].st_name
1564 ? (Char*)(o_strtab+o_symtab[i].st_name)
1565 : (Char*)"NONAME";
1566 Int nmoff = addStr ( si, t0 );
1567 vg_assert(nmoff >= 0
1568 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
1569 vg_assert( (Int)o_symtab[i].st_value >= 0);
1570 /* VG_(printf)("%p + %d: %s\n", si->addr,
1571 (Int)o_symtab[i].st_value, t0 ); */
1572 sym.addr = sym_addr;
1573 sym.size = o_symtab[i].st_size;
1574 sym.nmoff = nmoff;
1575 addSym ( si, &sym );
1576 }
1577 }
1578 }
1579 }
1580
sewardjb51f2e62002-06-01 23:11:19 +00001581 /* Reading of the stabs and/or dwarf2 debug format information, if
1582 any. */
sewardjde4a1d02002-03-22 01:27:54 +00001583 stabstr = NULL;
1584 stab = NULL;
sewardjb51f2e62002-06-01 23:11:19 +00001585 dwarf2 = NULL;
sewardjde4a1d02002-03-22 01:27:54 +00001586 stabstr_sz = 0;
1587 stab_sz = 0;
sewardjb51f2e62002-06-01 23:11:19 +00001588 dwarf2_sz = 0;
1589
1590 /* find the .stabstr / .stab / .debug_line sections */
sewardjde4a1d02002-03-22 01:27:54 +00001591 for (i = 0; i < ehdr->e_shnum; i++) {
1592 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001593 stab = (UChar*)(oimage + shdr[i].sh_offset);
sewardjde4a1d02002-03-22 01:27:54 +00001594 stab_sz = shdr[i].sh_size;
1595 }
1596 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
1597 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
1598 stabstr_sz = shdr[i].sh_size;
1599 }
sewardjc134dd92002-06-01 14:21:36 +00001600 if (0 == VG_(strcmp)(".debug_line",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001601 dwarf2 = (UChar *)(oimage + shdr[i].sh_offset);
1602 dwarf2_sz = shdr[i].sh_size;
sewardjc134dd92002-06-01 14:21:36 +00001603 }
sewardjde4a1d02002-03-22 01:27:54 +00001604 }
1605
sewardjb51f2e62002-06-01 23:11:19 +00001606 if ((stab == NULL || stabstr == NULL) && dwarf2 == NULL) {
sewardjde4a1d02002-03-22 01:27:54 +00001607 vg_symerr(" object doesn't have any debug info");
1608 VG_(munmap) ( (void*)oimage, n_oimage );
1609 return;
1610 }
1611
1612 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
1613 || stabstr_sz + (UChar*)stabstr
1614 > n_oimage + (UChar*)oimage ) {
sewardjb51f2e62002-06-01 23:11:19 +00001615 vg_symerr(" ELF (stabs) debug data is beyond image end?!");
sewardjde4a1d02002-03-22 01:27:54 +00001616 VG_(munmap) ( (void*)oimage, n_oimage );
1617 return;
1618 }
1619
sewardjb51f2e62002-06-01 23:11:19 +00001620 if ( dwarf2_sz + (UChar*)dwarf2 > n_oimage + (UChar*)oimage ) {
1621 vg_symerr(" ELF (dwarf2) debug data is beyond image end?!");
1622 VG_(munmap) ( (void*)oimage, n_oimage );
1623 return;
1624 }
sewardjde4a1d02002-03-22 01:27:54 +00001625
sewardjb51f2e62002-06-01 23:11:19 +00001626 /* Looks plausible. Go on and read debug data. */
1627 if (stab != NULL && stabstr != NULL) {
1628 read_debuginfo_stabs ( si, stab, stab_sz, stabstr, stabstr_sz );
1629 }
sewardjde4a1d02002-03-22 01:27:54 +00001630
sewardjb51f2e62002-06-01 23:11:19 +00001631 if (dwarf2 != NULL) {
1632 read_debuginfo_dwarf2 ( si, dwarf2, dwarf2_sz );
1633 }
sewardjde4a1d02002-03-22 01:27:54 +00001634
1635 /* Last, but not least, heave the oimage back overboard. */
1636 VG_(munmap) ( (void*)oimage, n_oimage );
1637}
1638
1639
1640/*------------------------------------------------------------*/
1641/*--- Main entry point for symbols table reading. ---*/
1642/*------------------------------------------------------------*/
1643
1644/* The root structure for the entire symbol table system. It is a
1645 linked list of SegInfos. Note that this entire mechanism assumes
1646 that what we read from /proc/self/maps doesn't contain overlapping
1647 address ranges, and as a result the SegInfos in this list describe
1648 disjoint address ranges.
1649*/
1650static SegInfo* segInfo = NULL;
1651
1652
1653static
1654void read_symtab_callback (
1655 Addr start, UInt size,
1656 Char rr, Char ww, Char xx,
1657 UInt foffset, UChar* filename )
1658{
1659 SegInfo* si;
1660
1661 /* Stay sane ... */
1662 if (size == 0)
1663 return;
1664
1665 /* We're only interested in collecting symbols in executable
1666 segments which are associated with a real file. Hence: */
1667 if (filename == NULL || xx != 'x')
1668 return;
1669 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1670 return;
1671
1672 /* Perhaps we already have this one? If so, skip. */
1673 for (si = segInfo; si != NULL; si = si->next) {
1674 /*
1675 if (0==VG_(strcmp)(si->filename, filename))
1676 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1677 rr,ww,xx,si->start,si->size,start,size,filename);
1678 */
1679 /* For some reason the observed size of a mapping can change, so
1680 we don't use that to determine uniqueness. */
1681 if (si->start == start
1682 /* && si->size == size */
1683 && 0==VG_(strcmp)(si->filename, filename)) {
1684 return;
1685 }
1686 }
1687
1688 /* Get the record initialised right. */
1689 si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
1690 si->next = segInfo;
1691 segInfo = si;
1692
1693 si->start = start;
1694 si->size = size;
1695 si->foffset = foffset;
1696 si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
1697 VG_(strcpy)(si->filename, filename);
1698
1699 si->symtab = NULL;
1700 si->symtab_size = si->symtab_used = 0;
1701 si->loctab = NULL;
1702 si->loctab_size = si->loctab_used = 0;
1703 si->strtab = NULL;
1704 si->strtab_size = si->strtab_used = 0;
1705
1706 /* Kludge ... */
1707 si->offset
1708 = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
1709
1710 /* And actually fill it up. */
sewardj18d75132002-05-16 11:06:21 +00001711 if (VG_(clo_instrument) || VG_(clo_cachesim)) {
1712 vg_read_lib_symbols ( si );
1713 canonicaliseSymtab ( si );
1714 canonicaliseLoctab ( si );
1715 }
sewardjde4a1d02002-03-22 01:27:54 +00001716}
1717
1718
1719/* This one really is the Head Honcho. Update the symbol tables to
1720 reflect the current state of /proc/self/maps. Rather than re-read
1721 everything, just read the entries which are not already in segInfo.
1722 So we can call here repeatedly, after every mmap of a non-anonymous
1723 segment with execute permissions, for example, to pick up new
1724 libraries as they are dlopen'd. Conversely, when the client does
1725 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1726 which happen to correspond to the munmap()d area. */
1727void VG_(read_symbols) ( void )
1728{
sewardjde4a1d02002-03-22 01:27:54 +00001729 VG_(read_procselfmaps) ( read_symtab_callback );
1730
1731 /* Do a sanity check on the symbol tables: ensure that the address
1732 space pieces they cover do not overlap (otherwise we are severely
1733 hosed). This is a quadratic algorithm, but there shouldn't be
1734 many of them.
1735 */
1736 { SegInfo *si, *si2;
1737 for (si = segInfo; si != NULL; si = si->next) {
1738 /* Check no overlap between *si and those in the rest of the
1739 list. */
1740 for (si2 = si->next; si2 != NULL; si2 = si2->next) {
1741 Addr lo = si->start;
1742 Addr hi = si->start + si->size - 1;
1743 Addr lo2 = si2->start;
1744 Addr hi2 = si2->start + si2->size - 1;
1745 Bool overlap;
1746 vg_assert(lo < hi);
1747 vg_assert(lo2 < hi2);
1748 /* the main assertion */
1749 overlap = (lo <= lo2 && lo2 <= hi)
1750 || (lo <= hi2 && hi2 <= hi);
sewardjde4a1d02002-03-22 01:27:54 +00001751 if (overlap) {
1752 VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
1753 ppSegInfo ( si );
1754 ppSegInfo ( si2 );
1755 VG_(printf)("\n\n");
1756 vg_assert(! overlap);
1757 }
1758 }
1759 }
1760 }
1761}
1762
1763
1764/* When an munmap() call happens, check to see whether it corresponds
1765 to a segment for a .so, and if so discard the relevant SegInfo.
1766 This might not be a very clever idea from the point of view of
1767 accuracy of error messages, but we need to do it in order to
sewardj18d75132002-05-16 11:06:21 +00001768 maintain the no-overlapping invariant.
1769
1770 16 May 02: Returns a Bool indicating whether or not the discarded
1771 range falls inside a known executable segment. See comment at top
1772 of file for why.
sewardjde4a1d02002-03-22 01:27:54 +00001773*/
sewardj18d75132002-05-16 11:06:21 +00001774Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
sewardjde4a1d02002-03-22 01:27:54 +00001775{
1776 SegInfo *prev, *curr;
1777
sewardjde4a1d02002-03-22 01:27:54 +00001778 prev = NULL;
1779 curr = segInfo;
1780 while (True) {
1781 if (curr == NULL) break;
1782 if (start == curr->start) break;
1783 prev = curr;
1784 curr = curr->next;
1785 }
sewardj18d75132002-05-16 11:06:21 +00001786 if (curr == NULL)
1787 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001788
1789 VG_(message)(Vg_UserMsg,
1790 "discard syms in %s due to munmap()",
1791 curr->filename ? curr->filename : (UChar*)"???");
1792
1793 vg_assert(prev == NULL || prev->next == curr);
1794
1795 if (prev == NULL) {
1796 segInfo = curr->next;
1797 } else {
1798 prev->next = curr->next;
1799 }
1800
1801 freeSegInfo(curr);
sewardj18d75132002-05-16 11:06:21 +00001802 return True;
sewardjde4a1d02002-03-22 01:27:54 +00001803}
1804
1805
1806/*------------------------------------------------------------*/
1807/*--- Use of symbol table & location info to create ---*/
1808/*--- plausible-looking stack dumps. ---*/
1809/*------------------------------------------------------------*/
1810
1811/* Find a symbol-table index containing the specified pointer, or -1
1812 if not found. Binary search. */
1813
1814static Int search_one_symtab ( SegInfo* si, Addr ptr )
1815{
1816 Addr a_mid_lo, a_mid_hi;
1817 Int mid,
1818 lo = 0,
1819 hi = si->symtab_used-1;
1820 while (True) {
1821 /* current unsearched space is from lo to hi, inclusive. */
1822 if (lo > hi) return -1; /* not found */
1823 mid = (lo + hi) / 2;
1824 a_mid_lo = si->symtab[mid].addr;
1825 a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
1826
1827 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1828 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1829 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1830 return mid;
1831 }
1832}
1833
1834
1835/* Search all symtabs that we know about to locate ptr. If found, set
1836 *psi to the relevant SegInfo, and *symno to the symtab entry number
1837 within that. If not found, *psi is set to NULL. */
1838
1839static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
1840{
1841 Int sno;
1842 SegInfo* si;
1843 for (si = segInfo; si != NULL; si = si->next) {
1844 if (si->start <= ptr && ptr < si->start+si->size) {
1845 sno = search_one_symtab ( si, ptr );
1846 if (sno == -1) goto not_found;
1847 *symno = sno;
1848 *psi = si;
1849 return;
1850 }
1851 }
1852 not_found:
1853 *psi = NULL;
1854}
1855
1856
1857/* Find a location-table index containing the specified pointer, or -1
1858 if not found. Binary search. */
1859
1860static Int search_one_loctab ( SegInfo* si, Addr ptr )
1861{
1862 Addr a_mid_lo, a_mid_hi;
1863 Int mid,
1864 lo = 0,
1865 hi = si->loctab_used-1;
1866 while (True) {
1867 /* current unsearched space is from lo to hi, inclusive. */
1868 if (lo > hi) return -1; /* not found */
1869 mid = (lo + hi) / 2;
1870 a_mid_lo = si->loctab[mid].addr;
1871 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
1872
1873 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1874 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1875 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1876 return mid;
1877 }
1878}
1879
1880
1881/* Search all loctabs that we know about to locate ptr. If found, set
1882 *psi to the relevant SegInfo, and *locno to the loctab entry number
1883 within that. If not found, *psi is set to NULL.
1884*/
1885static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
1886{
1887 Int lno;
1888 SegInfo* si;
1889 for (si = segInfo; si != NULL; si = si->next) {
1890 if (si->start <= ptr && ptr < si->start+si->size) {
1891 lno = search_one_loctab ( si, ptr );
1892 if (lno == -1) goto not_found;
1893 *locno = lno;
1894 *psi = si;
1895 return;
1896 }
1897 }
1898 not_found:
1899 *psi = NULL;
1900}
1901
1902
1903/* The whole point of this whole big deal: map a code address to a
1904 plausible symbol name. Returns False if no idea; otherwise True.
1905 Caller supplies buf and nbuf. If no_demangle is True, don't do
1906 demangling, regardless of vg_clo_demangle -- probably because the
1907 call has come from vg_what_fn_or_object_is_this. */
njn4f9c9342002-04-29 16:03:24 +00001908Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
1909 Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00001910{
1911 SegInfo* si;
1912 Int sno;
1913 search_all_symtabs ( a, &si, &sno );
1914 if (si == NULL)
1915 return False;
1916 if (no_demangle) {
1917 VG_(strncpy_safely)
1918 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
1919 } else {
1920 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
1921 }
1922 return True;
1923}
1924
1925
1926/* Map a code address to the name of a shared object file. Returns
1927 False if no idea; otherwise False. Caller supplies buf and
1928 nbuf. */
1929static
1930Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
1931{
1932 SegInfo* si;
1933 for (si = segInfo; si != NULL; si = si->next) {
1934 if (si->start <= a && a < si->start+si->size) {
1935 VG_(strncpy_safely)(buf, si->filename, nbuf);
1936 return True;
1937 }
1938 }
1939 return False;
1940}
1941
1942/* Return the name of an erring fn in a way which is useful
1943 for comparing against the contents of a suppressions file.
1944 Always writes something to buf. Also, doesn't demangle the
1945 name, because we want to refer to mangled names in the
1946 suppressions file.
1947*/
1948void VG_(what_obj_and_fun_is_this) ( Addr a,
1949 Char* obj_buf, Int n_obj_buf,
1950 Char* fun_buf, Int n_fun_buf )
1951{
1952 (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
njn4f9c9342002-04-29 16:03:24 +00001953 (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
sewardjde4a1d02002-03-22 01:27:54 +00001954}
1955
1956
1957/* Map a code address to a (filename, line number) pair.
1958 Returns True if successful.
1959*/
njn4f9c9342002-04-29 16:03:24 +00001960Bool VG_(what_line_is_this)( Addr a,
1961 UChar* filename, Int n_filename,
1962 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00001963{
1964 SegInfo* si;
1965 Int locno;
1966 search_all_loctabs ( a, &si, &locno );
1967 if (si == NULL)
1968 return False;
1969 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
1970 n_filename);
1971 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00001972
sewardjde4a1d02002-03-22 01:27:54 +00001973 return True;
1974}
1975
1976
1977/* Print a mini stack dump, showing the current location. */
1978void VG_(mini_stack_dump) ( ExeContext* ec )
1979{
1980
1981#define APPEND(str) \
1982 { UChar* sss; \
1983 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
1984 buf[n] = *sss; \
1985 buf[n] = 0; \
1986 }
1987
1988 Bool know_fnname;
1989 Bool know_objname;
1990 Bool know_srcloc;
1991 UInt lineno;
1992 UChar ibuf[20];
sewardj04b91062002-06-05 21:22:04 +00001993 UInt i, n;
sewardjde4a1d02002-03-22 01:27:54 +00001994
1995 UChar buf[M_VG_ERRTXT];
1996 UChar buf_fn[M_VG_ERRTXT];
1997 UChar buf_obj[M_VG_ERRTXT];
1998 UChar buf_srcloc[M_VG_ERRTXT];
1999
2000 Int stop_at = VG_(clo_backtrace_size);
2001
2002 n = 0;
2003
njn4f9c9342002-04-29 16:03:24 +00002004 know_fnname = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00002005 know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00002006 know_srcloc = VG_(what_line_is_this)(ec->eips[0],
2007 buf_srcloc, M_VG_ERRTXT,
2008 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00002009
2010 APPEND(" at ");
2011 VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
2012 APPEND(ibuf);
2013 if (know_fnname) {
2014 APPEND(buf_fn);
2015 if (!know_srcloc && know_objname) {
2016 APPEND(" (in ");
2017 APPEND(buf_obj);
2018 APPEND(")");
2019 }
2020 } else if (know_objname && !know_srcloc) {
2021 APPEND("(within ");
2022 APPEND(buf_obj);
2023 APPEND(")");
2024 } else {
2025 APPEND("???");
2026 }
2027 if (know_srcloc) {
2028 APPEND(" (");
2029 APPEND(buf_srcloc);
2030 APPEND(":");
2031 VG_(sprintf)(ibuf,"%d",lineno);
2032 APPEND(ibuf);
2033 APPEND(")");
2034 }
2035 VG_(message)(Vg_UserMsg, "%s", buf);
2036
sewardj04b91062002-06-05 21:22:04 +00002037 for (i = 1; i < stop_at && ec->eips[i] != 0; i++) {
njn4f9c9342002-04-29 16:03:24 +00002038 know_fnname = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
sewardjde4a1d02002-03-22 01:27:54 +00002039 know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
njn4f9c9342002-04-29 16:03:24 +00002040 know_srcloc = VG_(what_line_is_this)(ec->eips[i],
sewardjde4a1d02002-03-22 01:27:54 +00002041 buf_srcloc, M_VG_ERRTXT,
2042 &lineno);
2043 n = 0;
2044 APPEND(" by ");
sewardj04b91062002-06-05 21:22:04 +00002045 VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
2046 APPEND(ibuf);
sewardjde4a1d02002-03-22 01:27:54 +00002047 if (know_fnname) {
2048 APPEND(buf_fn)
2049 if (!know_srcloc && know_objname) {
2050 APPEND(" (in ");
2051 APPEND(buf_obj);
2052 APPEND(")");
2053 }
2054 } else {
2055 if (know_objname && !know_srcloc) {
2056 APPEND("(within ");
2057 APPEND(buf_obj);
2058 APPEND(")");
2059 } else {
2060 APPEND("???");
2061 }
sewardjde4a1d02002-03-22 01:27:54 +00002062 };
2063 if (know_srcloc) {
2064 APPEND(" (");
2065 APPEND(buf_srcloc);
2066 APPEND(":");
2067 VG_(sprintf)(ibuf,"%d",lineno);
2068 APPEND(ibuf);
2069 APPEND(")");
2070 }
2071 VG_(message)(Vg_UserMsg, "%s", buf);
2072 }
2073}
2074
2075#undef APPEND
2076
2077/*--------------------------------------------------------------------*/
2078/*--- end vg_symtab2.c ---*/
2079/*--------------------------------------------------------------------*/