blob: 7ae017064ebb3742979c366d34d01c03eee9040a [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
njn25e49d8e72002-09-23 09:36:25 +000028 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000029*/
30
31#include "vg_include.h"
sewardjde4a1d02002-03-22 01:27:54 +000032
33#include <elf.h> /* ELF defns */
34#include <a.out.h> /* stabs defns */
35
njn9aae6742002-04-30 13:44:01 +000036
sewardjde4a1d02002-03-22 01:27:54 +000037/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
38 dlopen()ed libraries, which is something that KDE3 does a lot.
sewardjde4a1d02002-03-22 01:27:54 +000039
njn25e49d8e72002-09-23 09:36:25 +000040 Stabs reader greatly improved by Nick Nethercote, Apr 02.
sewardjde4a1d02002-03-22 01:27:54 +000041*/
42
njn25e49d8e72002-09-23 09:36:25 +000043/* Set to True when first debug info search is performed */
44Bool VG_(using_debug_info) = False;
45
sewardjde4a1d02002-03-22 01:27:54 +000046/*------------------------------------------------------------*/
47/*--- Structs n stuff ---*/
48/*------------------------------------------------------------*/
49
50/* A structure to hold an ELF symbol (very crudely). */
51typedef
52 struct {
53 Addr addr; /* lowest address of entity */
54 UInt size; /* size in bytes */
55 Int nmoff; /* offset of name in this SegInfo's str tab */
56 }
57 RiSym;
58
njne0ee0712002-05-03 16:41:05 +000059/* Line count at which overflow happens, due to line numbers being stored as
60 * shorts in `struct nlist' in a.out.h. */
61#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000062
njne0ee0712002-05-03 16:41:05 +000063#define LINENO_BITS 20
64#define LOC_SIZE_BITS (32 - LINENO_BITS)
sewardj97ff05f2002-05-09 01:32:57 +000065#define MAX_LINENO ((1 << LINENO_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000066
67/* Unlikely to have any lines with instruction ranges > 4096 bytes */
sewardj97ff05f2002-05-09 01:32:57 +000068#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000069
70/* Number used to detect line number overflows; if one line is 60000-odd
71 * smaller than the previous, is was probably an overflow.
72 */
73#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
74
75/* A structure to hold addr-to-source info for a single line. There can be a
76 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +000077typedef
78 struct {
njne0ee0712002-05-03 16:41:05 +000079 /* Word 1 */
80 Addr addr; /* lowest address for this line */
81 /* Word 2 */
82 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
83 UInt lineno:LINENO_BITS; /* source line number, or zero */
84 /* Word 3 */
85 UInt fnmoff; /* source filename; offset in this
86 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +000087 }
88 RiLoc;
89
90
91/* A structure which contains information pertaining to one mapped
92 text segment. */
93typedef
94 struct _SegInfo {
95 struct _SegInfo* next;
96 /* Description of the mapped segment. */
97 Addr start;
98 UInt size;
99 UChar* filename; /* in mallocville */
100 UInt foffset;
101 /* An expandable array of symbols. */
102 RiSym* symtab;
103 UInt symtab_used;
104 UInt symtab_size;
105 /* An expandable array of locations. */
106 RiLoc* loctab;
107 UInt loctab_used;
108 UInt loctab_size;
109 /* An expandable array of characters -- the string table. */
110 Char* strtab;
111 UInt strtab_used;
112 UInt strtab_size;
113 /* offset is what we need to add to symbol table entries
114 to get the real location of that symbol in memory.
115 For executables, offset is zero.
116 For .so's, offset == base_addr.
117 This seems like a giant kludge to me.
118 */
119 UInt offset;
120 }
121 SegInfo;
122
123
sewardjde4a1d02002-03-22 01:27:54 +0000124static void freeSegInfo ( SegInfo* si )
125{
126 vg_assert(si != NULL);
njn25e49d8e72002-09-23 09:36:25 +0000127 if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename);
128 if (si->symtab) VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
129 if (si->loctab) VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
130 if (si->strtab) VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
131 VG_(arena_free)(VG_AR_SYMTAB, si);
sewardjde4a1d02002-03-22 01:27:54 +0000132}
133
134
135/*------------------------------------------------------------*/
136/*--- Adding stuff ---*/
137/*------------------------------------------------------------*/
138
139/* Add a str to the string table, including terminating zero, and
njn25e49d8e72002-09-23 09:36:25 +0000140 return offset of the string in vg_strtab. Unless it's been seen
141 recently, in which case we find the old index and return that.
142 This avoids the most egregious duplications. */
sewardjde4a1d02002-03-22 01:27:54 +0000143
144static __inline__
145Int addStr ( SegInfo* si, Char* str )
146{
njn25e49d8e72002-09-23 09:36:25 +0000147# define EMPTY 0xffffffff
148# define NN 5
149
150 /* prevN[0] has the most recent, prevN[NN-1] the least recent */
151 static UInt prevN[] = { EMPTY, EMPTY, EMPTY, EMPTY, EMPTY };
152 static SegInfo* curr_si = NULL;
153
sewardjde4a1d02002-03-22 01:27:54 +0000154 Char* new_tab;
155 Int new_sz, i, space_needed;
njn25e49d8e72002-09-23 09:36:25 +0000156
157 /* Avoid gratuitous duplication: if we saw `str' within the last NN,
158 * within this segment, return that index. Saves about 200KB in glibc,
159 * extra time taken is too small to measure. --NJN 2002-Aug-30 */
160 if (curr_si == si) {
161 for (i = NN-1; i >= 0; i--) {
sewardjcda419b2002-10-01 08:59:36 +0000162 if (EMPTY != prevN[i]
163 && NULL != si->strtab
164 && 0 == VG_(strcmp)(str, &si->strtab[prevN[i]])) {
njn25e49d8e72002-09-23 09:36:25 +0000165 return prevN[i];
166 }
167 }
168 } else {
169 /* New segment */
170 curr_si = si;
sewardjcda419b2002-10-01 08:59:36 +0000171 for (i = 0; i < NN; i++) prevN[i] = EMPTY;
njn25e49d8e72002-09-23 09:36:25 +0000172 }
173 /* Shuffle prevous ones along, put new one in. */
174 for (i = NN-1; i > 0; i--) prevN[i] = prevN[i-1];
175 prevN[0] = si->strtab_used;
176
177# undef EMPTY
178
sewardjde4a1d02002-03-22 01:27:54 +0000179 space_needed = 1 + VG_(strlen)(str);
njn25e49d8e72002-09-23 09:36:25 +0000180
sewardjde4a1d02002-03-22 01:27:54 +0000181 if (si->strtab_used + space_needed > si->strtab_size) {
182 new_sz = 2 * si->strtab_size;
183 if (new_sz == 0) new_sz = 5000;
njn25e49d8e72002-09-23 09:36:25 +0000184 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz);
sewardjde4a1d02002-03-22 01:27:54 +0000185 if (si->strtab != NULL) {
186 for (i = 0; i < si->strtab_used; i++)
187 new_tab[i] = si->strtab[i];
njn25e49d8e72002-09-23 09:36:25 +0000188 VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
sewardjde4a1d02002-03-22 01:27:54 +0000189 }
190 si->strtab = new_tab;
191 si->strtab_size = new_sz;
192 }
193
194 for (i = 0; i < space_needed; i++)
195 si->strtab[si->strtab_used+i] = str[i];
196
197 si->strtab_used += space_needed;
198 vg_assert(si->strtab_used <= si->strtab_size);
njn25e49d8e72002-09-23 09:36:25 +0000199
sewardjde4a1d02002-03-22 01:27:54 +0000200 return si->strtab_used - space_needed;
201}
202
203/* Add a symbol to the symbol table. */
204
205static __inline__
206void addSym ( SegInfo* si, RiSym* sym )
207{
208 Int new_sz, i;
209 RiSym* new_tab;
210
211 /* Ignore zero-sized syms. */
212 if (sym->size == 0) return;
213
214 if (si->symtab_used == si->symtab_size) {
215 new_sz = 2 * si->symtab_size;
216 if (new_sz == 0) new_sz = 500;
njn25e49d8e72002-09-23 09:36:25 +0000217 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
sewardjde4a1d02002-03-22 01:27:54 +0000218 if (si->symtab != NULL) {
219 for (i = 0; i < si->symtab_used; i++)
220 new_tab[i] = si->symtab[i];
njn25e49d8e72002-09-23 09:36:25 +0000221 VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
sewardjde4a1d02002-03-22 01:27:54 +0000222 }
223 si->symtab = new_tab;
224 si->symtab_size = new_sz;
225 }
226
227 si->symtab[si->symtab_used] = *sym;
228 si->symtab_used++;
229 vg_assert(si->symtab_used <= si->symtab_size);
230}
231
232/* Add a location to the location table. */
233
234static __inline__
235void addLoc ( SegInfo* si, RiLoc* loc )
236{
237 Int new_sz, i;
238 RiLoc* new_tab;
239
njne0ee0712002-05-03 16:41:05 +0000240 /* Zero-sized locs should have been ignored earlier */
241 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000242
243 if (si->loctab_used == si->loctab_size) {
244 new_sz = 2 * si->loctab_size;
245 if (new_sz == 0) new_sz = 500;
njn25e49d8e72002-09-23 09:36:25 +0000246 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
sewardjde4a1d02002-03-22 01:27:54 +0000247 if (si->loctab != NULL) {
248 for (i = 0; i < si->loctab_used; i++)
249 new_tab[i] = si->loctab[i];
njn25e49d8e72002-09-23 09:36:25 +0000250 VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
sewardjde4a1d02002-03-22 01:27:54 +0000251 }
252 si->loctab = new_tab;
253 si->loctab_size = new_sz;
254 }
255
256 si->loctab[si->loctab_used] = *loc;
257 si->loctab_used++;
258 vg_assert(si->loctab_used <= si->loctab_size);
259}
260
261
sewardjb51f2e62002-06-01 23:11:19 +0000262/* Top-level place to call to add a source-location mapping entry. */
263
264static __inline__
265void addLineInfo ( SegInfo* si,
266 Int fnmoff,
267 Addr this,
268 Addr next,
269 Int lineno,
sewardj08a50f62002-06-17 02:21:20 +0000270 Int entry /* only needed for debug printing */
271 )
sewardjb51f2e62002-06-01 23:11:19 +0000272{
273 RiLoc loc;
274 Int size = next - this;
275
276 /* Ignore zero-sized locs */
277 if (this == next) return;
278
279 /* Maximum sanity checking. Some versions of GNU as do a shabby
280 * job with stabs entries; if anything looks suspicious, revert to
281 * a size of 1. This should catch the instruction of interest
282 * (since if using asm-level debug info, one instruction will
283 * correspond to one line, unlike with C-level debug info where
284 * multiple instructions can map to the one line), but avoid
285 * catching any other instructions bogusly. */
286 if (this > next) {
287 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000288 "warning: line info addresses out of order "
sewardjb51f2e62002-06-01 23:11:19 +0000289 "at entry %d: 0x%x 0x%x", entry, this, next);
290 size = 1;
291 }
292
293 if (size > MAX_LOC_SIZE) {
sewardjd84606d2002-06-18 01:04:57 +0000294 if (0)
sewardjb51f2e62002-06-01 23:11:19 +0000295 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000296 "warning: line info address range too large "
sewardjb51f2e62002-06-01 23:11:19 +0000297 "at entry %d: %d", entry, size);
298 size = 1;
299 }
300
sewardj08a50f62002-06-17 02:21:20 +0000301 /* vg_assert(this < si->start + si->size && next-1 >= si->start); */
njne306ffe2002-06-08 13:34:17 +0000302 if (this >= si->start + si->size || next-1 < si->start) {
sewardjd84606d2002-06-18 01:04:57 +0000303 if (0)
sewardj08a50f62002-06-17 02:21:20 +0000304 VG_(message)(Vg_DebugMsg,
305 "warning: ignoring line info entry falling "
306 "outside current SegInfo: %p %p %p %p",
307 si->start, si->start + si->size,
308 this, next-1);
njne306ffe2002-06-08 13:34:17 +0000309 return;
310 }
311
312 vg_assert(lineno >= 0);
313 if (lineno > MAX_LINENO) {
314 VG_(message)(Vg_UserMsg,
sewardj08a50f62002-06-17 02:21:20 +0000315 "warning: ignoring line info entry with "
316 "huge line number (%d)", lineno);
njne306ffe2002-06-08 13:34:17 +0000317 VG_(message)(Vg_UserMsg,
318 " Can't handle line numbers "
sewardj08a50f62002-06-17 02:21:20 +0000319 "greater than %d, sorry", MAX_LINENO);
njne306ffe2002-06-08 13:34:17 +0000320 return;
321 }
sewardjb51f2e62002-06-01 23:11:19 +0000322
323 loc.addr = this;
324 loc.size = (UShort)size;
325 loc.lineno = lineno;
326 loc.fnmoff = fnmoff;
327 addLoc ( si, &loc );
328}
329
sewardjde4a1d02002-03-22 01:27:54 +0000330
331/*------------------------------------------------------------*/
332/*--- Helpers ---*/
333/*------------------------------------------------------------*/
334
335/* Non-fatal -- use vg_panic if terminal. */
336static
337void vg_symerr ( Char* msg )
338{
339 if (VG_(clo_verbosity) > 1)
340 VG_(message)(Vg_UserMsg,"%s", msg );
341}
342
343
344/* Print a symbol. */
345static
346void printSym ( SegInfo* si, Int i )
347{
348 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
349 i,
350 si->symtab[i].addr,
351 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
352 &si->strtab[si->symtab[i].nmoff] );
353}
354
355
356#if 0
357/* Print the entire sym tab. */
358static __attribute__ ((unused))
359void printSymtab ( void )
360{
361 Int i;
362 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
363 for (i = 0; i < vg_symtab_used; i++)
364 printSym(i);
365 VG_(printf)("------ BEGIN vg_symtab ------\n");
366}
367#endif
368
369#if 0
370/* Paranoid strcat. */
371static
372void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
373{
374 UInt i = 0, j = 0;
375 while (True) {
376 if (i >= maxlen) return;
377 if (dst[i] == 0) break;
378 i++;
379 }
380 while (True) {
381 if (i >= maxlen) return;
382 dst[i] = src[j];
383 if (src[j] == 0) return;
384 i++; j++;
385 }
386}
387#endif
388
sewardjb51f2e62002-06-01 23:11:19 +0000389
sewardjde4a1d02002-03-22 01:27:54 +0000390/*------------------------------------------------------------*/
391/*--- Canonicalisers ---*/
392/*------------------------------------------------------------*/
393
394/* Sort the symtab by starting address, and emit warnings if any
395 symbols have overlapping address ranges. We use that old chestnut,
396 shellsort. Mash the table around so as to establish the property
397 that addresses are in order and the ranges to not overlap. This
398 facilitates using binary search to map addresses to symbols when we
399 come to query the table.
400*/
401static
402void canonicaliseSymtab ( SegInfo* si )
403{
404 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
405 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
406 4592, 13776, 33936, 86961, 198768,
407 463792, 1391376 };
408 Int lo = 0;
409 Int hi = si->symtab_used-1;
410 Int i, j, h, bigN, hp, n_merged, n_truncated;
411 RiSym v;
412 Addr s1, s2, e1, e2;
413
414# define SWAP(ty,aa,bb) \
415 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
416
417 bigN = hi - lo + 1; if (bigN < 2) return;
418 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
419 vg_assert(0 <= hp && hp < 16);
420
421 for (; hp >= 0; hp--) {
422 h = incs[hp];
423 i = lo + h;
424 while (1) {
425 if (i > hi) break;
426 v = si->symtab[i];
427 j = i;
428 while (si->symtab[j-h].addr > v.addr) {
429 si->symtab[j] = si->symtab[j-h];
430 j = j - h;
431 if (j <= (lo + h - 1)) break;
432 }
433 si->symtab[j] = v;
434 i++;
435 }
436 }
437
438 cleanup_more:
439
440 /* If two symbols have identical address ranges, favour the
441 one with the longer name.
442 */
443 do {
444 n_merged = 0;
445 j = si->symtab_used;
446 si->symtab_used = 0;
447 for (i = 0; i < j; i++) {
448 if (i < j-1
449 && si->symtab[i].addr == si->symtab[i+1].addr
450 && si->symtab[i].size == si->symtab[i+1].size) {
451 n_merged++;
452 /* merge the two into one */
453 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
454 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
455 si->symtab[si->symtab_used++] = si->symtab[i];
456 } else {
457 si->symtab[si->symtab_used++] = si->symtab[i+1];
458 }
459 i++;
460 } else {
461 si->symtab[si->symtab_used++] = si->symtab[i];
462 }
463 }
464 if (VG_(clo_trace_symtab))
465 VG_(printf)( "%d merged\n", n_merged);
466 }
467 while (n_merged > 0);
468
469 /* Detect and "fix" overlapping address ranges. */
470 n_truncated = 0;
471
472 for (i = 0; i < si->symtab_used-1; i++) {
473
474 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
475
476 /* Check for common (no overlap) case. */
477 if (si->symtab[i].addr + si->symtab[i].size
478 <= si->symtab[i+1].addr)
479 continue;
480
481 /* There's an overlap. Truncate one or the other. */
482 if (VG_(clo_trace_symtab)) {
483 VG_(printf)("overlapping address ranges in symbol table\n\t");
484 printSym(si,i);
485 VG_(printf)("\t");
486 printSym(si,i+1);
487 VG_(printf)("\n");
488 }
489
490 /* Truncate one or the other. */
491 s1 = si->symtab[i].addr;
492 s2 = si->symtab[i+1].addr;
493 e1 = s1 + si->symtab[i].size - 1;
494 e2 = s2 + si->symtab[i+1].size - 1;
495 if (s1 < s2) {
496 e1 = s2-1;
497 } else {
498 vg_assert(s1 == s2);
499 if (e1 > e2) {
500 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
501 } else
502 if (e1 < e2) {
503 s2 = e1+1;
504 } else {
505 /* e1 == e2. Identical addr ranges. We'll eventually wind
506 up back at cleanup_more, which will take care of it. */
507 }
508 }
509 si->symtab[i].addr = s1;
510 si->symtab[i+1].addr = s2;
511 si->symtab[i].size = e1 - s1 + 1;
512 si->symtab[i+1].size = e2 - s2 + 1;
513 vg_assert(s1 <= s2);
514 vg_assert(si->symtab[i].size > 0);
515 vg_assert(si->symtab[i+1].size > 0);
516 /* It may be that the i+1 entry now needs to be moved further
517 along to maintain the address order requirement. */
518 j = i+1;
519 while (j < si->symtab_used-1
520 && si->symtab[j].addr > si->symtab[j+1].addr) {
521 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
522 j++;
523 }
524 n_truncated++;
525 }
526
527 if (n_truncated > 0) goto cleanup_more;
528
529 /* Ensure relevant postconditions hold. */
530 for (i = 0; i < si->symtab_used-1; i++) {
531 /* No zero-sized symbols. */
532 vg_assert(si->symtab[i].size > 0);
533 /* In order. */
534 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
535 /* No overlaps. */
536 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
537 < si->symtab[i+1].addr);
538 }
539# undef SWAP
540}
541
542
543
544/* Sort the location table by starting address. Mash the table around
545 so as to establish the property that addresses are in order and the
546 ranges do not overlap. This facilitates using binary search to map
sewardjb51f2e62002-06-01 23:11:19 +0000547 addresses to locations when we come to query the table.
548*/
sewardjde4a1d02002-03-22 01:27:54 +0000549static
550void canonicaliseLoctab ( SegInfo* si )
551{
552 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
553 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
554 4592, 13776, 33936, 86961, 198768,
555 463792, 1391376 };
556 Int lo = 0;
557 Int hi = si->loctab_used-1;
558 Int i, j, h, bigN, hp;
559 RiLoc v;
560
561# define SWAP(ty,aa,bb) \
562 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
563
564 /* Sort by start address. */
565
566 bigN = hi - lo + 1; if (bigN < 2) return;
567 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
568 vg_assert(0 <= hp && hp < 16);
569
570 for (; hp >= 0; hp--) {
571 h = incs[hp];
572 i = lo + h;
573 while (1) {
574 if (i > hi) break;
575 v = si->loctab[i];
576 j = i;
577 while (si->loctab[j-h].addr > v.addr) {
578 si->loctab[j] = si->loctab[j-h];
579 j = j - h;
580 if (j <= (lo + h - 1)) break;
581 }
582 si->loctab[j] = v;
583 i++;
584 }
585 }
586
587 /* If two adjacent entries overlap, truncate the first. */
588 for (i = 0; i < si->loctab_used-1; i++) {
589 vg_assert(si->loctab[i].size < 10000);
590 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
591 /* Do this in signed int32 because the actual .size fields
592 are unsigned 16s. */
593 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
594 if (new_size < 0) {
595 si->loctab[i].size = 0;
596 } else
597 if (new_size >= 65536) {
598 si->loctab[i].size = 65535;
599 } else {
600 si->loctab[i].size = (UShort)new_size;
601 }
602 }
603 }
604
605 /* Zap any zero-sized entries resulting from the truncation
606 process. */
607 j = 0;
608 for (i = 0; i < si->loctab_used; i++) {
609 if (si->loctab[i].size > 0) {
610 si->loctab[j] = si->loctab[i];
611 j++;
612 }
613 }
614 si->loctab_used = j;
615
616 /* Ensure relevant postconditions hold. */
617 for (i = 0; i < si->loctab_used-1; i++) {
618 /*
619 VG_(printf)("%d (%d) %d 0x%x\n",
620 i, si->loctab[i+1].confident,
621 si->loctab[i+1].size, si->loctab[i+1].addr );
622 */
623 /* No zero-sized symbols. */
624 vg_assert(si->loctab[i].size > 0);
625 /* In order. */
626 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
627 /* No overlaps. */
628 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
629 < si->loctab[i+1].addr);
630 }
631# undef SWAP
632}
633
634
635/*------------------------------------------------------------*/
sewardjb51f2e62002-06-01 23:11:19 +0000636/*--- Read STABS format debug info. ---*/
sewardjde4a1d02002-03-22 01:27:54 +0000637/*------------------------------------------------------------*/
638
sewardjb51f2e62002-06-01 23:11:19 +0000639/* Stabs entry types, from:
640 * The "stabs" debug format
641 * Menapace, Kingdon and MacKenzie
642 * Cygnus Support
643 */
644typedef enum { N_GSYM = 32, /* Global symbol */
645 N_FUN = 36, /* Function start or end */
646 N_STSYM = 38, /* Data segment file-scope variable */
647 N_LCSYM = 40, /* BSS segment file-scope variable */
648 N_RSYM = 64, /* Register variable */
649 N_SLINE = 68, /* Source line number */
650 N_SO = 100, /* Source file path and name */
651 N_LSYM = 128, /* Stack variable or type */
652 N_SOL = 132, /* Include file name */
653 N_LBRAC = 192, /* Start of lexical block */
654 N_RBRAC = 224 /* End of lexical block */
655 } stab_types;
656
657
658/* Read stabs-format debug info. This is all rather horrible because
659 stabs is a underspecified, kludgy hack.
660*/
661static
662void read_debuginfo_stabs ( SegInfo* si,
663 UChar* stabC, Int stab_sz,
664 UChar* stabstr, Int stabstr_sz )
sewardjde4a1d02002-03-22 01:27:54 +0000665{
sewardjb51f2e62002-06-01 23:11:19 +0000666 Int i;
667 Int curr_filenmoff;
njnb79ad342002-06-05 15:30:30 +0000668 Addr curr_fn_stabs_addr = (Addr)NULL;
669 Addr curr_fnbaseaddr = (Addr)NULL;
sewardjb51f2e62002-06-01 23:11:19 +0000670 Char *curr_file_name, *curr_fn_name;
671 Int n_stab_entries;
njnb79ad342002-06-05 15:30:30 +0000672 Int prev_lineno = 0, lineno = 0;
673 Int lineno_overflows = 0;
674 Bool same_file = True;
sewardjb51f2e62002-06-01 23:11:19 +0000675 struct nlist* stab = (struct nlist*)stabC;
njnb79ad342002-06-05 15:30:30 +0000676
sewardjb51f2e62002-06-01 23:11:19 +0000677 /* Ok. It all looks plausible. Go on and read debug data.
678 stab kinds: 100 N_SO a source file name
679 68 N_SLINE a source line number
680 36 N_FUN start of a function
njn4f9c9342002-04-29 16:03:24 +0000681
sewardjb51f2e62002-06-01 23:11:19 +0000682 In this loop, we maintain a current file name, updated as
683 N_SO/N_SOLs appear, and a current function base address,
684 updated as N_FUNs appear. Based on that, address ranges for
685 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000686
sewardjb51f2e62002-06-01 23:11:19 +0000687 Finding the instruction address range covered by an N_SLINE is
688 complicated; see the N_SLINE case below.
689 */
njnb79ad342002-06-05 15:30:30 +0000690 curr_filenmoff = addStr(si,"???");
691 curr_file_name = curr_fn_name = (Char*)NULL;
sewardjde4a1d02002-03-22 01:27:54 +0000692
sewardjb51f2e62002-06-01 23:11:19 +0000693 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
njne0ee0712002-05-03 16:41:05 +0000694
sewardjb51f2e62002-06-01 23:11:19 +0000695 for (i = 0; i < n_stab_entries; i++) {
696# if 0
697 VG_(printf) ( " %2d ", i );
698 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
699 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
700 (int)stab[i].n_value,
701 (int)stab[i].n_un.n_strx,
702 stabstr + stab[i].n_un.n_strx );
703 VG_(printf)("\n");
704# endif
njne0ee0712002-05-03 16:41:05 +0000705
sewardjb51f2e62002-06-01 23:11:19 +0000706 Char *no_fn_name = "???";
707
708 switch (stab[i].n_type) {
709 UInt next_addr;
710
711 /* Two complicated things here:
712 *
713 * 1. the n_desc field in 'struct n_list' in a.out.h is only
714 * 16-bits, which gives a maximum of 65535 lines. We handle
715 * files bigger than this by detecting heuristically
716 * overflows -- if the line count goes from 65000-odd to
717 * 0-odd within the same file, we assume it's an overflow.
718 * Once we switch files, we zero the overflow count.
719 *
720 * 2. To compute the instr address range covered by a single
721 * line, find the address of the next thing and compute the
722 * difference. The approach used depends on what kind of
723 * entry/entries follow...
724 */
725 case N_SLINE: {
726 Int this_addr = (UInt)stab[i].n_value;
727
728 /* Although stored as a short, neg values really are >
729 * 32768, hence the UShort cast. Then we use an Int to
730 * handle overflows. */
731 prev_lineno = lineno;
732 lineno = (Int)((UShort)stab[i].n_desc);
733
734 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
735 VG_(message)(Vg_DebugMsg,
736 "Line number overflow detected (%d --> %d) in %s",
737 prev_lineno, lineno, curr_file_name);
738 lineno_overflows++;
739 }
740 same_file = True;
741
742 LOOP:
743 if (i+1 >= n_stab_entries) {
744 /* If it's the last entry, just guess the range is
745 * four; can't do any better */
746 next_addr = this_addr + 4;
747 } else {
748 switch (stab[i+1].n_type) {
749 /* Easy, common case: use address of next entry */
750 case N_SLINE: case N_SO:
751 next_addr = (UInt)stab[i+1].n_value;
752 break;
753
njn25e49d8e72002-09-23 09:36:25 +0000754 /* Boring one: skip, look for something more useful. */
sewardjb51f2e62002-06-01 23:11:19 +0000755 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
756 case N_STSYM: case N_LCSYM: case N_GSYM:
757 i++;
758 goto LOOP;
759
njnb79ad342002-06-05 15:30:30 +0000760 /* If end-of-this-fun entry, use its address.
761 * If start-of-next-fun entry, find difference between start
762 * of current function and start of next function to work
763 * it out.
764 */
sewardjb51f2e62002-06-01 23:11:19 +0000765 case N_FUN:
766 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
767 next_addr = (UInt)stab[i+1].n_value;
768 } else {
njnb79ad342002-06-05 15:30:30 +0000769 next_addr =
770 (UInt)stab[i+1].n_value - curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000771 }
772 break;
773
774 /* N_SOL should be followed by an N_SLINE which can
775 be used */
776 case N_SOL:
777 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
778 next_addr = (UInt)stab[i+2].n_value;
779 break;
780 } else {
781 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
782 stab[i+1].n_type, i, n_stab_entries);
783 VG_(panic)("unhandled N_SOL stabs case");
784 }
785
786 default:
787 VG_(printf)("unhandled (other) stabs case: %d %d",
788 stab[i+1].n_type,i);
789 /* VG_(panic)("unhandled (other) stabs case"); */
790 next_addr = this_addr + 4;
791 break;
792 }
793 }
794
795 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
796 curr_fnbaseaddr + next_addr,
797 lineno + lineno_overflows * LINENO_OVERFLOW, i);
798 break;
799 }
800
801 case N_FUN: {
802 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
803 /* N_FUN with a name -- indicates the start of a fn. */
njnb79ad342002-06-05 15:30:30 +0000804 curr_fn_stabs_addr = (Addr)stab[i].n_value;
805 curr_fnbaseaddr = si->offset + curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000806 curr_fn_name = stabstr + stab[i].n_un.n_strx;
807 } else {
808 curr_fn_name = no_fn_name;
809 }
810 break;
811 }
812
813 case N_SOL:
814 if (lineno_overflows != 0) {
815 VG_(message)(Vg_UserMsg,
816 "Warning: file %s is very big (> 65535 lines) "
817 "Line numbers and annotation for this file might "
818 "be wrong. Sorry",
819 curr_file_name);
820 }
821 /* fall through! */
822 case N_SO:
823 lineno_overflows = 0;
824
825 /* seems to give lots of locations in header files */
826 /* case 130: */ /* BINCL */
827 {
828 UChar* nm = stabstr + stab[i].n_un.n_strx;
829 UInt len = VG_(strlen)(nm);
830
831 if (len > 0 && nm[len-1] != '/') {
832 curr_filenmoff = addStr ( si, nm );
833 curr_file_name = stabstr + stab[i].n_un.n_strx;
834 }
835 else
836 if (len == 0)
837 curr_filenmoff = addStr ( si, "?1\0" );
838
839 break;
840 }
841
842# if 0
843 case 162: /* EINCL */
844 curr_filenmoff = addStr ( si, "?2\0" );
845 break;
846# endif
847
848 default:
849 break;
850 }
851 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
sewardjde4a1d02002-03-22 01:27:54 +0000852}
853
854
sewardjb51f2e62002-06-01 23:11:19 +0000855/*------------------------------------------------------------*/
856/*--- Read DWARF2 format debug info. ---*/
857/*------------------------------------------------------------*/
sewardjc134dd92002-06-01 14:21:36 +0000858
859/* Structure found in the .debug_line section. */
860typedef struct
861{
862 UChar li_length [4];
863 UChar li_version [2];
864 UChar li_prologue_length [4];
865 UChar li_min_insn_length [1];
866 UChar li_default_is_stmt [1];
867 UChar li_line_base [1];
868 UChar li_line_range [1];
869 UChar li_opcode_base [1];
870}
871DWARF2_External_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000872
sewardjc134dd92002-06-01 14:21:36 +0000873typedef struct
874{
sewardj08a50f62002-06-17 02:21:20 +0000875 UInt li_length;
sewardjc134dd92002-06-01 14:21:36 +0000876 UShort li_version;
877 UInt li_prologue_length;
878 UChar li_min_insn_length;
879 UChar li_default_is_stmt;
sewardj08a50f62002-06-17 02:21:20 +0000880 Int li_line_base;
sewardjc134dd92002-06-01 14:21:36 +0000881 UChar li_line_range;
882 UChar li_opcode_base;
883}
884DWARF2_Internal_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000885
sewardjc134dd92002-06-01 14:21:36 +0000886/* Line number opcodes. */
887enum dwarf_line_number_ops
888 {
889 DW_LNS_extended_op = 0,
890 DW_LNS_copy = 1,
891 DW_LNS_advance_pc = 2,
892 DW_LNS_advance_line = 3,
893 DW_LNS_set_file = 4,
894 DW_LNS_set_column = 5,
895 DW_LNS_negate_stmt = 6,
896 DW_LNS_set_basic_block = 7,
897 DW_LNS_const_add_pc = 8,
898 DW_LNS_fixed_advance_pc = 9,
899 /* DWARF 3. */
900 DW_LNS_set_prologue_end = 10,
901 DW_LNS_set_epilogue_begin = 11,
902 DW_LNS_set_isa = 12
903 };
904
905/* Line number extended opcodes. */
906enum dwarf_line_number_x_ops
907 {
908 DW_LNE_end_sequence = 1,
909 DW_LNE_set_address = 2,
910 DW_LNE_define_file = 3
911 };
912
913typedef struct State_Machine_Registers
914{
sewardj08a50f62002-06-17 02:21:20 +0000915 Addr address;
sewardjc134dd92002-06-01 14:21:36 +0000916 UInt file;
917 UInt line;
918 UInt column;
919 Int is_stmt;
920 Int basic_block;
sewardj08a50f62002-06-17 02:21:20 +0000921 Int end_sequence;
922 /* This variable hold the number of the last entry seen
923 in the File Table. */
sewardjc134dd92002-06-01 14:21:36 +0000924 UInt last_file_entry;
925} SMR;
926
sewardjb51f2e62002-06-01 23:11:19 +0000927
928static
929UInt read_leb128 ( UChar* data, Int* length_return, Int sign )
930{
sewardj08a50f62002-06-17 02:21:20 +0000931 UInt result = 0;
932 UInt num_read = 0;
933 Int shift = 0;
934 UChar byte;
sewardjb51f2e62002-06-01 23:11:19 +0000935
936 do
937 {
938 byte = * data ++;
939 num_read ++;
940
941 result |= (byte & 0x7f) << shift;
942
943 shift += 7;
944
945 }
946 while (byte & 0x80);
947
948 if (length_return != NULL)
949 * length_return = num_read;
950
951 if (sign && (shift < 32) && (byte & 0x40))
952 result |= -1 << shift;
953
954 return result;
955}
956
957
sewardjc134dd92002-06-01 14:21:36 +0000958static SMR state_machine_regs;
959
sewardj08a50f62002-06-17 02:21:20 +0000960static
961void reset_state_machine ( Int is_stmt )
sewardjc134dd92002-06-01 14:21:36 +0000962{
sewardj08a50f62002-06-17 02:21:20 +0000963 if (0) VG_(printf)("smr.a := %p (reset)\n", 0 );
sewardjc134dd92002-06-01 14:21:36 +0000964 state_machine_regs.address = 0;
965 state_machine_regs.file = 1;
966 state_machine_regs.line = 1;
967 state_machine_regs.column = 0;
968 state_machine_regs.is_stmt = is_stmt;
969 state_machine_regs.basic_block = 0;
970 state_machine_regs.end_sequence = 0;
971 state_machine_regs.last_file_entry = 0;
972}
973
974/* Handled an extend line op. Returns true if this is the end
975 of sequence. */
sewardj08a50f62002-06-17 02:21:20 +0000976static
977int process_extended_line_op( SegInfo *si, UInt** fnames,
978 UChar* data, Int is_stmt, Int pointer_size)
sewardjc134dd92002-06-01 14:21:36 +0000979{
980 UChar op_code;
sewardj08a50f62002-06-17 02:21:20 +0000981 Int bytes_read;
sewardjc134dd92002-06-01 14:21:36 +0000982 UInt len;
983 UChar * name;
sewardj08a50f62002-06-17 02:21:20 +0000984 Addr adr;
sewardjc134dd92002-06-01 14:21:36 +0000985
986 len = read_leb128 (data, & bytes_read, 0);
987 data += bytes_read;
988
989 if (len == 0)
990 {
sewardj08a50f62002-06-17 02:21:20 +0000991 VG_(message)(Vg_UserMsg,
992 "badly formed extended line op encountered!\n");
sewardjc134dd92002-06-01 14:21:36 +0000993 return bytes_read;
994 }
995
996 len += bytes_read;
997 op_code = * data ++;
998
999
1000 switch (op_code)
1001 {
1002 case DW_LNE_end_sequence:
sewardj08a50f62002-06-17 02:21:20 +00001003 if (0) VG_(printf)("1001: si->o %p, smr.a %p\n",
1004 si->offset, state_machine_regs.address );
sewardjd84606d2002-06-18 01:04:57 +00001005 state_machine_regs.end_sequence = 1; /* JRS: added for compliance
1006 with spec; is pointless due to reset_state_machine below
1007 */
sewardj08a50f62002-06-17 02:21:20 +00001008 addLineInfo (si, (*fnames)[state_machine_regs.file],
1009 si->offset + (state_machine_regs.address - 1),
1010 si->offset + (state_machine_regs.address),
1011 0, 0);
sewardjc134dd92002-06-01 14:21:36 +00001012 reset_state_machine (is_stmt);
1013 break;
1014
1015 case DW_LNE_set_address:
1016 /* XXX: Pointer size could be 8 */
sewardj08a50f62002-06-17 02:21:20 +00001017 vg_assert(pointer_size == 4);
sewardjc134dd92002-06-01 14:21:36 +00001018 adr = *((Addr *)data);
sewardj08a50f62002-06-17 02:21:20 +00001019 if (0) VG_(printf)("smr.a := %p\n", adr );
sewardjc134dd92002-06-01 14:21:36 +00001020 state_machine_regs.address = adr;
1021 break;
1022
1023 case DW_LNE_define_file:
sewardjc134dd92002-06-01 14:21:36 +00001024 ++ state_machine_regs.last_file_entry;
1025 name = data;
1026 if (*fnames == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001027 *fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
sewardjc134dd92002-06-01 14:21:36 +00001028 else
njn25e49d8e72002-09-23 09:36:25 +00001029 *fnames = VG_(arena_realloc)(
1030 VG_AR_SYMTAB, *fnames, /*alignment*/4,
sewardj08a50f62002-06-17 02:21:20 +00001031 sizeof(UInt)
1032 * (state_machine_regs.last_file_entry + 1));
sewardjc134dd92002-06-01 14:21:36 +00001033 (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name);
1034 data += VG_(strlen) ((char *) data) + 1;
1035 read_leb128 (data, & bytes_read, 0);
1036 data += bytes_read;
1037 read_leb128 (data, & bytes_read, 0);
1038 data += bytes_read;
sewardj08a50f62002-06-17 02:21:20 +00001039 read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001040 break;
1041
1042 default:
1043 break;
1044 }
1045
1046 return len;
1047}
1048
1049
sewardjb51f2e62002-06-01 23:11:19 +00001050static
1051void read_debuginfo_dwarf2 ( SegInfo* si, UChar* dwarf2, Int dwarf2_sz )
sewardjc134dd92002-06-01 14:21:36 +00001052{
1053 DWARF2_External_LineInfo * external;
1054 DWARF2_Internal_LineInfo info;
1055 UChar * standard_opcodes;
sewardjb51f2e62002-06-01 23:11:19 +00001056 UChar * data = dwarf2;
1057 UChar * end = dwarf2 + dwarf2_sz;
sewardjc134dd92002-06-01 14:21:36 +00001058 UChar * end_of_sequence;
sewardj08a50f62002-06-17 02:21:20 +00001059 UInt * fnames = NULL;
sewardjc134dd92002-06-01 14:21:36 +00001060
sewardjd84606d2002-06-18 01:04:57 +00001061 /* Fails due to gcc padding ...
1062 vg_assert(sizeof(DWARF2_External_LineInfo)
1063 == sizeof(DWARF2_Internal_LineInfo));
1064 */
sewardjc134dd92002-06-01 14:21:36 +00001065
1066 while (data < end)
1067 {
1068 external = (DWARF2_External_LineInfo *) data;
1069
1070 /* Check the length of the block. */
sewardj08a50f62002-06-17 02:21:20 +00001071 info.li_length = * ((UInt *)(external->li_length));
sewardjc134dd92002-06-01 14:21:36 +00001072
1073 if (info.li_length == 0xffffffff)
1074 {
sewardjb51f2e62002-06-01 23:11:19 +00001075 vg_symerr("64-bit DWARF line info is not supported yet.");
sewardjc134dd92002-06-01 14:21:36 +00001076 break;
1077 }
1078
sewardjb51f2e62002-06-01 23:11:19 +00001079 if (info.li_length + sizeof (external->li_length) > dwarf2_sz)
sewardjc134dd92002-06-01 14:21:36 +00001080 {
sewardj08a50f62002-06-17 02:21:20 +00001081 vg_symerr("DWARF line info appears to be corrupt "
1082 "- the section is too small");
sewardjb51f2e62002-06-01 23:11:19 +00001083 return;
sewardjc134dd92002-06-01 14:21:36 +00001084 }
1085
1086 /* Check its version number. */
sewardj08a50f62002-06-17 02:21:20 +00001087 info.li_version = * ((UShort *) (external->li_version));
sewardjc134dd92002-06-01 14:21:36 +00001088 if (info.li_version != 2)
1089 {
sewardj08a50f62002-06-17 02:21:20 +00001090 vg_symerr("Only DWARF version 2 line info "
1091 "is currently supported.");
sewardjb51f2e62002-06-01 23:11:19 +00001092 return;
sewardjc134dd92002-06-01 14:21:36 +00001093 }
1094
sewardjd84606d2002-06-18 01:04:57 +00001095 info.li_prologue_length = * ((UInt *) (external->li_prologue_length));
1096 info.li_min_insn_length = * ((UChar *)(external->li_min_insn_length));
1097 info.li_default_is_stmt = * ((UChar *)(external->li_default_is_stmt));
1098
1099 /* JRS: changed (UInt*) to (UChar*) */
1100 info.li_line_base = * ((UChar *)(external->li_line_base));
1101
1102 info.li_line_range = * ((UChar *)(external->li_line_range));
1103 info.li_opcode_base = * ((UChar *)(external->li_opcode_base));
sewardjc134dd92002-06-01 14:21:36 +00001104
1105 /* Sign extend the line base field. */
1106 info.li_line_base <<= 24;
1107 info.li_line_base >>= 24;
1108
sewardj08a50f62002-06-17 02:21:20 +00001109 end_of_sequence = data + info.li_length
1110 + sizeof (external->li_length);
sewardjc134dd92002-06-01 14:21:36 +00001111
1112 reset_state_machine (info.li_default_is_stmt);
1113
1114 /* Read the contents of the Opcodes table. */
1115 standard_opcodes = data + sizeof (* external);
1116
sewardjc134dd92002-06-01 14:21:36 +00001117 /* Read the contents of the Directory table. */
1118 data = standard_opcodes + info.li_opcode_base - 1;
1119
sewardj08a50f62002-06-17 02:21:20 +00001120 if (* data == 0)
1121 {
1122 }
sewardjc134dd92002-06-01 14:21:36 +00001123 else
1124 {
sewardj08a50f62002-06-17 02:21:20 +00001125 /* We ignore the directory table, since gcc gives the entire
1126 path as part of the filename */
sewardjc134dd92002-06-01 14:21:36 +00001127 while (* data != 0)
1128 {
1129 data += VG_(strlen) ((char *) data) + 1;
1130 }
1131 }
1132
1133 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001134 if (*data != 0) {
1135 vg_symerr("can't find NUL at end of DWARF2 directory table");
1136 return;
1137 }
sewardjc134dd92002-06-01 14:21:36 +00001138 data ++;
1139
1140 /* Read the contents of the File Name table. */
sewardj08a50f62002-06-17 02:21:20 +00001141 if (* data == 0)
1142 {
1143 }
sewardjc134dd92002-06-01 14:21:36 +00001144 else
1145 {
sewardjc134dd92002-06-01 14:21:36 +00001146 while (* data != 0)
1147 {
1148 UChar * name;
1149 Int bytes_read;
1150
sewardj08a50f62002-06-17 02:21:20 +00001151 ++ state_machine_regs.last_file_entry;
sewardjc134dd92002-06-01 14:21:36 +00001152 name = data;
sewardj08a50f62002-06-17 02:21:20 +00001153 /* Since we don't have realloc (0, ....) == malloc (...)
1154 semantics, we need to malloc the first time. */
sewardjc134dd92002-06-01 14:21:36 +00001155
1156 if (fnames == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001157 fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
sewardjc134dd92002-06-01 14:21:36 +00001158 else
njn25e49d8e72002-09-23 09:36:25 +00001159 fnames = VG_(arena_realloc)(VG_AR_SYMTAB, fnames, /*alignment*/4,
sewardj08a50f62002-06-17 02:21:20 +00001160 sizeof(UInt)
1161 * (state_machine_regs.last_file_entry + 1));
1162 data += VG_(strlen) ((Char *) data) + 1;
sewardjc134dd92002-06-01 14:21:36 +00001163 fnames[state_machine_regs.last_file_entry] = addStr (si,name);
1164
1165 read_leb128 (data, & bytes_read, 0);
1166 data += bytes_read;
1167 read_leb128 (data, & bytes_read, 0);
1168 data += bytes_read;
1169 read_leb128 (data, & bytes_read, 0);
1170 data += bytes_read;
1171 }
1172 }
1173
1174 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001175 if (*data != 0) {
1176 vg_symerr("can't find NUL at end of DWARF2 file name table");
1177 return;
1178 }
sewardjc134dd92002-06-01 14:21:36 +00001179 data ++;
1180
1181 /* Now display the statements. */
1182
1183 while (data < end_of_sequence)
1184 {
1185 UChar op_code;
1186 Int adv;
1187 Int bytes_read;
1188
1189 op_code = * data ++;
1190
1191 if (op_code >= info.li_opcode_base)
1192 {
1193 Int advAddr;
1194 op_code -= info.li_opcode_base;
sewardj08a50f62002-06-17 02:21:20 +00001195 adv = (op_code / info.li_line_range)
1196 * info.li_min_insn_length;
sewardjc134dd92002-06-01 14:21:36 +00001197 advAddr = adv;
1198 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001199 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001200 adv = (op_code % info.li_line_range) + info.li_line_base;
sewardj08a50f62002-06-17 02:21:20 +00001201 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1202 si->offset, state_machine_regs.address );
1203 addLineInfo (si, fnames[state_machine_regs.file],
1204 si->offset + (state_machine_regs.address
1205 - advAddr),
1206 si->offset + (state_machine_regs.address),
1207 state_machine_regs.line, 0);
sewardjc134dd92002-06-01 14:21:36 +00001208 state_machine_regs.line += adv;
1209 }
1210 else switch (op_code)
1211 {
1212 case DW_LNS_extended_op:
sewardj08a50f62002-06-17 02:21:20 +00001213 data += process_extended_line_op (
1214 si, &fnames, data,
1215 info.li_default_is_stmt, sizeof (Addr));
sewardjc134dd92002-06-01 14:21:36 +00001216 break;
1217
1218 case DW_LNS_copy:
sewardj08a50f62002-06-17 02:21:20 +00001219 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1220 si->offset, state_machine_regs.address );
1221 addLineInfo (si, fnames[state_machine_regs.file],
1222 si->offset + state_machine_regs.address,
1223 si->offset + (state_machine_regs.address + 1),
1224 state_machine_regs.line , 0);
sewardjd84606d2002-06-18 01:04:57 +00001225 state_machine_regs.basic_block = 0; /* JRS added */
sewardjc134dd92002-06-01 14:21:36 +00001226 break;
1227
1228 case DW_LNS_advance_pc:
sewardj08a50f62002-06-17 02:21:20 +00001229 adv = info.li_min_insn_length
1230 * read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001231 data += bytes_read;
1232 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001233 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001234 break;
1235
1236 case DW_LNS_advance_line:
1237 adv = read_leb128 (data, & bytes_read, 1);
1238 data += bytes_read;
1239 state_machine_regs.line += adv;
1240 break;
1241
1242 case DW_LNS_set_file:
1243 adv = read_leb128 (data, & bytes_read, 0);
1244 data += bytes_read;
1245 state_machine_regs.file = adv;
1246 break;
1247
1248 case DW_LNS_set_column:
1249 adv = read_leb128 (data, & bytes_read, 0);
1250 data += bytes_read;
1251 state_machine_regs.column = adv;
1252 break;
1253
1254 case DW_LNS_negate_stmt:
1255 adv = state_machine_regs.is_stmt;
1256 adv = ! adv;
1257 state_machine_regs.is_stmt = adv;
1258 break;
1259
1260 case DW_LNS_set_basic_block:
1261 state_machine_regs.basic_block = 1;
1262 break;
1263
1264 case DW_LNS_const_add_pc:
1265 adv = (((255 - info.li_opcode_base) / info.li_line_range)
1266 * info.li_min_insn_length);
1267 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001268 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001269 break;
1270
1271 case DW_LNS_fixed_advance_pc:
1272 /* XXX: Need something to get 2 bytes */
1273 adv = *((UShort *)data);
1274 data += 2;
1275 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001276 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001277 break;
1278
1279 case DW_LNS_set_prologue_end:
1280 break;
1281
1282 case DW_LNS_set_epilogue_begin:
1283 break;
1284
1285 case DW_LNS_set_isa:
1286 adv = read_leb128 (data, & bytes_read, 0);
1287 data += bytes_read;
1288 break;
1289
1290 default:
1291 {
1292 int j;
1293 for (j = standard_opcodes[op_code - 1]; j > 0 ; --j)
1294 {
1295 read_leb128 (data, &bytes_read, 0);
1296 data += bytes_read;
1297 }
1298 }
1299 break;
1300 }
1301 }
njn25e49d8e72002-09-23 09:36:25 +00001302 VG_(arena_free)(VG_AR_SYMTAB, fnames);
sewardjc134dd92002-06-01 14:21:36 +00001303 fnames = NULL;
1304 }
sewardjc134dd92002-06-01 14:21:36 +00001305}
1306
sewardjb51f2e62002-06-01 23:11:19 +00001307
1308/*------------------------------------------------------------*/
1309/*--- Read info from a .so/exe file. ---*/
1310/*------------------------------------------------------------*/
1311
sewardjde4a1d02002-03-22 01:27:54 +00001312/* Read the symbols from the object/exe specified by the SegInfo into
1313 the tables within the supplied SegInfo. */
1314static
1315void vg_read_lib_symbols ( SegInfo* si )
1316{
1317 Elf32_Ehdr* ehdr; /* The ELF header */
1318 Elf32_Shdr* shdr; /* The section table */
1319 UChar* sh_strtab; /* The section table's string table */
sewardjb51f2e62002-06-01 23:11:19 +00001320 UChar* stab; /* The .stab table */
sewardjde4a1d02002-03-22 01:27:54 +00001321 UChar* stabstr; /* The .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001322 UChar* dwarf2; /* The DWARF2 location info table */
sewardjde4a1d02002-03-22 01:27:54 +00001323 Int stab_sz; /* Size in bytes of the .stab table */
1324 Int stabstr_sz; /* Size in bytes of the .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001325 Int dwarf2_sz; /* Size in bytes of the DWARF2 srcloc table*/
sewardjde4a1d02002-03-22 01:27:54 +00001326 Int fd;
1327 Int i;
1328 Bool ok;
1329 Addr oimage;
1330 Int n_oimage;
sewardjb3586202002-05-09 17:38:13 +00001331 struct vki_stat stat_buf;
sewardjde4a1d02002-03-22 01:27:54 +00001332
sewardjde4a1d02002-03-22 01:27:54 +00001333 oimage = (Addr)NULL;
1334 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +00001335 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +00001336
1337 /* mmap the object image aboard, so that we can read symbols and
1338 line number info out of it. It will be munmapped immediately
1339 thereafter; it is only aboard transiently. */
1340
sewardjb3586202002-05-09 17:38:13 +00001341 i = VG_(stat)(si->filename, &stat_buf);
sewardjde4a1d02002-03-22 01:27:54 +00001342 if (i != 0) {
1343 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
1344 return;
1345 }
1346 n_oimage = stat_buf.st_size;
1347
njn25e49d8e72002-09-23 09:36:25 +00001348 fd = VG_(open)(si->filename, VKI_O_RDONLY, 0);
sewardjde4a1d02002-03-22 01:27:54 +00001349 if (fd == -1) {
1350 vg_symerr("Can't open .so/.exe to read symbols?!");
1351 return;
1352 }
1353
sewardjb3586202002-05-09 17:38:13 +00001354 oimage = (Addr)VG_(mmap)( NULL, n_oimage,
1355 VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 );
sewardjde4a1d02002-03-22 01:27:54 +00001356 if (oimage == ((Addr)(-1))) {
1357 VG_(message)(Vg_UserMsg,
1358 "mmap failed on %s", si->filename );
1359 VG_(close)(fd);
1360 return;
1361 }
1362
1363 VG_(close)(fd);
1364
1365 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
1366 Now verify that it is a valid ELF .so or executable image.
1367 */
1368 ok = (n_oimage >= sizeof(Elf32_Ehdr));
1369 ehdr = (Elf32_Ehdr*)oimage;
1370
1371 if (ok) {
1372 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
1373 && ehdr->e_ident[EI_MAG1] == 'E'
1374 && ehdr->e_ident[EI_MAG2] == 'L'
1375 && ehdr->e_ident[EI_MAG3] == 'F');
1376 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
1377 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
1378 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
1379 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
1380 ok &= (ehdr->e_machine == EM_386);
1381 ok &= (ehdr->e_version == EV_CURRENT);
1382 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
1383 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
1384 }
1385
1386 if (!ok) {
1387 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
1388 VG_(munmap) ( (void*)oimage, n_oimage );
1389 return;
1390 }
1391
1392 if (VG_(clo_trace_symtab))
1393 VG_(printf)(
1394 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
1395 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
1396
1397 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
1398 vg_symerr("ELF section header is beyond image end?!");
1399 VG_(munmap) ( (void*)oimage, n_oimage );
1400 return;
1401 }
1402
1403 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
1404 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
1405
1406 /* try and read the object's symbol table */
1407 {
1408 UChar* o_strtab = NULL;
1409 Elf32_Sym* o_symtab = NULL;
1410 UInt o_strtab_sz = 0;
1411 UInt o_symtab_sz = 0;
1412
1413 UChar* o_got = NULL;
1414 UChar* o_plt = NULL;
1415 UInt o_got_sz = 0;
1416 UInt o_plt_sz = 0;
1417
1418 Bool snaffle_it;
1419 Addr sym_addr;
1420
1421 /* find the .stabstr and .stab sections */
1422 for (i = 0; i < ehdr->e_shnum; i++) {
1423 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
1424 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
1425 o_symtab_sz = shdr[i].sh_size;
1426 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
1427 /* check image overrun here */
1428 }
1429 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
1430 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
1431 o_strtab_sz = shdr[i].sh_size;
1432 /* check image overrun here */
1433 }
1434
1435 /* find out where the .got and .plt sections will be in the
1436 executable image, not in the object image transiently loaded.
1437 */
1438 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
1439 o_got = (UChar*)(si->offset
1440 + shdr[i].sh_offset);
1441 o_got_sz = shdr[i].sh_size;
1442 /* check image overrun here */
1443 }
1444 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
1445 o_plt = (UChar*)(si->offset
1446 + shdr[i].sh_offset);
1447 o_plt_sz = shdr[i].sh_size;
1448 /* check image overrun here */
1449 }
1450
1451 }
1452
1453 if (VG_(clo_trace_symtab)) {
1454 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
1455 o_plt, o_plt + o_plt_sz - 1 );
1456 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
1457 o_got, o_got + o_got_sz - 1 );
1458 }
1459
1460 if (o_strtab == NULL || o_symtab == NULL) {
1461 vg_symerr(" object doesn't have a symbol table");
1462 } else {
1463 /* Perhaps should start at i = 1; ELF docs suggest that entry
1464 0 always denotes `unknown symbol'. */
1465 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
1466# if 0
1467 VG_(printf)("raw symbol: ");
1468 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
1469 case STB_LOCAL: VG_(printf)("LOC "); break;
1470 case STB_GLOBAL: VG_(printf)("GLO "); break;
1471 case STB_WEAK: VG_(printf)("WEA "); break;
1472 case STB_LOPROC: VG_(printf)("lop "); break;
1473 case STB_HIPROC: VG_(printf)("hip "); break;
1474 default: VG_(printf)("??? "); break;
1475 }
1476 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
1477 case STT_NOTYPE: VG_(printf)("NOT "); break;
1478 case STT_OBJECT: VG_(printf)("OBJ "); break;
1479 case STT_FUNC: VG_(printf)("FUN "); break;
1480 case STT_SECTION: VG_(printf)("SEC "); break;
1481 case STT_FILE: VG_(printf)("FIL "); break;
1482 case STT_LOPROC: VG_(printf)("lop "); break;
1483 case STT_HIPROC: VG_(printf)("hip "); break;
1484 default: VG_(printf)("??? "); break;
1485 }
1486 VG_(printf)(
1487 ": value %p, size %d, name %s\n",
1488 si->offset+(UChar*)o_symtab[i].st_value,
1489 o_symtab[i].st_size,
1490 o_symtab[i].st_name
1491 ? ((Char*)o_strtab+o_symtab[i].st_name)
1492 : (Char*)"NONAME");
1493# endif
1494
1495 /* Figure out if we're interested in the symbol.
1496 Firstly, is it of the right flavour?
1497 */
1498 snaffle_it
1499 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
1500 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
1501 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
1502 &&
1503 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
1504 ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
1505 );
1506
1507 /* Secondly, if it's apparently in a GOT or PLT, it's really
1508 a reference to a symbol defined elsewhere, so ignore it.
1509 */
1510 sym_addr = si->offset
1511 + (UInt)o_symtab[i].st_value;
1512 if (o_got != NULL
1513 && sym_addr >= (Addr)o_got
1514 && sym_addr < (Addr)(o_got+o_got_sz)) {
1515 snaffle_it = False;
1516 if (VG_(clo_trace_symtab)) {
1517 VG_(printf)( "in GOT: %s\n",
1518 o_strtab+o_symtab[i].st_name);
1519 }
1520 }
1521 if (o_plt != NULL
1522 && sym_addr >= (Addr)o_plt
1523 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
1524 snaffle_it = False;
1525 if (VG_(clo_trace_symtab)) {
1526 VG_(printf)( "in PLT: %s\n",
1527 o_strtab+o_symtab[i].st_name);
1528 }
1529 }
1530
1531 /* Don't bother if nameless, or zero-sized. */
1532 if (snaffle_it
1533 && (o_symtab[i].st_name == (Elf32_Word)NULL
1534 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
1535 /* equivalent but cheaper ... */
1536 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
1537 || o_symtab[i].st_size == 0)) {
1538 snaffle_it = False;
1539 if (VG_(clo_trace_symtab)) {
1540 VG_(printf)( "size=0: %s\n",
1541 o_strtab+o_symtab[i].st_name);
1542 }
1543 }
1544
1545# if 0
1546 /* Avoid _dl_ junk. (Why?) */
1547 /* 01-02-24: disabled until I find out if it really helps. */
1548 if (snaffle_it
1549 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
1550 || VG_(strncmp)("_r_debug",
1551 o_strtab+o_symtab[i].st_name, 8) == 0)) {
1552 snaffle_it = False;
1553 if (VG_(clo_trace_symtab)) {
1554 VG_(printf)( "_dl_ junk: %s\n",
1555 o_strtab+o_symtab[i].st_name);
1556 }
1557 }
1558# endif
1559
1560 /* This seems to significantly reduce the number of junk
1561 symbols, and particularly reduces the number of
1562 overlapping address ranges. Don't ask me why ... */
1563 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
1564 snaffle_it = False;
1565 if (VG_(clo_trace_symtab)) {
1566 VG_(printf)( "valu=0: %s\n",
1567 o_strtab+o_symtab[i].st_name);
1568 }
1569 }
1570
1571 /* If no part of the symbol falls within the mapped range,
1572 ignore it. */
1573 if (sym_addr+o_symtab[i].st_size <= si->start
1574 || sym_addr >= si->start+si->size) {
1575 snaffle_it = False;
1576 }
1577
1578 if (snaffle_it) {
1579 /* it's an interesting symbol; record ("snaffle") it. */
1580 RiSym sym;
1581 Char* t0 = o_symtab[i].st_name
1582 ? (Char*)(o_strtab+o_symtab[i].st_name)
1583 : (Char*)"NONAME";
1584 Int nmoff = addStr ( si, t0 );
1585 vg_assert(nmoff >= 0
1586 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
1587 vg_assert( (Int)o_symtab[i].st_value >= 0);
1588 /* VG_(printf)("%p + %d: %s\n", si->addr,
1589 (Int)o_symtab[i].st_value, t0 ); */
1590 sym.addr = sym_addr;
1591 sym.size = o_symtab[i].st_size;
1592 sym.nmoff = nmoff;
1593 addSym ( si, &sym );
1594 }
1595 }
1596 }
1597 }
1598
sewardjb51f2e62002-06-01 23:11:19 +00001599 /* Reading of the stabs and/or dwarf2 debug format information, if
1600 any. */
sewardjde4a1d02002-03-22 01:27:54 +00001601 stabstr = NULL;
1602 stab = NULL;
sewardjb51f2e62002-06-01 23:11:19 +00001603 dwarf2 = NULL;
sewardjde4a1d02002-03-22 01:27:54 +00001604 stabstr_sz = 0;
1605 stab_sz = 0;
sewardjb51f2e62002-06-01 23:11:19 +00001606 dwarf2_sz = 0;
1607
1608 /* find the .stabstr / .stab / .debug_line sections */
sewardjde4a1d02002-03-22 01:27:54 +00001609 for (i = 0; i < ehdr->e_shnum; i++) {
1610 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001611 stab = (UChar*)(oimage + shdr[i].sh_offset);
sewardjde4a1d02002-03-22 01:27:54 +00001612 stab_sz = shdr[i].sh_size;
1613 }
1614 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
1615 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
1616 stabstr_sz = shdr[i].sh_size;
1617 }
sewardjc134dd92002-06-01 14:21:36 +00001618 if (0 == VG_(strcmp)(".debug_line",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001619 dwarf2 = (UChar *)(oimage + shdr[i].sh_offset);
1620 dwarf2_sz = shdr[i].sh_size;
sewardjc134dd92002-06-01 14:21:36 +00001621 }
sewardjde4a1d02002-03-22 01:27:54 +00001622 }
1623
sewardjb51f2e62002-06-01 23:11:19 +00001624 if ((stab == NULL || stabstr == NULL) && dwarf2 == NULL) {
sewardjde4a1d02002-03-22 01:27:54 +00001625 vg_symerr(" object doesn't have any debug info");
1626 VG_(munmap) ( (void*)oimage, n_oimage );
1627 return;
1628 }
1629
1630 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
1631 || stabstr_sz + (UChar*)stabstr
1632 > n_oimage + (UChar*)oimage ) {
sewardjb51f2e62002-06-01 23:11:19 +00001633 vg_symerr(" ELF (stabs) debug data is beyond image end?!");
sewardjde4a1d02002-03-22 01:27:54 +00001634 VG_(munmap) ( (void*)oimage, n_oimage );
1635 return;
1636 }
1637
sewardjb51f2e62002-06-01 23:11:19 +00001638 if ( dwarf2_sz + (UChar*)dwarf2 > n_oimage + (UChar*)oimage ) {
1639 vg_symerr(" ELF (dwarf2) debug data is beyond image end?!");
1640 VG_(munmap) ( (void*)oimage, n_oimage );
1641 return;
1642 }
sewardjde4a1d02002-03-22 01:27:54 +00001643
sewardjb51f2e62002-06-01 23:11:19 +00001644 /* Looks plausible. Go on and read debug data. */
1645 if (stab != NULL && stabstr != NULL) {
1646 read_debuginfo_stabs ( si, stab, stab_sz, stabstr, stabstr_sz );
1647 }
sewardjde4a1d02002-03-22 01:27:54 +00001648
sewardjb51f2e62002-06-01 23:11:19 +00001649 if (dwarf2 != NULL) {
1650 read_debuginfo_dwarf2 ( si, dwarf2, dwarf2_sz );
1651 }
sewardjde4a1d02002-03-22 01:27:54 +00001652
1653 /* Last, but not least, heave the oimage back overboard. */
1654 VG_(munmap) ( (void*)oimage, n_oimage );
1655}
1656
1657
1658/*------------------------------------------------------------*/
1659/*--- Main entry point for symbols table reading. ---*/
1660/*------------------------------------------------------------*/
1661
1662/* The root structure for the entire symbol table system. It is a
1663 linked list of SegInfos. Note that this entire mechanism assumes
1664 that what we read from /proc/self/maps doesn't contain overlapping
1665 address ranges, and as a result the SegInfos in this list describe
1666 disjoint address ranges.
1667*/
1668static SegInfo* segInfo = NULL;
1669
1670
njn25e49d8e72002-09-23 09:36:25 +00001671void VG_(read_symtab_callback) (
sewardjde4a1d02002-03-22 01:27:54 +00001672 Addr start, UInt size,
1673 Char rr, Char ww, Char xx,
1674 UInt foffset, UChar* filename )
1675{
1676 SegInfo* si;
1677
1678 /* Stay sane ... */
1679 if (size == 0)
1680 return;
1681
1682 /* We're only interested in collecting symbols in executable
1683 segments which are associated with a real file. Hence: */
1684 if (filename == NULL || xx != 'x')
1685 return;
1686 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1687 return;
1688
1689 /* Perhaps we already have this one? If so, skip. */
1690 for (si = segInfo; si != NULL; si = si->next) {
1691 /*
1692 if (0==VG_(strcmp)(si->filename, filename))
1693 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1694 rr,ww,xx,si->start,si->size,start,size,filename);
1695 */
1696 /* For some reason the observed size of a mapping can change, so
1697 we don't use that to determine uniqueness. */
1698 if (si->start == start
1699 /* && si->size == size */
1700 && 0==VG_(strcmp)(si->filename, filename)) {
1701 return;
1702 }
1703 }
1704
1705 /* Get the record initialised right. */
njn25e49d8e72002-09-23 09:36:25 +00001706 si = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
sewardjde4a1d02002-03-22 01:27:54 +00001707 si->next = segInfo;
1708 segInfo = si;
1709
1710 si->start = start;
1711 si->size = size;
1712 si->foffset = foffset;
njn25e49d8e72002-09-23 09:36:25 +00001713 si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
sewardjde4a1d02002-03-22 01:27:54 +00001714 VG_(strcpy)(si->filename, filename);
1715
1716 si->symtab = NULL;
1717 si->symtab_size = si->symtab_used = 0;
1718 si->loctab = NULL;
1719 si->loctab_size = si->loctab_used = 0;
1720 si->strtab = NULL;
1721 si->strtab_size = si->strtab_used = 0;
1722
1723 /* Kludge ... */
njn25e49d8e72002-09-23 09:36:25 +00001724 si->offset = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
sewardjde4a1d02002-03-22 01:27:54 +00001725
1726 /* And actually fill it up. */
njn25e49d8e72002-09-23 09:36:25 +00001727 vg_read_lib_symbols ( si );
1728 canonicaliseSymtab ( si );
1729 canonicaliseLoctab ( si );
sewardjde4a1d02002-03-22 01:27:54 +00001730}
1731
1732
1733/* This one really is the Head Honcho. Update the symbol tables to
1734 reflect the current state of /proc/self/maps. Rather than re-read
1735 everything, just read the entries which are not already in segInfo.
1736 So we can call here repeatedly, after every mmap of a non-anonymous
1737 segment with execute permissions, for example, to pick up new
1738 libraries as they are dlopen'd. Conversely, when the client does
1739 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1740 which happen to correspond to the munmap()d area. */
njn25e49d8e72002-09-23 09:36:25 +00001741void VG_(maybe_read_symbols) ( void )
sewardjde4a1d02002-03-22 01:27:54 +00001742{
njn25e49d8e72002-09-23 09:36:25 +00001743 if (!VG_(using_debug_info))
1744 return;
sewardjde4a1d02002-03-22 01:27:54 +00001745
njn25e49d8e72002-09-23 09:36:25 +00001746 VGP_PUSHCC(VgpReadSyms);
1747 VG_(read_procselfmaps) ( VG_(read_symtab_callback) );
1748 VGP_POPCC(VgpReadSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001749}
1750
sewardjde4a1d02002-03-22 01:27:54 +00001751/* When an munmap() call happens, check to see whether it corresponds
1752 to a segment for a .so, and if so discard the relevant SegInfo.
1753 This might not be a very clever idea from the point of view of
1754 accuracy of error messages, but we need to do it in order to
sewardj18d75132002-05-16 11:06:21 +00001755 maintain the no-overlapping invariant.
sewardjde4a1d02002-03-22 01:27:54 +00001756*/
njn25e49d8e72002-09-23 09:36:25 +00001757void VG_(maybe_unload_symbols) ( Addr start, UInt length )
sewardjde4a1d02002-03-22 01:27:54 +00001758{
1759 SegInfo *prev, *curr;
1760
njn25e49d8e72002-09-23 09:36:25 +00001761 if (!VG_(using_debug_info))
1762 return;
1763
sewardjde4a1d02002-03-22 01:27:54 +00001764 prev = NULL;
1765 curr = segInfo;
1766 while (True) {
1767 if (curr == NULL) break;
1768 if (start == curr->start) break;
1769 prev = curr;
1770 curr = curr->next;
1771 }
sewardj18d75132002-05-16 11:06:21 +00001772 if (curr == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001773 return;
sewardjde4a1d02002-03-22 01:27:54 +00001774
1775 VG_(message)(Vg_UserMsg,
1776 "discard syms in %s due to munmap()",
1777 curr->filename ? curr->filename : (UChar*)"???");
1778
1779 vg_assert(prev == NULL || prev->next == curr);
1780
1781 if (prev == NULL) {
1782 segInfo = curr->next;
1783 } else {
1784 prev->next = curr->next;
1785 }
1786
1787 freeSegInfo(curr);
njn25e49d8e72002-09-23 09:36:25 +00001788 return;
sewardjde4a1d02002-03-22 01:27:54 +00001789}
1790
1791
1792/*------------------------------------------------------------*/
1793/*--- Use of symbol table & location info to create ---*/
1794/*--- plausible-looking stack dumps. ---*/
1795/*------------------------------------------------------------*/
1796
njn25e49d8e72002-09-23 09:36:25 +00001797static __inline__ void ensure_debug_info_inited ( void )
1798{
1799 if (!VG_(using_debug_info)) {
1800 VG_(using_debug_info) = True;
1801 VG_(maybe_read_symbols)();
1802 }
1803}
1804
sewardjde4a1d02002-03-22 01:27:54 +00001805/* Find a symbol-table index containing the specified pointer, or -1
1806 if not found. Binary search. */
1807
njn25e49d8e72002-09-23 09:36:25 +00001808static Int search_one_symtab ( SegInfo* si, Addr ptr,
1809 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00001810{
1811 Addr a_mid_lo, a_mid_hi;
njn25e49d8e72002-09-23 09:36:25 +00001812 Int mid, size,
sewardjde4a1d02002-03-22 01:27:54 +00001813 lo = 0,
1814 hi = si->symtab_used-1;
1815 while (True) {
1816 /* current unsearched space is from lo to hi, inclusive. */
1817 if (lo > hi) return -1; /* not found */
1818 mid = (lo + hi) / 2;
1819 a_mid_lo = si->symtab[mid].addr;
njn25e49d8e72002-09-23 09:36:25 +00001820 size = ( match_anywhere_in_fun
1821 ? si->symtab[mid].size
1822 : 1);
1823 a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1;
sewardjde4a1d02002-03-22 01:27:54 +00001824
1825 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1826 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1827 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1828 return mid;
1829 }
1830}
1831
1832
1833/* Search all symtabs that we know about to locate ptr. If found, set
1834 *psi to the relevant SegInfo, and *symno to the symtab entry number
1835 within that. If not found, *psi is set to NULL. */
1836
njn25e49d8e72002-09-23 09:36:25 +00001837static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi,
1838 /*OUT*/Int* symno,
1839 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00001840{
1841 Int sno;
1842 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00001843
1844 ensure_debug_info_inited();
1845 VGP_PUSHCC(VgpSearchSyms);
1846
sewardjde4a1d02002-03-22 01:27:54 +00001847 for (si = segInfo; si != NULL; si = si->next) {
1848 if (si->start <= ptr && ptr < si->start+si->size) {
njn25e49d8e72002-09-23 09:36:25 +00001849 sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
sewardjde4a1d02002-03-22 01:27:54 +00001850 if (sno == -1) goto not_found;
1851 *symno = sno;
1852 *psi = si;
njn25e49d8e72002-09-23 09:36:25 +00001853 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001854 return;
1855 }
1856 }
1857 not_found:
1858 *psi = NULL;
njn25e49d8e72002-09-23 09:36:25 +00001859 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001860}
1861
1862
1863/* Find a location-table index containing the specified pointer, or -1
1864 if not found. Binary search. */
1865
1866static Int search_one_loctab ( SegInfo* si, Addr ptr )
1867{
1868 Addr a_mid_lo, a_mid_hi;
1869 Int mid,
1870 lo = 0,
1871 hi = si->loctab_used-1;
1872 while (True) {
1873 /* current unsearched space is from lo to hi, inclusive. */
1874 if (lo > hi) return -1; /* not found */
1875 mid = (lo + hi) / 2;
1876 a_mid_lo = si->loctab[mid].addr;
1877 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
1878
1879 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1880 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1881 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1882 return mid;
1883 }
1884}
1885
1886
1887/* Search all loctabs that we know about to locate ptr. If found, set
1888 *psi to the relevant SegInfo, and *locno to the loctab entry number
1889 within that. If not found, *psi is set to NULL.
1890*/
njn25e49d8e72002-09-23 09:36:25 +00001891static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi,
1892 /*OUT*/Int* locno )
sewardjde4a1d02002-03-22 01:27:54 +00001893{
1894 Int lno;
1895 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00001896
1897 VGP_PUSHCC(VgpSearchSyms);
1898
1899 ensure_debug_info_inited();
sewardjde4a1d02002-03-22 01:27:54 +00001900 for (si = segInfo; si != NULL; si = si->next) {
1901 if (si->start <= ptr && ptr < si->start+si->size) {
1902 lno = search_one_loctab ( si, ptr );
1903 if (lno == -1) goto not_found;
1904 *locno = lno;
1905 *psi = si;
njn25e49d8e72002-09-23 09:36:25 +00001906 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001907 return;
1908 }
1909 }
1910 not_found:
1911 *psi = NULL;
njn25e49d8e72002-09-23 09:36:25 +00001912 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001913}
1914
1915
1916/* The whole point of this whole big deal: map a code address to a
1917 plausible symbol name. Returns False if no idea; otherwise True.
njn25e49d8e72002-09-23 09:36:25 +00001918 Caller supplies buf and nbuf. If demangle is False, don't do
sewardjde4a1d02002-03-22 01:27:54 +00001919 demangling, regardless of vg_clo_demangle -- probably because the
1920 call has come from vg_what_fn_or_object_is_this. */
njn25e49d8e72002-09-23 09:36:25 +00001921static
1922Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf,
1923 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00001924{
1925 SegInfo* si;
1926 Int sno;
njn25e49d8e72002-09-23 09:36:25 +00001927 search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun );
sewardjde4a1d02002-03-22 01:27:54 +00001928 if (si == NULL)
1929 return False;
njn25e49d8e72002-09-23 09:36:25 +00001930 if (demangle) {
1931 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
1932 } else {
sewardjde4a1d02002-03-22 01:27:54 +00001933 VG_(strncpy_safely)
1934 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
sewardjde4a1d02002-03-22 01:27:54 +00001935 }
1936 return True;
1937}
1938
njn25e49d8e72002-09-23 09:36:25 +00001939/* This is available to skins... always demangle C++ names */
1940Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf )
1941{
1942 return get_fnname ( /*demangle*/True, a, buf, nbuf,
1943 /*match_anywhere_in_fun*/True );
1944}
sewardjde4a1d02002-03-22 01:27:54 +00001945
njn25e49d8e72002-09-23 09:36:25 +00001946/* This is available to skins... always demangle C++ names,
1947 only succeed if 'a' matches first instruction of function. */
1948Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf )
1949{
1950 return get_fnname ( /*demangle*/True, a, buf, nbuf,
1951 /*match_anywhere_in_fun*/False );
1952}
1953
1954/* This is only available to core... don't demangle C++ names */
1955Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf )
1956{
1957 return get_fnname ( /*demangle*/False, a, buf, nbuf,
1958 /*match_anywhere_in_fun*/True );
1959}
1960
1961/* Map a code address to the name of a shared object file or the executable.
1962 Returns False if no idea; otherwise True. Doesn't require debug info.
1963 Caller supplies buf and nbuf. */
1964Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00001965{
1966 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00001967
1968 ensure_debug_info_inited();
sewardjde4a1d02002-03-22 01:27:54 +00001969 for (si = segInfo; si != NULL; si = si->next) {
1970 if (si->start <= a && a < si->start+si->size) {
1971 VG_(strncpy_safely)(buf, si->filename, nbuf);
1972 return True;
1973 }
1974 }
1975 return False;
1976}
1977
njn25e49d8e72002-09-23 09:36:25 +00001978
1979/* Map a code address to a filename. Returns True if successful. */
1980Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
sewardjde4a1d02002-03-22 01:27:54 +00001981{
njn25e49d8e72002-09-23 09:36:25 +00001982 SegInfo* si;
1983 Int locno;
1984 search_all_loctabs ( a, &si, &locno );
1985 if (si == NULL)
1986 return False;
1987 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
1988 n_filename);
1989 return True;
sewardjde4a1d02002-03-22 01:27:54 +00001990}
1991
njn25e49d8e72002-09-23 09:36:25 +00001992/* Map a code address to a line number. Returns True if successful. */
1993Bool VG_(get_linenum)( Addr a, UInt* lineno )
1994{
1995 SegInfo* si;
1996 Int locno;
1997 search_all_loctabs ( a, &si, &locno );
1998 if (si == NULL)
1999 return False;
2000 *lineno = si->loctab[locno].lineno;
2001
2002 return True;
2003}
sewardjde4a1d02002-03-22 01:27:54 +00002004
2005/* Map a code address to a (filename, line number) pair.
2006 Returns True if successful.
2007*/
njn25e49d8e72002-09-23 09:36:25 +00002008Bool VG_(get_filename_linenum)( Addr a,
2009 Char* filename, Int n_filename,
2010 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00002011{
2012 SegInfo* si;
2013 Int locno;
2014 search_all_loctabs ( a, &si, &locno );
2015 if (si == NULL)
2016 return False;
2017 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
2018 n_filename);
2019 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00002020
sewardjde4a1d02002-03-22 01:27:54 +00002021 return True;
2022}
2023
2024
2025/* Print a mini stack dump, showing the current location. */
2026void VG_(mini_stack_dump) ( ExeContext* ec )
2027{
2028
2029#define APPEND(str) \
2030 { UChar* sss; \
2031 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
2032 buf[n] = *sss; \
2033 buf[n] = 0; \
2034 }
2035
2036 Bool know_fnname;
2037 Bool know_objname;
2038 Bool know_srcloc;
2039 UInt lineno;
2040 UChar ibuf[20];
sewardj04b91062002-06-05 21:22:04 +00002041 UInt i, n;
sewardjde4a1d02002-03-22 01:27:54 +00002042
2043 UChar buf[M_VG_ERRTXT];
2044 UChar buf_fn[M_VG_ERRTXT];
2045 UChar buf_obj[M_VG_ERRTXT];
2046 UChar buf_srcloc[M_VG_ERRTXT];
2047
2048 Int stop_at = VG_(clo_backtrace_size);
2049
2050 n = 0;
2051
njn25e49d8e72002-09-23 09:36:25 +00002052 // SSS: factor this repeated code out!
2053
2054 know_fnname = VG_(get_fnname) (ec->eips[0], buf_fn, M_VG_ERRTXT);
2055 know_objname = VG_(get_objname)(ec->eips[0], buf_obj, M_VG_ERRTXT);
2056 know_srcloc = VG_(get_filename_linenum)(ec->eips[0],
2057 buf_srcloc, M_VG_ERRTXT,
2058 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00002059
2060 APPEND(" at ");
2061 VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
2062 APPEND(ibuf);
2063 if (know_fnname) {
2064 APPEND(buf_fn);
2065 if (!know_srcloc && know_objname) {
2066 APPEND(" (in ");
2067 APPEND(buf_obj);
2068 APPEND(")");
2069 }
2070 } else if (know_objname && !know_srcloc) {
2071 APPEND("(within ");
2072 APPEND(buf_obj);
2073 APPEND(")");
2074 } else {
2075 APPEND("???");
2076 }
2077 if (know_srcloc) {
2078 APPEND(" (");
2079 APPEND(buf_srcloc);
2080 APPEND(":");
2081 VG_(sprintf)(ibuf,"%d",lineno);
2082 APPEND(ibuf);
2083 APPEND(")");
2084 }
2085 VG_(message)(Vg_UserMsg, "%s", buf);
2086
sewardj04b91062002-06-05 21:22:04 +00002087 for (i = 1; i < stop_at && ec->eips[i] != 0; i++) {
njn25e49d8e72002-09-23 09:36:25 +00002088 know_fnname = VG_(get_fnname) (ec->eips[i], buf_fn, M_VG_ERRTXT);
2089 know_objname = VG_(get_objname)(ec->eips[i], buf_obj, M_VG_ERRTXT);
2090 know_srcloc = VG_(get_filename_linenum)(ec->eips[i],
2091 buf_srcloc, M_VG_ERRTXT,
2092 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00002093 n = 0;
2094 APPEND(" by ");
sewardj04b91062002-06-05 21:22:04 +00002095 VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
2096 APPEND(ibuf);
sewardjde4a1d02002-03-22 01:27:54 +00002097 if (know_fnname) {
2098 APPEND(buf_fn)
2099 if (!know_srcloc && know_objname) {
2100 APPEND(" (in ");
2101 APPEND(buf_obj);
2102 APPEND(")");
2103 }
2104 } else {
2105 if (know_objname && !know_srcloc) {
2106 APPEND("(within ");
2107 APPEND(buf_obj);
2108 APPEND(")");
2109 } else {
2110 APPEND("???");
2111 }
sewardjde4a1d02002-03-22 01:27:54 +00002112 };
2113 if (know_srcloc) {
2114 APPEND(" (");
2115 APPEND(buf_srcloc);
2116 APPEND(":");
2117 VG_(sprintf)(ibuf,"%d",lineno);
2118 APPEND(ibuf);
2119 APPEND(")");
2120 }
2121 VG_(message)(Vg_UserMsg, "%s", buf);
2122 }
2123}
2124
2125#undef APPEND
2126
2127/*--------------------------------------------------------------------*/
2128/*--- end vg_symtab2.c ---*/
2129/*--------------------------------------------------------------------*/