blob: 728f2285c5f7e60cbd191e482a35f489f9e87e9b [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
njn25e49d8e72002-09-23 09:36:25 +000028 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000029*/
30
31#include "vg_include.h"
sewardjde4a1d02002-03-22 01:27:54 +000032
33#include <elf.h> /* ELF defns */
34#include <a.out.h> /* stabs defns */
35
njn9aae6742002-04-30 13:44:01 +000036
sewardjde4a1d02002-03-22 01:27:54 +000037/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
38 dlopen()ed libraries, which is something that KDE3 does a lot.
sewardjde4a1d02002-03-22 01:27:54 +000039
njn25e49d8e72002-09-23 09:36:25 +000040 Stabs reader greatly improved by Nick Nethercote, Apr 02.
sewardjde4a1d02002-03-22 01:27:54 +000041*/
42
njn25e49d8e72002-09-23 09:36:25 +000043/* Set to True when first debug info search is performed */
44Bool VG_(using_debug_info) = False;
45
sewardjde4a1d02002-03-22 01:27:54 +000046/*------------------------------------------------------------*/
47/*--- Structs n stuff ---*/
48/*------------------------------------------------------------*/
49
50/* A structure to hold an ELF symbol (very crudely). */
51typedef
52 struct {
53 Addr addr; /* lowest address of entity */
54 UInt size; /* size in bytes */
55 Int nmoff; /* offset of name in this SegInfo's str tab */
56 }
57 RiSym;
58
njne0ee0712002-05-03 16:41:05 +000059/* Line count at which overflow happens, due to line numbers being stored as
60 * shorts in `struct nlist' in a.out.h. */
61#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000062
njne0ee0712002-05-03 16:41:05 +000063#define LINENO_BITS 20
64#define LOC_SIZE_BITS (32 - LINENO_BITS)
sewardj97ff05f2002-05-09 01:32:57 +000065#define MAX_LINENO ((1 << LINENO_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000066
67/* Unlikely to have any lines with instruction ranges > 4096 bytes */
sewardj97ff05f2002-05-09 01:32:57 +000068#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000069
70/* Number used to detect line number overflows; if one line is 60000-odd
71 * smaller than the previous, is was probably an overflow.
72 */
73#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
74
75/* A structure to hold addr-to-source info for a single line. There can be a
76 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +000077typedef
78 struct {
njne0ee0712002-05-03 16:41:05 +000079 /* Word 1 */
80 Addr addr; /* lowest address for this line */
81 /* Word 2 */
82 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
83 UInt lineno:LINENO_BITS; /* source line number, or zero */
84 /* Word 3 */
85 UInt fnmoff; /* source filename; offset in this
86 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +000087 }
88 RiLoc;
89
90
91/* A structure which contains information pertaining to one mapped
92 text segment. */
93typedef
94 struct _SegInfo {
95 struct _SegInfo* next;
96 /* Description of the mapped segment. */
97 Addr start;
98 UInt size;
99 UChar* filename; /* in mallocville */
100 UInt foffset;
101 /* An expandable array of symbols. */
102 RiSym* symtab;
103 UInt symtab_used;
104 UInt symtab_size;
105 /* An expandable array of locations. */
106 RiLoc* loctab;
107 UInt loctab_used;
108 UInt loctab_size;
109 /* An expandable array of characters -- the string table. */
110 Char* strtab;
111 UInt strtab_used;
112 UInt strtab_size;
113 /* offset is what we need to add to symbol table entries
114 to get the real location of that symbol in memory.
115 For executables, offset is zero.
116 For .so's, offset == base_addr.
117 This seems like a giant kludge to me.
118 */
119 UInt offset;
120 }
121 SegInfo;
122
123
sewardjde4a1d02002-03-22 01:27:54 +0000124static void freeSegInfo ( SegInfo* si )
125{
126 vg_assert(si != NULL);
njn25e49d8e72002-09-23 09:36:25 +0000127 if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename);
128 if (si->symtab) VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
129 if (si->loctab) VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
130 if (si->strtab) VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
131 VG_(arena_free)(VG_AR_SYMTAB, si);
sewardjde4a1d02002-03-22 01:27:54 +0000132}
133
134
135/*------------------------------------------------------------*/
136/*--- Adding stuff ---*/
137/*------------------------------------------------------------*/
138
139/* Add a str to the string table, including terminating zero, and
njn25e49d8e72002-09-23 09:36:25 +0000140 return offset of the string in vg_strtab. Unless it's been seen
141 recently, in which case we find the old index and return that.
142 This avoids the most egregious duplications. */
sewardjde4a1d02002-03-22 01:27:54 +0000143
144static __inline__
145Int addStr ( SegInfo* si, Char* str )
146{
njn25e49d8e72002-09-23 09:36:25 +0000147# define EMPTY 0xffffffff
148# define NN 5
149
150 /* prevN[0] has the most recent, prevN[NN-1] the least recent */
151 static UInt prevN[] = { EMPTY, EMPTY, EMPTY, EMPTY, EMPTY };
152 static SegInfo* curr_si = NULL;
153
sewardjde4a1d02002-03-22 01:27:54 +0000154 Char* new_tab;
155 Int new_sz, i, space_needed;
njn25e49d8e72002-09-23 09:36:25 +0000156
157 /* Avoid gratuitous duplication: if we saw `str' within the last NN,
158 * within this segment, return that index. Saves about 200KB in glibc,
159 * extra time taken is too small to measure. --NJN 2002-Aug-30 */
160 if (curr_si == si) {
161 for (i = NN-1; i >= 0; i--) {
162 if (EMPTY != prevN[i] &&
163 (0 == VG_(strcmp)(str, &si->strtab[prevN[i]]))) {
164 return prevN[i];
165 }
166 }
167 } else {
168 /* New segment */
169 curr_si = si;
170 for (i = 0; i < 5; i++) prevN[i] = EMPTY;
171 }
172 /* Shuffle prevous ones along, put new one in. */
173 for (i = NN-1; i > 0; i--) prevN[i] = prevN[i-1];
174 prevN[0] = si->strtab_used;
175
176# undef EMPTY
177
sewardjde4a1d02002-03-22 01:27:54 +0000178 space_needed = 1 + VG_(strlen)(str);
njn25e49d8e72002-09-23 09:36:25 +0000179
sewardjde4a1d02002-03-22 01:27:54 +0000180 if (si->strtab_used + space_needed > si->strtab_size) {
181 new_sz = 2 * si->strtab_size;
182 if (new_sz == 0) new_sz = 5000;
njn25e49d8e72002-09-23 09:36:25 +0000183 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz);
sewardjde4a1d02002-03-22 01:27:54 +0000184 if (si->strtab != NULL) {
185 for (i = 0; i < si->strtab_used; i++)
186 new_tab[i] = si->strtab[i];
njn25e49d8e72002-09-23 09:36:25 +0000187 VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
sewardjde4a1d02002-03-22 01:27:54 +0000188 }
189 si->strtab = new_tab;
190 si->strtab_size = new_sz;
191 }
192
193 for (i = 0; i < space_needed; i++)
194 si->strtab[si->strtab_used+i] = str[i];
195
196 si->strtab_used += space_needed;
197 vg_assert(si->strtab_used <= si->strtab_size);
njn25e49d8e72002-09-23 09:36:25 +0000198
sewardjde4a1d02002-03-22 01:27:54 +0000199 return si->strtab_used - space_needed;
200}
201
202/* Add a symbol to the symbol table. */
203
204static __inline__
205void addSym ( SegInfo* si, RiSym* sym )
206{
207 Int new_sz, i;
208 RiSym* new_tab;
209
210 /* Ignore zero-sized syms. */
211 if (sym->size == 0) return;
212
213 if (si->symtab_used == si->symtab_size) {
214 new_sz = 2 * si->symtab_size;
215 if (new_sz == 0) new_sz = 500;
njn25e49d8e72002-09-23 09:36:25 +0000216 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
sewardjde4a1d02002-03-22 01:27:54 +0000217 if (si->symtab != NULL) {
218 for (i = 0; i < si->symtab_used; i++)
219 new_tab[i] = si->symtab[i];
njn25e49d8e72002-09-23 09:36:25 +0000220 VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
sewardjde4a1d02002-03-22 01:27:54 +0000221 }
222 si->symtab = new_tab;
223 si->symtab_size = new_sz;
224 }
225
226 si->symtab[si->symtab_used] = *sym;
227 si->symtab_used++;
228 vg_assert(si->symtab_used <= si->symtab_size);
229}
230
231/* Add a location to the location table. */
232
233static __inline__
234void addLoc ( SegInfo* si, RiLoc* loc )
235{
236 Int new_sz, i;
237 RiLoc* new_tab;
238
njne0ee0712002-05-03 16:41:05 +0000239 /* Zero-sized locs should have been ignored earlier */
240 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000241
242 if (si->loctab_used == si->loctab_size) {
243 new_sz = 2 * si->loctab_size;
244 if (new_sz == 0) new_sz = 500;
njn25e49d8e72002-09-23 09:36:25 +0000245 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
sewardjde4a1d02002-03-22 01:27:54 +0000246 if (si->loctab != NULL) {
247 for (i = 0; i < si->loctab_used; i++)
248 new_tab[i] = si->loctab[i];
njn25e49d8e72002-09-23 09:36:25 +0000249 VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
sewardjde4a1d02002-03-22 01:27:54 +0000250 }
251 si->loctab = new_tab;
252 si->loctab_size = new_sz;
253 }
254
255 si->loctab[si->loctab_used] = *loc;
256 si->loctab_used++;
257 vg_assert(si->loctab_used <= si->loctab_size);
258}
259
260
sewardjb51f2e62002-06-01 23:11:19 +0000261/* Top-level place to call to add a source-location mapping entry. */
262
263static __inline__
264void addLineInfo ( SegInfo* si,
265 Int fnmoff,
266 Addr this,
267 Addr next,
268 Int lineno,
sewardj08a50f62002-06-17 02:21:20 +0000269 Int entry /* only needed for debug printing */
270 )
sewardjb51f2e62002-06-01 23:11:19 +0000271{
272 RiLoc loc;
273 Int size = next - this;
274
275 /* Ignore zero-sized locs */
276 if (this == next) return;
277
278 /* Maximum sanity checking. Some versions of GNU as do a shabby
279 * job with stabs entries; if anything looks suspicious, revert to
280 * a size of 1. This should catch the instruction of interest
281 * (since if using asm-level debug info, one instruction will
282 * correspond to one line, unlike with C-level debug info where
283 * multiple instructions can map to the one line), but avoid
284 * catching any other instructions bogusly. */
285 if (this > next) {
286 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000287 "warning: line info addresses out of order "
sewardjb51f2e62002-06-01 23:11:19 +0000288 "at entry %d: 0x%x 0x%x", entry, this, next);
289 size = 1;
290 }
291
292 if (size > MAX_LOC_SIZE) {
sewardjd84606d2002-06-18 01:04:57 +0000293 if (0)
sewardjb51f2e62002-06-01 23:11:19 +0000294 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000295 "warning: line info address range too large "
sewardjb51f2e62002-06-01 23:11:19 +0000296 "at entry %d: %d", entry, size);
297 size = 1;
298 }
299
sewardj08a50f62002-06-17 02:21:20 +0000300 /* vg_assert(this < si->start + si->size && next-1 >= si->start); */
njne306ffe2002-06-08 13:34:17 +0000301 if (this >= si->start + si->size || next-1 < si->start) {
sewardjd84606d2002-06-18 01:04:57 +0000302 if (0)
sewardj08a50f62002-06-17 02:21:20 +0000303 VG_(message)(Vg_DebugMsg,
304 "warning: ignoring line info entry falling "
305 "outside current SegInfo: %p %p %p %p",
306 si->start, si->start + si->size,
307 this, next-1);
njne306ffe2002-06-08 13:34:17 +0000308 return;
309 }
310
311 vg_assert(lineno >= 0);
312 if (lineno > MAX_LINENO) {
313 VG_(message)(Vg_UserMsg,
sewardj08a50f62002-06-17 02:21:20 +0000314 "warning: ignoring line info entry with "
315 "huge line number (%d)", lineno);
njne306ffe2002-06-08 13:34:17 +0000316 VG_(message)(Vg_UserMsg,
317 " Can't handle line numbers "
sewardj08a50f62002-06-17 02:21:20 +0000318 "greater than %d, sorry", MAX_LINENO);
njne306ffe2002-06-08 13:34:17 +0000319 return;
320 }
sewardjb51f2e62002-06-01 23:11:19 +0000321
322 loc.addr = this;
323 loc.size = (UShort)size;
324 loc.lineno = lineno;
325 loc.fnmoff = fnmoff;
326 addLoc ( si, &loc );
327}
328
sewardjde4a1d02002-03-22 01:27:54 +0000329
330/*------------------------------------------------------------*/
331/*--- Helpers ---*/
332/*------------------------------------------------------------*/
333
334/* Non-fatal -- use vg_panic if terminal. */
335static
336void vg_symerr ( Char* msg )
337{
338 if (VG_(clo_verbosity) > 1)
339 VG_(message)(Vg_UserMsg,"%s", msg );
340}
341
342
343/* Print a symbol. */
344static
345void printSym ( SegInfo* si, Int i )
346{
347 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
348 i,
349 si->symtab[i].addr,
350 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
351 &si->strtab[si->symtab[i].nmoff] );
352}
353
354
355#if 0
356/* Print the entire sym tab. */
357static __attribute__ ((unused))
358void printSymtab ( void )
359{
360 Int i;
361 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
362 for (i = 0; i < vg_symtab_used; i++)
363 printSym(i);
364 VG_(printf)("------ BEGIN vg_symtab ------\n");
365}
366#endif
367
368#if 0
369/* Paranoid strcat. */
370static
371void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
372{
373 UInt i = 0, j = 0;
374 while (True) {
375 if (i >= maxlen) return;
376 if (dst[i] == 0) break;
377 i++;
378 }
379 while (True) {
380 if (i >= maxlen) return;
381 dst[i] = src[j];
382 if (src[j] == 0) return;
383 i++; j++;
384 }
385}
386#endif
387
sewardjb51f2e62002-06-01 23:11:19 +0000388
sewardjde4a1d02002-03-22 01:27:54 +0000389/*------------------------------------------------------------*/
390/*--- Canonicalisers ---*/
391/*------------------------------------------------------------*/
392
393/* Sort the symtab by starting address, and emit warnings if any
394 symbols have overlapping address ranges. We use that old chestnut,
395 shellsort. Mash the table around so as to establish the property
396 that addresses are in order and the ranges to not overlap. This
397 facilitates using binary search to map addresses to symbols when we
398 come to query the table.
399*/
400static
401void canonicaliseSymtab ( SegInfo* si )
402{
403 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
404 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
405 4592, 13776, 33936, 86961, 198768,
406 463792, 1391376 };
407 Int lo = 0;
408 Int hi = si->symtab_used-1;
409 Int i, j, h, bigN, hp, n_merged, n_truncated;
410 RiSym v;
411 Addr s1, s2, e1, e2;
412
413# define SWAP(ty,aa,bb) \
414 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
415
416 bigN = hi - lo + 1; if (bigN < 2) return;
417 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
418 vg_assert(0 <= hp && hp < 16);
419
420 for (; hp >= 0; hp--) {
421 h = incs[hp];
422 i = lo + h;
423 while (1) {
424 if (i > hi) break;
425 v = si->symtab[i];
426 j = i;
427 while (si->symtab[j-h].addr > v.addr) {
428 si->symtab[j] = si->symtab[j-h];
429 j = j - h;
430 if (j <= (lo + h - 1)) break;
431 }
432 si->symtab[j] = v;
433 i++;
434 }
435 }
436
437 cleanup_more:
438
439 /* If two symbols have identical address ranges, favour the
440 one with the longer name.
441 */
442 do {
443 n_merged = 0;
444 j = si->symtab_used;
445 si->symtab_used = 0;
446 for (i = 0; i < j; i++) {
447 if (i < j-1
448 && si->symtab[i].addr == si->symtab[i+1].addr
449 && si->symtab[i].size == si->symtab[i+1].size) {
450 n_merged++;
451 /* merge the two into one */
452 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
453 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
454 si->symtab[si->symtab_used++] = si->symtab[i];
455 } else {
456 si->symtab[si->symtab_used++] = si->symtab[i+1];
457 }
458 i++;
459 } else {
460 si->symtab[si->symtab_used++] = si->symtab[i];
461 }
462 }
463 if (VG_(clo_trace_symtab))
464 VG_(printf)( "%d merged\n", n_merged);
465 }
466 while (n_merged > 0);
467
468 /* Detect and "fix" overlapping address ranges. */
469 n_truncated = 0;
470
471 for (i = 0; i < si->symtab_used-1; i++) {
472
473 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
474
475 /* Check for common (no overlap) case. */
476 if (si->symtab[i].addr + si->symtab[i].size
477 <= si->symtab[i+1].addr)
478 continue;
479
480 /* There's an overlap. Truncate one or the other. */
481 if (VG_(clo_trace_symtab)) {
482 VG_(printf)("overlapping address ranges in symbol table\n\t");
483 printSym(si,i);
484 VG_(printf)("\t");
485 printSym(si,i+1);
486 VG_(printf)("\n");
487 }
488
489 /* Truncate one or the other. */
490 s1 = si->symtab[i].addr;
491 s2 = si->symtab[i+1].addr;
492 e1 = s1 + si->symtab[i].size - 1;
493 e2 = s2 + si->symtab[i+1].size - 1;
494 if (s1 < s2) {
495 e1 = s2-1;
496 } else {
497 vg_assert(s1 == s2);
498 if (e1 > e2) {
499 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
500 } else
501 if (e1 < e2) {
502 s2 = e1+1;
503 } else {
504 /* e1 == e2. Identical addr ranges. We'll eventually wind
505 up back at cleanup_more, which will take care of it. */
506 }
507 }
508 si->symtab[i].addr = s1;
509 si->symtab[i+1].addr = s2;
510 si->symtab[i].size = e1 - s1 + 1;
511 si->symtab[i+1].size = e2 - s2 + 1;
512 vg_assert(s1 <= s2);
513 vg_assert(si->symtab[i].size > 0);
514 vg_assert(si->symtab[i+1].size > 0);
515 /* It may be that the i+1 entry now needs to be moved further
516 along to maintain the address order requirement. */
517 j = i+1;
518 while (j < si->symtab_used-1
519 && si->symtab[j].addr > si->symtab[j+1].addr) {
520 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
521 j++;
522 }
523 n_truncated++;
524 }
525
526 if (n_truncated > 0) goto cleanup_more;
527
528 /* Ensure relevant postconditions hold. */
529 for (i = 0; i < si->symtab_used-1; i++) {
530 /* No zero-sized symbols. */
531 vg_assert(si->symtab[i].size > 0);
532 /* In order. */
533 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
534 /* No overlaps. */
535 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
536 < si->symtab[i+1].addr);
537 }
538# undef SWAP
539}
540
541
542
543/* Sort the location table by starting address. Mash the table around
544 so as to establish the property that addresses are in order and the
545 ranges do not overlap. This facilitates using binary search to map
sewardjb51f2e62002-06-01 23:11:19 +0000546 addresses to locations when we come to query the table.
547*/
sewardjde4a1d02002-03-22 01:27:54 +0000548static
549void canonicaliseLoctab ( SegInfo* si )
550{
551 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
552 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
553 4592, 13776, 33936, 86961, 198768,
554 463792, 1391376 };
555 Int lo = 0;
556 Int hi = si->loctab_used-1;
557 Int i, j, h, bigN, hp;
558 RiLoc v;
559
560# define SWAP(ty,aa,bb) \
561 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
562
563 /* Sort by start address. */
564
565 bigN = hi - lo + 1; if (bigN < 2) return;
566 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
567 vg_assert(0 <= hp && hp < 16);
568
569 for (; hp >= 0; hp--) {
570 h = incs[hp];
571 i = lo + h;
572 while (1) {
573 if (i > hi) break;
574 v = si->loctab[i];
575 j = i;
576 while (si->loctab[j-h].addr > v.addr) {
577 si->loctab[j] = si->loctab[j-h];
578 j = j - h;
579 if (j <= (lo + h - 1)) break;
580 }
581 si->loctab[j] = v;
582 i++;
583 }
584 }
585
586 /* If two adjacent entries overlap, truncate the first. */
587 for (i = 0; i < si->loctab_used-1; i++) {
588 vg_assert(si->loctab[i].size < 10000);
589 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
590 /* Do this in signed int32 because the actual .size fields
591 are unsigned 16s. */
592 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
593 if (new_size < 0) {
594 si->loctab[i].size = 0;
595 } else
596 if (new_size >= 65536) {
597 si->loctab[i].size = 65535;
598 } else {
599 si->loctab[i].size = (UShort)new_size;
600 }
601 }
602 }
603
604 /* Zap any zero-sized entries resulting from the truncation
605 process. */
606 j = 0;
607 for (i = 0; i < si->loctab_used; i++) {
608 if (si->loctab[i].size > 0) {
609 si->loctab[j] = si->loctab[i];
610 j++;
611 }
612 }
613 si->loctab_used = j;
614
615 /* Ensure relevant postconditions hold. */
616 for (i = 0; i < si->loctab_used-1; i++) {
617 /*
618 VG_(printf)("%d (%d) %d 0x%x\n",
619 i, si->loctab[i+1].confident,
620 si->loctab[i+1].size, si->loctab[i+1].addr );
621 */
622 /* No zero-sized symbols. */
623 vg_assert(si->loctab[i].size > 0);
624 /* In order. */
625 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
626 /* No overlaps. */
627 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
628 < si->loctab[i+1].addr);
629 }
630# undef SWAP
631}
632
633
634/*------------------------------------------------------------*/
sewardjb51f2e62002-06-01 23:11:19 +0000635/*--- Read STABS format debug info. ---*/
sewardjde4a1d02002-03-22 01:27:54 +0000636/*------------------------------------------------------------*/
637
sewardjb51f2e62002-06-01 23:11:19 +0000638/* Stabs entry types, from:
639 * The "stabs" debug format
640 * Menapace, Kingdon and MacKenzie
641 * Cygnus Support
642 */
643typedef enum { N_GSYM = 32, /* Global symbol */
644 N_FUN = 36, /* Function start or end */
645 N_STSYM = 38, /* Data segment file-scope variable */
646 N_LCSYM = 40, /* BSS segment file-scope variable */
647 N_RSYM = 64, /* Register variable */
648 N_SLINE = 68, /* Source line number */
649 N_SO = 100, /* Source file path and name */
650 N_LSYM = 128, /* Stack variable or type */
651 N_SOL = 132, /* Include file name */
652 N_LBRAC = 192, /* Start of lexical block */
653 N_RBRAC = 224 /* End of lexical block */
654 } stab_types;
655
656
657/* Read stabs-format debug info. This is all rather horrible because
658 stabs is a underspecified, kludgy hack.
659*/
660static
661void read_debuginfo_stabs ( SegInfo* si,
662 UChar* stabC, Int stab_sz,
663 UChar* stabstr, Int stabstr_sz )
sewardjde4a1d02002-03-22 01:27:54 +0000664{
sewardjb51f2e62002-06-01 23:11:19 +0000665 Int i;
666 Int curr_filenmoff;
njnb79ad342002-06-05 15:30:30 +0000667 Addr curr_fn_stabs_addr = (Addr)NULL;
668 Addr curr_fnbaseaddr = (Addr)NULL;
sewardjb51f2e62002-06-01 23:11:19 +0000669 Char *curr_file_name, *curr_fn_name;
670 Int n_stab_entries;
njnb79ad342002-06-05 15:30:30 +0000671 Int prev_lineno = 0, lineno = 0;
672 Int lineno_overflows = 0;
673 Bool same_file = True;
sewardjb51f2e62002-06-01 23:11:19 +0000674 struct nlist* stab = (struct nlist*)stabC;
njnb79ad342002-06-05 15:30:30 +0000675
sewardjb51f2e62002-06-01 23:11:19 +0000676 /* Ok. It all looks plausible. Go on and read debug data.
677 stab kinds: 100 N_SO a source file name
678 68 N_SLINE a source line number
679 36 N_FUN start of a function
njn4f9c9342002-04-29 16:03:24 +0000680
sewardjb51f2e62002-06-01 23:11:19 +0000681 In this loop, we maintain a current file name, updated as
682 N_SO/N_SOLs appear, and a current function base address,
683 updated as N_FUNs appear. Based on that, address ranges for
684 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000685
sewardjb51f2e62002-06-01 23:11:19 +0000686 Finding the instruction address range covered by an N_SLINE is
687 complicated; see the N_SLINE case below.
688 */
njnb79ad342002-06-05 15:30:30 +0000689 curr_filenmoff = addStr(si,"???");
690 curr_file_name = curr_fn_name = (Char*)NULL;
sewardjde4a1d02002-03-22 01:27:54 +0000691
sewardjb51f2e62002-06-01 23:11:19 +0000692 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
njne0ee0712002-05-03 16:41:05 +0000693
sewardjb51f2e62002-06-01 23:11:19 +0000694 for (i = 0; i < n_stab_entries; i++) {
695# if 0
696 VG_(printf) ( " %2d ", i );
697 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
698 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
699 (int)stab[i].n_value,
700 (int)stab[i].n_un.n_strx,
701 stabstr + stab[i].n_un.n_strx );
702 VG_(printf)("\n");
703# endif
njne0ee0712002-05-03 16:41:05 +0000704
sewardjb51f2e62002-06-01 23:11:19 +0000705 Char *no_fn_name = "???";
706
707 switch (stab[i].n_type) {
708 UInt next_addr;
709
710 /* Two complicated things here:
711 *
712 * 1. the n_desc field in 'struct n_list' in a.out.h is only
713 * 16-bits, which gives a maximum of 65535 lines. We handle
714 * files bigger than this by detecting heuristically
715 * overflows -- if the line count goes from 65000-odd to
716 * 0-odd within the same file, we assume it's an overflow.
717 * Once we switch files, we zero the overflow count.
718 *
719 * 2. To compute the instr address range covered by a single
720 * line, find the address of the next thing and compute the
721 * difference. The approach used depends on what kind of
722 * entry/entries follow...
723 */
724 case N_SLINE: {
725 Int this_addr = (UInt)stab[i].n_value;
726
727 /* Although stored as a short, neg values really are >
728 * 32768, hence the UShort cast. Then we use an Int to
729 * handle overflows. */
730 prev_lineno = lineno;
731 lineno = (Int)((UShort)stab[i].n_desc);
732
733 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
734 VG_(message)(Vg_DebugMsg,
735 "Line number overflow detected (%d --> %d) in %s",
736 prev_lineno, lineno, curr_file_name);
737 lineno_overflows++;
738 }
739 same_file = True;
740
741 LOOP:
742 if (i+1 >= n_stab_entries) {
743 /* If it's the last entry, just guess the range is
744 * four; can't do any better */
745 next_addr = this_addr + 4;
746 } else {
747 switch (stab[i+1].n_type) {
748 /* Easy, common case: use address of next entry */
749 case N_SLINE: case N_SO:
750 next_addr = (UInt)stab[i+1].n_value;
751 break;
752
njn25e49d8e72002-09-23 09:36:25 +0000753 /* Boring one: skip, look for something more useful. */
sewardjb51f2e62002-06-01 23:11:19 +0000754 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
755 case N_STSYM: case N_LCSYM: case N_GSYM:
756 i++;
757 goto LOOP;
758
njnb79ad342002-06-05 15:30:30 +0000759 /* If end-of-this-fun entry, use its address.
760 * If start-of-next-fun entry, find difference between start
761 * of current function and start of next function to work
762 * it out.
763 */
sewardjb51f2e62002-06-01 23:11:19 +0000764 case N_FUN:
765 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
766 next_addr = (UInt)stab[i+1].n_value;
767 } else {
njnb79ad342002-06-05 15:30:30 +0000768 next_addr =
769 (UInt)stab[i+1].n_value - curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000770 }
771 break;
772
773 /* N_SOL should be followed by an N_SLINE which can
774 be used */
775 case N_SOL:
776 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
777 next_addr = (UInt)stab[i+2].n_value;
778 break;
779 } else {
780 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
781 stab[i+1].n_type, i, n_stab_entries);
782 VG_(panic)("unhandled N_SOL stabs case");
783 }
784
785 default:
786 VG_(printf)("unhandled (other) stabs case: %d %d",
787 stab[i+1].n_type,i);
788 /* VG_(panic)("unhandled (other) stabs case"); */
789 next_addr = this_addr + 4;
790 break;
791 }
792 }
793
794 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
795 curr_fnbaseaddr + next_addr,
796 lineno + lineno_overflows * LINENO_OVERFLOW, i);
797 break;
798 }
799
800 case N_FUN: {
801 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
802 /* N_FUN with a name -- indicates the start of a fn. */
njnb79ad342002-06-05 15:30:30 +0000803 curr_fn_stabs_addr = (Addr)stab[i].n_value;
804 curr_fnbaseaddr = si->offset + curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000805 curr_fn_name = stabstr + stab[i].n_un.n_strx;
806 } else {
807 curr_fn_name = no_fn_name;
808 }
809 break;
810 }
811
812 case N_SOL:
813 if (lineno_overflows != 0) {
814 VG_(message)(Vg_UserMsg,
815 "Warning: file %s is very big (> 65535 lines) "
816 "Line numbers and annotation for this file might "
817 "be wrong. Sorry",
818 curr_file_name);
819 }
820 /* fall through! */
821 case N_SO:
822 lineno_overflows = 0;
823
824 /* seems to give lots of locations in header files */
825 /* case 130: */ /* BINCL */
826 {
827 UChar* nm = stabstr + stab[i].n_un.n_strx;
828 UInt len = VG_(strlen)(nm);
829
830 if (len > 0 && nm[len-1] != '/') {
831 curr_filenmoff = addStr ( si, nm );
832 curr_file_name = stabstr + stab[i].n_un.n_strx;
833 }
834 else
835 if (len == 0)
836 curr_filenmoff = addStr ( si, "?1\0" );
837
838 break;
839 }
840
841# if 0
842 case 162: /* EINCL */
843 curr_filenmoff = addStr ( si, "?2\0" );
844 break;
845# endif
846
847 default:
848 break;
849 }
850 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
sewardjde4a1d02002-03-22 01:27:54 +0000851}
852
853
sewardjb51f2e62002-06-01 23:11:19 +0000854/*------------------------------------------------------------*/
855/*--- Read DWARF2 format debug info. ---*/
856/*------------------------------------------------------------*/
sewardjc134dd92002-06-01 14:21:36 +0000857
858/* Structure found in the .debug_line section. */
859typedef struct
860{
861 UChar li_length [4];
862 UChar li_version [2];
863 UChar li_prologue_length [4];
864 UChar li_min_insn_length [1];
865 UChar li_default_is_stmt [1];
866 UChar li_line_base [1];
867 UChar li_line_range [1];
868 UChar li_opcode_base [1];
869}
870DWARF2_External_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000871
sewardjc134dd92002-06-01 14:21:36 +0000872typedef struct
873{
sewardj08a50f62002-06-17 02:21:20 +0000874 UInt li_length;
sewardjc134dd92002-06-01 14:21:36 +0000875 UShort li_version;
876 UInt li_prologue_length;
877 UChar li_min_insn_length;
878 UChar li_default_is_stmt;
sewardj08a50f62002-06-17 02:21:20 +0000879 Int li_line_base;
sewardjc134dd92002-06-01 14:21:36 +0000880 UChar li_line_range;
881 UChar li_opcode_base;
882}
883DWARF2_Internal_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000884
sewardjc134dd92002-06-01 14:21:36 +0000885/* Line number opcodes. */
886enum dwarf_line_number_ops
887 {
888 DW_LNS_extended_op = 0,
889 DW_LNS_copy = 1,
890 DW_LNS_advance_pc = 2,
891 DW_LNS_advance_line = 3,
892 DW_LNS_set_file = 4,
893 DW_LNS_set_column = 5,
894 DW_LNS_negate_stmt = 6,
895 DW_LNS_set_basic_block = 7,
896 DW_LNS_const_add_pc = 8,
897 DW_LNS_fixed_advance_pc = 9,
898 /* DWARF 3. */
899 DW_LNS_set_prologue_end = 10,
900 DW_LNS_set_epilogue_begin = 11,
901 DW_LNS_set_isa = 12
902 };
903
904/* Line number extended opcodes. */
905enum dwarf_line_number_x_ops
906 {
907 DW_LNE_end_sequence = 1,
908 DW_LNE_set_address = 2,
909 DW_LNE_define_file = 3
910 };
911
912typedef struct State_Machine_Registers
913{
sewardj08a50f62002-06-17 02:21:20 +0000914 Addr address;
sewardjc134dd92002-06-01 14:21:36 +0000915 UInt file;
916 UInt line;
917 UInt column;
918 Int is_stmt;
919 Int basic_block;
sewardj08a50f62002-06-17 02:21:20 +0000920 Int end_sequence;
921 /* This variable hold the number of the last entry seen
922 in the File Table. */
sewardjc134dd92002-06-01 14:21:36 +0000923 UInt last_file_entry;
924} SMR;
925
sewardjb51f2e62002-06-01 23:11:19 +0000926
927static
928UInt read_leb128 ( UChar* data, Int* length_return, Int sign )
929{
sewardj08a50f62002-06-17 02:21:20 +0000930 UInt result = 0;
931 UInt num_read = 0;
932 Int shift = 0;
933 UChar byte;
sewardjb51f2e62002-06-01 23:11:19 +0000934
935 do
936 {
937 byte = * data ++;
938 num_read ++;
939
940 result |= (byte & 0x7f) << shift;
941
942 shift += 7;
943
944 }
945 while (byte & 0x80);
946
947 if (length_return != NULL)
948 * length_return = num_read;
949
950 if (sign && (shift < 32) && (byte & 0x40))
951 result |= -1 << shift;
952
953 return result;
954}
955
956
sewardjc134dd92002-06-01 14:21:36 +0000957static SMR state_machine_regs;
958
sewardj08a50f62002-06-17 02:21:20 +0000959static
960void reset_state_machine ( Int is_stmt )
sewardjc134dd92002-06-01 14:21:36 +0000961{
sewardj08a50f62002-06-17 02:21:20 +0000962 if (0) VG_(printf)("smr.a := %p (reset)\n", 0 );
sewardjc134dd92002-06-01 14:21:36 +0000963 state_machine_regs.address = 0;
964 state_machine_regs.file = 1;
965 state_machine_regs.line = 1;
966 state_machine_regs.column = 0;
967 state_machine_regs.is_stmt = is_stmt;
968 state_machine_regs.basic_block = 0;
969 state_machine_regs.end_sequence = 0;
970 state_machine_regs.last_file_entry = 0;
971}
972
973/* Handled an extend line op. Returns true if this is the end
974 of sequence. */
sewardj08a50f62002-06-17 02:21:20 +0000975static
976int process_extended_line_op( SegInfo *si, UInt** fnames,
977 UChar* data, Int is_stmt, Int pointer_size)
sewardjc134dd92002-06-01 14:21:36 +0000978{
979 UChar op_code;
sewardj08a50f62002-06-17 02:21:20 +0000980 Int bytes_read;
sewardjc134dd92002-06-01 14:21:36 +0000981 UInt len;
982 UChar * name;
sewardj08a50f62002-06-17 02:21:20 +0000983 Addr adr;
sewardjc134dd92002-06-01 14:21:36 +0000984
985 len = read_leb128 (data, & bytes_read, 0);
986 data += bytes_read;
987
988 if (len == 0)
989 {
sewardj08a50f62002-06-17 02:21:20 +0000990 VG_(message)(Vg_UserMsg,
991 "badly formed extended line op encountered!\n");
sewardjc134dd92002-06-01 14:21:36 +0000992 return bytes_read;
993 }
994
995 len += bytes_read;
996 op_code = * data ++;
997
998
999 switch (op_code)
1000 {
1001 case DW_LNE_end_sequence:
sewardj08a50f62002-06-17 02:21:20 +00001002 if (0) VG_(printf)("1001: si->o %p, smr.a %p\n",
1003 si->offset, state_machine_regs.address );
sewardjd84606d2002-06-18 01:04:57 +00001004 state_machine_regs.end_sequence = 1; /* JRS: added for compliance
1005 with spec; is pointless due to reset_state_machine below
1006 */
sewardj08a50f62002-06-17 02:21:20 +00001007 addLineInfo (si, (*fnames)[state_machine_regs.file],
1008 si->offset + (state_machine_regs.address - 1),
1009 si->offset + (state_machine_regs.address),
1010 0, 0);
sewardjc134dd92002-06-01 14:21:36 +00001011 reset_state_machine (is_stmt);
1012 break;
1013
1014 case DW_LNE_set_address:
1015 /* XXX: Pointer size could be 8 */
sewardj08a50f62002-06-17 02:21:20 +00001016 vg_assert(pointer_size == 4);
sewardjc134dd92002-06-01 14:21:36 +00001017 adr = *((Addr *)data);
sewardj08a50f62002-06-17 02:21:20 +00001018 if (0) VG_(printf)("smr.a := %p\n", adr );
sewardjc134dd92002-06-01 14:21:36 +00001019 state_machine_regs.address = adr;
1020 break;
1021
1022 case DW_LNE_define_file:
sewardjc134dd92002-06-01 14:21:36 +00001023 ++ state_machine_regs.last_file_entry;
1024 name = data;
1025 if (*fnames == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001026 *fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
sewardjc134dd92002-06-01 14:21:36 +00001027 else
njn25e49d8e72002-09-23 09:36:25 +00001028 *fnames = VG_(arena_realloc)(
1029 VG_AR_SYMTAB, *fnames, /*alignment*/4,
sewardj08a50f62002-06-17 02:21:20 +00001030 sizeof(UInt)
1031 * (state_machine_regs.last_file_entry + 1));
sewardjc134dd92002-06-01 14:21:36 +00001032 (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name);
1033 data += VG_(strlen) ((char *) data) + 1;
1034 read_leb128 (data, & bytes_read, 0);
1035 data += bytes_read;
1036 read_leb128 (data, & bytes_read, 0);
1037 data += bytes_read;
sewardj08a50f62002-06-17 02:21:20 +00001038 read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001039 break;
1040
1041 default:
1042 break;
1043 }
1044
1045 return len;
1046}
1047
1048
sewardjb51f2e62002-06-01 23:11:19 +00001049static
1050void read_debuginfo_dwarf2 ( SegInfo* si, UChar* dwarf2, Int dwarf2_sz )
sewardjc134dd92002-06-01 14:21:36 +00001051{
1052 DWARF2_External_LineInfo * external;
1053 DWARF2_Internal_LineInfo info;
1054 UChar * standard_opcodes;
sewardjb51f2e62002-06-01 23:11:19 +00001055 UChar * data = dwarf2;
1056 UChar * end = dwarf2 + dwarf2_sz;
sewardjc134dd92002-06-01 14:21:36 +00001057 UChar * end_of_sequence;
sewardj08a50f62002-06-17 02:21:20 +00001058 UInt * fnames = NULL;
sewardjc134dd92002-06-01 14:21:36 +00001059
sewardjd84606d2002-06-18 01:04:57 +00001060 /* Fails due to gcc padding ...
1061 vg_assert(sizeof(DWARF2_External_LineInfo)
1062 == sizeof(DWARF2_Internal_LineInfo));
1063 */
sewardjc134dd92002-06-01 14:21:36 +00001064
1065 while (data < end)
1066 {
1067 external = (DWARF2_External_LineInfo *) data;
1068
1069 /* Check the length of the block. */
sewardj08a50f62002-06-17 02:21:20 +00001070 info.li_length = * ((UInt *)(external->li_length));
sewardjc134dd92002-06-01 14:21:36 +00001071
1072 if (info.li_length == 0xffffffff)
1073 {
sewardjb51f2e62002-06-01 23:11:19 +00001074 vg_symerr("64-bit DWARF line info is not supported yet.");
sewardjc134dd92002-06-01 14:21:36 +00001075 break;
1076 }
1077
sewardjb51f2e62002-06-01 23:11:19 +00001078 if (info.li_length + sizeof (external->li_length) > dwarf2_sz)
sewardjc134dd92002-06-01 14:21:36 +00001079 {
sewardj08a50f62002-06-17 02:21:20 +00001080 vg_symerr("DWARF line info appears to be corrupt "
1081 "- the section is too small");
sewardjb51f2e62002-06-01 23:11:19 +00001082 return;
sewardjc134dd92002-06-01 14:21:36 +00001083 }
1084
1085 /* Check its version number. */
sewardj08a50f62002-06-17 02:21:20 +00001086 info.li_version = * ((UShort *) (external->li_version));
sewardjc134dd92002-06-01 14:21:36 +00001087 if (info.li_version != 2)
1088 {
sewardj08a50f62002-06-17 02:21:20 +00001089 vg_symerr("Only DWARF version 2 line info "
1090 "is currently supported.");
sewardjb51f2e62002-06-01 23:11:19 +00001091 return;
sewardjc134dd92002-06-01 14:21:36 +00001092 }
1093
sewardjd84606d2002-06-18 01:04:57 +00001094 info.li_prologue_length = * ((UInt *) (external->li_prologue_length));
1095 info.li_min_insn_length = * ((UChar *)(external->li_min_insn_length));
1096 info.li_default_is_stmt = * ((UChar *)(external->li_default_is_stmt));
1097
1098 /* JRS: changed (UInt*) to (UChar*) */
1099 info.li_line_base = * ((UChar *)(external->li_line_base));
1100
1101 info.li_line_range = * ((UChar *)(external->li_line_range));
1102 info.li_opcode_base = * ((UChar *)(external->li_opcode_base));
sewardjc134dd92002-06-01 14:21:36 +00001103
1104 /* Sign extend the line base field. */
1105 info.li_line_base <<= 24;
1106 info.li_line_base >>= 24;
1107
sewardj08a50f62002-06-17 02:21:20 +00001108 end_of_sequence = data + info.li_length
1109 + sizeof (external->li_length);
sewardjc134dd92002-06-01 14:21:36 +00001110
1111 reset_state_machine (info.li_default_is_stmt);
1112
1113 /* Read the contents of the Opcodes table. */
1114 standard_opcodes = data + sizeof (* external);
1115
sewardjc134dd92002-06-01 14:21:36 +00001116 /* Read the contents of the Directory table. */
1117 data = standard_opcodes + info.li_opcode_base - 1;
1118
sewardj08a50f62002-06-17 02:21:20 +00001119 if (* data == 0)
1120 {
1121 }
sewardjc134dd92002-06-01 14:21:36 +00001122 else
1123 {
sewardj08a50f62002-06-17 02:21:20 +00001124 /* We ignore the directory table, since gcc gives the entire
1125 path as part of the filename */
sewardjc134dd92002-06-01 14:21:36 +00001126 while (* data != 0)
1127 {
1128 data += VG_(strlen) ((char *) data) + 1;
1129 }
1130 }
1131
1132 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001133 if (*data != 0) {
1134 vg_symerr("can't find NUL at end of DWARF2 directory table");
1135 return;
1136 }
sewardjc134dd92002-06-01 14:21:36 +00001137 data ++;
1138
1139 /* Read the contents of the File Name table. */
sewardj08a50f62002-06-17 02:21:20 +00001140 if (* data == 0)
1141 {
1142 }
sewardjc134dd92002-06-01 14:21:36 +00001143 else
1144 {
sewardjc134dd92002-06-01 14:21:36 +00001145 while (* data != 0)
1146 {
1147 UChar * name;
1148 Int bytes_read;
1149
sewardj08a50f62002-06-17 02:21:20 +00001150 ++ state_machine_regs.last_file_entry;
sewardjc134dd92002-06-01 14:21:36 +00001151 name = data;
sewardj08a50f62002-06-17 02:21:20 +00001152 /* Since we don't have realloc (0, ....) == malloc (...)
1153 semantics, we need to malloc the first time. */
sewardjc134dd92002-06-01 14:21:36 +00001154
1155 if (fnames == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001156 fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
sewardjc134dd92002-06-01 14:21:36 +00001157 else
njn25e49d8e72002-09-23 09:36:25 +00001158 fnames = VG_(arena_realloc)(VG_AR_SYMTAB, fnames, /*alignment*/4,
sewardj08a50f62002-06-17 02:21:20 +00001159 sizeof(UInt)
1160 * (state_machine_regs.last_file_entry + 1));
1161 data += VG_(strlen) ((Char *) data) + 1;
sewardjc134dd92002-06-01 14:21:36 +00001162 fnames[state_machine_regs.last_file_entry] = addStr (si,name);
1163
1164 read_leb128 (data, & bytes_read, 0);
1165 data += bytes_read;
1166 read_leb128 (data, & bytes_read, 0);
1167 data += bytes_read;
1168 read_leb128 (data, & bytes_read, 0);
1169 data += bytes_read;
1170 }
1171 }
1172
1173 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001174 if (*data != 0) {
1175 vg_symerr("can't find NUL at end of DWARF2 file name table");
1176 return;
1177 }
sewardjc134dd92002-06-01 14:21:36 +00001178 data ++;
1179
1180 /* Now display the statements. */
1181
1182 while (data < end_of_sequence)
1183 {
1184 UChar op_code;
1185 Int adv;
1186 Int bytes_read;
1187
1188 op_code = * data ++;
1189
1190 if (op_code >= info.li_opcode_base)
1191 {
1192 Int advAddr;
1193 op_code -= info.li_opcode_base;
sewardj08a50f62002-06-17 02:21:20 +00001194 adv = (op_code / info.li_line_range)
1195 * info.li_min_insn_length;
sewardjc134dd92002-06-01 14:21:36 +00001196 advAddr = adv;
1197 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001198 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001199 adv = (op_code % info.li_line_range) + info.li_line_base;
sewardj08a50f62002-06-17 02:21:20 +00001200 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1201 si->offset, state_machine_regs.address );
1202 addLineInfo (si, fnames[state_machine_regs.file],
1203 si->offset + (state_machine_regs.address
1204 - advAddr),
1205 si->offset + (state_machine_regs.address),
1206 state_machine_regs.line, 0);
sewardjc134dd92002-06-01 14:21:36 +00001207 state_machine_regs.line += adv;
1208 }
1209 else switch (op_code)
1210 {
1211 case DW_LNS_extended_op:
sewardj08a50f62002-06-17 02:21:20 +00001212 data += process_extended_line_op (
1213 si, &fnames, data,
1214 info.li_default_is_stmt, sizeof (Addr));
sewardjc134dd92002-06-01 14:21:36 +00001215 break;
1216
1217 case DW_LNS_copy:
sewardj08a50f62002-06-17 02:21:20 +00001218 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1219 si->offset, state_machine_regs.address );
1220 addLineInfo (si, fnames[state_machine_regs.file],
1221 si->offset + state_machine_regs.address,
1222 si->offset + (state_machine_regs.address + 1),
1223 state_machine_regs.line , 0);
sewardjd84606d2002-06-18 01:04:57 +00001224 state_machine_regs.basic_block = 0; /* JRS added */
sewardjc134dd92002-06-01 14:21:36 +00001225 break;
1226
1227 case DW_LNS_advance_pc:
sewardj08a50f62002-06-17 02:21:20 +00001228 adv = info.li_min_insn_length
1229 * read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001230 data += bytes_read;
1231 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001232 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001233 break;
1234
1235 case DW_LNS_advance_line:
1236 adv = read_leb128 (data, & bytes_read, 1);
1237 data += bytes_read;
1238 state_machine_regs.line += adv;
1239 break;
1240
1241 case DW_LNS_set_file:
1242 adv = read_leb128 (data, & bytes_read, 0);
1243 data += bytes_read;
1244 state_machine_regs.file = adv;
1245 break;
1246
1247 case DW_LNS_set_column:
1248 adv = read_leb128 (data, & bytes_read, 0);
1249 data += bytes_read;
1250 state_machine_regs.column = adv;
1251 break;
1252
1253 case DW_LNS_negate_stmt:
1254 adv = state_machine_regs.is_stmt;
1255 adv = ! adv;
1256 state_machine_regs.is_stmt = adv;
1257 break;
1258
1259 case DW_LNS_set_basic_block:
1260 state_machine_regs.basic_block = 1;
1261 break;
1262
1263 case DW_LNS_const_add_pc:
1264 adv = (((255 - info.li_opcode_base) / info.li_line_range)
1265 * info.li_min_insn_length);
1266 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001267 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001268 break;
1269
1270 case DW_LNS_fixed_advance_pc:
1271 /* XXX: Need something to get 2 bytes */
1272 adv = *((UShort *)data);
1273 data += 2;
1274 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001275 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001276 break;
1277
1278 case DW_LNS_set_prologue_end:
1279 break;
1280
1281 case DW_LNS_set_epilogue_begin:
1282 break;
1283
1284 case DW_LNS_set_isa:
1285 adv = read_leb128 (data, & bytes_read, 0);
1286 data += bytes_read;
1287 break;
1288
1289 default:
1290 {
1291 int j;
1292 for (j = standard_opcodes[op_code - 1]; j > 0 ; --j)
1293 {
1294 read_leb128 (data, &bytes_read, 0);
1295 data += bytes_read;
1296 }
1297 }
1298 break;
1299 }
1300 }
njn25e49d8e72002-09-23 09:36:25 +00001301 VG_(arena_free)(VG_AR_SYMTAB, fnames);
sewardjc134dd92002-06-01 14:21:36 +00001302 fnames = NULL;
1303 }
sewardjc134dd92002-06-01 14:21:36 +00001304}
1305
sewardjb51f2e62002-06-01 23:11:19 +00001306
1307/*------------------------------------------------------------*/
1308/*--- Read info from a .so/exe file. ---*/
1309/*------------------------------------------------------------*/
1310
sewardjde4a1d02002-03-22 01:27:54 +00001311/* Read the symbols from the object/exe specified by the SegInfo into
1312 the tables within the supplied SegInfo. */
1313static
1314void vg_read_lib_symbols ( SegInfo* si )
1315{
1316 Elf32_Ehdr* ehdr; /* The ELF header */
1317 Elf32_Shdr* shdr; /* The section table */
1318 UChar* sh_strtab; /* The section table's string table */
sewardjb51f2e62002-06-01 23:11:19 +00001319 UChar* stab; /* The .stab table */
sewardjde4a1d02002-03-22 01:27:54 +00001320 UChar* stabstr; /* The .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001321 UChar* dwarf2; /* The DWARF2 location info table */
sewardjde4a1d02002-03-22 01:27:54 +00001322 Int stab_sz; /* Size in bytes of the .stab table */
1323 Int stabstr_sz; /* Size in bytes of the .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001324 Int dwarf2_sz; /* Size in bytes of the DWARF2 srcloc table*/
sewardjde4a1d02002-03-22 01:27:54 +00001325 Int fd;
1326 Int i;
1327 Bool ok;
1328 Addr oimage;
1329 Int n_oimage;
sewardjb3586202002-05-09 17:38:13 +00001330 struct vki_stat stat_buf;
sewardjde4a1d02002-03-22 01:27:54 +00001331
sewardjde4a1d02002-03-22 01:27:54 +00001332 oimage = (Addr)NULL;
1333 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +00001334 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +00001335
1336 /* mmap the object image aboard, so that we can read symbols and
1337 line number info out of it. It will be munmapped immediately
1338 thereafter; it is only aboard transiently. */
1339
sewardjb3586202002-05-09 17:38:13 +00001340 i = VG_(stat)(si->filename, &stat_buf);
sewardjde4a1d02002-03-22 01:27:54 +00001341 if (i != 0) {
1342 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
1343 return;
1344 }
1345 n_oimage = stat_buf.st_size;
1346
njn25e49d8e72002-09-23 09:36:25 +00001347 fd = VG_(open)(si->filename, VKI_O_RDONLY, 0);
sewardjde4a1d02002-03-22 01:27:54 +00001348 if (fd == -1) {
1349 vg_symerr("Can't open .so/.exe to read symbols?!");
1350 return;
1351 }
1352
sewardjb3586202002-05-09 17:38:13 +00001353 oimage = (Addr)VG_(mmap)( NULL, n_oimage,
1354 VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 );
sewardjde4a1d02002-03-22 01:27:54 +00001355 if (oimage == ((Addr)(-1))) {
1356 VG_(message)(Vg_UserMsg,
1357 "mmap failed on %s", si->filename );
1358 VG_(close)(fd);
1359 return;
1360 }
1361
1362 VG_(close)(fd);
1363
1364 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
1365 Now verify that it is a valid ELF .so or executable image.
1366 */
1367 ok = (n_oimage >= sizeof(Elf32_Ehdr));
1368 ehdr = (Elf32_Ehdr*)oimage;
1369
1370 if (ok) {
1371 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
1372 && ehdr->e_ident[EI_MAG1] == 'E'
1373 && ehdr->e_ident[EI_MAG2] == 'L'
1374 && ehdr->e_ident[EI_MAG3] == 'F');
1375 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
1376 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
1377 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
1378 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
1379 ok &= (ehdr->e_machine == EM_386);
1380 ok &= (ehdr->e_version == EV_CURRENT);
1381 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
1382 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
1383 }
1384
1385 if (!ok) {
1386 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
1387 VG_(munmap) ( (void*)oimage, n_oimage );
1388 return;
1389 }
1390
1391 if (VG_(clo_trace_symtab))
1392 VG_(printf)(
1393 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
1394 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
1395
1396 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
1397 vg_symerr("ELF section header is beyond image end?!");
1398 VG_(munmap) ( (void*)oimage, n_oimage );
1399 return;
1400 }
1401
1402 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
1403 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
1404
1405 /* try and read the object's symbol table */
1406 {
1407 UChar* o_strtab = NULL;
1408 Elf32_Sym* o_symtab = NULL;
1409 UInt o_strtab_sz = 0;
1410 UInt o_symtab_sz = 0;
1411
1412 UChar* o_got = NULL;
1413 UChar* o_plt = NULL;
1414 UInt o_got_sz = 0;
1415 UInt o_plt_sz = 0;
1416
1417 Bool snaffle_it;
1418 Addr sym_addr;
1419
1420 /* find the .stabstr and .stab sections */
1421 for (i = 0; i < ehdr->e_shnum; i++) {
1422 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
1423 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
1424 o_symtab_sz = shdr[i].sh_size;
1425 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
1426 /* check image overrun here */
1427 }
1428 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
1429 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
1430 o_strtab_sz = shdr[i].sh_size;
1431 /* check image overrun here */
1432 }
1433
1434 /* find out where the .got and .plt sections will be in the
1435 executable image, not in the object image transiently loaded.
1436 */
1437 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
1438 o_got = (UChar*)(si->offset
1439 + shdr[i].sh_offset);
1440 o_got_sz = shdr[i].sh_size;
1441 /* check image overrun here */
1442 }
1443 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
1444 o_plt = (UChar*)(si->offset
1445 + shdr[i].sh_offset);
1446 o_plt_sz = shdr[i].sh_size;
1447 /* check image overrun here */
1448 }
1449
1450 }
1451
1452 if (VG_(clo_trace_symtab)) {
1453 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
1454 o_plt, o_plt + o_plt_sz - 1 );
1455 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
1456 o_got, o_got + o_got_sz - 1 );
1457 }
1458
1459 if (o_strtab == NULL || o_symtab == NULL) {
1460 vg_symerr(" object doesn't have a symbol table");
1461 } else {
1462 /* Perhaps should start at i = 1; ELF docs suggest that entry
1463 0 always denotes `unknown symbol'. */
1464 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
1465# if 0
1466 VG_(printf)("raw symbol: ");
1467 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
1468 case STB_LOCAL: VG_(printf)("LOC "); break;
1469 case STB_GLOBAL: VG_(printf)("GLO "); break;
1470 case STB_WEAK: VG_(printf)("WEA "); break;
1471 case STB_LOPROC: VG_(printf)("lop "); break;
1472 case STB_HIPROC: VG_(printf)("hip "); break;
1473 default: VG_(printf)("??? "); break;
1474 }
1475 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
1476 case STT_NOTYPE: VG_(printf)("NOT "); break;
1477 case STT_OBJECT: VG_(printf)("OBJ "); break;
1478 case STT_FUNC: VG_(printf)("FUN "); break;
1479 case STT_SECTION: VG_(printf)("SEC "); break;
1480 case STT_FILE: VG_(printf)("FIL "); break;
1481 case STT_LOPROC: VG_(printf)("lop "); break;
1482 case STT_HIPROC: VG_(printf)("hip "); break;
1483 default: VG_(printf)("??? "); break;
1484 }
1485 VG_(printf)(
1486 ": value %p, size %d, name %s\n",
1487 si->offset+(UChar*)o_symtab[i].st_value,
1488 o_symtab[i].st_size,
1489 o_symtab[i].st_name
1490 ? ((Char*)o_strtab+o_symtab[i].st_name)
1491 : (Char*)"NONAME");
1492# endif
1493
1494 /* Figure out if we're interested in the symbol.
1495 Firstly, is it of the right flavour?
1496 */
1497 snaffle_it
1498 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
1499 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* ||
1500 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */)
1501 &&
1502 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*||
1503 ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/)
1504 );
1505
1506 /* Secondly, if it's apparently in a GOT or PLT, it's really
1507 a reference to a symbol defined elsewhere, so ignore it.
1508 */
1509 sym_addr = si->offset
1510 + (UInt)o_symtab[i].st_value;
1511 if (o_got != NULL
1512 && sym_addr >= (Addr)o_got
1513 && sym_addr < (Addr)(o_got+o_got_sz)) {
1514 snaffle_it = False;
1515 if (VG_(clo_trace_symtab)) {
1516 VG_(printf)( "in GOT: %s\n",
1517 o_strtab+o_symtab[i].st_name);
1518 }
1519 }
1520 if (o_plt != NULL
1521 && sym_addr >= (Addr)o_plt
1522 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
1523 snaffle_it = False;
1524 if (VG_(clo_trace_symtab)) {
1525 VG_(printf)( "in PLT: %s\n",
1526 o_strtab+o_symtab[i].st_name);
1527 }
1528 }
1529
1530 /* Don't bother if nameless, or zero-sized. */
1531 if (snaffle_it
1532 && (o_symtab[i].st_name == (Elf32_Word)NULL
1533 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
1534 /* equivalent but cheaper ... */
1535 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
1536 || o_symtab[i].st_size == 0)) {
1537 snaffle_it = False;
1538 if (VG_(clo_trace_symtab)) {
1539 VG_(printf)( "size=0: %s\n",
1540 o_strtab+o_symtab[i].st_name);
1541 }
1542 }
1543
1544# if 0
1545 /* Avoid _dl_ junk. (Why?) */
1546 /* 01-02-24: disabled until I find out if it really helps. */
1547 if (snaffle_it
1548 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
1549 || VG_(strncmp)("_r_debug",
1550 o_strtab+o_symtab[i].st_name, 8) == 0)) {
1551 snaffle_it = False;
1552 if (VG_(clo_trace_symtab)) {
1553 VG_(printf)( "_dl_ junk: %s\n",
1554 o_strtab+o_symtab[i].st_name);
1555 }
1556 }
1557# endif
1558
1559 /* This seems to significantly reduce the number of junk
1560 symbols, and particularly reduces the number of
1561 overlapping address ranges. Don't ask me why ... */
1562 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
1563 snaffle_it = False;
1564 if (VG_(clo_trace_symtab)) {
1565 VG_(printf)( "valu=0: %s\n",
1566 o_strtab+o_symtab[i].st_name);
1567 }
1568 }
1569
1570 /* If no part of the symbol falls within the mapped range,
1571 ignore it. */
1572 if (sym_addr+o_symtab[i].st_size <= si->start
1573 || sym_addr >= si->start+si->size) {
1574 snaffle_it = False;
1575 }
1576
1577 if (snaffle_it) {
1578 /* it's an interesting symbol; record ("snaffle") it. */
1579 RiSym sym;
1580 Char* t0 = o_symtab[i].st_name
1581 ? (Char*)(o_strtab+o_symtab[i].st_name)
1582 : (Char*)"NONAME";
1583 Int nmoff = addStr ( si, t0 );
1584 vg_assert(nmoff >= 0
1585 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
1586 vg_assert( (Int)o_symtab[i].st_value >= 0);
1587 /* VG_(printf)("%p + %d: %s\n", si->addr,
1588 (Int)o_symtab[i].st_value, t0 ); */
1589 sym.addr = sym_addr;
1590 sym.size = o_symtab[i].st_size;
1591 sym.nmoff = nmoff;
1592 addSym ( si, &sym );
1593 }
1594 }
1595 }
1596 }
1597
sewardjb51f2e62002-06-01 23:11:19 +00001598 /* Reading of the stabs and/or dwarf2 debug format information, if
1599 any. */
sewardjde4a1d02002-03-22 01:27:54 +00001600 stabstr = NULL;
1601 stab = NULL;
sewardjb51f2e62002-06-01 23:11:19 +00001602 dwarf2 = NULL;
sewardjde4a1d02002-03-22 01:27:54 +00001603 stabstr_sz = 0;
1604 stab_sz = 0;
sewardjb51f2e62002-06-01 23:11:19 +00001605 dwarf2_sz = 0;
1606
1607 /* find the .stabstr / .stab / .debug_line sections */
sewardjde4a1d02002-03-22 01:27:54 +00001608 for (i = 0; i < ehdr->e_shnum; i++) {
1609 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001610 stab = (UChar*)(oimage + shdr[i].sh_offset);
sewardjde4a1d02002-03-22 01:27:54 +00001611 stab_sz = shdr[i].sh_size;
1612 }
1613 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
1614 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
1615 stabstr_sz = shdr[i].sh_size;
1616 }
sewardjc134dd92002-06-01 14:21:36 +00001617 if (0 == VG_(strcmp)(".debug_line",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001618 dwarf2 = (UChar *)(oimage + shdr[i].sh_offset);
1619 dwarf2_sz = shdr[i].sh_size;
sewardjc134dd92002-06-01 14:21:36 +00001620 }
sewardjde4a1d02002-03-22 01:27:54 +00001621 }
1622
sewardjb51f2e62002-06-01 23:11:19 +00001623 if ((stab == NULL || stabstr == NULL) && dwarf2 == NULL) {
sewardjde4a1d02002-03-22 01:27:54 +00001624 vg_symerr(" object doesn't have any debug info");
1625 VG_(munmap) ( (void*)oimage, n_oimage );
1626 return;
1627 }
1628
1629 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
1630 || stabstr_sz + (UChar*)stabstr
1631 > n_oimage + (UChar*)oimage ) {
sewardjb51f2e62002-06-01 23:11:19 +00001632 vg_symerr(" ELF (stabs) debug data is beyond image end?!");
sewardjde4a1d02002-03-22 01:27:54 +00001633 VG_(munmap) ( (void*)oimage, n_oimage );
1634 return;
1635 }
1636
sewardjb51f2e62002-06-01 23:11:19 +00001637 if ( dwarf2_sz + (UChar*)dwarf2 > n_oimage + (UChar*)oimage ) {
1638 vg_symerr(" ELF (dwarf2) debug data is beyond image end?!");
1639 VG_(munmap) ( (void*)oimage, n_oimage );
1640 return;
1641 }
sewardjde4a1d02002-03-22 01:27:54 +00001642
sewardjb51f2e62002-06-01 23:11:19 +00001643 /* Looks plausible. Go on and read debug data. */
1644 if (stab != NULL && stabstr != NULL) {
1645 read_debuginfo_stabs ( si, stab, stab_sz, stabstr, stabstr_sz );
1646 }
sewardjde4a1d02002-03-22 01:27:54 +00001647
sewardjb51f2e62002-06-01 23:11:19 +00001648 if (dwarf2 != NULL) {
1649 read_debuginfo_dwarf2 ( si, dwarf2, dwarf2_sz );
1650 }
sewardjde4a1d02002-03-22 01:27:54 +00001651
1652 /* Last, but not least, heave the oimage back overboard. */
1653 VG_(munmap) ( (void*)oimage, n_oimage );
1654}
1655
1656
1657/*------------------------------------------------------------*/
1658/*--- Main entry point for symbols table reading. ---*/
1659/*------------------------------------------------------------*/
1660
1661/* The root structure for the entire symbol table system. It is a
1662 linked list of SegInfos. Note that this entire mechanism assumes
1663 that what we read from /proc/self/maps doesn't contain overlapping
1664 address ranges, and as a result the SegInfos in this list describe
1665 disjoint address ranges.
1666*/
1667static SegInfo* segInfo = NULL;
1668
1669
njn25e49d8e72002-09-23 09:36:25 +00001670void VG_(read_symtab_callback) (
sewardjde4a1d02002-03-22 01:27:54 +00001671 Addr start, UInt size,
1672 Char rr, Char ww, Char xx,
1673 UInt foffset, UChar* filename )
1674{
1675 SegInfo* si;
1676
1677 /* Stay sane ... */
1678 if (size == 0)
1679 return;
1680
1681 /* We're only interested in collecting symbols in executable
1682 segments which are associated with a real file. Hence: */
1683 if (filename == NULL || xx != 'x')
1684 return;
1685 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1686 return;
1687
1688 /* Perhaps we already have this one? If so, skip. */
1689 for (si = segInfo; si != NULL; si = si->next) {
1690 /*
1691 if (0==VG_(strcmp)(si->filename, filename))
1692 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1693 rr,ww,xx,si->start,si->size,start,size,filename);
1694 */
1695 /* For some reason the observed size of a mapping can change, so
1696 we don't use that to determine uniqueness. */
1697 if (si->start == start
1698 /* && si->size == size */
1699 && 0==VG_(strcmp)(si->filename, filename)) {
1700 return;
1701 }
1702 }
1703
1704 /* Get the record initialised right. */
njn25e49d8e72002-09-23 09:36:25 +00001705 si = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
sewardjde4a1d02002-03-22 01:27:54 +00001706 si->next = segInfo;
1707 segInfo = si;
1708
1709 si->start = start;
1710 si->size = size;
1711 si->foffset = foffset;
njn25e49d8e72002-09-23 09:36:25 +00001712 si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
sewardjde4a1d02002-03-22 01:27:54 +00001713 VG_(strcpy)(si->filename, filename);
1714
1715 si->symtab = NULL;
1716 si->symtab_size = si->symtab_used = 0;
1717 si->loctab = NULL;
1718 si->loctab_size = si->loctab_used = 0;
1719 si->strtab = NULL;
1720 si->strtab_size = si->strtab_used = 0;
1721
1722 /* Kludge ... */
njn25e49d8e72002-09-23 09:36:25 +00001723 si->offset = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
sewardjde4a1d02002-03-22 01:27:54 +00001724
1725 /* And actually fill it up. */
njn25e49d8e72002-09-23 09:36:25 +00001726 vg_read_lib_symbols ( si );
1727 canonicaliseSymtab ( si );
1728 canonicaliseLoctab ( si );
sewardjde4a1d02002-03-22 01:27:54 +00001729}
1730
1731
1732/* This one really is the Head Honcho. Update the symbol tables to
1733 reflect the current state of /proc/self/maps. Rather than re-read
1734 everything, just read the entries which are not already in segInfo.
1735 So we can call here repeatedly, after every mmap of a non-anonymous
1736 segment with execute permissions, for example, to pick up new
1737 libraries as they are dlopen'd. Conversely, when the client does
1738 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1739 which happen to correspond to the munmap()d area. */
njn25e49d8e72002-09-23 09:36:25 +00001740void VG_(maybe_read_symbols) ( void )
sewardjde4a1d02002-03-22 01:27:54 +00001741{
njn25e49d8e72002-09-23 09:36:25 +00001742 if (!VG_(using_debug_info))
1743 return;
sewardjde4a1d02002-03-22 01:27:54 +00001744
njn25e49d8e72002-09-23 09:36:25 +00001745 VGP_PUSHCC(VgpReadSyms);
1746 VG_(read_procselfmaps) ( VG_(read_symtab_callback) );
1747 VGP_POPCC(VgpReadSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001748}
1749
sewardjde4a1d02002-03-22 01:27:54 +00001750/* When an munmap() call happens, check to see whether it corresponds
1751 to a segment for a .so, and if so discard the relevant SegInfo.
1752 This might not be a very clever idea from the point of view of
1753 accuracy of error messages, but we need to do it in order to
sewardj18d75132002-05-16 11:06:21 +00001754 maintain the no-overlapping invariant.
sewardjde4a1d02002-03-22 01:27:54 +00001755*/
njn25e49d8e72002-09-23 09:36:25 +00001756void VG_(maybe_unload_symbols) ( Addr start, UInt length )
sewardjde4a1d02002-03-22 01:27:54 +00001757{
1758 SegInfo *prev, *curr;
1759
njn25e49d8e72002-09-23 09:36:25 +00001760 if (!VG_(using_debug_info))
1761 return;
1762
sewardjde4a1d02002-03-22 01:27:54 +00001763 prev = NULL;
1764 curr = segInfo;
1765 while (True) {
1766 if (curr == NULL) break;
1767 if (start == curr->start) break;
1768 prev = curr;
1769 curr = curr->next;
1770 }
sewardj18d75132002-05-16 11:06:21 +00001771 if (curr == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001772 return;
sewardjde4a1d02002-03-22 01:27:54 +00001773
1774 VG_(message)(Vg_UserMsg,
1775 "discard syms in %s due to munmap()",
1776 curr->filename ? curr->filename : (UChar*)"???");
1777
1778 vg_assert(prev == NULL || prev->next == curr);
1779
1780 if (prev == NULL) {
1781 segInfo = curr->next;
1782 } else {
1783 prev->next = curr->next;
1784 }
1785
1786 freeSegInfo(curr);
njn25e49d8e72002-09-23 09:36:25 +00001787 return;
sewardjde4a1d02002-03-22 01:27:54 +00001788}
1789
1790
1791/*------------------------------------------------------------*/
1792/*--- Use of symbol table & location info to create ---*/
1793/*--- plausible-looking stack dumps. ---*/
1794/*------------------------------------------------------------*/
1795
njn25e49d8e72002-09-23 09:36:25 +00001796static __inline__ void ensure_debug_info_inited ( void )
1797{
1798 if (!VG_(using_debug_info)) {
1799 VG_(using_debug_info) = True;
1800 VG_(maybe_read_symbols)();
1801 }
1802}
1803
sewardjde4a1d02002-03-22 01:27:54 +00001804/* Find a symbol-table index containing the specified pointer, or -1
1805 if not found. Binary search. */
1806
njn25e49d8e72002-09-23 09:36:25 +00001807static Int search_one_symtab ( SegInfo* si, Addr ptr,
1808 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00001809{
1810 Addr a_mid_lo, a_mid_hi;
njn25e49d8e72002-09-23 09:36:25 +00001811 Int mid, size,
sewardjde4a1d02002-03-22 01:27:54 +00001812 lo = 0,
1813 hi = si->symtab_used-1;
1814 while (True) {
1815 /* current unsearched space is from lo to hi, inclusive. */
1816 if (lo > hi) return -1; /* not found */
1817 mid = (lo + hi) / 2;
1818 a_mid_lo = si->symtab[mid].addr;
njn25e49d8e72002-09-23 09:36:25 +00001819 size = ( match_anywhere_in_fun
1820 ? si->symtab[mid].size
1821 : 1);
1822 a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1;
sewardjde4a1d02002-03-22 01:27:54 +00001823
1824 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1825 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1826 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1827 return mid;
1828 }
1829}
1830
1831
1832/* Search all symtabs that we know about to locate ptr. If found, set
1833 *psi to the relevant SegInfo, and *symno to the symtab entry number
1834 within that. If not found, *psi is set to NULL. */
1835
njn25e49d8e72002-09-23 09:36:25 +00001836static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi,
1837 /*OUT*/Int* symno,
1838 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00001839{
1840 Int sno;
1841 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00001842
1843 ensure_debug_info_inited();
1844 VGP_PUSHCC(VgpSearchSyms);
1845
sewardjde4a1d02002-03-22 01:27:54 +00001846 for (si = segInfo; si != NULL; si = si->next) {
1847 if (si->start <= ptr && ptr < si->start+si->size) {
njn25e49d8e72002-09-23 09:36:25 +00001848 sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
sewardjde4a1d02002-03-22 01:27:54 +00001849 if (sno == -1) goto not_found;
1850 *symno = sno;
1851 *psi = si;
njn25e49d8e72002-09-23 09:36:25 +00001852 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001853 return;
1854 }
1855 }
1856 not_found:
1857 *psi = NULL;
njn25e49d8e72002-09-23 09:36:25 +00001858 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001859}
1860
1861
1862/* Find a location-table index containing the specified pointer, or -1
1863 if not found. Binary search. */
1864
1865static Int search_one_loctab ( SegInfo* si, Addr ptr )
1866{
1867 Addr a_mid_lo, a_mid_hi;
1868 Int mid,
1869 lo = 0,
1870 hi = si->loctab_used-1;
1871 while (True) {
1872 /* current unsearched space is from lo to hi, inclusive. */
1873 if (lo > hi) return -1; /* not found */
1874 mid = (lo + hi) / 2;
1875 a_mid_lo = si->loctab[mid].addr;
1876 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
1877
1878 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1879 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1880 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1881 return mid;
1882 }
1883}
1884
1885
1886/* Search all loctabs that we know about to locate ptr. If found, set
1887 *psi to the relevant SegInfo, and *locno to the loctab entry number
1888 within that. If not found, *psi is set to NULL.
1889*/
njn25e49d8e72002-09-23 09:36:25 +00001890static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi,
1891 /*OUT*/Int* locno )
sewardjde4a1d02002-03-22 01:27:54 +00001892{
1893 Int lno;
1894 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00001895
1896 VGP_PUSHCC(VgpSearchSyms);
1897
1898 ensure_debug_info_inited();
sewardjde4a1d02002-03-22 01:27:54 +00001899 for (si = segInfo; si != NULL; si = si->next) {
1900 if (si->start <= ptr && ptr < si->start+si->size) {
1901 lno = search_one_loctab ( si, ptr );
1902 if (lno == -1) goto not_found;
1903 *locno = lno;
1904 *psi = si;
njn25e49d8e72002-09-23 09:36:25 +00001905 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001906 return;
1907 }
1908 }
1909 not_found:
1910 *psi = NULL;
njn25e49d8e72002-09-23 09:36:25 +00001911 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001912}
1913
1914
1915/* The whole point of this whole big deal: map a code address to a
1916 plausible symbol name. Returns False if no idea; otherwise True.
njn25e49d8e72002-09-23 09:36:25 +00001917 Caller supplies buf and nbuf. If demangle is False, don't do
sewardjde4a1d02002-03-22 01:27:54 +00001918 demangling, regardless of vg_clo_demangle -- probably because the
1919 call has come from vg_what_fn_or_object_is_this. */
njn25e49d8e72002-09-23 09:36:25 +00001920static
1921Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf,
1922 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00001923{
1924 SegInfo* si;
1925 Int sno;
njn25e49d8e72002-09-23 09:36:25 +00001926 search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun );
sewardjde4a1d02002-03-22 01:27:54 +00001927 if (si == NULL)
1928 return False;
njn25e49d8e72002-09-23 09:36:25 +00001929 if (demangle) {
1930 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
1931 } else {
sewardjde4a1d02002-03-22 01:27:54 +00001932 VG_(strncpy_safely)
1933 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
sewardjde4a1d02002-03-22 01:27:54 +00001934 }
1935 return True;
1936}
1937
njn25e49d8e72002-09-23 09:36:25 +00001938/* This is available to skins... always demangle C++ names */
1939Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf )
1940{
1941 return get_fnname ( /*demangle*/True, a, buf, nbuf,
1942 /*match_anywhere_in_fun*/True );
1943}
sewardjde4a1d02002-03-22 01:27:54 +00001944
njn25e49d8e72002-09-23 09:36:25 +00001945/* This is available to skins... always demangle C++ names,
1946 only succeed if 'a' matches first instruction of function. */
1947Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf )
1948{
1949 return get_fnname ( /*demangle*/True, a, buf, nbuf,
1950 /*match_anywhere_in_fun*/False );
1951}
1952
1953/* This is only available to core... don't demangle C++ names */
1954Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf )
1955{
1956 return get_fnname ( /*demangle*/False, a, buf, nbuf,
1957 /*match_anywhere_in_fun*/True );
1958}
1959
1960/* Map a code address to the name of a shared object file or the executable.
1961 Returns False if no idea; otherwise True. Doesn't require debug info.
1962 Caller supplies buf and nbuf. */
1963Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00001964{
1965 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00001966
1967 ensure_debug_info_inited();
sewardjde4a1d02002-03-22 01:27:54 +00001968 for (si = segInfo; si != NULL; si = si->next) {
1969 if (si->start <= a && a < si->start+si->size) {
1970 VG_(strncpy_safely)(buf, si->filename, nbuf);
1971 return True;
1972 }
1973 }
1974 return False;
1975}
1976
njn25e49d8e72002-09-23 09:36:25 +00001977
1978/* Map a code address to a filename. Returns True if successful. */
1979Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
sewardjde4a1d02002-03-22 01:27:54 +00001980{
njn25e49d8e72002-09-23 09:36:25 +00001981 SegInfo* si;
1982 Int locno;
1983 search_all_loctabs ( a, &si, &locno );
1984 if (si == NULL)
1985 return False;
1986 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
1987 n_filename);
1988 return True;
sewardjde4a1d02002-03-22 01:27:54 +00001989}
1990
njn25e49d8e72002-09-23 09:36:25 +00001991/* Map a code address to a line number. Returns True if successful. */
1992Bool VG_(get_linenum)( Addr a, UInt* lineno )
1993{
1994 SegInfo* si;
1995 Int locno;
1996 search_all_loctabs ( a, &si, &locno );
1997 if (si == NULL)
1998 return False;
1999 *lineno = si->loctab[locno].lineno;
2000
2001 return True;
2002}
sewardjde4a1d02002-03-22 01:27:54 +00002003
2004/* Map a code address to a (filename, line number) pair.
2005 Returns True if successful.
2006*/
njn25e49d8e72002-09-23 09:36:25 +00002007Bool VG_(get_filename_linenum)( Addr a,
2008 Char* filename, Int n_filename,
2009 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00002010{
2011 SegInfo* si;
2012 Int locno;
2013 search_all_loctabs ( a, &si, &locno );
2014 if (si == NULL)
2015 return False;
2016 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
2017 n_filename);
2018 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00002019
sewardjde4a1d02002-03-22 01:27:54 +00002020 return True;
2021}
2022
2023
2024/* Print a mini stack dump, showing the current location. */
2025void VG_(mini_stack_dump) ( ExeContext* ec )
2026{
2027
2028#define APPEND(str) \
2029 { UChar* sss; \
2030 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
2031 buf[n] = *sss; \
2032 buf[n] = 0; \
2033 }
2034
2035 Bool know_fnname;
2036 Bool know_objname;
2037 Bool know_srcloc;
2038 UInt lineno;
2039 UChar ibuf[20];
sewardj04b91062002-06-05 21:22:04 +00002040 UInt i, n;
sewardjde4a1d02002-03-22 01:27:54 +00002041
2042 UChar buf[M_VG_ERRTXT];
2043 UChar buf_fn[M_VG_ERRTXT];
2044 UChar buf_obj[M_VG_ERRTXT];
2045 UChar buf_srcloc[M_VG_ERRTXT];
2046
2047 Int stop_at = VG_(clo_backtrace_size);
2048
2049 n = 0;
2050
njn25e49d8e72002-09-23 09:36:25 +00002051 // SSS: factor this repeated code out!
2052
2053 know_fnname = VG_(get_fnname) (ec->eips[0], buf_fn, M_VG_ERRTXT);
2054 know_objname = VG_(get_objname)(ec->eips[0], buf_obj, M_VG_ERRTXT);
2055 know_srcloc = VG_(get_filename_linenum)(ec->eips[0],
2056 buf_srcloc, M_VG_ERRTXT,
2057 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00002058
2059 APPEND(" at ");
2060 VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
2061 APPEND(ibuf);
2062 if (know_fnname) {
2063 APPEND(buf_fn);
2064 if (!know_srcloc && know_objname) {
2065 APPEND(" (in ");
2066 APPEND(buf_obj);
2067 APPEND(")");
2068 }
2069 } else if (know_objname && !know_srcloc) {
2070 APPEND("(within ");
2071 APPEND(buf_obj);
2072 APPEND(")");
2073 } else {
2074 APPEND("???");
2075 }
2076 if (know_srcloc) {
2077 APPEND(" (");
2078 APPEND(buf_srcloc);
2079 APPEND(":");
2080 VG_(sprintf)(ibuf,"%d",lineno);
2081 APPEND(ibuf);
2082 APPEND(")");
2083 }
2084 VG_(message)(Vg_UserMsg, "%s", buf);
2085
sewardj04b91062002-06-05 21:22:04 +00002086 for (i = 1; i < stop_at && ec->eips[i] != 0; i++) {
njn25e49d8e72002-09-23 09:36:25 +00002087 know_fnname = VG_(get_fnname) (ec->eips[i], buf_fn, M_VG_ERRTXT);
2088 know_objname = VG_(get_objname)(ec->eips[i], buf_obj, M_VG_ERRTXT);
2089 know_srcloc = VG_(get_filename_linenum)(ec->eips[i],
2090 buf_srcloc, M_VG_ERRTXT,
2091 &lineno);
sewardjde4a1d02002-03-22 01:27:54 +00002092 n = 0;
2093 APPEND(" by ");
sewardj04b91062002-06-05 21:22:04 +00002094 VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
2095 APPEND(ibuf);
sewardjde4a1d02002-03-22 01:27:54 +00002096 if (know_fnname) {
2097 APPEND(buf_fn)
2098 if (!know_srcloc && know_objname) {
2099 APPEND(" (in ");
2100 APPEND(buf_obj);
2101 APPEND(")");
2102 }
2103 } else {
2104 if (know_objname && !know_srcloc) {
2105 APPEND("(within ");
2106 APPEND(buf_obj);
2107 APPEND(")");
2108 } else {
2109 APPEND("???");
2110 }
sewardjde4a1d02002-03-22 01:27:54 +00002111 };
2112 if (know_srcloc) {
2113 APPEND(" (");
2114 APPEND(buf_srcloc);
2115 APPEND(":");
2116 VG_(sprintf)(ibuf,"%d",lineno);
2117 APPEND(ibuf);
2118 APPEND(")");
2119 }
2120 VG_(message)(Vg_UserMsg, "%s", buf);
2121 }
2122}
2123
2124#undef APPEND
2125
2126/*--------------------------------------------------------------------*/
2127/*--- end vg_symtab2.c ---*/
2128/*--------------------------------------------------------------------*/