blob: c36d32b258cb1c015638577a822ddb82d5da3937 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001/*--------------------------------------------------------------------*/
2/*--- Management of symbols and debugging information. ---*/
3/*--- vg_symtab2.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
njnc9539842002-10-02 13:26:35 +00007 This file is part of Valgrind, an extensible x86 protected-mode
8 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +00009
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
njn25e49d8e72002-09-23 09:36:25 +000028 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000029*/
30
31#include "vg_include.h"
sewardjde4a1d02002-03-22 01:27:54 +000032
33#include <elf.h> /* ELF defns */
34#include <a.out.h> /* stabs defns */
35
njn9aae6742002-04-30 13:44:01 +000036
sewardjde4a1d02002-03-22 01:27:54 +000037/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
38 dlopen()ed libraries, which is something that KDE3 does a lot.
sewardjde4a1d02002-03-22 01:27:54 +000039
njn25e49d8e72002-09-23 09:36:25 +000040 Stabs reader greatly improved by Nick Nethercote, Apr 02.
sewardjde4a1d02002-03-22 01:27:54 +000041*/
42
njn25e49d8e72002-09-23 09:36:25 +000043/* Set to True when first debug info search is performed */
44Bool VG_(using_debug_info) = False;
45
sewardjde4a1d02002-03-22 01:27:54 +000046/*------------------------------------------------------------*/
47/*--- Structs n stuff ---*/
48/*------------------------------------------------------------*/
49
50/* A structure to hold an ELF symbol (very crudely). */
51typedef
52 struct {
53 Addr addr; /* lowest address of entity */
54 UInt size; /* size in bytes */
55 Int nmoff; /* offset of name in this SegInfo's str tab */
56 }
57 RiSym;
58
njne0ee0712002-05-03 16:41:05 +000059/* Line count at which overflow happens, due to line numbers being stored as
60 * shorts in `struct nlist' in a.out.h. */
61#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
sewardjde4a1d02002-03-22 01:27:54 +000062
njne0ee0712002-05-03 16:41:05 +000063#define LINENO_BITS 20
64#define LOC_SIZE_BITS (32 - LINENO_BITS)
sewardj97ff05f2002-05-09 01:32:57 +000065#define MAX_LINENO ((1 << LINENO_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000066
67/* Unlikely to have any lines with instruction ranges > 4096 bytes */
sewardj97ff05f2002-05-09 01:32:57 +000068#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
njne0ee0712002-05-03 16:41:05 +000069
70/* Number used to detect line number overflows; if one line is 60000-odd
71 * smaller than the previous, is was probably an overflow.
72 */
73#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
74
75/* A structure to hold addr-to-source info for a single line. There can be a
76 * lot of these, hence the dense packing. */
sewardjde4a1d02002-03-22 01:27:54 +000077typedef
78 struct {
njne0ee0712002-05-03 16:41:05 +000079 /* Word 1 */
80 Addr addr; /* lowest address for this line */
81 /* Word 2 */
82 UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */
83 UInt lineno:LINENO_BITS; /* source line number, or zero */
84 /* Word 3 */
85 UInt fnmoff; /* source filename; offset in this
86 SegInfo's str tab */
sewardjde4a1d02002-03-22 01:27:54 +000087 }
88 RiLoc;
89
90
91/* A structure which contains information pertaining to one mapped
sewardj47104382002-10-20 18:35:48 +000092 text segment. (typedef in vg_skin.h) */
93struct _SegInfo {
94 struct _SegInfo* next;
95 /* Description of the mapped segment. */
96 Addr start;
97 UInt size;
98 UChar* filename; /* in mallocville */
99 UInt foffset;
100 /* An expandable array of symbols. */
101 RiSym* symtab;
102 UInt symtab_used;
103 UInt symtab_size;
104 /* An expandable array of locations. */
105 RiLoc* loctab;
106 UInt loctab_used;
107 UInt loctab_size;
108 /* An expandable array of characters -- the string table. */
109 Char* strtab;
110 UInt strtab_used;
111 UInt strtab_size;
112 /* offset is what we need to add to symbol table entries
113 to get the real location of that symbol in memory.
114 For executables, offset is zero.
115 For .so's, offset == base_addr.
116 This seems like a giant kludge to me.
117 */
118 UInt offset;
119
sewardj8fe15a32002-10-20 19:29:21 +0000120 /* Bounds of data, BSS, PLT and GOT, so that skins can see what
121 section an address is in */
sewardj47104382002-10-20 18:35:48 +0000122 Addr plt_start;
123 UInt plt_size;
124 Addr got_start;
125 UInt got_size;
sewardj8fe15a32002-10-20 19:29:21 +0000126 Addr data_start;
127 UInt data_size;
128 Addr bss_start;
129 UInt bss_size;
sewardj47104382002-10-20 18:35:48 +0000130};
sewardjde4a1d02002-03-22 01:27:54 +0000131
132
sewardjde4a1d02002-03-22 01:27:54 +0000133static void freeSegInfo ( SegInfo* si )
134{
135 vg_assert(si != NULL);
njn25e49d8e72002-09-23 09:36:25 +0000136 if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename);
137 if (si->symtab) VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
138 if (si->loctab) VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
139 if (si->strtab) VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
140 VG_(arena_free)(VG_AR_SYMTAB, si);
sewardjde4a1d02002-03-22 01:27:54 +0000141}
142
143
144/*------------------------------------------------------------*/
145/*--- Adding stuff ---*/
146/*------------------------------------------------------------*/
147
148/* Add a str to the string table, including terminating zero, and
njn25e49d8e72002-09-23 09:36:25 +0000149 return offset of the string in vg_strtab. Unless it's been seen
150 recently, in which case we find the old index and return that.
151 This avoids the most egregious duplications. */
sewardjde4a1d02002-03-22 01:27:54 +0000152
153static __inline__
154Int addStr ( SegInfo* si, Char* str )
155{
njn25e49d8e72002-09-23 09:36:25 +0000156# define EMPTY 0xffffffff
157# define NN 5
158
159 /* prevN[0] has the most recent, prevN[NN-1] the least recent */
sewardj8fe15a32002-10-20 19:29:21 +0000160 static UInt prevN[NN] = { EMPTY, EMPTY, EMPTY, EMPTY, EMPTY };
njn25e49d8e72002-09-23 09:36:25 +0000161 static SegInfo* curr_si = NULL;
162
sewardjde4a1d02002-03-22 01:27:54 +0000163 Char* new_tab;
164 Int new_sz, i, space_needed;
njn25e49d8e72002-09-23 09:36:25 +0000165
166 /* Avoid gratuitous duplication: if we saw `str' within the last NN,
167 * within this segment, return that index. Saves about 200KB in glibc,
168 * extra time taken is too small to measure. --NJN 2002-Aug-30 */
169 if (curr_si == si) {
170 for (i = NN-1; i >= 0; i--) {
sewardjcda419b2002-10-01 08:59:36 +0000171 if (EMPTY != prevN[i]
172 && NULL != si->strtab
173 && 0 == VG_(strcmp)(str, &si->strtab[prevN[i]])) {
njn25e49d8e72002-09-23 09:36:25 +0000174 return prevN[i];
175 }
176 }
177 } else {
178 /* New segment */
179 curr_si = si;
sewardjcda419b2002-10-01 08:59:36 +0000180 for (i = 0; i < NN; i++) prevN[i] = EMPTY;
njn25e49d8e72002-09-23 09:36:25 +0000181 }
182 /* Shuffle prevous ones along, put new one in. */
183 for (i = NN-1; i > 0; i--) prevN[i] = prevN[i-1];
184 prevN[0] = si->strtab_used;
185
186# undef EMPTY
187
sewardjde4a1d02002-03-22 01:27:54 +0000188 space_needed = 1 + VG_(strlen)(str);
njn25e49d8e72002-09-23 09:36:25 +0000189
sewardjde4a1d02002-03-22 01:27:54 +0000190 if (si->strtab_used + space_needed > si->strtab_size) {
191 new_sz = 2 * si->strtab_size;
192 if (new_sz == 0) new_sz = 5000;
njn25e49d8e72002-09-23 09:36:25 +0000193 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz);
sewardjde4a1d02002-03-22 01:27:54 +0000194 if (si->strtab != NULL) {
195 for (i = 0; i < si->strtab_used; i++)
196 new_tab[i] = si->strtab[i];
njn25e49d8e72002-09-23 09:36:25 +0000197 VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
sewardjde4a1d02002-03-22 01:27:54 +0000198 }
199 si->strtab = new_tab;
200 si->strtab_size = new_sz;
201 }
202
203 for (i = 0; i < space_needed; i++)
204 si->strtab[si->strtab_used+i] = str[i];
205
206 si->strtab_used += space_needed;
207 vg_assert(si->strtab_used <= si->strtab_size);
njn25e49d8e72002-09-23 09:36:25 +0000208
sewardjde4a1d02002-03-22 01:27:54 +0000209 return si->strtab_used - space_needed;
210}
211
212/* Add a symbol to the symbol table. */
213
214static __inline__
215void addSym ( SegInfo* si, RiSym* sym )
216{
217 Int new_sz, i;
218 RiSym* new_tab;
219
220 /* Ignore zero-sized syms. */
221 if (sym->size == 0) return;
222
223 if (si->symtab_used == si->symtab_size) {
224 new_sz = 2 * si->symtab_size;
225 if (new_sz == 0) new_sz = 500;
njn25e49d8e72002-09-23 09:36:25 +0000226 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
sewardjde4a1d02002-03-22 01:27:54 +0000227 if (si->symtab != NULL) {
228 for (i = 0; i < si->symtab_used; i++)
229 new_tab[i] = si->symtab[i];
njn25e49d8e72002-09-23 09:36:25 +0000230 VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
sewardjde4a1d02002-03-22 01:27:54 +0000231 }
232 si->symtab = new_tab;
233 si->symtab_size = new_sz;
234 }
235
236 si->symtab[si->symtab_used] = *sym;
237 si->symtab_used++;
238 vg_assert(si->symtab_used <= si->symtab_size);
239}
240
241/* Add a location to the location table. */
242
243static __inline__
244void addLoc ( SegInfo* si, RiLoc* loc )
245{
246 Int new_sz, i;
247 RiLoc* new_tab;
248
njne0ee0712002-05-03 16:41:05 +0000249 /* Zero-sized locs should have been ignored earlier */
250 vg_assert(loc->size > 0);
sewardjde4a1d02002-03-22 01:27:54 +0000251
252 if (si->loctab_used == si->loctab_size) {
253 new_sz = 2 * si->loctab_size;
254 if (new_sz == 0) new_sz = 500;
njn25e49d8e72002-09-23 09:36:25 +0000255 new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
sewardjde4a1d02002-03-22 01:27:54 +0000256 if (si->loctab != NULL) {
257 for (i = 0; i < si->loctab_used; i++)
258 new_tab[i] = si->loctab[i];
njn25e49d8e72002-09-23 09:36:25 +0000259 VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
sewardjde4a1d02002-03-22 01:27:54 +0000260 }
261 si->loctab = new_tab;
262 si->loctab_size = new_sz;
263 }
264
265 si->loctab[si->loctab_used] = *loc;
266 si->loctab_used++;
267 vg_assert(si->loctab_used <= si->loctab_size);
268}
269
270
sewardjb51f2e62002-06-01 23:11:19 +0000271/* Top-level place to call to add a source-location mapping entry. */
272
273static __inline__
274void addLineInfo ( SegInfo* si,
275 Int fnmoff,
276 Addr this,
277 Addr next,
278 Int lineno,
sewardj08a50f62002-06-17 02:21:20 +0000279 Int entry /* only needed for debug printing */
280 )
sewardjb51f2e62002-06-01 23:11:19 +0000281{
282 RiLoc loc;
283 Int size = next - this;
284
285 /* Ignore zero-sized locs */
286 if (this == next) return;
287
288 /* Maximum sanity checking. Some versions of GNU as do a shabby
289 * job with stabs entries; if anything looks suspicious, revert to
290 * a size of 1. This should catch the instruction of interest
291 * (since if using asm-level debug info, one instruction will
292 * correspond to one line, unlike with C-level debug info where
293 * multiple instructions can map to the one line), but avoid
294 * catching any other instructions bogusly. */
295 if (this > next) {
296 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000297 "warning: line info addresses out of order "
sewardjb51f2e62002-06-01 23:11:19 +0000298 "at entry %d: 0x%x 0x%x", entry, this, next);
299 size = 1;
300 }
301
302 if (size > MAX_LOC_SIZE) {
sewardjd84606d2002-06-18 01:04:57 +0000303 if (0)
sewardjb51f2e62002-06-01 23:11:19 +0000304 VG_(message)(Vg_DebugMsg,
sewardj08a50f62002-06-17 02:21:20 +0000305 "warning: line info address range too large "
sewardjb51f2e62002-06-01 23:11:19 +0000306 "at entry %d: %d", entry, size);
307 size = 1;
308 }
309
sewardj08a50f62002-06-17 02:21:20 +0000310 /* vg_assert(this < si->start + si->size && next-1 >= si->start); */
njne306ffe2002-06-08 13:34:17 +0000311 if (this >= si->start + si->size || next-1 < si->start) {
sewardjd84606d2002-06-18 01:04:57 +0000312 if (0)
sewardj08a50f62002-06-17 02:21:20 +0000313 VG_(message)(Vg_DebugMsg,
314 "warning: ignoring line info entry falling "
315 "outside current SegInfo: %p %p %p %p",
316 si->start, si->start + si->size,
317 this, next-1);
njne306ffe2002-06-08 13:34:17 +0000318 return;
319 }
320
321 vg_assert(lineno >= 0);
322 if (lineno > MAX_LINENO) {
323 VG_(message)(Vg_UserMsg,
sewardj08a50f62002-06-17 02:21:20 +0000324 "warning: ignoring line info entry with "
325 "huge line number (%d)", lineno);
njne306ffe2002-06-08 13:34:17 +0000326 VG_(message)(Vg_UserMsg,
327 " Can't handle line numbers "
sewardj08a50f62002-06-17 02:21:20 +0000328 "greater than %d, sorry", MAX_LINENO);
njne306ffe2002-06-08 13:34:17 +0000329 return;
330 }
sewardjb51f2e62002-06-01 23:11:19 +0000331
332 loc.addr = this;
333 loc.size = (UShort)size;
334 loc.lineno = lineno;
335 loc.fnmoff = fnmoff;
sewardjb642dc22002-10-12 17:27:16 +0000336
337 if (0) VG_(message)(Vg_DebugMsg,
338 "addLoc: addr %p, size %d, line %d, file %s",
339 this,size,lineno,&si->strtab[fnmoff]);
340
sewardjb51f2e62002-06-01 23:11:19 +0000341 addLoc ( si, &loc );
342}
343
sewardjde4a1d02002-03-22 01:27:54 +0000344/*------------------------------------------------------------*/
345/*--- Helpers ---*/
346/*------------------------------------------------------------*/
347
348/* Non-fatal -- use vg_panic if terminal. */
349static
350void vg_symerr ( Char* msg )
351{
352 if (VG_(clo_verbosity) > 1)
353 VG_(message)(Vg_UserMsg,"%s", msg );
354}
355
356
357/* Print a symbol. */
358static
359void printSym ( SegInfo* si, Int i )
360{
361 VG_(printf)( "%5d: %8p .. %8p (%d) %s\n",
362 i,
363 si->symtab[i].addr,
364 si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size,
365 &si->strtab[si->symtab[i].nmoff] );
366}
367
368
369#if 0
370/* Print the entire sym tab. */
371static __attribute__ ((unused))
372void printSymtab ( void )
373{
374 Int i;
375 VG_(printf)("\n------ BEGIN vg_symtab ------\n");
376 for (i = 0; i < vg_symtab_used; i++)
377 printSym(i);
378 VG_(printf)("------ BEGIN vg_symtab ------\n");
379}
380#endif
381
382#if 0
383/* Paranoid strcat. */
384static
385void safeCopy ( UChar* dst, UInt maxlen, UChar* src )
386{
387 UInt i = 0, j = 0;
388 while (True) {
389 if (i >= maxlen) return;
390 if (dst[i] == 0) break;
391 i++;
392 }
393 while (True) {
394 if (i >= maxlen) return;
395 dst[i] = src[j];
396 if (src[j] == 0) return;
397 i++; j++;
398 }
399}
400#endif
401
sewardjb51f2e62002-06-01 23:11:19 +0000402
sewardjde4a1d02002-03-22 01:27:54 +0000403/*------------------------------------------------------------*/
404/*--- Canonicalisers ---*/
405/*------------------------------------------------------------*/
406
407/* Sort the symtab by starting address, and emit warnings if any
408 symbols have overlapping address ranges. We use that old chestnut,
409 shellsort. Mash the table around so as to establish the property
410 that addresses are in order and the ranges to not overlap. This
411 facilitates using binary search to map addresses to symbols when we
412 come to query the table.
413*/
414static
415void canonicaliseSymtab ( SegInfo* si )
416{
417 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
418 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
419 4592, 13776, 33936, 86961, 198768,
420 463792, 1391376 };
421 Int lo = 0;
422 Int hi = si->symtab_used-1;
423 Int i, j, h, bigN, hp, n_merged, n_truncated;
424 RiSym v;
425 Addr s1, s2, e1, e2;
426
427# define SWAP(ty,aa,bb) \
428 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
429
430 bigN = hi - lo + 1; if (bigN < 2) return;
431 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
432 vg_assert(0 <= hp && hp < 16);
433
434 for (; hp >= 0; hp--) {
435 h = incs[hp];
436 i = lo + h;
437 while (1) {
438 if (i > hi) break;
439 v = si->symtab[i];
440 j = i;
441 while (si->symtab[j-h].addr > v.addr) {
442 si->symtab[j] = si->symtab[j-h];
443 j = j - h;
444 if (j <= (lo + h - 1)) break;
445 }
446 si->symtab[j] = v;
447 i++;
448 }
449 }
450
451 cleanup_more:
452
453 /* If two symbols have identical address ranges, favour the
454 one with the longer name.
455 */
456 do {
457 n_merged = 0;
458 j = si->symtab_used;
459 si->symtab_used = 0;
460 for (i = 0; i < j; i++) {
461 if (i < j-1
462 && si->symtab[i].addr == si->symtab[i+1].addr
463 && si->symtab[i].size == si->symtab[i+1].size) {
464 n_merged++;
465 /* merge the two into one */
466 if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff])
467 > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) {
468 si->symtab[si->symtab_used++] = si->symtab[i];
469 } else {
470 si->symtab[si->symtab_used++] = si->symtab[i+1];
471 }
472 i++;
473 } else {
474 si->symtab[si->symtab_used++] = si->symtab[i];
475 }
476 }
477 if (VG_(clo_trace_symtab))
478 VG_(printf)( "%d merged\n", n_merged);
479 }
480 while (n_merged > 0);
481
482 /* Detect and "fix" overlapping address ranges. */
483 n_truncated = 0;
484
485 for (i = 0; i < si->symtab_used-1; i++) {
486
487 vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr);
488
489 /* Check for common (no overlap) case. */
490 if (si->symtab[i].addr + si->symtab[i].size
491 <= si->symtab[i+1].addr)
492 continue;
493
494 /* There's an overlap. Truncate one or the other. */
495 if (VG_(clo_trace_symtab)) {
496 VG_(printf)("overlapping address ranges in symbol table\n\t");
497 printSym(si,i);
498 VG_(printf)("\t");
499 printSym(si,i+1);
500 VG_(printf)("\n");
501 }
502
503 /* Truncate one or the other. */
504 s1 = si->symtab[i].addr;
505 s2 = si->symtab[i+1].addr;
506 e1 = s1 + si->symtab[i].size - 1;
507 e2 = s2 + si->symtab[i+1].size - 1;
508 if (s1 < s2) {
509 e1 = s2-1;
510 } else {
511 vg_assert(s1 == s2);
512 if (e1 > e2) {
513 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2);
514 } else
515 if (e1 < e2) {
516 s2 = e1+1;
517 } else {
518 /* e1 == e2. Identical addr ranges. We'll eventually wind
519 up back at cleanup_more, which will take care of it. */
520 }
521 }
522 si->symtab[i].addr = s1;
523 si->symtab[i+1].addr = s2;
524 si->symtab[i].size = e1 - s1 + 1;
525 si->symtab[i+1].size = e2 - s2 + 1;
526 vg_assert(s1 <= s2);
527 vg_assert(si->symtab[i].size > 0);
528 vg_assert(si->symtab[i+1].size > 0);
529 /* It may be that the i+1 entry now needs to be moved further
530 along to maintain the address order requirement. */
531 j = i+1;
532 while (j < si->symtab_used-1
533 && si->symtab[j].addr > si->symtab[j+1].addr) {
534 SWAP(RiSym,si->symtab[j],si->symtab[j+1]);
535 j++;
536 }
537 n_truncated++;
538 }
539
540 if (n_truncated > 0) goto cleanup_more;
541
542 /* Ensure relevant postconditions hold. */
543 for (i = 0; i < si->symtab_used-1; i++) {
544 /* No zero-sized symbols. */
545 vg_assert(si->symtab[i].size > 0);
546 /* In order. */
547 vg_assert(si->symtab[i].addr < si->symtab[i+1].addr);
548 /* No overlaps. */
549 vg_assert(si->symtab[i].addr + si->symtab[i].size - 1
550 < si->symtab[i+1].addr);
551 }
552# undef SWAP
553}
554
555
556
557/* Sort the location table by starting address. Mash the table around
558 so as to establish the property that addresses are in order and the
559 ranges do not overlap. This facilitates using binary search to map
sewardjb51f2e62002-06-01 23:11:19 +0000560 addresses to locations when we come to query the table.
561*/
sewardjde4a1d02002-03-22 01:27:54 +0000562static
563void canonicaliseLoctab ( SegInfo* si )
564{
565 /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */
566 Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968,
567 4592, 13776, 33936, 86961, 198768,
568 463792, 1391376 };
569 Int lo = 0;
570 Int hi = si->loctab_used-1;
571 Int i, j, h, bigN, hp;
572 RiLoc v;
573
574# define SWAP(ty,aa,bb) \
575 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
576
577 /* Sort by start address. */
578
579 bigN = hi - lo + 1; if (bigN < 2) return;
580 hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--;
581 vg_assert(0 <= hp && hp < 16);
582
583 for (; hp >= 0; hp--) {
584 h = incs[hp];
585 i = lo + h;
586 while (1) {
587 if (i > hi) break;
588 v = si->loctab[i];
589 j = i;
590 while (si->loctab[j-h].addr > v.addr) {
591 si->loctab[j] = si->loctab[j-h];
592 j = j - h;
593 if (j <= (lo + h - 1)) break;
594 }
595 si->loctab[j] = v;
596 i++;
597 }
598 }
599
600 /* If two adjacent entries overlap, truncate the first. */
601 for (i = 0; i < si->loctab_used-1; i++) {
602 vg_assert(si->loctab[i].size < 10000);
603 if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) {
604 /* Do this in signed int32 because the actual .size fields
605 are unsigned 16s. */
606 Int new_size = si->loctab[i+1].addr - si->loctab[i].addr;
607 if (new_size < 0) {
608 si->loctab[i].size = 0;
609 } else
610 if (new_size >= 65536) {
611 si->loctab[i].size = 65535;
612 } else {
613 si->loctab[i].size = (UShort)new_size;
614 }
615 }
616 }
617
618 /* Zap any zero-sized entries resulting from the truncation
619 process. */
620 j = 0;
621 for (i = 0; i < si->loctab_used; i++) {
622 if (si->loctab[i].size > 0) {
623 si->loctab[j] = si->loctab[i];
624 j++;
625 }
626 }
627 si->loctab_used = j;
628
629 /* Ensure relevant postconditions hold. */
630 for (i = 0; i < si->loctab_used-1; i++) {
631 /*
632 VG_(printf)("%d (%d) %d 0x%x\n",
633 i, si->loctab[i+1].confident,
634 si->loctab[i+1].size, si->loctab[i+1].addr );
635 */
636 /* No zero-sized symbols. */
637 vg_assert(si->loctab[i].size > 0);
638 /* In order. */
639 vg_assert(si->loctab[i].addr < si->loctab[i+1].addr);
640 /* No overlaps. */
641 vg_assert(si->loctab[i].addr + si->loctab[i].size - 1
642 < si->loctab[i+1].addr);
643 }
644# undef SWAP
645}
646
647
648/*------------------------------------------------------------*/
sewardjb51f2e62002-06-01 23:11:19 +0000649/*--- Read STABS format debug info. ---*/
sewardjde4a1d02002-03-22 01:27:54 +0000650/*------------------------------------------------------------*/
651
sewardjb51f2e62002-06-01 23:11:19 +0000652/* Stabs entry types, from:
653 * The "stabs" debug format
654 * Menapace, Kingdon and MacKenzie
655 * Cygnus Support
656 */
657typedef enum { N_GSYM = 32, /* Global symbol */
658 N_FUN = 36, /* Function start or end */
659 N_STSYM = 38, /* Data segment file-scope variable */
660 N_LCSYM = 40, /* BSS segment file-scope variable */
661 N_RSYM = 64, /* Register variable */
662 N_SLINE = 68, /* Source line number */
663 N_SO = 100, /* Source file path and name */
664 N_LSYM = 128, /* Stack variable or type */
665 N_SOL = 132, /* Include file name */
666 N_LBRAC = 192, /* Start of lexical block */
667 N_RBRAC = 224 /* End of lexical block */
668 } stab_types;
669
670
671/* Read stabs-format debug info. This is all rather horrible because
672 stabs is a underspecified, kludgy hack.
673*/
674static
675void read_debuginfo_stabs ( SegInfo* si,
676 UChar* stabC, Int stab_sz,
677 UChar* stabstr, Int stabstr_sz )
sewardjde4a1d02002-03-22 01:27:54 +0000678{
sewardjb51f2e62002-06-01 23:11:19 +0000679 Int i;
680 Int curr_filenmoff;
njnb79ad342002-06-05 15:30:30 +0000681 Addr curr_fn_stabs_addr = (Addr)NULL;
682 Addr curr_fnbaseaddr = (Addr)NULL;
sewardjb51f2e62002-06-01 23:11:19 +0000683 Char *curr_file_name, *curr_fn_name;
684 Int n_stab_entries;
njnb79ad342002-06-05 15:30:30 +0000685 Int prev_lineno = 0, lineno = 0;
686 Int lineno_overflows = 0;
687 Bool same_file = True;
sewardjb51f2e62002-06-01 23:11:19 +0000688 struct nlist* stab = (struct nlist*)stabC;
njnb79ad342002-06-05 15:30:30 +0000689
sewardjb51f2e62002-06-01 23:11:19 +0000690 /* Ok. It all looks plausible. Go on and read debug data.
691 stab kinds: 100 N_SO a source file name
692 68 N_SLINE a source line number
693 36 N_FUN start of a function
njn4f9c9342002-04-29 16:03:24 +0000694
sewardjb51f2e62002-06-01 23:11:19 +0000695 In this loop, we maintain a current file name, updated as
696 N_SO/N_SOLs appear, and a current function base address,
697 updated as N_FUNs appear. Based on that, address ranges for
698 N_SLINEs are calculated, and stuffed into the line info table.
sewardjde4a1d02002-03-22 01:27:54 +0000699
sewardjb51f2e62002-06-01 23:11:19 +0000700 Finding the instruction address range covered by an N_SLINE is
701 complicated; see the N_SLINE case below.
702 */
njnb79ad342002-06-05 15:30:30 +0000703 curr_filenmoff = addStr(si,"???");
704 curr_file_name = curr_fn_name = (Char*)NULL;
sewardjde4a1d02002-03-22 01:27:54 +0000705
sewardjb51f2e62002-06-01 23:11:19 +0000706 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
njne0ee0712002-05-03 16:41:05 +0000707
sewardjb51f2e62002-06-01 23:11:19 +0000708 for (i = 0; i < n_stab_entries; i++) {
709# if 0
710 VG_(printf) ( " %2d ", i );
711 VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s",
712 stab[i].n_type, stab[i].n_other, stab[i].n_desc,
713 (int)stab[i].n_value,
714 (int)stab[i].n_un.n_strx,
715 stabstr + stab[i].n_un.n_strx );
716 VG_(printf)("\n");
717# endif
njne0ee0712002-05-03 16:41:05 +0000718
sewardjb51f2e62002-06-01 23:11:19 +0000719 Char *no_fn_name = "???";
720
721 switch (stab[i].n_type) {
722 UInt next_addr;
723
724 /* Two complicated things here:
725 *
726 * 1. the n_desc field in 'struct n_list' in a.out.h is only
727 * 16-bits, which gives a maximum of 65535 lines. We handle
728 * files bigger than this by detecting heuristically
729 * overflows -- if the line count goes from 65000-odd to
730 * 0-odd within the same file, we assume it's an overflow.
731 * Once we switch files, we zero the overflow count.
732 *
733 * 2. To compute the instr address range covered by a single
734 * line, find the address of the next thing and compute the
735 * difference. The approach used depends on what kind of
736 * entry/entries follow...
737 */
738 case N_SLINE: {
739 Int this_addr = (UInt)stab[i].n_value;
740
741 /* Although stored as a short, neg values really are >
742 * 32768, hence the UShort cast. Then we use an Int to
743 * handle overflows. */
744 prev_lineno = lineno;
745 lineno = (Int)((UShort)stab[i].n_desc);
746
747 if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
748 VG_(message)(Vg_DebugMsg,
749 "Line number overflow detected (%d --> %d) in %s",
750 prev_lineno, lineno, curr_file_name);
751 lineno_overflows++;
752 }
753 same_file = True;
754
755 LOOP:
756 if (i+1 >= n_stab_entries) {
757 /* If it's the last entry, just guess the range is
758 * four; can't do any better */
759 next_addr = this_addr + 4;
760 } else {
761 switch (stab[i+1].n_type) {
762 /* Easy, common case: use address of next entry */
763 case N_SLINE: case N_SO:
764 next_addr = (UInt)stab[i+1].n_value;
765 break;
766
njn25e49d8e72002-09-23 09:36:25 +0000767 /* Boring one: skip, look for something more useful. */
sewardjb51f2e62002-06-01 23:11:19 +0000768 case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC:
769 case N_STSYM: case N_LCSYM: case N_GSYM:
770 i++;
771 goto LOOP;
772
njnb79ad342002-06-05 15:30:30 +0000773 /* If end-of-this-fun entry, use its address.
774 * If start-of-next-fun entry, find difference between start
775 * of current function and start of next function to work
776 * it out.
777 */
sewardjb51f2e62002-06-01 23:11:19 +0000778 case N_FUN:
779 if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
780 next_addr = (UInt)stab[i+1].n_value;
781 } else {
njnb79ad342002-06-05 15:30:30 +0000782 next_addr =
783 (UInt)stab[i+1].n_value - curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000784 }
785 break;
786
787 /* N_SOL should be followed by an N_SLINE which can
788 be used */
789 case N_SOL:
790 if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) {
791 next_addr = (UInt)stab[i+2].n_value;
792 break;
793 } else {
794 VG_(printf)("unhandled N_SOL stabs case: %d %d %d",
795 stab[i+1].n_type, i, n_stab_entries);
njne427a662002-10-02 11:08:25 +0000796 VG_(core_panic)("unhandled N_SOL stabs case");
sewardjb51f2e62002-06-01 23:11:19 +0000797 }
798
799 default:
800 VG_(printf)("unhandled (other) stabs case: %d %d",
801 stab[i+1].n_type,i);
njne427a662002-10-02 11:08:25 +0000802 /* VG_(core_panic)("unhandled (other) stabs case"); */
sewardjb51f2e62002-06-01 23:11:19 +0000803 next_addr = this_addr + 4;
804 break;
805 }
806 }
807
808 addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr,
809 curr_fnbaseaddr + next_addr,
810 lineno + lineno_overflows * LINENO_OVERFLOW, i);
811 break;
812 }
813
814 case N_FUN: {
815 if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
816 /* N_FUN with a name -- indicates the start of a fn. */
njnb79ad342002-06-05 15:30:30 +0000817 curr_fn_stabs_addr = (Addr)stab[i].n_value;
818 curr_fnbaseaddr = si->offset + curr_fn_stabs_addr;
sewardjb51f2e62002-06-01 23:11:19 +0000819 curr_fn_name = stabstr + stab[i].n_un.n_strx;
820 } else {
821 curr_fn_name = no_fn_name;
822 }
823 break;
824 }
825
826 case N_SOL:
827 if (lineno_overflows != 0) {
828 VG_(message)(Vg_UserMsg,
829 "Warning: file %s is very big (> 65535 lines) "
830 "Line numbers and annotation for this file might "
831 "be wrong. Sorry",
832 curr_file_name);
833 }
834 /* fall through! */
835 case N_SO:
836 lineno_overflows = 0;
837
838 /* seems to give lots of locations in header files */
839 /* case 130: */ /* BINCL */
840 {
841 UChar* nm = stabstr + stab[i].n_un.n_strx;
842 UInt len = VG_(strlen)(nm);
843
844 if (len > 0 && nm[len-1] != '/') {
845 curr_filenmoff = addStr ( si, nm );
846 curr_file_name = stabstr + stab[i].n_un.n_strx;
847 }
848 else
849 if (len == 0)
850 curr_filenmoff = addStr ( si, "?1\0" );
851
852 break;
853 }
854
855# if 0
856 case 162: /* EINCL */
857 curr_filenmoff = addStr ( si, "?2\0" );
858 break;
859# endif
860
861 default:
862 break;
863 }
864 } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */
sewardjde4a1d02002-03-22 01:27:54 +0000865}
866
867
sewardjb51f2e62002-06-01 23:11:19 +0000868/*------------------------------------------------------------*/
869/*--- Read DWARF2 format debug info. ---*/
870/*------------------------------------------------------------*/
sewardjc134dd92002-06-01 14:21:36 +0000871
872/* Structure found in the .debug_line section. */
873typedef struct
874{
875 UChar li_length [4];
876 UChar li_version [2];
877 UChar li_prologue_length [4];
878 UChar li_min_insn_length [1];
879 UChar li_default_is_stmt [1];
880 UChar li_line_base [1];
881 UChar li_line_range [1];
882 UChar li_opcode_base [1];
883}
884DWARF2_External_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000885
sewardjc134dd92002-06-01 14:21:36 +0000886typedef struct
887{
sewardj08a50f62002-06-17 02:21:20 +0000888 UInt li_length;
sewardjc134dd92002-06-01 14:21:36 +0000889 UShort li_version;
890 UInt li_prologue_length;
891 UChar li_min_insn_length;
892 UChar li_default_is_stmt;
sewardj08a50f62002-06-17 02:21:20 +0000893 Int li_line_base;
sewardjc134dd92002-06-01 14:21:36 +0000894 UChar li_line_range;
895 UChar li_opcode_base;
896}
897DWARF2_Internal_LineInfo;
sewardjd84606d2002-06-18 01:04:57 +0000898
sewardjc134dd92002-06-01 14:21:36 +0000899/* Line number opcodes. */
900enum dwarf_line_number_ops
901 {
902 DW_LNS_extended_op = 0,
903 DW_LNS_copy = 1,
904 DW_LNS_advance_pc = 2,
905 DW_LNS_advance_line = 3,
906 DW_LNS_set_file = 4,
907 DW_LNS_set_column = 5,
908 DW_LNS_negate_stmt = 6,
909 DW_LNS_set_basic_block = 7,
910 DW_LNS_const_add_pc = 8,
911 DW_LNS_fixed_advance_pc = 9,
912 /* DWARF 3. */
913 DW_LNS_set_prologue_end = 10,
914 DW_LNS_set_epilogue_begin = 11,
915 DW_LNS_set_isa = 12
916 };
917
918/* Line number extended opcodes. */
919enum dwarf_line_number_x_ops
920 {
921 DW_LNE_end_sequence = 1,
922 DW_LNE_set_address = 2,
923 DW_LNE_define_file = 3
924 };
925
926typedef struct State_Machine_Registers
927{
sewardj08a50f62002-06-17 02:21:20 +0000928 Addr address;
sewardjb642dc22002-10-12 17:27:16 +0000929 /* Holds the address of the last statement boundary.
930 * We use it to calculate statement lengths. Without it,
931 * we would need to search backwards for last statement begin
932 * each time we are emitting a statement with addLineInfo */
933 Addr last_address;
sewardjc134dd92002-06-01 14:21:36 +0000934 UInt file;
935 UInt line;
936 UInt column;
937 Int is_stmt;
938 Int basic_block;
sewardj08a50f62002-06-17 02:21:20 +0000939 Int end_sequence;
940 /* This variable hold the number of the last entry seen
941 in the File Table. */
sewardjc134dd92002-06-01 14:21:36 +0000942 UInt last_file_entry;
943} SMR;
944
sewardjb51f2e62002-06-01 23:11:19 +0000945
946static
947UInt read_leb128 ( UChar* data, Int* length_return, Int sign )
948{
sewardj08a50f62002-06-17 02:21:20 +0000949 UInt result = 0;
950 UInt num_read = 0;
951 Int shift = 0;
952 UChar byte;
sewardjb51f2e62002-06-01 23:11:19 +0000953
954 do
955 {
956 byte = * data ++;
957 num_read ++;
958
959 result |= (byte & 0x7f) << shift;
960
961 shift += 7;
962
963 }
964 while (byte & 0x80);
965
966 if (length_return != NULL)
967 * length_return = num_read;
968
969 if (sign && (shift < 32) && (byte & 0x40))
970 result |= -1 << shift;
971
972 return result;
973}
974
975
sewardjc134dd92002-06-01 14:21:36 +0000976static SMR state_machine_regs;
977
sewardj08a50f62002-06-17 02:21:20 +0000978static
979void reset_state_machine ( Int is_stmt )
sewardjc134dd92002-06-01 14:21:36 +0000980{
sewardj08a50f62002-06-17 02:21:20 +0000981 if (0) VG_(printf)("smr.a := %p (reset)\n", 0 );
sewardjc134dd92002-06-01 14:21:36 +0000982 state_machine_regs.address = 0;
sewardjb642dc22002-10-12 17:27:16 +0000983 state_machine_regs.last_address = 0;
sewardjc134dd92002-06-01 14:21:36 +0000984 state_machine_regs.file = 1;
985 state_machine_regs.line = 1;
986 state_machine_regs.column = 0;
987 state_machine_regs.is_stmt = is_stmt;
988 state_machine_regs.basic_block = 0;
989 state_machine_regs.end_sequence = 0;
990 state_machine_regs.last_file_entry = 0;
991}
992
993/* Handled an extend line op. Returns true if this is the end
994 of sequence. */
sewardj08a50f62002-06-17 02:21:20 +0000995static
996int process_extended_line_op( SegInfo *si, UInt** fnames,
997 UChar* data, Int is_stmt, Int pointer_size)
sewardjc134dd92002-06-01 14:21:36 +0000998{
999 UChar op_code;
sewardj08a50f62002-06-17 02:21:20 +00001000 Int bytes_read;
sewardjc134dd92002-06-01 14:21:36 +00001001 UInt len;
1002 UChar * name;
sewardj08a50f62002-06-17 02:21:20 +00001003 Addr adr;
sewardjc134dd92002-06-01 14:21:36 +00001004
1005 len = read_leb128 (data, & bytes_read, 0);
1006 data += bytes_read;
1007
1008 if (len == 0)
1009 {
sewardj08a50f62002-06-17 02:21:20 +00001010 VG_(message)(Vg_UserMsg,
1011 "badly formed extended line op encountered!\n");
sewardjc134dd92002-06-01 14:21:36 +00001012 return bytes_read;
1013 }
1014
1015 len += bytes_read;
1016 op_code = * data ++;
1017
sewardjb642dc22002-10-12 17:27:16 +00001018 if (0) VG_(printf)("dwarf2: ext OPC: %d\n", op_code);
sewardjc134dd92002-06-01 14:21:36 +00001019
1020 switch (op_code)
1021 {
1022 case DW_LNE_end_sequence:
sewardj08a50f62002-06-17 02:21:20 +00001023 if (0) VG_(printf)("1001: si->o %p, smr.a %p\n",
1024 si->offset, state_machine_regs.address );
sewardjd84606d2002-06-18 01:04:57 +00001025 state_machine_regs.end_sequence = 1; /* JRS: added for compliance
1026 with spec; is pointless due to reset_state_machine below
1027 */
sewardjb642dc22002-10-12 17:27:16 +00001028 if (state_machine_regs.is_stmt) {
1029 if (state_machine_regs.last_address)
1030 addLineInfo (si, (*fnames)[state_machine_regs.file],
1031 si->offset + state_machine_regs.last_address,
1032 si->offset + state_machine_regs.address,
1033 state_machine_regs.line, 0);
1034 }
sewardjc134dd92002-06-01 14:21:36 +00001035 reset_state_machine (is_stmt);
1036 break;
1037
1038 case DW_LNE_set_address:
1039 /* XXX: Pointer size could be 8 */
sewardj08a50f62002-06-17 02:21:20 +00001040 vg_assert(pointer_size == 4);
sewardjc134dd92002-06-01 14:21:36 +00001041 adr = *((Addr *)data);
sewardj08a50f62002-06-17 02:21:20 +00001042 if (0) VG_(printf)("smr.a := %p\n", adr );
sewardjc134dd92002-06-01 14:21:36 +00001043 state_machine_regs.address = adr;
1044 break;
1045
1046 case DW_LNE_define_file:
sewardjc134dd92002-06-01 14:21:36 +00001047 ++ state_machine_regs.last_file_entry;
1048 name = data;
1049 if (*fnames == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001050 *fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
sewardjc134dd92002-06-01 14:21:36 +00001051 else
njn25e49d8e72002-09-23 09:36:25 +00001052 *fnames = VG_(arena_realloc)(
1053 VG_AR_SYMTAB, *fnames, /*alignment*/4,
sewardj08a50f62002-06-17 02:21:20 +00001054 sizeof(UInt)
1055 * (state_machine_regs.last_file_entry + 1));
sewardjc134dd92002-06-01 14:21:36 +00001056 (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name);
1057 data += VG_(strlen) ((char *) data) + 1;
1058 read_leb128 (data, & bytes_read, 0);
1059 data += bytes_read;
1060 read_leb128 (data, & bytes_read, 0);
1061 data += bytes_read;
sewardj08a50f62002-06-17 02:21:20 +00001062 read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001063 break;
1064
1065 default:
1066 break;
1067 }
1068
1069 return len;
1070}
1071
1072
sewardjb51f2e62002-06-01 23:11:19 +00001073static
1074void read_debuginfo_dwarf2 ( SegInfo* si, UChar* dwarf2, Int dwarf2_sz )
sewardjc134dd92002-06-01 14:21:36 +00001075{
1076 DWARF2_External_LineInfo * external;
1077 DWARF2_Internal_LineInfo info;
1078 UChar * standard_opcodes;
sewardjb51f2e62002-06-01 23:11:19 +00001079 UChar * data = dwarf2;
1080 UChar * end = dwarf2 + dwarf2_sz;
sewardjc134dd92002-06-01 14:21:36 +00001081 UChar * end_of_sequence;
sewardj08a50f62002-06-17 02:21:20 +00001082 UInt * fnames = NULL;
sewardjc134dd92002-06-01 14:21:36 +00001083
sewardjd84606d2002-06-18 01:04:57 +00001084 /* Fails due to gcc padding ...
1085 vg_assert(sizeof(DWARF2_External_LineInfo)
1086 == sizeof(DWARF2_Internal_LineInfo));
1087 */
sewardjc134dd92002-06-01 14:21:36 +00001088
1089 while (data < end)
1090 {
1091 external = (DWARF2_External_LineInfo *) data;
1092
1093 /* Check the length of the block. */
sewardj08a50f62002-06-17 02:21:20 +00001094 info.li_length = * ((UInt *)(external->li_length));
sewardjc134dd92002-06-01 14:21:36 +00001095
1096 if (info.li_length == 0xffffffff)
1097 {
sewardjb51f2e62002-06-01 23:11:19 +00001098 vg_symerr("64-bit DWARF line info is not supported yet.");
sewardjc134dd92002-06-01 14:21:36 +00001099 break;
1100 }
1101
sewardjb51f2e62002-06-01 23:11:19 +00001102 if (info.li_length + sizeof (external->li_length) > dwarf2_sz)
sewardjc134dd92002-06-01 14:21:36 +00001103 {
sewardj08a50f62002-06-17 02:21:20 +00001104 vg_symerr("DWARF line info appears to be corrupt "
1105 "- the section is too small");
sewardjb51f2e62002-06-01 23:11:19 +00001106 return;
sewardjc134dd92002-06-01 14:21:36 +00001107 }
1108
1109 /* Check its version number. */
sewardj08a50f62002-06-17 02:21:20 +00001110 info.li_version = * ((UShort *) (external->li_version));
sewardjc134dd92002-06-01 14:21:36 +00001111 if (info.li_version != 2)
1112 {
sewardj08a50f62002-06-17 02:21:20 +00001113 vg_symerr("Only DWARF version 2 line info "
1114 "is currently supported.");
sewardjb51f2e62002-06-01 23:11:19 +00001115 return;
sewardjc134dd92002-06-01 14:21:36 +00001116 }
1117
sewardjd84606d2002-06-18 01:04:57 +00001118 info.li_prologue_length = * ((UInt *) (external->li_prologue_length));
1119 info.li_min_insn_length = * ((UChar *)(external->li_min_insn_length));
sewardj8fe2c1f2002-10-22 05:41:14 +00001120
1121 info.li_default_is_stmt = True;
1122 /* WAS: = * ((UChar *)(external->li_default_is_stmt)); */
1123 /* Josef Weidendorfer (20021021) writes:
1124
1125 It seems to me that the Intel Fortran compiler generates
1126 bad DWARF2 line info code: It sets "is_stmt" of the state
1127 machine in the the line info reader to be always
1128 false. Thus, there is never a statement boundary generated
1129 and therefore never a instruction range/line number
1130 mapping generated for valgrind.
1131
1132 Please have a look at the DWARF2 specification, Ch. 6.2
1133 (x86.ddj.com/ftp/manuals/tools/dwarf.pdf). Perhaps I
1134 understand this wrong, but I don't think so.
1135
1136 I just had a look at the GDB DWARF2 reader... They
1137 completly ignore "is_stmt" when recording line info ;-)
1138 That's the reason "objdump -S" works on files from the the
1139 intel fortran compiler.
1140 */
1141
sewardjd84606d2002-06-18 01:04:57 +00001142
1143 /* JRS: changed (UInt*) to (UChar*) */
1144 info.li_line_base = * ((UChar *)(external->li_line_base));
1145
1146 info.li_line_range = * ((UChar *)(external->li_line_range));
1147 info.li_opcode_base = * ((UChar *)(external->li_opcode_base));
sewardjc134dd92002-06-01 14:21:36 +00001148
sewardjb642dc22002-10-12 17:27:16 +00001149 if (0) VG_(printf)("dwarf2: line base: %d, range %d, opc base: %d\n",
1150 info.li_line_base, info.li_line_range, info.li_opcode_base);
1151
sewardjc134dd92002-06-01 14:21:36 +00001152 /* Sign extend the line base field. */
1153 info.li_line_base <<= 24;
1154 info.li_line_base >>= 24;
1155
sewardj08a50f62002-06-17 02:21:20 +00001156 end_of_sequence = data + info.li_length
1157 + sizeof (external->li_length);
sewardjc134dd92002-06-01 14:21:36 +00001158
1159 reset_state_machine (info.li_default_is_stmt);
1160
1161 /* Read the contents of the Opcodes table. */
1162 standard_opcodes = data + sizeof (* external);
1163
sewardjc134dd92002-06-01 14:21:36 +00001164 /* Read the contents of the Directory table. */
1165 data = standard_opcodes + info.li_opcode_base - 1;
1166
sewardj08a50f62002-06-17 02:21:20 +00001167 if (* data == 0)
1168 {
1169 }
sewardjc134dd92002-06-01 14:21:36 +00001170 else
1171 {
sewardj08a50f62002-06-17 02:21:20 +00001172 /* We ignore the directory table, since gcc gives the entire
1173 path as part of the filename */
sewardjc134dd92002-06-01 14:21:36 +00001174 while (* data != 0)
1175 {
1176 data += VG_(strlen) ((char *) data) + 1;
1177 }
1178 }
1179
1180 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001181 if (*data != 0) {
1182 vg_symerr("can't find NUL at end of DWARF2 directory table");
1183 return;
1184 }
sewardjc134dd92002-06-01 14:21:36 +00001185 data ++;
1186
1187 /* Read the contents of the File Name table. */
sewardj08a50f62002-06-17 02:21:20 +00001188 if (* data == 0)
1189 {
1190 }
sewardjc134dd92002-06-01 14:21:36 +00001191 else
1192 {
sewardjc134dd92002-06-01 14:21:36 +00001193 while (* data != 0)
1194 {
1195 UChar * name;
1196 Int bytes_read;
1197
sewardj08a50f62002-06-17 02:21:20 +00001198 ++ state_machine_regs.last_file_entry;
sewardjc134dd92002-06-01 14:21:36 +00001199 name = data;
sewardj08a50f62002-06-17 02:21:20 +00001200 /* Since we don't have realloc (0, ....) == malloc (...)
1201 semantics, we need to malloc the first time. */
sewardjc134dd92002-06-01 14:21:36 +00001202
1203 if (fnames == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001204 fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
sewardjc134dd92002-06-01 14:21:36 +00001205 else
njn25e49d8e72002-09-23 09:36:25 +00001206 fnames = VG_(arena_realloc)(VG_AR_SYMTAB, fnames, /*alignment*/4,
sewardj08a50f62002-06-17 02:21:20 +00001207 sizeof(UInt)
1208 * (state_machine_regs.last_file_entry + 1));
1209 data += VG_(strlen) ((Char *) data) + 1;
sewardjc134dd92002-06-01 14:21:36 +00001210 fnames[state_machine_regs.last_file_entry] = addStr (si,name);
1211
1212 read_leb128 (data, & bytes_read, 0);
1213 data += bytes_read;
1214 read_leb128 (data, & bytes_read, 0);
1215 data += bytes_read;
1216 read_leb128 (data, & bytes_read, 0);
1217 data += bytes_read;
1218 }
1219 }
1220
1221 /* Skip the NUL at the end of the table. */
sewardjd84606d2002-06-18 01:04:57 +00001222 if (*data != 0) {
1223 vg_symerr("can't find NUL at end of DWARF2 file name table");
1224 return;
1225 }
sewardjc134dd92002-06-01 14:21:36 +00001226 data ++;
1227
1228 /* Now display the statements. */
1229
1230 while (data < end_of_sequence)
1231 {
1232 UChar op_code;
1233 Int adv;
1234 Int bytes_read;
1235
1236 op_code = * data ++;
1237
sewardjb642dc22002-10-12 17:27:16 +00001238 if (0) VG_(printf)("dwarf2: OPC: %d\n", op_code);
1239
sewardjc134dd92002-06-01 14:21:36 +00001240 if (op_code >= info.li_opcode_base)
1241 {
1242 Int advAddr;
1243 op_code -= info.li_opcode_base;
sewardj08a50f62002-06-17 02:21:20 +00001244 adv = (op_code / info.li_line_range)
1245 * info.li_min_insn_length;
sewardjc134dd92002-06-01 14:21:36 +00001246 advAddr = adv;
1247 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001248 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001249 adv = (op_code % info.li_line_range) + info.li_line_base;
sewardj08a50f62002-06-17 02:21:20 +00001250 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1251 si->offset, state_machine_regs.address );
sewardjb642dc22002-10-12 17:27:16 +00001252 if (state_machine_regs.is_stmt) {
1253 /* only add a statement if there was a previous boundary */
1254 if (state_machine_regs.last_address)
1255 addLineInfo (si, fnames[state_machine_regs.file],
1256 si->offset + state_machine_regs.last_address,
1257 si->offset + state_machine_regs.address,
sewardj08a50f62002-06-17 02:21:20 +00001258 state_machine_regs.line, 0);
sewardjb642dc22002-10-12 17:27:16 +00001259 state_machine_regs.last_address = state_machine_regs.address;
1260 }
sewardjc134dd92002-06-01 14:21:36 +00001261 state_machine_regs.line += adv;
1262 }
1263 else switch (op_code)
1264 {
1265 case DW_LNS_extended_op:
sewardj08a50f62002-06-17 02:21:20 +00001266 data += process_extended_line_op (
1267 si, &fnames, data,
1268 info.li_default_is_stmt, sizeof (Addr));
sewardjc134dd92002-06-01 14:21:36 +00001269 break;
1270
1271 case DW_LNS_copy:
sewardj08a50f62002-06-17 02:21:20 +00001272 if (0) VG_(printf)("1002: si->o %p, smr.a %p\n",
1273 si->offset, state_machine_regs.address );
sewardjb642dc22002-10-12 17:27:16 +00001274 if (state_machine_regs.is_stmt) {
1275 /* only add a statement if there was a previous boundary */
1276 if (state_machine_regs.last_address)
1277 addLineInfo (si, fnames[state_machine_regs.file],
1278 si->offset + state_machine_regs.last_address,
1279 si->offset + state_machine_regs.address,
sewardj08a50f62002-06-17 02:21:20 +00001280 state_machine_regs.line , 0);
sewardjb642dc22002-10-12 17:27:16 +00001281 state_machine_regs.last_address = state_machine_regs.address;
1282 }
sewardjd84606d2002-06-18 01:04:57 +00001283 state_machine_regs.basic_block = 0; /* JRS added */
sewardjc134dd92002-06-01 14:21:36 +00001284 break;
1285
1286 case DW_LNS_advance_pc:
sewardj08a50f62002-06-17 02:21:20 +00001287 adv = info.li_min_insn_length
1288 * read_leb128 (data, & bytes_read, 0);
sewardjc134dd92002-06-01 14:21:36 +00001289 data += bytes_read;
1290 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001291 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001292 break;
1293
1294 case DW_LNS_advance_line:
1295 adv = read_leb128 (data, & bytes_read, 1);
1296 data += bytes_read;
1297 state_machine_regs.line += adv;
1298 break;
1299
1300 case DW_LNS_set_file:
1301 adv = read_leb128 (data, & bytes_read, 0);
1302 data += bytes_read;
1303 state_machine_regs.file = adv;
1304 break;
1305
1306 case DW_LNS_set_column:
1307 adv = read_leb128 (data, & bytes_read, 0);
1308 data += bytes_read;
1309 state_machine_regs.column = adv;
1310 break;
1311
1312 case DW_LNS_negate_stmt:
1313 adv = state_machine_regs.is_stmt;
1314 adv = ! adv;
1315 state_machine_regs.is_stmt = adv;
1316 break;
1317
1318 case DW_LNS_set_basic_block:
1319 state_machine_regs.basic_block = 1;
1320 break;
1321
1322 case DW_LNS_const_add_pc:
1323 adv = (((255 - info.li_opcode_base) / info.li_line_range)
1324 * info.li_min_insn_length);
1325 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001326 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001327 break;
1328
1329 case DW_LNS_fixed_advance_pc:
1330 /* XXX: Need something to get 2 bytes */
1331 adv = *((UShort *)data);
1332 data += 2;
1333 state_machine_regs.address += adv;
sewardj08a50f62002-06-17 02:21:20 +00001334 if (0) VG_(printf)("smr.a += %p\n", adv );
sewardjc134dd92002-06-01 14:21:36 +00001335 break;
1336
1337 case DW_LNS_set_prologue_end:
1338 break;
1339
1340 case DW_LNS_set_epilogue_begin:
1341 break;
1342
1343 case DW_LNS_set_isa:
1344 adv = read_leb128 (data, & bytes_read, 0);
1345 data += bytes_read;
1346 break;
1347
1348 default:
1349 {
1350 int j;
1351 for (j = standard_opcodes[op_code - 1]; j > 0 ; --j)
1352 {
1353 read_leb128 (data, &bytes_read, 0);
1354 data += bytes_read;
1355 }
1356 }
1357 break;
1358 }
1359 }
njn25e49d8e72002-09-23 09:36:25 +00001360 VG_(arena_free)(VG_AR_SYMTAB, fnames);
sewardjc134dd92002-06-01 14:21:36 +00001361 fnames = NULL;
1362 }
sewardjc134dd92002-06-01 14:21:36 +00001363}
1364
sewardjb51f2e62002-06-01 23:11:19 +00001365
1366/*------------------------------------------------------------*/
1367/*--- Read info from a .so/exe file. ---*/
1368/*------------------------------------------------------------*/
1369
sewardjde4a1d02002-03-22 01:27:54 +00001370/* Read the symbols from the object/exe specified by the SegInfo into
1371 the tables within the supplied SegInfo. */
1372static
sewardj8fe15a32002-10-20 19:29:21 +00001373Bool vg_read_lib_symbols ( SegInfo* si )
sewardjde4a1d02002-03-22 01:27:54 +00001374{
1375 Elf32_Ehdr* ehdr; /* The ELF header */
1376 Elf32_Shdr* shdr; /* The section table */
1377 UChar* sh_strtab; /* The section table's string table */
sewardjb51f2e62002-06-01 23:11:19 +00001378 UChar* stab; /* The .stab table */
sewardjde4a1d02002-03-22 01:27:54 +00001379 UChar* stabstr; /* The .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001380 UChar* dwarf2; /* The DWARF2 location info table */
sewardjde4a1d02002-03-22 01:27:54 +00001381 Int stab_sz; /* Size in bytes of the .stab table */
1382 Int stabstr_sz; /* Size in bytes of the .stab string table */
sewardjb51f2e62002-06-01 23:11:19 +00001383 Int dwarf2_sz; /* Size in bytes of the DWARF2 srcloc table*/
sewardjde4a1d02002-03-22 01:27:54 +00001384 Int fd;
1385 Int i;
1386 Bool ok;
1387 Addr oimage;
1388 Int n_oimage;
sewardjb3586202002-05-09 17:38:13 +00001389 struct vki_stat stat_buf;
sewardjde4a1d02002-03-22 01:27:54 +00001390
sewardjde4a1d02002-03-22 01:27:54 +00001391 oimage = (Addr)NULL;
1392 if (VG_(clo_verbosity) > 1)
njne0ee0712002-05-03 16:41:05 +00001393 VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
sewardjde4a1d02002-03-22 01:27:54 +00001394
1395 /* mmap the object image aboard, so that we can read symbols and
1396 line number info out of it. It will be munmapped immediately
1397 thereafter; it is only aboard transiently. */
1398
sewardjb3586202002-05-09 17:38:13 +00001399 i = VG_(stat)(si->filename, &stat_buf);
sewardjde4a1d02002-03-22 01:27:54 +00001400 if (i != 0) {
1401 vg_symerr("Can't stat .so/.exe (to determine its size)?!");
sewardj8fe15a32002-10-20 19:29:21 +00001402 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001403 }
1404 n_oimage = stat_buf.st_size;
1405
njn25e49d8e72002-09-23 09:36:25 +00001406 fd = VG_(open)(si->filename, VKI_O_RDONLY, 0);
sewardjde4a1d02002-03-22 01:27:54 +00001407 if (fd == -1) {
1408 vg_symerr("Can't open .so/.exe to read symbols?!");
sewardj8fe15a32002-10-20 19:29:21 +00001409 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001410 }
1411
sewardjb3586202002-05-09 17:38:13 +00001412 oimage = (Addr)VG_(mmap)( NULL, n_oimage,
1413 VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 );
sewardjde4a1d02002-03-22 01:27:54 +00001414 if (oimage == ((Addr)(-1))) {
1415 VG_(message)(Vg_UserMsg,
1416 "mmap failed on %s", si->filename );
1417 VG_(close)(fd);
sewardj8fe15a32002-10-20 19:29:21 +00001418 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001419 }
1420
1421 VG_(close)(fd);
1422
1423 /* Ok, the object image is safely in oimage[0 .. n_oimage-1].
1424 Now verify that it is a valid ELF .so or executable image.
1425 */
1426 ok = (n_oimage >= sizeof(Elf32_Ehdr));
1427 ehdr = (Elf32_Ehdr*)oimage;
1428
1429 if (ok) {
1430 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
1431 && ehdr->e_ident[EI_MAG1] == 'E'
1432 && ehdr->e_ident[EI_MAG2] == 'L'
1433 && ehdr->e_ident[EI_MAG3] == 'F');
1434 ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
1435 && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
1436 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
1437 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
1438 ok &= (ehdr->e_machine == EM_386);
1439 ok &= (ehdr->e_version == EV_CURRENT);
1440 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
1441 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
sewardj8fe15a32002-10-20 19:29:21 +00001442 ok &= (ehdr->e_phoff != 0 && ehdr->e_phnum != 0);
sewardjde4a1d02002-03-22 01:27:54 +00001443 }
1444
1445 if (!ok) {
1446 vg_symerr("Invalid ELF header, or missing stringtab/sectiontab.");
1447 VG_(munmap) ( (void*)oimage, n_oimage );
sewardj8fe15a32002-10-20 19:29:21 +00001448 return False;
1449 }
1450
1451 /* Walk the LOAD headers in the phdr and update the SegInfo to
1452 include them all, so that this segment also contains data and
1453 bss memory. Also computes correct symbol offset value for this
1454 ELF file. */
1455 if (ehdr->e_phoff + ehdr->e_phnum*sizeof(Elf32_Phdr) > n_oimage) {
1456 vg_symerr("ELF program header is beyond image end?!");
1457 VG_(munmap) ( (void*)oimage, n_oimage );
1458 return False;
1459 }
1460 {
1461 Bool offset_set = False;
1462 Elf32_Addr prev_addr = 0;
1463
1464 si->offset = 0;
1465
1466 for(i = 0; i < ehdr->e_phnum; i++) {
1467 Elf32_Phdr *o_phdr;
1468 Elf32_Addr mapped, mapped_end;
1469
1470 o_phdr = &((Elf32_Phdr *)(oimage + ehdr->e_phoff))[i];
1471
1472 if (o_phdr->p_type != PT_LOAD)
1473 continue;
1474
1475 if (!offset_set) {
1476 offset_set = True;
1477 si->offset = si->start - o_phdr->p_vaddr;
1478 }
1479
1480 if (o_phdr->p_vaddr < prev_addr) {
1481 vg_symerr("ELF Phdrs are out of order!?");
1482 VG_(munmap) ( (void*)oimage, n_oimage );
1483 return False;
1484 }
1485 prev_addr = o_phdr->p_vaddr;
1486
1487 mapped = o_phdr->p_vaddr + si->offset;
1488 mapped_end = mapped + o_phdr->p_memsz;
1489
1490 if (si->data_start == 0 &&
1491 (o_phdr->p_flags & (PF_R|PF_W|PF_X)) == (PF_R|PF_W)) {
1492 si->data_start = mapped;
1493 si->data_size = o_phdr->p_filesz;
1494 si->bss_start = mapped + o_phdr->p_filesz;
1495 if (o_phdr->p_memsz > o_phdr->p_filesz)
1496 si->bss_size = o_phdr->p_memsz - o_phdr->p_filesz;
1497 else
1498 si->bss_size = 0;
1499 }
1500
1501 mapped = mapped & ~(VKI_BYTES_PER_PAGE-1);
1502 mapped_end = (mapped_end + VKI_BYTES_PER_PAGE - 1) & ~(VKI_BYTES_PER_PAGE-1);
1503
1504 if (VG_(needs).data_syms &&
1505 (mapped >= si->start && mapped <= (si->start+si->size)) &&
1506 (mapped_end > (si->start+si->size))) {
1507 UInt newsz = mapped_end - si->start;
1508 if (newsz > si->size) {
1509 if (0)
1510 VG_(printf)("extending mapping %p..%p %d -> ..%p %d\n",
1511 si->start, si->start+si->size, si->size,
1512 si->start+newsz, newsz);
1513 si->size = newsz;
1514 }
1515 }
1516 }
sewardjde4a1d02002-03-22 01:27:54 +00001517 }
1518
1519 if (VG_(clo_trace_symtab))
1520 VG_(printf)(
1521 "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n",
1522 ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage );
1523
1524 if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) {
1525 vg_symerr("ELF section header is beyond image end?!");
1526 VG_(munmap) ( (void*)oimage, n_oimage );
sewardj8fe15a32002-10-20 19:29:21 +00001527 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001528 }
1529
1530 shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff);
1531 sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset);
1532
1533 /* try and read the object's symbol table */
1534 {
1535 UChar* o_strtab = NULL;
1536 Elf32_Sym* o_symtab = NULL;
1537 UInt o_strtab_sz = 0;
1538 UInt o_symtab_sz = 0;
1539
1540 UChar* o_got = NULL;
1541 UChar* o_plt = NULL;
1542 UInt o_got_sz = 0;
1543 UInt o_plt_sz = 0;
1544
1545 Bool snaffle_it;
1546 Addr sym_addr;
1547
1548 /* find the .stabstr and .stab sections */
1549 for (i = 0; i < ehdr->e_shnum; i++) {
sewardj48115152002-10-02 10:20:30 +00001550
1551 /* As a fallback position, we look first for the dynamic
1552 symbols of a library to increase the chances that we can
1553 say something helpful even if the standard and debug
1554 symbols are missing. */
1555
1556 if (0 == VG_(strcmp)(".dynsym",sh_strtab + shdr[i].sh_name)) {
1557 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
1558 o_symtab_sz = shdr[i].sh_size;
1559 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
1560 /* check image overrun here */
1561 }
1562 if (0 == VG_(strcmp)(".dynstr",sh_strtab + shdr[i].sh_name)) {
1563 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
1564 o_strtab_sz = shdr[i].sh_size;
1565 /* check image overrun here */
1566 }
1567
1568 /* now look for the main symbol and string tables. */
sewardjde4a1d02002-03-22 01:27:54 +00001569 if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) {
1570 o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset);
1571 o_symtab_sz = shdr[i].sh_size;
1572 vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0);
1573 /* check image overrun here */
1574 }
1575 if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) {
1576 o_strtab = (UChar*)(oimage + shdr[i].sh_offset);
1577 o_strtab_sz = shdr[i].sh_size;
1578 /* check image overrun here */
1579 }
1580
1581 /* find out where the .got and .plt sections will be in the
1582 executable image, not in the object image transiently loaded.
1583 */
1584 if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) {
1585 o_got = (UChar*)(si->offset
sewardj47104382002-10-20 18:35:48 +00001586 + shdr[i].sh_addr);
sewardjde4a1d02002-03-22 01:27:54 +00001587 o_got_sz = shdr[i].sh_size;
sewardj47104382002-10-20 18:35:48 +00001588 si->got_start= (Addr)o_got;
1589 si->got_size = o_got_sz;
sewardjde4a1d02002-03-22 01:27:54 +00001590 /* check image overrun here */
1591 }
1592 if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) {
1593 o_plt = (UChar*)(si->offset
sewardj47104382002-10-20 18:35:48 +00001594 + shdr[i].sh_addr);
sewardjde4a1d02002-03-22 01:27:54 +00001595 o_plt_sz = shdr[i].sh_size;
sewardj47104382002-10-20 18:35:48 +00001596 si->plt_start= (Addr)o_plt;
1597 si->plt_size = o_plt_sz;
sewardjde4a1d02002-03-22 01:27:54 +00001598 /* check image overrun here */
1599 }
1600
1601 }
1602
1603 if (VG_(clo_trace_symtab)) {
1604 if (o_plt) VG_(printf)( "PLT: %p .. %p\n",
1605 o_plt, o_plt + o_plt_sz - 1 );
1606 if (o_got) VG_(printf)( "GOT: %p .. %p\n",
1607 o_got, o_got + o_got_sz - 1 );
1608 }
1609
1610 if (o_strtab == NULL || o_symtab == NULL) {
1611 vg_symerr(" object doesn't have a symbol table");
1612 } else {
1613 /* Perhaps should start at i = 1; ELF docs suggest that entry
1614 0 always denotes `unknown symbol'. */
1615 for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){
sewardj8fe15a32002-10-20 19:29:21 +00001616# if 1
1617 if (VG_(clo_trace_symtab)) {
1618 VG_(printf)("raw symbol: ");
1619 switch (ELF32_ST_BIND(o_symtab[i].st_info)) {
sewardjde4a1d02002-03-22 01:27:54 +00001620 case STB_LOCAL: VG_(printf)("LOC "); break;
1621 case STB_GLOBAL: VG_(printf)("GLO "); break;
1622 case STB_WEAK: VG_(printf)("WEA "); break;
1623 case STB_LOPROC: VG_(printf)("lop "); break;
1624 case STB_HIPROC: VG_(printf)("hip "); break;
1625 default: VG_(printf)("??? "); break;
sewardj8fe15a32002-10-20 19:29:21 +00001626 }
1627 switch (ELF32_ST_TYPE(o_symtab[i].st_info)) {
sewardjde4a1d02002-03-22 01:27:54 +00001628 case STT_NOTYPE: VG_(printf)("NOT "); break;
1629 case STT_OBJECT: VG_(printf)("OBJ "); break;
1630 case STT_FUNC: VG_(printf)("FUN "); break;
1631 case STT_SECTION: VG_(printf)("SEC "); break;
1632 case STT_FILE: VG_(printf)("FIL "); break;
1633 case STT_LOPROC: VG_(printf)("lop "); break;
1634 case STT_HIPROC: VG_(printf)("hip "); break;
1635 default: VG_(printf)("??? "); break;
sewardj8fe15a32002-10-20 19:29:21 +00001636 }
1637 VG_(printf)(
1638 ": value %p, size %d, name %s\n",
1639 si->offset+(UChar*)o_symtab[i].st_value,
1640 o_symtab[i].st_size,
1641 o_symtab[i].st_name
1642 ? ((Char*)o_strtab+o_symtab[i].st_name)
1643 : (Char*)"NONAME");
1644 }
sewardjde4a1d02002-03-22 01:27:54 +00001645# endif
1646
1647 /* Figure out if we're interested in the symbol.
1648 Firstly, is it of the right flavour?
1649 */
1650 snaffle_it
1651 = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL ||
sewardjc75dd522002-11-13 22:49:54 +00001652 ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL ||
1653 ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK)
sewardjde4a1d02002-03-22 01:27:54 +00001654 &&
sewardj8fe15a32002-10-20 19:29:21 +00001655 (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC ||
sewardjc75dd522002-11-13 22:49:54 +00001656 (VG_(needs).data_syms
1657 && ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT))
sewardjde4a1d02002-03-22 01:27:54 +00001658 );
1659
1660 /* Secondly, if it's apparently in a GOT or PLT, it's really
1661 a reference to a symbol defined elsewhere, so ignore it.
1662 */
1663 sym_addr = si->offset
1664 + (UInt)o_symtab[i].st_value;
1665 if (o_got != NULL
1666 && sym_addr >= (Addr)o_got
1667 && sym_addr < (Addr)(o_got+o_got_sz)) {
1668 snaffle_it = False;
1669 if (VG_(clo_trace_symtab)) {
1670 VG_(printf)( "in GOT: %s\n",
1671 o_strtab+o_symtab[i].st_name);
1672 }
1673 }
1674 if (o_plt != NULL
1675 && sym_addr >= (Addr)o_plt
1676 && sym_addr < (Addr)(o_plt+o_plt_sz)) {
1677 snaffle_it = False;
1678 if (VG_(clo_trace_symtab)) {
1679 VG_(printf)( "in PLT: %s\n",
1680 o_strtab+o_symtab[i].st_name);
1681 }
1682 }
1683
1684 /* Don't bother if nameless, or zero-sized. */
1685 if (snaffle_it
1686 && (o_symtab[i].st_name == (Elf32_Word)NULL
1687 || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */
1688 /* equivalent but cheaper ... */
1689 * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0
1690 || o_symtab[i].st_size == 0)) {
1691 snaffle_it = False;
1692 if (VG_(clo_trace_symtab)) {
1693 VG_(printf)( "size=0: %s\n",
1694 o_strtab+o_symtab[i].st_name);
1695 }
1696 }
1697
1698# if 0
1699 /* Avoid _dl_ junk. (Why?) */
1700 /* 01-02-24: disabled until I find out if it really helps. */
1701 if (snaffle_it
1702 && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0
1703 || VG_(strncmp)("_r_debug",
1704 o_strtab+o_symtab[i].st_name, 8) == 0)) {
1705 snaffle_it = False;
1706 if (VG_(clo_trace_symtab)) {
1707 VG_(printf)( "_dl_ junk: %s\n",
1708 o_strtab+o_symtab[i].st_name);
1709 }
1710 }
1711# endif
1712
1713 /* This seems to significantly reduce the number of junk
1714 symbols, and particularly reduces the number of
1715 overlapping address ranges. Don't ask me why ... */
1716 if (snaffle_it && (Int)o_symtab[i].st_value == 0) {
1717 snaffle_it = False;
1718 if (VG_(clo_trace_symtab)) {
1719 VG_(printf)( "valu=0: %s\n",
1720 o_strtab+o_symtab[i].st_name);
1721 }
1722 }
1723
1724 /* If no part of the symbol falls within the mapped range,
1725 ignore it. */
1726 if (sym_addr+o_symtab[i].st_size <= si->start
1727 || sym_addr >= si->start+si->size) {
1728 snaffle_it = False;
1729 }
1730
1731 if (snaffle_it) {
1732 /* it's an interesting symbol; record ("snaffle") it. */
1733 RiSym sym;
1734 Char* t0 = o_symtab[i].st_name
1735 ? (Char*)(o_strtab+o_symtab[i].st_name)
1736 : (Char*)"NONAME";
1737 Int nmoff = addStr ( si, t0 );
1738 vg_assert(nmoff >= 0
1739 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
1740 vg_assert( (Int)o_symtab[i].st_value >= 0);
sewardj8fe15a32002-10-20 19:29:21 +00001741 /* VG_(printf)("%p + %d: %p %s\n", si->start,
1742 (Int)o_symtab[i].st_value, sym_addr, t0 ); */
sewardjde4a1d02002-03-22 01:27:54 +00001743 sym.addr = sym_addr;
1744 sym.size = o_symtab[i].st_size;
1745 sym.nmoff = nmoff;
1746 addSym ( si, &sym );
1747 }
1748 }
1749 }
1750 }
1751
sewardjb51f2e62002-06-01 23:11:19 +00001752 /* Reading of the stabs and/or dwarf2 debug format information, if
1753 any. */
sewardjde4a1d02002-03-22 01:27:54 +00001754 stabstr = NULL;
1755 stab = NULL;
sewardjb51f2e62002-06-01 23:11:19 +00001756 dwarf2 = NULL;
sewardjde4a1d02002-03-22 01:27:54 +00001757 stabstr_sz = 0;
1758 stab_sz = 0;
sewardjb51f2e62002-06-01 23:11:19 +00001759 dwarf2_sz = 0;
1760
1761 /* find the .stabstr / .stab / .debug_line sections */
sewardjde4a1d02002-03-22 01:27:54 +00001762 for (i = 0; i < ehdr->e_shnum; i++) {
1763 if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001764 stab = (UChar*)(oimage + shdr[i].sh_offset);
sewardjde4a1d02002-03-22 01:27:54 +00001765 stab_sz = shdr[i].sh_size;
1766 }
1767 if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) {
1768 stabstr = (UChar*)(oimage + shdr[i].sh_offset);
1769 stabstr_sz = shdr[i].sh_size;
1770 }
sewardjc134dd92002-06-01 14:21:36 +00001771 if (0 == VG_(strcmp)(".debug_line",sh_strtab + shdr[i].sh_name)) {
sewardjb51f2e62002-06-01 23:11:19 +00001772 dwarf2 = (UChar *)(oimage + shdr[i].sh_offset);
1773 dwarf2_sz = shdr[i].sh_size;
sewardjc134dd92002-06-01 14:21:36 +00001774 }
sewardjde4a1d02002-03-22 01:27:54 +00001775 }
1776
sewardjb51f2e62002-06-01 23:11:19 +00001777 if ((stab == NULL || stabstr == NULL) && dwarf2 == NULL) {
sewardjde4a1d02002-03-22 01:27:54 +00001778 vg_symerr(" object doesn't have any debug info");
1779 VG_(munmap) ( (void*)oimage, n_oimage );
sewardj8fe15a32002-10-20 19:29:21 +00001780 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001781 }
1782
1783 if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage
1784 || stabstr_sz + (UChar*)stabstr
1785 > n_oimage + (UChar*)oimage ) {
sewardjb51f2e62002-06-01 23:11:19 +00001786 vg_symerr(" ELF (stabs) debug data is beyond image end?!");
sewardjde4a1d02002-03-22 01:27:54 +00001787 VG_(munmap) ( (void*)oimage, n_oimage );
sewardj8fe15a32002-10-20 19:29:21 +00001788 return False;
sewardjde4a1d02002-03-22 01:27:54 +00001789 }
1790
sewardjb51f2e62002-06-01 23:11:19 +00001791 if ( dwarf2_sz + (UChar*)dwarf2 > n_oimage + (UChar*)oimage ) {
1792 vg_symerr(" ELF (dwarf2) debug data is beyond image end?!");
1793 VG_(munmap) ( (void*)oimage, n_oimage );
sewardj8fe15a32002-10-20 19:29:21 +00001794 return False;
sewardjb51f2e62002-06-01 23:11:19 +00001795 }
sewardjde4a1d02002-03-22 01:27:54 +00001796
sewardjb51f2e62002-06-01 23:11:19 +00001797 /* Looks plausible. Go on and read debug data. */
1798 if (stab != NULL && stabstr != NULL) {
1799 read_debuginfo_stabs ( si, stab, stab_sz, stabstr, stabstr_sz );
1800 }
sewardjde4a1d02002-03-22 01:27:54 +00001801
sewardjb51f2e62002-06-01 23:11:19 +00001802 if (dwarf2 != NULL) {
1803 read_debuginfo_dwarf2 ( si, dwarf2, dwarf2_sz );
1804 }
sewardjde4a1d02002-03-22 01:27:54 +00001805
1806 /* Last, but not least, heave the oimage back overboard. */
1807 VG_(munmap) ( (void*)oimage, n_oimage );
sewardj8fe15a32002-10-20 19:29:21 +00001808
1809 return True;
sewardjde4a1d02002-03-22 01:27:54 +00001810}
1811
1812
1813/*------------------------------------------------------------*/
1814/*--- Main entry point for symbols table reading. ---*/
1815/*------------------------------------------------------------*/
1816
1817/* The root structure for the entire symbol table system. It is a
1818 linked list of SegInfos. Note that this entire mechanism assumes
1819 that what we read from /proc/self/maps doesn't contain overlapping
1820 address ranges, and as a result the SegInfos in this list describe
1821 disjoint address ranges.
1822*/
1823static SegInfo* segInfo = NULL;
1824
1825
njn25e49d8e72002-09-23 09:36:25 +00001826void VG_(read_symtab_callback) (
sewardjde4a1d02002-03-22 01:27:54 +00001827 Addr start, UInt size,
1828 Char rr, Char ww, Char xx,
1829 UInt foffset, UChar* filename )
1830{
1831 SegInfo* si;
1832
1833 /* Stay sane ... */
1834 if (size == 0)
1835 return;
1836
1837 /* We're only interested in collecting symbols in executable
1838 segments which are associated with a real file. Hence: */
1839 if (filename == NULL || xx != 'x')
1840 return;
1841 if (0 == VG_(strcmp)(filename, "/dev/zero"))
1842 return;
sewardj8fe15a32002-10-20 19:29:21 +00001843 if (foffset != 0)
1844 return;
sewardjde4a1d02002-03-22 01:27:54 +00001845
1846 /* Perhaps we already have this one? If so, skip. */
1847 for (si = segInfo; si != NULL; si = si->next) {
1848 /*
1849 if (0==VG_(strcmp)(si->filename, filename))
1850 VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n",
1851 rr,ww,xx,si->start,si->size,start,size,filename);
1852 */
1853 /* For some reason the observed size of a mapping can change, so
1854 we don't use that to determine uniqueness. */
1855 if (si->start == start
1856 /* && si->size == size */
1857 && 0==VG_(strcmp)(si->filename, filename)) {
1858 return;
1859 }
1860 }
1861
1862 /* Get the record initialised right. */
njn25e49d8e72002-09-23 09:36:25 +00001863 si = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
sewardjde4a1d02002-03-22 01:27:54 +00001864
sewardj8fe15a32002-10-20 19:29:21 +00001865 VG_(memset)(si, 0, sizeof(*si));
sewardjde4a1d02002-03-22 01:27:54 +00001866 si->start = start;
1867 si->size = size;
1868 si->foffset = foffset;
njn25e49d8e72002-09-23 09:36:25 +00001869 si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
sewardjde4a1d02002-03-22 01:27:54 +00001870 VG_(strcpy)(si->filename, filename);
1871
1872 si->symtab = NULL;
1873 si->symtab_size = si->symtab_used = 0;
1874 si->loctab = NULL;
1875 si->loctab_size = si->loctab_used = 0;
1876 si->strtab = NULL;
1877 si->strtab_size = si->strtab_used = 0;
1878
sewardjde4a1d02002-03-22 01:27:54 +00001879 /* And actually fill it up. */
sewardj8fe15a32002-10-20 19:29:21 +00001880 if (!vg_read_lib_symbols ( si ) && 0) {
1881 /* XXX this interacts badly with the prevN optimization in
1882 addStr(). Since this frees the si, the si pointer value can
1883 be recycled, which confuses the curr_si == si test. For now,
1884 this code is disabled, and everything is included in the
1885 segment list, even if it is a bad ELF file. Ironically,
1886 running this under valgrind itself hides the problem, because
1887 it doesn't recycle pointers... */
1888 freeSegInfo( si );
1889 } else {
1890 si->next = segInfo;
1891 segInfo = si;
1892
1893 canonicaliseSymtab ( si );
1894 canonicaliseLoctab ( si );
1895 }
sewardjde4a1d02002-03-22 01:27:54 +00001896}
1897
1898
1899/* This one really is the Head Honcho. Update the symbol tables to
1900 reflect the current state of /proc/self/maps. Rather than re-read
1901 everything, just read the entries which are not already in segInfo.
1902 So we can call here repeatedly, after every mmap of a non-anonymous
1903 segment with execute permissions, for example, to pick up new
1904 libraries as they are dlopen'd. Conversely, when the client does
1905 munmap(), vg_symtab_notify_munmap() throws away any symbol tables
1906 which happen to correspond to the munmap()d area. */
njn25e49d8e72002-09-23 09:36:25 +00001907void VG_(maybe_read_symbols) ( void )
sewardjde4a1d02002-03-22 01:27:54 +00001908{
njn25e49d8e72002-09-23 09:36:25 +00001909 if (!VG_(using_debug_info))
1910 return;
sewardjde4a1d02002-03-22 01:27:54 +00001911
njn25e49d8e72002-09-23 09:36:25 +00001912 VGP_PUSHCC(VgpReadSyms);
1913 VG_(read_procselfmaps) ( VG_(read_symtab_callback) );
1914 VGP_POPCC(VgpReadSyms);
sewardjde4a1d02002-03-22 01:27:54 +00001915}
1916
sewardjde4a1d02002-03-22 01:27:54 +00001917/* When an munmap() call happens, check to see whether it corresponds
1918 to a segment for a .so, and if so discard the relevant SegInfo.
1919 This might not be a very clever idea from the point of view of
1920 accuracy of error messages, but we need to do it in order to
sewardj18d75132002-05-16 11:06:21 +00001921 maintain the no-overlapping invariant.
sewardjde4a1d02002-03-22 01:27:54 +00001922*/
njn25e49d8e72002-09-23 09:36:25 +00001923void VG_(maybe_unload_symbols) ( Addr start, UInt length )
sewardjde4a1d02002-03-22 01:27:54 +00001924{
1925 SegInfo *prev, *curr;
1926
njn25e49d8e72002-09-23 09:36:25 +00001927 if (!VG_(using_debug_info))
1928 return;
1929
sewardjde4a1d02002-03-22 01:27:54 +00001930 prev = NULL;
1931 curr = segInfo;
1932 while (True) {
1933 if (curr == NULL) break;
1934 if (start == curr->start) break;
1935 prev = curr;
1936 curr = curr->next;
1937 }
sewardj18d75132002-05-16 11:06:21 +00001938 if (curr == NULL)
njn25e49d8e72002-09-23 09:36:25 +00001939 return;
sewardjde4a1d02002-03-22 01:27:54 +00001940
1941 VG_(message)(Vg_UserMsg,
1942 "discard syms in %s due to munmap()",
1943 curr->filename ? curr->filename : (UChar*)"???");
1944
1945 vg_assert(prev == NULL || prev->next == curr);
1946
1947 if (prev == NULL) {
1948 segInfo = curr->next;
1949 } else {
1950 prev->next = curr->next;
1951 }
1952
1953 freeSegInfo(curr);
njn25e49d8e72002-09-23 09:36:25 +00001954 return;
sewardjde4a1d02002-03-22 01:27:54 +00001955}
1956
1957
1958/*------------------------------------------------------------*/
1959/*--- Use of symbol table & location info to create ---*/
1960/*--- plausible-looking stack dumps. ---*/
1961/*------------------------------------------------------------*/
1962
njn25e49d8e72002-09-23 09:36:25 +00001963static __inline__ void ensure_debug_info_inited ( void )
1964{
1965 if (!VG_(using_debug_info)) {
1966 VG_(using_debug_info) = True;
1967 VG_(maybe_read_symbols)();
1968 }
1969}
1970
sewardjde4a1d02002-03-22 01:27:54 +00001971/* Find a symbol-table index containing the specified pointer, or -1
1972 if not found. Binary search. */
1973
njn25e49d8e72002-09-23 09:36:25 +00001974static Int search_one_symtab ( SegInfo* si, Addr ptr,
1975 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00001976{
1977 Addr a_mid_lo, a_mid_hi;
njn25e49d8e72002-09-23 09:36:25 +00001978 Int mid, size,
sewardjde4a1d02002-03-22 01:27:54 +00001979 lo = 0,
1980 hi = si->symtab_used-1;
1981 while (True) {
1982 /* current unsearched space is from lo to hi, inclusive. */
1983 if (lo > hi) return -1; /* not found */
1984 mid = (lo + hi) / 2;
1985 a_mid_lo = si->symtab[mid].addr;
njn25e49d8e72002-09-23 09:36:25 +00001986 size = ( match_anywhere_in_fun
1987 ? si->symtab[mid].size
1988 : 1);
1989 a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1;
sewardjde4a1d02002-03-22 01:27:54 +00001990
1991 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1992 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1993 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1994 return mid;
1995 }
1996}
1997
1998
1999/* Search all symtabs that we know about to locate ptr. If found, set
2000 *psi to the relevant SegInfo, and *symno to the symtab entry number
2001 within that. If not found, *psi is set to NULL. */
2002
njn25e49d8e72002-09-23 09:36:25 +00002003static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi,
2004 /*OUT*/Int* symno,
2005 Bool match_anywhere_in_fun )
sewardjde4a1d02002-03-22 01:27:54 +00002006{
2007 Int sno;
2008 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00002009
2010 ensure_debug_info_inited();
2011 VGP_PUSHCC(VgpSearchSyms);
2012
sewardjde4a1d02002-03-22 01:27:54 +00002013 for (si = segInfo; si != NULL; si = si->next) {
2014 if (si->start <= ptr && ptr < si->start+si->size) {
njn25e49d8e72002-09-23 09:36:25 +00002015 sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
sewardjde4a1d02002-03-22 01:27:54 +00002016 if (sno == -1) goto not_found;
2017 *symno = sno;
2018 *psi = si;
njn25e49d8e72002-09-23 09:36:25 +00002019 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00002020 return;
2021 }
2022 }
2023 not_found:
2024 *psi = NULL;
njn25e49d8e72002-09-23 09:36:25 +00002025 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00002026}
2027
2028
2029/* Find a location-table index containing the specified pointer, or -1
2030 if not found. Binary search. */
2031
2032static Int search_one_loctab ( SegInfo* si, Addr ptr )
2033{
2034 Addr a_mid_lo, a_mid_hi;
2035 Int mid,
2036 lo = 0,
2037 hi = si->loctab_used-1;
2038 while (True) {
2039 /* current unsearched space is from lo to hi, inclusive. */
2040 if (lo > hi) return -1; /* not found */
2041 mid = (lo + hi) / 2;
2042 a_mid_lo = si->loctab[mid].addr;
2043 a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1;
2044
2045 if (ptr < a_mid_lo) { hi = mid-1; continue; }
2046 if (ptr > a_mid_hi) { lo = mid+1; continue; }
2047 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
2048 return mid;
2049 }
2050}
2051
2052
2053/* Search all loctabs that we know about to locate ptr. If found, set
2054 *psi to the relevant SegInfo, and *locno to the loctab entry number
2055 within that. If not found, *psi is set to NULL.
2056*/
njn25e49d8e72002-09-23 09:36:25 +00002057static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi,
2058 /*OUT*/Int* locno )
sewardjde4a1d02002-03-22 01:27:54 +00002059{
2060 Int lno;
2061 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00002062
2063 VGP_PUSHCC(VgpSearchSyms);
2064
2065 ensure_debug_info_inited();
sewardjde4a1d02002-03-22 01:27:54 +00002066 for (si = segInfo; si != NULL; si = si->next) {
2067 if (si->start <= ptr && ptr < si->start+si->size) {
2068 lno = search_one_loctab ( si, ptr );
2069 if (lno == -1) goto not_found;
2070 *locno = lno;
2071 *psi = si;
njn25e49d8e72002-09-23 09:36:25 +00002072 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00002073 return;
2074 }
2075 }
2076 not_found:
2077 *psi = NULL;
njn25e49d8e72002-09-23 09:36:25 +00002078 VGP_POPCC(VgpSearchSyms);
sewardjde4a1d02002-03-22 01:27:54 +00002079}
2080
2081
2082/* The whole point of this whole big deal: map a code address to a
2083 plausible symbol name. Returns False if no idea; otherwise True.
njn25e49d8e72002-09-23 09:36:25 +00002084 Caller supplies buf and nbuf. If demangle is False, don't do
sewardjde4a1d02002-03-22 01:27:54 +00002085 demangling, regardless of vg_clo_demangle -- probably because the
2086 call has come from vg_what_fn_or_object_is_this. */
njn25e49d8e72002-09-23 09:36:25 +00002087static
2088Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf,
sewardj1771e172002-11-13 22:06:35 +00002089 Bool match_anywhere_in_fun, Bool show_offset)
sewardjde4a1d02002-03-22 01:27:54 +00002090{
2091 SegInfo* si;
2092 Int sno;
sewardj1771e172002-11-13 22:06:35 +00002093 Int offset;
2094
njn25e49d8e72002-09-23 09:36:25 +00002095 search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun );
sewardjde4a1d02002-03-22 01:27:54 +00002096 if (si == NULL)
2097 return False;
njn25e49d8e72002-09-23 09:36:25 +00002098 if (demangle) {
2099 VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
2100 } else {
sewardjde4a1d02002-03-22 01:27:54 +00002101 VG_(strncpy_safely)
2102 ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
sewardjde4a1d02002-03-22 01:27:54 +00002103 }
sewardj1771e172002-11-13 22:06:35 +00002104
2105 offset = a - si->symtab[sno].addr;
2106 if (show_offset && offset != 0) {
2107 Char buf2[12];
2108 Char* symend = buf + VG_(strlen)(buf);
2109 Char* end = buf + nbuf;
2110 Int len;
2111
2112 len = VG_(sprintf)(buf2, "%c%d",
2113 offset < 0 ? '-' : '+',
2114 offset < 0 ? -offset : offset);
2115 vg_assert(len < sizeof(buf2));
2116
2117 if (len < (end - symend)) {
2118 Char *cp = buf2;
2119 VG_(memcpy)(symend, cp, len+1);
2120 }
2121 }
2122
sewardjde4a1d02002-03-22 01:27:54 +00002123 return True;
2124}
2125
sewardj6e008cb2002-12-15 13:11:39 +00002126/* This is available to skins... always demangle C++ names,
2127 match anywhere in function, but don't show offsets. */
njn25e49d8e72002-09-23 09:36:25 +00002128Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf )
2129{
2130 return get_fnname ( /*demangle*/True, a, buf, nbuf,
sewardj6e008cb2002-12-15 13:11:39 +00002131 /*match_anywhere_in_fun*/True,
2132 /*show offset?*/False );
njn25e49d8e72002-09-23 09:36:25 +00002133}
sewardjde4a1d02002-03-22 01:27:54 +00002134
njn25e49d8e72002-09-23 09:36:25 +00002135/* This is available to skins... always demangle C++ names,
sewardj6e008cb2002-12-15 13:11:39 +00002136 match anywhere in function, and show offset if nonzero. */
2137Bool VG_(get_fnname_w_offset) ( Addr a, Char* buf, Int nbuf )
2138{
2139 return get_fnname ( /*demangle*/True, a, buf, nbuf,
2140 /*match_anywhere_in_fun*/True,
2141 /*show offset?*/True );
2142}
2143
2144/* This is available to skins... always demangle C++ names,
2145 only succeed if 'a' matches first instruction of function,
2146 and don't show offsets. */
njn25e49d8e72002-09-23 09:36:25 +00002147Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf )
2148{
2149 return get_fnname ( /*demangle*/True, a, buf, nbuf,
sewardj6e008cb2002-12-15 13:11:39 +00002150 /*match_anywhere_in_fun*/False,
2151 /*show offset?*/False );
njn25e49d8e72002-09-23 09:36:25 +00002152}
2153
sewardj6e008cb2002-12-15 13:11:39 +00002154/* This is only available to core... don't demangle C++ names,
2155 match anywhere in function, and don't show offsets. */
njn25e49d8e72002-09-23 09:36:25 +00002156Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf )
2157{
2158 return get_fnname ( /*demangle*/False, a, buf, nbuf,
sewardj6e008cb2002-12-15 13:11:39 +00002159 /*match_anywhere_in_fun*/True,
2160 /*show offset?*/False );
njn25e49d8e72002-09-23 09:36:25 +00002161}
2162
2163/* Map a code address to the name of a shared object file or the executable.
2164 Returns False if no idea; otherwise True. Doesn't require debug info.
2165 Caller supplies buf and nbuf. */
2166Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf )
sewardjde4a1d02002-03-22 01:27:54 +00002167{
2168 SegInfo* si;
njn25e49d8e72002-09-23 09:36:25 +00002169
2170 ensure_debug_info_inited();
sewardjde4a1d02002-03-22 01:27:54 +00002171 for (si = segInfo; si != NULL; si = si->next) {
2172 if (si->start <= a && a < si->start+si->size) {
2173 VG_(strncpy_safely)(buf, si->filename, nbuf);
2174 return True;
2175 }
2176 }
2177 return False;
2178}
2179
njnb877d492003-01-28 20:40:57 +00002180/* Map a code address to its SegInfo. Returns NULL if not found. Doesn't
2181 require debug info. */
2182SegInfo* VG_(get_obj) ( Addr a )
2183{
2184 SegInfo* si;
2185
2186 ensure_debug_info_inited();
2187 for (si = segInfo; si != NULL; si = si->next) {
2188 if (si->start <= a && a < si->start+si->size) {
2189 return si;
2190 }
2191 }
2192 return False;
2193}
2194
njn25e49d8e72002-09-23 09:36:25 +00002195
2196/* Map a code address to a filename. Returns True if successful. */
2197Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
sewardjde4a1d02002-03-22 01:27:54 +00002198{
njn25e49d8e72002-09-23 09:36:25 +00002199 SegInfo* si;
2200 Int locno;
2201 search_all_loctabs ( a, &si, &locno );
2202 if (si == NULL)
2203 return False;
2204 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
2205 n_filename);
2206 return True;
sewardjde4a1d02002-03-22 01:27:54 +00002207}
2208
njn25e49d8e72002-09-23 09:36:25 +00002209/* Map a code address to a line number. Returns True if successful. */
2210Bool VG_(get_linenum)( Addr a, UInt* lineno )
2211{
2212 SegInfo* si;
2213 Int locno;
2214 search_all_loctabs ( a, &si, &locno );
2215 if (si == NULL)
2216 return False;
2217 *lineno = si->loctab[locno].lineno;
2218
2219 return True;
2220}
sewardjde4a1d02002-03-22 01:27:54 +00002221
2222/* Map a code address to a (filename, line number) pair.
2223 Returns True if successful.
2224*/
njn25e49d8e72002-09-23 09:36:25 +00002225Bool VG_(get_filename_linenum)( Addr a,
2226 Char* filename, Int n_filename,
2227 UInt* lineno )
sewardjde4a1d02002-03-22 01:27:54 +00002228{
2229 SegInfo* si;
2230 Int locno;
2231 search_all_loctabs ( a, &si, &locno );
2232 if (si == NULL)
2233 return False;
2234 VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff],
2235 n_filename);
2236 *lineno = si->loctab[locno].lineno;
njn4f9c9342002-04-29 16:03:24 +00002237
sewardjde4a1d02002-03-22 01:27:54 +00002238 return True;
2239}
2240
2241
2242/* Print a mini stack dump, showing the current location. */
2243void VG_(mini_stack_dump) ( ExeContext* ec )
2244{
2245
2246#define APPEND(str) \
2247 { UChar* sss; \
2248 for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \
2249 buf[n] = *sss; \
2250 buf[n] = 0; \
2251 }
2252
2253 Bool know_fnname;
2254 Bool know_objname;
2255 Bool know_srcloc;
2256 UInt lineno;
2257 UChar ibuf[20];
sewardj04b91062002-06-05 21:22:04 +00002258 UInt i, n;
sewardjde4a1d02002-03-22 01:27:54 +00002259
2260 UChar buf[M_VG_ERRTXT];
2261 UChar buf_fn[M_VG_ERRTXT];
2262 UChar buf_obj[M_VG_ERRTXT];
2263 UChar buf_srcloc[M_VG_ERRTXT];
2264
2265 Int stop_at = VG_(clo_backtrace_size);
2266
njn49ef4622002-10-04 15:15:08 +00002267 vg_assert(stop_at > 0);
sewardjde4a1d02002-03-22 01:27:54 +00002268
njn49ef4622002-10-04 15:15:08 +00002269 i = 0;
2270 do {
sewardjbaf6d422002-11-14 23:16:58 +00002271 Addr eip = ec->eips[i];
njn49ef4622002-10-04 15:15:08 +00002272 n = 0;
sewardjbaf6d422002-11-14 23:16:58 +00002273 if (i > 0)
2274 eip--; /* point to calling line */
2275 know_fnname = get_fnname (True, eip, buf_fn, M_VG_ERRTXT, True, False);
2276 know_objname = VG_(get_objname)(eip, buf_obj, M_VG_ERRTXT);
2277 know_srcloc = VG_(get_filename_linenum)(eip,
njn25e49d8e72002-09-23 09:36:25 +00002278 buf_srcloc, M_VG_ERRTXT,
2279 &lineno);
njn49ef4622002-10-04 15:15:08 +00002280 if (i == 0) APPEND(" at ") else APPEND(" by ");
2281
sewardjbaf6d422002-11-14 23:16:58 +00002282 VG_(sprintf)(ibuf,"0x%x: ", eip);
sewardj04b91062002-06-05 21:22:04 +00002283 APPEND(ibuf);
sewardjde4a1d02002-03-22 01:27:54 +00002284 if (know_fnname) {
njn49ef4622002-10-04 15:15:08 +00002285 APPEND(buf_fn);
sewardjde4a1d02002-03-22 01:27:54 +00002286 if (!know_srcloc && know_objname) {
2287 APPEND(" (in ");
2288 APPEND(buf_obj);
2289 APPEND(")");
2290 }
njn49ef4622002-10-04 15:15:08 +00002291 } else if (know_objname && !know_srcloc) {
2292 APPEND("(within ");
2293 APPEND(buf_obj);
2294 APPEND(")");
sewardjde4a1d02002-03-22 01:27:54 +00002295 } else {
njn49ef4622002-10-04 15:15:08 +00002296 APPEND("???");
2297 }
sewardjde4a1d02002-03-22 01:27:54 +00002298 if (know_srcloc) {
2299 APPEND(" (");
2300 APPEND(buf_srcloc);
2301 APPEND(":");
2302 VG_(sprintf)(ibuf,"%d",lineno);
2303 APPEND(ibuf);
2304 APPEND(")");
2305 }
2306 VG_(message)(Vg_UserMsg, "%s", buf);
njn49ef4622002-10-04 15:15:08 +00002307 i++;
2308
2309 } while (i < stop_at && ec->eips[i] != 0);
sewardjde4a1d02002-03-22 01:27:54 +00002310}
2311
2312#undef APPEND
2313
sewardj47104382002-10-20 18:35:48 +00002314/*------------------------------------------------------------*/
2315/*--- SegInfo accessor functions ---*/
2316/*------------------------------------------------------------*/
2317
2318const SegInfo* VG_(next_seginfo)(const SegInfo* seg)
2319{
2320 ensure_debug_info_inited();
2321
2322 if (seg == NULL)
2323 return segInfo;
2324 return seg->next;
2325}
2326
2327Addr VG_(seg_start)(const SegInfo* seg)
2328{
2329 return seg->start;
2330}
2331
2332UInt VG_(seg_size)(const SegInfo* seg)
2333{
2334 return seg->size;
2335}
2336
2337const UChar* VG_(seg_filename)(const SegInfo* seg)
2338{
2339 return seg->filename;
2340}
2341
2342UInt VG_(seg_sym_offset)(const SegInfo* seg)
2343{
2344 return seg->offset;
2345}
2346
2347VgSectKind VG_(seg_sect_kind)(Addr a)
2348{
2349 SegInfo* seg;
2350 VgSectKind ret = Vg_SectUnknown;
2351
2352 ensure_debug_info_inited();
2353
2354 for(seg = segInfo; seg != NULL; seg = seg->next) {
2355 if (a >= seg->start && a < (seg->start + seg->size)) {
2356 if (0)
sewardj8fe15a32002-10-20 19:29:21 +00002357 VG_(printf)("addr=%p seg=%p %s got=%p %d plt=%p %d data=%p %d bss=%p %d\n",
sewardj47104382002-10-20 18:35:48 +00002358 a, seg, seg->filename,
2359 seg->got_start, seg->got_size,
sewardj8fe15a32002-10-20 19:29:21 +00002360 seg->plt_start, seg->plt_size,
2361 seg->data_start, seg->data_size,
2362 seg->bss_start, seg->bss_size);
sewardj47104382002-10-20 18:35:48 +00002363 ret = Vg_SectText;
2364
sewardj8fe15a32002-10-20 19:29:21 +00002365 if (a >= seg->data_start && a < (seg->data_start + seg->data_size))
2366 ret = Vg_SectData;
2367 else if (a >= seg->bss_start && a < (seg->bss_start + seg->bss_size))
2368 ret = Vg_SectBSS;
2369 else if (a >= seg->plt_start && a < (seg->plt_start + seg->plt_size))
sewardj47104382002-10-20 18:35:48 +00002370 ret = Vg_SectPLT;
2371 else if (a >= seg->got_start && a < (seg->got_start + seg->got_size))
2372 ret = Vg_SectGOT;
2373 }
2374 }
2375
2376 return ret;
2377}
2378
sewardjde4a1d02002-03-22 01:27:54 +00002379/*--------------------------------------------------------------------*/
2380/*--- end vg_symtab2.c ---*/
2381/*--------------------------------------------------------------------*/