blob: 4efd4c3ce4fb213857cc517919eb7d1c926862dd [file] [log] [blame]
njnea27e462005-05-31 02:38:09 +00001
njn4bbdc972003-10-16 10:10:55 +00002/*--------------------------------------------------------------------*/
njnea27e462005-05-31 02:38:09 +00003/*--- Read stabs debug info. stabs.c ---*/
njn4bbdc972003-10-16 10:10:55 +00004/*--------------------------------------------------------------------*/
5
6/*
njnb9c427c2004-12-01 14:14:42 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
njn4bbdc972003-10-16 10:10:55 +00009
njn53612422005-03-12 16:22:54 +000010 Copyright (C) 2000-2005 Julian Seward
njn4bbdc972003-10-16 10:10:55 +000011 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
jsgfcb1d1c02003-10-14 21:55:10 +000030
njnc7561b92005-06-19 01:24:32 +000031#include "pub_core_basics.h"
njndab4e4b2005-06-26 14:48:23 +000032#include "pub_core_debuginfo.h"
njn97405b22005-06-02 03:39:33 +000033#include "pub_core_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_core_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_core_libcprint.h"
njnaf1d7df2005-06-11 01:31:52 +000036#include "pub_core_mallocfree.h"
njnf4c50162005-06-20 14:18:12 +000037
njndab4e4b2005-06-26 14:48:23 +000038#include "priv_symtypes.h"
njnea27e462005-05-31 02:38:09 +000039#include "priv_symtab.h"
jsgfcb1d1c02003-10-14 21:55:10 +000040
41#include <a.out.h> /* stabs defns */
42
43/*------------------------------------------------------------*/
44/*--- Read STABS format debug info. ---*/
45/*------------------------------------------------------------*/
46
47/* Stabs entry types, from:
48 * The "stabs" debug format
49 * Menapace, Kingdon and MacKenzie
50 * Cygnus Support
51 */
52typedef enum { N_UNDEF = 0, /* undefined symbol, new stringtab */
53 N_GSYM = 32, /* Global symbol */
54 N_FUN = 36, /* Function start or end */
55 N_STSYM = 38, /* Data segment file-scope variable */
56 N_LCSYM = 40, /* BSS segment file-scope variable */
57 N_RSYM = 64, /* Register variable */
58 N_SLINE = 68, /* Source line number */
59 N_SO = 100, /* Source file path and name */
60 N_LSYM = 128, /* Stack variable or type */
61 N_BINCL = 130, /* Beginning of an include file */
62 N_SOL = 132, /* Include file name */
63 N_PSYM = 160, /* Function parameter */
64 N_EINCL = 162, /* End of an include file */
65 N_LBRAC = 192, /* Start of lexical block */
66 N_EXCL = 194, /* Placeholder for an include file */
67 N_RBRAC = 224 /* End of lexical block */
68 } stab_types;
69
70
njn4bbdc972003-10-16 10:10:55 +000071/* stabs use a two-dimensional numbering scheme for types: the type
jsgfcb1d1c02003-10-14 21:55:10 +000072 number is either of the form name:N or name:(M,N); name may be
73 empty. N is the type number within a file context; M is the file
74 number (an object may have multiple files by inclusion).
75*/
76
77typedef struct _StabType {
78 Char *str; /* string as it appears in file */
79 SymType *type; /* our type info */
80} StabType;
81
82typedef struct _StabFile {
83 StabType *types;
84 Int ntypes;
85 UInt fileidx; /* for reference, idx of creation */
86} StabFile;
87
88typedef struct _StabTypeTab {
89 StabFile **files;
90 Int nfiles;
91
92 /* List of structure tag names, used for mapping them to actual
93 definitions of the structures. There should really be one of
94 these per object and a global one to cope with cross-object
95 references. */
96 struct structlist {
97 Char *name;
98 Bool isstruct; /* struct (or union) */
99 SymType *type; /* reference */
100 struct structlist *next;
101 } *structlist;
102
103#define HEADER_HASHSZ 53
104 struct header {
105 Char *filename; /* header file name */
106 StabFile *types; /* types for that header */
107 UInt instance; /* instance */
108 struct header *next;
109 } *headerhash[HEADER_HASHSZ];
110} StabTypeTab;
111
thughes2988f492004-10-07 08:33:29 +0000112static const Bool stabs_debug = False;
jsgfcb1d1c02003-10-14 21:55:10 +0000113
114static UInt header_hash(Char *filename, UInt instance)
115{
116 Char *cp;
117 UInt hash = 0;
118
119 for(cp = filename; *cp; cp++) {
120 hash += *cp;
121 hash = (hash << 17) | (hash >> (32-17));
122 }
123 hash += instance;
124
125 return hash % HEADER_HASHSZ;
126}
127
128/* Look up a struct/union tag name in table, and return reference to
129 existing type, or create a new tag entry.
130 XXX make this a proper data structure
131*/
132static SymType *structRef(StabTypeTab *tab, SymType *def, Bool isstruct, Char *name)
133{
tomc2bf09c2005-07-29 18:33:19 +0000134 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +0000135 struct structlist *sl;
136 SymType *ty;
137 static Int warnlen = 0;
138 Int len = 0;
139
140 for(sl = tab->structlist; sl != NULL; sl = sl->next) {
141 len++;
142
143 if (isstruct == sl->isstruct && VG_(strcmp)(name, sl->name) == 0) {
144 if (debug)
145 VG_(printf)("found %s ref for %s\n",
146 isstruct ? "struct" : "union", name);
147 return sl->type;
148 }
149 }
150
151 if (debug && (len > warnlen*2)) {
152 warnlen = len;
153 VG_(printf)("struct ref list reached %d entries\n", len);
154 }
155
156 sl = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sl));
157 if (isstruct)
sewardj7eb7c582005-06-23 01:02:53 +0000158 ty = ML_(st_mkstruct)(def, 0, 0);
jsgfcb1d1c02003-10-14 21:55:10 +0000159 else
sewardj7eb7c582005-06-23 01:02:53 +0000160 ty = ML_(st_mkunion)(def, 0, 0);
jsgfcb1d1c02003-10-14 21:55:10 +0000161
sewardj7eb7c582005-06-23 01:02:53 +0000162 ML_(st_setname)(ty, name);
jsgfcb1d1c02003-10-14 21:55:10 +0000163 sl->isstruct = isstruct;
164 sl->type = ty;
165 sl->name = name;
166 sl->next = tab->structlist;
167 tab->structlist = sl;
168
169 if (debug)
170 VG_(printf)("created %s ref for %s = %p\n",
171 isstruct ? "struct" : "union", name, ty);
172
173 return ty;
174}
175
176/* Add a structural defintion for a struct/union reference */
177static SymType *structDef(StabTypeTab *tab, SymType *def, Bool isstruct, Char *name)
178{
tomc2bf09c2005-07-29 18:33:19 +0000179 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +0000180 SymType *ref = structRef(tab, NULL, isstruct, name);
181
thughes60d62a72004-10-07 08:33:08 +0000182 /* it seems that GNAT likes to declare names as both struct tags
183 and typedefs so check we aren't about to make a structure a
184 reference to itself as that will create a loop */
185 if (ref == def) {
186 if (debug)
187 VG_(printf)("ignoring %s self ref for %s %p -> %p\n",
188 isstruct ? "struct" : "union", name, ref, def);
189 }
190 else {
191 if (debug)
192 VG_(printf)("defining %s ref for %s %p -> %p\n",
193 isstruct ? "struct" : "union", name, ref, def);
jsgfcb1d1c02003-10-14 21:55:10 +0000194
sewardj7eb7c582005-06-23 01:02:53 +0000195 def = ML_(st_mktypedef)(ref, name, ML_(st_basetype)(def, False));
thughes60d62a72004-10-07 08:33:08 +0000196 }
sewardj7eb7c582005-06-23 01:02:53 +0000197 ML_(st_setname)(def, name);
jsgfcb1d1c02003-10-14 21:55:10 +0000198 return def;
199}
200
201static StabFile *getStabFile(StabTypeTab *tab, Int file, StabFile *set)
202{
203 StabFile *sf;
204 file++; /* file == -1 -> no file */
205
206 if (file < 0)
207 return NULL;
208
209 if (file >= tab->nfiles) {
210 UInt i;
211 StabFile **n = VG_(arena_malloc)(VG_AR_SYMTAB, (file+1) * sizeof(*n));
212
213 for(i = 0; i <= file; i++) {
214 if (i < tab->nfiles)
215 n[i] = tab->files[i];
216 else {
217 n[i] = NULL;
218 }
219 }
220
221 if (tab->files != NULL)
222 VG_(arena_free)(VG_AR_SYMTAB, tab->files);
223
224 tab->files = n;
225 tab->nfiles = file+1;
226 }
227
228 if (set != NULL)
229 tab->files[file] = set;
230
231 if (tab->files[file] == NULL) {
232 sf = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sf));
233 tab->files[file] = sf;
234 sf->types = NULL;
235 sf->ntypes = 0;
236 sf->fileidx = file - 1; /* compensate for file++ above */
237 }
238
239 sf = tab->files[file];
240
241 return sf;
242}
243
244/* add a new index for a file */
245static void addFileAlias(StabTypeTab *tab, Char *filename, UInt instance, Int idx)
246{
tomc2bf09c2005-07-29 18:33:19 +0000247 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +0000248 struct header *hp;
249
250 for(hp = tab->headerhash[header_hash(filename, instance)]; hp != NULL; hp = hp->next) {
251 if (hp->instance == instance && VG_(strcmp)(filename, hp->filename) == 0) {
252 if (debug)
253 VG_(printf)("adding alias for \"%s\"/%d fileidx %d to fileidx %d\n",
254 filename, instance, idx, hp->types->fileidx);
255 getStabFile(tab, idx, hp->types);
256 return;
257 }
258 }
259
260 VG_(printf)("Couldn't find previous reference to \"%s\"/%d for fileidx %d\n",
261 filename, instance, idx);
262}
263
264static void addHeader(StabTypeTab *tab, Char *filename, UInt instance, Int idx)
265{
tomc2bf09c2005-07-29 18:33:19 +0000266 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +0000267 struct header *hp, **bucket;
268
269 if (debug)
270 VG_(printf)("adding new header %s/%d fileidx %d\n", filename, instance, idx);
271
272 bucket = &tab->headerhash[header_hash(filename, instance)];
273
274 hp = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*hp));
275 hp->filename = filename;
276 hp->instance = instance;
277 hp->types = getStabFile(tab, idx, NULL);
278 hp->next = *bucket;
279 *bucket = hp;
280}
281
282static void clearStabFiles(StabTypeTab *tab)
283{
284 VG_(arena_free)(VG_AR_SYMTAB, tab->files);
285
286 tab->files = NULL;
287 tab->nfiles = 0;
288}
289
290static StabType *getStabType(StabTypeTab *tab, Int file, Int sym)
291{
292 StabFile *sf;
293
294 sf = getStabFile(tab, file, NULL);
295
296 if (sf == NULL || sym < 0)
297 return NULL;
298
299 if (sym >= sf->ntypes) {
300 UInt i;
301 StabType *n = VG_(arena_malloc)(VG_AR_SYMTAB, (sym+1) * sizeof(*n));
302
303 for(i = 0; i <= sym; i++) {
304 if (i < sf->ntypes)
305 n[i] = sf->types[i];
306 else {
307 n[i].str = NULL;
308 n[i].type = NULL;
309 }
310 }
311
312 if (sf->types != NULL)
313 VG_(arena_free)(VG_AR_SYMTAB, sf->types);
314
315 sf->types = n;
316 sf->ntypes = sym+1;
317 }
318
319 return &sf->types[sym];
320}
321
njna3b16dc2005-03-13 18:49:00 +0000322static Bool isdigit_base(Char c, Int base, Int *vp)
jsgfcb1d1c02003-10-14 21:55:10 +0000323{
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000324 Bool ret = False;
325 Int v = 0;
326
jsgfcb1d1c02003-10-14 21:55:10 +0000327 switch(base) {
328 case 10:
329 case 0:
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000330 v = c - '0';
331 ret = (c >= '0' && c <= '9');
332 break;
jsgfcb1d1c02003-10-14 21:55:10 +0000333
334 case 8:
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000335 v = c - '0';
336 ret = (c >= '0' && c <= '7');
337 break;
jsgfcb1d1c02003-10-14 21:55:10 +0000338
339 case 16:
340 if (c >= '0' && c <= '9') {
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000341 v = c - '0';
342 ret = True;
343 } else if (c >= 'a' && c <= 'f') {
344 v = c - 'a';
345 ret = True;
346 } else if (c >= 'A' && c <= 'F') {
347 v = c - 'F';
348 ret = True;
jsgfcb1d1c02003-10-14 21:55:10 +0000349 }
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000350 break;
jsgfcb1d1c02003-10-14 21:55:10 +0000351 }
352
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000353 if (vp && ret)
354 *vp = v;
355
356 return ret;
jsgfcb1d1c02003-10-14 21:55:10 +0000357}
358
359static inline Int getbase(Char **pp)
360{
361 Char *p = *pp;
362 Int base = 10;
363
364 if (p[0] == '0') {
365 if (p[1] == 'x') {
366 base = 16;
367 p += 2;
368 } else {
369 base = 8;
370 p++;
371 }
372 }
373 *pp = p;
374
375 return base;
376}
377
378static Int atoi(Char **pp, Int base)
379{
380 Char *p = *pp;
381 Int ret = 0;
382 Int v;
383 Bool neg = False;
384
385 if (*p == '-') {
386 neg = True;
387 p++;
388 }
389
390 if (base == 0)
391 base = getbase(&p);
392
njna3b16dc2005-03-13 18:49:00 +0000393 while(isdigit_base(*p, base, &v)) {
jsgfcb1d1c02003-10-14 21:55:10 +0000394 ret *= base;
395 ret += v;
396 p++;
397 }
398
399 *pp = p;
400 if (neg)
401 ret = -ret;
402 return ret;
403}
404
405static UInt atou(Char **pp, Int base)
406{
407 Char *p = *pp;
408 UInt ret = 0;
409 Int v;
410
411 if (base == 0)
412 base = getbase(&p);
413
njna3b16dc2005-03-13 18:49:00 +0000414 while(isdigit_base(*p, base, &v)) {
jsgfcb1d1c02003-10-14 21:55:10 +0000415 ret *= base;
416 ret += v;
417 p++;
418 }
419
420 *pp = p;
421 return ret;
422}
423
fitzhardingee5f9d912004-03-09 00:43:08 +0000424/* Skip a ':'-delimited name which may have ::, 'char' or other things in
425 <> brackets */
426static Char *templ_name(Char *p)
427{
428 Int brac = 0;
429
430 for(;;) {
431 if (*p == '<')
432 brac++;
433 if (*p == '>')
434 brac--;
435 /* skip quoted character (note, it could be anything, even a
436 literal \0)
437
438 XXX This is a complete botch; we can't do anything sane here,
439 like support \-quoting, because gcc doesn't seem to generate
440 it, and even if it did, we wouldn't know what "'\'" means -
441 the begining of '\'' or a char in itself ('\\')?
442 */
443 if (brac && p[0] == '\'' && p[2] == '\'')
444 p += 3;
fitzhardingec52fd7f2004-04-14 07:19:20 +0000445
446 /* If we're within <>, then treat :: as part of the name (a single
447 : still terminates) */
fitzhardingee5f9d912004-03-09 00:43:08 +0000448 if (*p == ':') {
fitzhardingec52fd7f2004-04-14 07:19:20 +0000449 if (brac && p[1] == ':' && p[-1] != '<')
fitzhardingee5f9d912004-03-09 00:43:08 +0000450 p++;
451 else
452 break;
453 }
jsgfcb1d1c02003-10-14 21:55:10 +0000454 p++;
455 }
456
457 return p;
458}
459
460/* updates pp to point to after parsed typeref */
461static void parse_typeref(Char **pp, Int *filep, Int *symp)
462{
463 Char *p = *pp;
464 Int file, sym;
465
466 file = sym = *filep = *symp = -1;
467
468 if (*p == '(') {
469 p++;
470 file = atoi(&p, 10);
471 if (*p++ != ',')
472 return;
473 sym = atoi(&p, 10);
474 if (*p++ != ')')
475 return;
476 } else if (VG_(isdigit)(*p)) {
477 sym = atoi(&p, 10);
478 }
479
480 *pp = p;
481 *filep = file;
482 *symp = sym;
483}
484
485static void stab_resolve(SymType *st, void *data)
486{
tomc2bf09c2005-07-29 18:33:19 +0000487 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +0000488 Char *str = (Char *)data;
sewardj7eb7c582005-06-23 01:02:53 +0000489 vg_assert(!ML_(st_isresolved)(st));
jsgfcb1d1c02003-10-14 21:55:10 +0000490
491 if (debug)
492 VG_(printf)("stab_resolve: failing to do anything useful with symtype %p=%s\n",
493 st, str);
494}
495
496/* Top level of recursive descent parser for stab type information.
497 This only extracts the information needed by vg_symtypes.c, which
498 is just structure shapes, pointers and arrays. It is still
499 necessary to parse everything else, because there's no way to skip
500 it to get to the interesting bits. Also, new types can be
501 introduced anywhere, so we need to scan it all to pick them up. */
502static SymType *stabtype_parser(SegInfo *si, SymType *def, Char **pp)
503{
tomc2bf09c2005-07-29 18:33:19 +0000504 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +0000505 Char *p = *pp;
506 Char t;
507 SymType *type;
508 StabTypeTab *tab = si->stab_typetab;
509
510/* make sure *p == 'c' and skip over it */
511#define EXPECT(c, msg) \
512 do { \
513 if (p == NULL || *p++ != c) { \
514 VG_(printf)("\n @@ expected '%c' at %s (remains=\"%s\")\n", c, msg, p); \
515 return NULL; \
516 } \
517 } while(0)
518
519/* return a pointer to just after the next ch after (and including) ptr */
520#define SKIPPAST(ptr, ch, msg) \
521 ({ \
522 Char *__zz_charptr = VG_(strchr)((ptr), (ch)); \
523 if (__zz_charptr == NULL) { \
524 VG_(printf)("\n @@ expected '%c' at %s (ptr=\"%s\")\n", (ch), (msg), (ptr)); \
525 return NULL; \
526 } \
527 __zz_charptr+1; \
528 })
529
530 t = *p++;
531
532 if (0 && debug)
533 VG_(printf)("stabtype_parser: parsing '%c' remains=\"%s\"\n", t, p);
534
535 switch(t) {
536 case '(':
537 case '0' ... '9': { /* reference (and perhaps definition) */
538 SymType *symtype;
539 Int file, sym;
540 Char *prev;
541
542 p--;
543 prev = p;
544
545 parse_typeref(&p, &file, &sym);
546
547 {
548 /* keep stabtype reference local, because the stabtype table
549 can be rearranged by new insertions, invalidating this
550 pointer; so copy the bits we need and don't hold onto the
551 pointer. */
552 StabType *stabtype = getStabType(tab, file, sym);
553
554 if (stabtype == NULL) {
555 VG_(printf)(" @@ bad type ref: %s\n", prev);
556 return NULL;
557 }
558
559 if (stabtype->type == NULL) {
sewardj7eb7c582005-06-23 01:02:53 +0000560 stabtype->type = ML_(st_mkunresolved)(def, stab_resolve, NULL);
jsgfcb1d1c02003-10-14 21:55:10 +0000561 if (debug)
562 VG_(printf)("making (%d,%d) %p unresolved\n", file, sym, stabtype->type);
563 }
564
565 symtype = stabtype->type;
566 }
567
568 if (*p == '=') {
569 /* a type definition */
570 p++;
571
sewardj7eb7c582005-06-23 01:02:53 +0000572 if (ML_(st_isresolved)(symtype)) {
jsgfcb1d1c02003-10-14 21:55:10 +0000573 /* a redefinition; clear the old type out */
574 StabType *stabtype = getStabType(tab, file, sym);
575
sewardj7eb7c582005-06-23 01:02:53 +0000576 symtype = stabtype->type = ML_(st_mkunresolved)(NULL, stab_resolve, NULL);
jsgfcb1d1c02003-10-14 21:55:10 +0000577 if (debug)
578 VG_(printf)("creating new type %p for definition (%d,%d)\n",
579 symtype, file, sym);
580 } else
sewardj7eb7c582005-06-23 01:02:53 +0000581 ML_(st_unresolved_setdata)(symtype, stab_resolve, p);
jsgfcb1d1c02003-10-14 21:55:10 +0000582
583 if (debug)
584 VG_(printf)("defining type %p (%d,%d) = %s\n", symtype, file, sym, p);
585
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000586 /* Skip type attributes
587 '@' could also be pointer-to-member, so we need to see if
588 the following character looks like a type reference or not.
589 */
590 while(*p == '@' && !(VG_(isdigit)(p[1]) || p[1] == '-' || p[1] == '(') )
jsgfcb1d1c02003-10-14 21:55:10 +0000591 p = SKIPPAST(p+1, ';', "type attrib");
592
593 prev = p;
594
595 type = stabtype_parser(si, symtype, &p);
596 if (debug)
597 VG_(printf)("parsed definition: type=%p symtype=%p\n", type, symtype);
598
599 if (type != symtype) {
600 StabType *stabtype = getStabType(tab, file, sym);
601
602 vg_assert(stabtype->type != NULL);
603 if (0) {
604 /* XXX bogus */
sewardj7eb7c582005-06-23 01:02:53 +0000605 vg_assert(!ML_(st_isresolved)(stabtype->type));
jsgfcb1d1c02003-10-14 21:55:10 +0000606 VG_(arena_free)(VG_AR_SYMTAB, stabtype->type); /* XXX proper free method? */
607 }
608 stabtype->type = type;
sewardj7eb7c582005-06-23 01:02:53 +0000609 } else if (!ML_(st_isresolved)(type)) {
jsgfcb1d1c02003-10-14 21:55:10 +0000610 /* If type is defined in terms of itself, and is
611 therefore not resolved, it is void */
612 if (debug)
613 VG_(printf)("type %p is defined in terms of self - making void\n", type);
sewardj7eb7c582005-06-23 01:02:53 +0000614 type = ML_(st_mkvoid)(type);
jsgfcb1d1c02003-10-14 21:55:10 +0000615 }
616 } else {
617 /* just a type reference */
618 type = symtype;
sewardj7eb7c582005-06-23 01:02:53 +0000619 if ((0 || debug) && !ML_(st_isresolved)(type))
jsgfcb1d1c02003-10-14 21:55:10 +0000620 VG_(printf)("type %p (%d,%d) is unresolved\n", type, file, sym);
sewardj7eb7c582005-06-23 01:02:53 +0000621 if ((0 || debug) && ML_(st_isresolved)(type))
jsgfcb1d1c02003-10-14 21:55:10 +0000622 VG_(printf)("reference (%d,%d) -> %p\n", file, sym, type);
623 }
624 break;
625 }
626
627 case '-': { /* -ve types for builtins? */
628 Int n;
629 p--;
630 n = atoi(&p, 0);
631 switch(n) {
sewardj7eb7c582005-06-23 01:02:53 +0000632 case -1: type = ML_(st_mkint)(def, 4, True); break;
633 case -2: type = ML_(st_mkint)(def, 1, True); break;
634 case -3: type = ML_(st_mkint)(def, 2, True); break;
635 case -4: type = ML_(st_mkint)(def, 4, True); break;
636 case -5: type = ML_(st_mkint)(def, 1, False); break;
637 case -6: type = ML_(st_mkint)(def, 1, True); break;
638 case -7: type = ML_(st_mkint)(def, 2, False); break;
639 case -8: type = ML_(st_mkint)(def, 4, False); break;
640 case -9: type = ML_(st_mkint)(def, 4, False); break;
641 case -10: type = ML_(st_mkint)(def, 4, False); break;
642 case -11: type = ML_(st_mkvoid)(def); break;
643 case -12: type = ML_(st_mkfloat)(def, 4); break;
644 case -13: type = ML_(st_mkfloat)(def, 8); break;
645 case -15: type = ML_(st_mkint)(def, 4, True); break;
646 case -16: type = ML_(st_mkbool)(def, 4); break;
647 case -17: type = ML_(st_mkfloat)(def, 4); break;
648 case -18: type = ML_(st_mkfloat)(def, 8); break;
649 case -20: type = ML_(st_mkint)(def, 1, False); break;
650 case -21: type = ML_(st_mkint)(def, 1, False); break;
651 case -22: type = ML_(st_mkint)(def, 2, False); break;
652 case -23: type = ML_(st_mkint)(def, 4, False); break;
653 case -24: type = ML_(st_mkint)(def, 4, False); break;
654 case -27: type = ML_(st_mkint)(def, 1, True); break;
655 case -28: type = ML_(st_mkint)(def, 2, True); break;
656 case -29: type = ML_(st_mkint)(def, 4, True); break;
tomd8d0ad22005-07-20 13:56:22 +0000657 case -30: type = ML_(st_mkint)(def, 2, False); break;
sewardj7eb7c582005-06-23 01:02:53 +0000658 case -31: type = ML_(st_mkint)(def, 8, True); break;
659 case -32: type = ML_(st_mkint)(def, 8, False); break;
660 case -33: type = ML_(st_mkint)(def, 8, False); break;
661 case -34: type = ML_(st_mkint)(def, 8, True); break;
jsgfcb1d1c02003-10-14 21:55:10 +0000662
663 default:
664 VG_(printf)(" @@ unrecognized negative type %d\n", n);
665 type = NULL;
666 break;
667 }
thughes4dc3aea2004-09-19 10:30:36 +0000668 /* Different versions of gcc seem to disagree about whether a
669 negative type is followed by a semicolon or not, and the stabs
670 spec (susch as it is) is not clear either so we will skip a
671 semicolon if there is one. */
672 if (*p == ';')
673 p++;
jsgfcb1d1c02003-10-14 21:55:10 +0000674 break;
675 }
676
677 case 't': { /* typedef: 't' TYPE */
678 SymType *td = stabtype_parser(si, NULL, &p);
sewardj7eb7c582005-06-23 01:02:53 +0000679 type = ML_(st_mktypedef)(def, NULL, td);
jsgfcb1d1c02003-10-14 21:55:10 +0000680 break;
681 }
682
683 case 'R': { /* FP type: 'R' FP-TYPE ';' BYTES ';' (extra) ';' */
684 Int fptype, bytes;
685
686 fptype = atoi(&p, 0);
687 EXPECT(';', "FP-TYPE");
688 bytes = atoi(&p, 0);
689 EXPECT(';', "FP-TYPE bytes");
jsgfcb1d1c02003-10-14 21:55:10 +0000690
sewardj7eb7c582005-06-23 01:02:53 +0000691 type = ML_(st_mkfloat)(def, bytes);
jsgfcb1d1c02003-10-14 21:55:10 +0000692 break;
693 }
694
695 case 'r': { /* range: 'r' TYPE ';' MIN ';' MAX ';' */
696 Int min, max;
697 SymType *rtype = stabtype_parser(si, NULL, &p);
698
699 EXPECT(';', "range TYPE");
700
701 /* MIN and MAX are: (INTEGER | 'A' OFFSET | 'T' OFFSET | 'a' REGNO | 't' REGNO | 'J')
702 only expect INTEGER for now (no way to represent the rest yet, and no need so far)
703 */
704 min = atoi(&p, 0);
705 EXPECT(';', "range MIN");
706 max = atoi(&p, 0);
707 EXPECT(';', "range MAX");
708
709 if (debug && 0)
710 VG_(printf)("range: rtype=%p def=%p min=%d max=%d remains = \"%s\"\n",
711 rtype, def, min, max, p);
712
713 if (rtype == def) {
714 if (debug)
715 VG_(printf)("type %p is subrange of self - making int\n", def);
sewardj7eb7c582005-06-23 01:02:53 +0000716 type = ML_(st_mkint)(def, sizeof(int), False);
jsgfcb1d1c02003-10-14 21:55:10 +0000717 } else if (min > max && max == 0) {
718 if (debug)
719 VG_(printf)("type %p has backwards range %d - %d: making float\n",
720 def, min, max);
sewardj7eb7c582005-06-23 01:02:53 +0000721 type = ML_(st_mkfloat)(def, min);
jsgfcb1d1c02003-10-14 21:55:10 +0000722 } else
sewardj7eb7c582005-06-23 01:02:53 +0000723 type = ML_(st_mkrange)(def, rtype, min, max);
jsgfcb1d1c02003-10-14 21:55:10 +0000724
sewardj7eb7c582005-06-23 01:02:53 +0000725 vg_assert(ML_(st_isresolved)(type));
jsgfcb1d1c02003-10-14 21:55:10 +0000726 break;
727 }
728
729 case '&': /* reference */
730 case '*': { /* pointer */
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000731 /* ('*' | '&') TYPE */
jsgfcb1d1c02003-10-14 21:55:10 +0000732 type = stabtype_parser(si, NULL, &p);
sewardj7eb7c582005-06-23 01:02:53 +0000733 type = ML_(st_mkpointer)(def, type);
jsgfcb1d1c02003-10-14 21:55:10 +0000734 break;
735 }
736
jseward9fc153f2004-01-04 22:50:16 +0000737 case 'k': /* const */
tom07720f32005-07-20 17:48:18 +0000738 case 'B': /* volatile */
739 case 'd': { /* file (pascal only) */
740 /* ('k' | 'B' | 'd') TYPE */
jseward9fc153f2004-01-04 22:50:16 +0000741 type = stabtype_parser(si, NULL, &p);
742 break;
743 }
744
jsgfcb1d1c02003-10-14 21:55:10 +0000745 case 'x': { /* reference to undefined type */
746 /* 'x' ('s' | 'u' | 'e') NAME ':' */
jsgfcb1d1c02003-10-14 21:55:10 +0000747 Char kind = *p++; /* get kind */
748 Char *name = p;
749
fitzhardingee5f9d912004-03-09 00:43:08 +0000750 p = templ_name(name);
jsgfcb1d1c02003-10-14 21:55:10 +0000751 EXPECT(':', "struct/union/enum ref");
752
sewardj7eb7c582005-06-23 01:02:53 +0000753 name = ML_(addStr)(si, name, p-1-name);
jsgfcb1d1c02003-10-14 21:55:10 +0000754
755 switch (kind) {
756 case 's': /* struct */
757 case 'u': /* union */
758 type = structRef(tab, def, kind == 's', name);
759 break;
760
761 case 'e': /* enum */
sewardj7eb7c582005-06-23 01:02:53 +0000762 type = ML_(st_mkenum)(def, 0);
jsgfcb1d1c02003-10-14 21:55:10 +0000763 break;
764
765 default:
766 VG_(printf)(" @@ unexpected type ref %c\n", p[-1]);
767 return NULL;
768 };
769
770 break;
771 }
772
fitzhardinge7194a562004-03-09 01:20:47 +0000773 case 'S': { /* set/bitstring */
774 /* 'S' TYPE */
775 SymType *typeinfo;
776
777 typeinfo = stabtype_parser(si, NULL, &p);
778
sewardj7eb7c582005-06-23 01:02:53 +0000779 type = ML_(st_mkarray)(def, typeinfo, ML_(st_mkint)(NULL, 1, True));
fitzhardinge7194a562004-03-09 01:20:47 +0000780 break;
781 }
782
jsgfcb1d1c02003-10-14 21:55:10 +0000783 case 'P': /* packed array */
784 case 'a': { /* array */
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000785 /* ( 'a' | 'P' ) IDX-TYPE TYPE */
jsgfcb1d1c02003-10-14 21:55:10 +0000786 SymType *idxtype;
787 SymType *artype;
788
789 idxtype = stabtype_parser(si, NULL, &p);
790 artype = stabtype_parser(si, NULL, &p);
791
sewardj7eb7c582005-06-23 01:02:53 +0000792 type = ML_(st_mkarray)(def, idxtype, artype);
jsgfcb1d1c02003-10-14 21:55:10 +0000793
794 break;
795 }
796
797 case 'e': { /* enum */
798 /* 'e' ( NAME ':' N ',' )* ';' */
799
sewardj7eb7c582005-06-23 01:02:53 +0000800 type = ML_(st_mkenum)(def, 0);
jsgfcb1d1c02003-10-14 21:55:10 +0000801
802 /* don't really care about tags; just skip them */
803 while(*p != ';') {
804 p = SKIPPAST(p, ':', "enum tag NAME");
805 p = SKIPPAST(p, ',', "enum tag N");
806 }
807 p++; /* skip ';' */
808
809 break;
810 }
811
812 case 'u': /* union */
813 case 's': { /* struct */
814 /* Gad. Here we go:
815
fitzhardinge3e2c6d52004-03-05 05:43:42 +0000816 ( 's' | 'u' ) SIZE
jsgfcb1d1c02003-10-14 21:55:10 +0000817 ( '!' NBASE ',' ( VIRT PUB OFF ',' BASE-TYPE ){NBASE} )?
818
819 ( NAME ( ':' ( '/' [0-9] )? TYPE ',' OFFSET ( ',' SIZE )?
820 | '::' ( METHOD-TYPE ':' MANGLE-ARGS ';'
821 PROT QUAL ( '.' | '*' VIRT | '?' ) )+
822 )
823 ';'
824 )*
825
826 ( '~%' FIRST-BASE-CLASS )?
827 ';'
828 */
829 UInt size;
830 Bool method = False;
831
832 size = atou(&p, 0);
sewardj7eb7c582005-06-23 01:02:53 +0000833 type = (t == 's' ? ML_(st_mkstruct) : ML_(st_mkunion))(def, size, 0);
jsgfcb1d1c02003-10-14 21:55:10 +0000834
835 if (*p == '!') {
836 /* base classes */
837 Int nbase;
838
839 p++;
840 nbase = atoi(&p, 0);
841 EXPECT(',', "class base class count");
842 while(nbase--) {
843 p++; /* VIRT flag */
844 p++; /* PUB flag */
845 atoi(&p, 0); /* offset */
846 EXPECT(',', "class base class ref");
847 stabtype_parser(si, NULL, &p);
848
849 if (*p == ';') /* who eats this? */
850 p++;
851 }
852 }
853
854 while(*p != ';') {
toma2c76032005-11-16 00:04:58 +0000855 Char *start = p;
jsgfcb1d1c02003-10-14 21:55:10 +0000856 Char *name;
857 UInt off, sz;
858 SymType *fieldty;
859
toma2c76032005-11-16 00:04:58 +0000860 if (VG_(strncmp)(p, "operator<::", 11) == 0 ||
861 VG_(strncmp)(p, "operator>::", 11) == 0 ||
862 VG_(strncmp)(p, "operator<=::", 12) == 0 ||
863 VG_(strncmp)(p, "operator>=::", 12) == 0 ||
864 VG_(strncmp)(p, "operator<<::", 12) == 0 ||
865 VG_(strncmp)(p, "operator>>::", 12) == 0 ||
866 VG_(strncmp)(p, "operator->::", 12) == 0) {
867 p = SKIPPAST(p, ':', "member name");
868 } else {
869 p = templ_name(p);
870 EXPECT(':', "member name");
871 }
fitzhardinged8685b82004-02-24 23:46:06 +0000872
toma2c76032005-11-16 00:04:58 +0000873 if (p[0] == ':') {
jsgfcb1d1c02003-10-14 21:55:10 +0000874 /* c++ method names end in :: */
875 method = True;
876
toma2c76032005-11-16 00:04:58 +0000877 if (VG_(strncmp)(start, "op$", 3) == 0) {
jsgfcb1d1c02003-10-14 21:55:10 +0000878 /* According to stabs.info, operators are named
879 ( "op$::" OP '.' ), where OP is +=, etc. Current
880 gcc doesn't seem to use this; operators just
881 appear as "operator==::" */
toma2c76032005-11-16 00:04:58 +0000882 p = SKIPPAST(p, '.', "op$ name");
jsgfcb1d1c02003-10-14 21:55:10 +0000883 }
toma2c76032005-11-16 00:04:58 +0000884 name = ML_(addStr)(si, start, p-start-1);
885 p = p+1;
jsgfcb1d1c02003-10-14 21:55:10 +0000886 } else {
toma2c76032005-11-16 00:04:58 +0000887 name = ML_(addStr)(si, start, p-start-1);
jsgfcb1d1c02003-10-14 21:55:10 +0000888 }
889
890 if (method) {
891 /* don't care about methods, but we still have to crunch
892 through this goo */
893 fieldty = NULL;
894 off = sz = 0;
895
896 do {
897 stabtype_parser(si, NULL, &p); /* METHOD-TYPE */
898
899 EXPECT(':', "struct method MANGLE-ARGS");
900 p = SKIPPAST(p, ';', "struct method MANGLE-ARGS");
901
902 p += 1; /* skip PROT */
903 if (*p >= 'A' && *p <= 'Z')
904 p++; /* skip QUAL (if present) */
905
906 switch(*p++) {
907 case '*': /* VIRT: VTAB-IDX ';' OVERRIDE-CLASS ';' */
908 atoi(&p, 0); /* skip VTAB-IDX */
909 EXPECT(';', "struct method vtab idx");
910 stabtype_parser(si, NULL, &p); /* skip OVERRIDE-CLASS */
911 EXPECT(';', "struct method vtab override");
912 break;
913
914 default:
915 VG_(printf)(" @@ struct method unexpected member-type '%c' \"%s\" remains\n",
916 p[-1], p);
917 /* FALLTHROUGH */
918 case '?':
919 case '.':
920 break;
921 }
922 } while (*p != ';');
923 } else {
924 if (*p == '/') {
925 /* c++ visibility spec: '/' PROT */
926 p += 2;
927 }
928
929 fieldty = stabtype_parser(si, NULL, &p);
930
931 if (*p == ':') {
932 /* static member; don't care (it will appear later) */
933 fieldty = NULL;
934 off = sz = 0;
935
936 p = SKIPPAST(p, ';', "struct static member");
937 p--; /* point at ';' */
938 } else {
939 EXPECT(',', "struct TYPE");
940
thughes60d62a72004-10-07 08:33:08 +0000941 /* logic dictates that the offset would always be
942 positive and that atou would work here but GNAT has
943 has other ideas - see bug 90128 for more details */
944 off = atoi(&p, 0);
jsgfcb1d1c02003-10-14 21:55:10 +0000945
946 if (*p == ',') {
947 EXPECT(',', "struct OFFSET");
tom265eb5c2005-07-21 11:49:24 +0000948
949 /* as with the offset, it seems that GNAT likes to
950 generate negative sizes so we use atoi here in
951 order to allow them - see bug 109385 for details */
952 sz = atoi(&p, 0);
jsgfcb1d1c02003-10-14 21:55:10 +0000953 } else {
954 /* sometimes the size is missing and assumed to be a
955 pointer (in bits) */
956 sz = sizeof(void *) * 8;
957 }
958 }
959 }
960
961 if (fieldty != NULL)
sewardj7eb7c582005-06-23 01:02:53 +0000962 ML_(st_addfield)(type, name, fieldty, off, sz);
jsgfcb1d1c02003-10-14 21:55:10 +0000963
964 EXPECT(';', "struct field end");
965 }
966 p++; /* skip final ';' */
967
968 /* one final C++ surprise */
969 if (*p == '~') {
970 /* "~%" FIRST-BASE-CLASS ';' */
971 p++;
972 EXPECT('%', "struct first base");
973 stabtype_parser(si, NULL, &p); /* skip FIRST-BASE-CLASS */
974 EXPECT(';', "struct first base semi");
975 }
976
977 break;
978 }
979
980 case 'f': /* function */
981 /* 'f' TYPE */
sewardj7eb7c582005-06-23 01:02:53 +0000982 type = ML_(st_mkvoid)(def); /* approximate functions as void */
jsgfcb1d1c02003-10-14 21:55:10 +0000983 stabtype_parser(si, NULL, &p);
984 break;
985
986 case '#': /* method */
987 /* '#' ( '#' RET-TYPE |
988 CLASS-TYPE ',' RET-TYPE ',' ( ARG-TYPE ( ',' ARG-TYPE )* )? )
989 ';'
990 */
sewardj7eb7c582005-06-23 01:02:53 +0000991 type = ML_(st_mkvoid)(def); /* methods are really void */
jsgfcb1d1c02003-10-14 21:55:10 +0000992
993 if (*p == '#') {
994 p++; /* skip '#' */
995 stabtype_parser(si, NULL, &p); /* RET-TYPE */
996 } else {
997 stabtype_parser(si, NULL, &p); /* CLASS-TYPE */
998 EXPECT(',', "method CLASS-TYPE");
999
1000 stabtype_parser(si, NULL, &p); /* RET-TYPE */
1001 EXPECT(',', "method RET-TYPE");
1002
1003 while (*p != ';') {
1004 stabtype_parser(si, NULL, &p);
1005 if (*p == ',')
1006 p++;
1007 else if (*p != ';')
1008 VG_(printf)(" @@ method ARG-TYPE list unexpected '%c'\n", *p);
1009 }
1010 }
1011
1012 EXPECT(';', "method definition");
1013 break;
1014
fitzhardinge3e2c6d52004-03-05 05:43:42 +00001015 case '@': /* pointer to member */
1016 /* '@' CLASS-TYPE ',' MEMBER-TYPE */
sewardj7eb7c582005-06-23 01:02:53 +00001017 type = ML_(st_mkint)(def, sizeof(int), False); /* make it an int for our use */
fitzhardinge3e2c6d52004-03-05 05:43:42 +00001018
1019 stabtype_parser(si, NULL, &p); /* CLASS-TYPE */
1020 EXPECT(',', "member-pointer CLASS-TYPE");
1021 stabtype_parser(si, NULL, &p); /* MEMBER-TYPE */
1022 break;
1023
jsgfcb1d1c02003-10-14 21:55:10 +00001024 default:
1025 VG_(printf)(" @@ don't know what type '%c' is\n", t);
1026 type = NULL;
1027 break;
1028 }
1029#undef EXPECT
1030#undef SKIPPAST
1031
1032 if (type == NULL)
1033 VG_(printf)(" @@ parsing %s gave NULL type (%s remains)\n", *pp, p);
1034
1035 *pp = p;
1036
1037 return type;
1038}
1039
1040/* parse a symbol reference: NAME ':' DESC TYPE */
1041static Bool initSym(SegInfo *si, Sym *sym, stab_types kind, Char **namep, Int val)
1042{
tomc2bf09c2005-07-29 18:33:19 +00001043 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +00001044 Char *name = *namep;
1045 Char *ty;
1046 Int len;
1047 Bool isTypedef = False;
1048 Bool isStruct = False;
1049 SymType *base;
1050
1051 if (debug && 0)
1052 VG_(printf)("initSym(si=%p, tab=%p, sym=%p, kind=%d, name=%p \"%s\", val=%d)\n",
1053 si, si->stab_typetab, sym, kind, name, name, val);
1054
njnda388f42005-03-26 16:10:49 +00001055 /* First first ':' */
thughes4aa03c42004-11-12 23:16:31 +00001056 ty = VG_(strchr)(name, ':');
njnda388f42005-03-26 16:10:49 +00001057
1058 /* Skip '::' */
1059 while (ty && ty[1] == ':')
1060 ty = VG_(strchr)(ty + 2, ':');
1061
1062 if (ty == NULL) {
1063 /* there was no ':' */
1064 *namep += VG_(strlen)(name);
1065 return True; /* skip */
1066 }
jsgfcb1d1c02003-10-14 21:55:10 +00001067
1068 len = ty - name;
1069
1070 if (debug) {
1071 Char buf[len+1];
1072 VG_(strncpy_safely)(buf, name, len+1);
1073 VG_(printf)("\ninitSym name=\"%s\" type=%s\n", buf, ty+1);
1074 }
1075
1076 if (*ty != ':') {
1077 /* no type info */
sewardj7eb7c582005-06-23 01:02:53 +00001078 sym->type = ML_(st_mkvoid)(NULL);
jsgfcb1d1c02003-10-14 21:55:10 +00001079 } else {
1080 ty++; /* skip ':' */
1081
1082 /* chew through an initial sequence of
1083 type descriptor type describers */
1084 for(;;) {
1085 switch(*ty) {
1086 case 'a': case 'b': case 'c': case 'C':
1087 case 'd': case 'D': case 'f': case 'F':
1088 case 'G': case 'i': case 'I': case 'J':
1089 case 'L': case 'm': case 'p': case 'P':
1090 case 'Q': case 'R': case 'r': case 'S':
1091 case 's': case 'v': case 'V': case 'x':
1092 case 'X':
1093 break;
1094
1095 case 'T': /* struct/union/enum */
1096 isStruct = True;
1097 break;
1098
1099 case 't': /* typedef handled within stabtype_parser */
1100 isTypedef = True;
1101 /* FALLTHROUGH */
1102 case '(': case '-': case '0' ... '9': /* type reference */
1103 default:
1104 goto out;
1105 }
1106 ty++;
1107 }
1108
1109 out:
1110 sym->type = stabtype_parser(si, NULL, &ty);
sewardj7eb7c582005-06-23 01:02:53 +00001111 base = ML_(st_basetype)(sym->type, False);
1112 if (isStruct && (ML_(st_isstruct)(base) || ML_(st_isunion)(base))) {
1113 Char *sname = ML_(addStr)(si, name, len);
1114 structDef(si->stab_typetab, base, ML_(st_isstruct)(base), sname);
jsgfcb1d1c02003-10-14 21:55:10 +00001115 }
1116
1117 if (isTypedef) {
sewardj7eb7c582005-06-23 01:02:53 +00001118 Char *tname = ML_(addStr)(si, name, len);
jsgfcb1d1c02003-10-14 21:55:10 +00001119 vg_assert(sym->type != base);
1120 if (debug)
1121 VG_(printf)(" typedef %p \"%s\"\n", sym->type, tname);
sewardj7eb7c582005-06-23 01:02:53 +00001122 ML_(st_setname)(sym->type, tname);
1123 ML_(st_setname)(base, tname);
jsgfcb1d1c02003-10-14 21:55:10 +00001124 }
1125 }
1126 *namep = ty;
1127
1128 switch(kind) {
1129 case N_STSYM:
1130 case N_LCSYM:
1131 sym->kind = SyStatic;
mueller5ed88f22004-01-06 16:02:29 +00001132 sym->u.addr = si->offset + (Addr)val;
jsgfcb1d1c02003-10-14 21:55:10 +00001133 break;
1134
1135 case N_PSYM:
1136 sym->kind = SyEBPrel; /* +ve offset off EBP (erk, or ESP if no frame pointer) */
mueller5ed88f22004-01-06 16:02:29 +00001137 sym->u.offset = val;
jsgfcb1d1c02003-10-14 21:55:10 +00001138 break;
1139
1140 case N_LSYM:
1141 if (val < 0)
1142 sym->kind = SyEBPrel; /* -ve off EBP when there's a frame pointer */
1143 else
1144 sym->kind = SyESPrel; /* +ve off ESP when there's no frame pointer */
mueller5ed88f22004-01-06 16:02:29 +00001145 sym->u.offset = val;
jsgfcb1d1c02003-10-14 21:55:10 +00001146 break;
1147
1148 case N_RSYM:
1149 sym->kind = SyReg;
mueller5ed88f22004-01-06 16:02:29 +00001150 sym->u.regno = val;
jsgfcb1d1c02003-10-14 21:55:10 +00001151 break;
1152
1153 case N_GSYM:
1154 sym->kind = SyGlobal;
mueller5ed88f22004-01-06 16:02:29 +00001155 sym->u.addr = 0; /* XXX should really look up global address */
jsgfcb1d1c02003-10-14 21:55:10 +00001156 break;
1157
1158 default:
1159 VG_(core_panic)("bad sym kind");
1160 }
1161
1162 if (debug)
1163 VG_(printf)(" %s = type=%p\n", (isStruct || isTypedef) ? "skipping" : "adding", sym->type);
1164
1165 if (isStruct || isTypedef) {
1166 return True; /* skip */
1167 } else {
sewardj7eb7c582005-06-23 01:02:53 +00001168 sym->name = ML_(addStr)(si, name, len);
jsgfcb1d1c02003-10-14 21:55:10 +00001169 return False; /* don't skip */
1170 }
1171}
1172
1173/* list of unbound symbols for next scope */
1174struct symlist {
1175 Sym sym;
1176 struct symlist *next;
1177};
1178
1179/* XXX TODO: make sure added syms are unique. A lot of syms added to
1180 the global scope are not. On the other hand, skipping type
1181 definitions helps a lot. */
1182static Scope *addSymsToScope(Scope *sc, struct symlist *list, Int nsyms, Scope *outer)
1183{
tomc2bf09c2005-07-29 18:33:19 +00001184 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +00001185 Int j;
1186 struct symlist *n;
1187 Int base;
1188
1189 if (sc == NULL) {
1190 sc = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sc));
1191 sc->syms = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*sc->syms) * nsyms);
1192 sc->nsyms = nsyms;
1193 base = 0;
1194 sc->outer = outer;
1195 if (outer == NULL)
1196 sc->depth = 0;
1197 else
1198 sc->depth = outer->depth+1;
1199 } else {
1200 Sym *s = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*s) * (sc->nsyms + nsyms));
1201
1202 VG_(memcpy)(s, sc->syms, sc->nsyms * sizeof(*s));
1203 VG_(arena_free)(VG_AR_SYMTAB, sc->syms);
1204 sc->syms = s;
1205 base = sc->nsyms;
1206 sc->nsyms += nsyms;
1207 }
1208
1209 /* bind any unbound syms to new scope */
1210 for(j = 0; j < nsyms; j++, list = n) {
1211 if (debug)
1212 VG_(printf)(" adding (%p) %s to scope %p depth %d\n",
1213 list->sym.name, list->sym.name, sc, sc->depth);
1214 n = list->next;
1215 sc->syms[base+j] = list->sym;
1216 VG_(arena_free)(VG_AR_SYMTAB, list);
1217 }
1218 vg_assert(list == NULL);
1219
1220 return sc;
1221}
1222
1223/* Read stabs-format debug info. This is all rather horrible because
1224 stabs is a underspecified, kludgy hack.
1225*/
sewardj7eb7c582005-06-23 01:02:53 +00001226void ML_(read_debuginfo_stabs) ( SegInfo* si,
jsgfcb1d1c02003-10-14 21:55:10 +00001227 UChar* stabC, Int stab_sz,
1228 UChar* stabstr, Int stabstr_sz )
1229{
tomc2bf09c2005-07-29 18:33:19 +00001230 const Bool debug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +00001231 Int i;
1232 Int n_stab_entries;
1233 struct nlist* stab = (struct nlist*)stabC;
1234 UChar *next_stabstr = NULL;
1235 /* state for various things */
1236 struct {
1237 Addr start; /* start address */
1238 Addr end; /* end address */
1239 Char *name; /* name */
1240 Char *filename; /* source file name */
1241 Int line; /* first line */
1242 } func = { 0, 0, NULL, NULL, -1 };
1243 struct {
1244 Char *name;
1245 Bool same;
1246 } file = { NULL, True };
1247 struct {
1248 Int prev; /* prev line */
1249 Int no; /* current line */
1250 Int ovf; /* line wrap */
1251 Addr addr; /* start of this line */
1252 Bool first; /* first line in function */
1253 Bool jump; /* was a jump from prev line (inline?) */
1254 } line = { 0, 0, 0, 0, False };
1255 struct {
1256 Scope *scope; /* current scope */
1257 struct symlist *symlist; /* unbound symbols */
1258 Int nsyms; /* number of unbound scopes */
1259 Addr addr; /* start of range */
1260 Int depth;
1261 } scope = { NULL, NULL, 0, 0 };
1262 Scope *global;
1263 Int fileidx = 0;
1264 StabTypeTab *tab;
1265
1266 if (si->stab_typetab == NULL) {
1267 si->stab_typetab = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(StabTypeTab));
1268 VG_(memset)(si->stab_typetab, 0, sizeof(StabTypeTab));
1269 }
1270 tab = si->stab_typetab;
1271
1272 /* Ok. It all looks plausible. Go on and read debug data.
1273 stab kinds: 100 N_SO a source file name
1274 68 N_SLINE a source line number
1275 36 N_FUN start of a function
1276
1277 In this loop, we maintain a current file name, updated as
1278 N_SO/N_SOLs appear, and a current function base address,
1279 updated as N_FUNs appear. Based on that, address ranges for
1280 N_SLINEs are calculated, and stuffed into the line info table.
1281
1282 Finding the instruction address range covered by an N_SLINE is
1283 complicated; see the N_SLINE case below.
1284 */
sewardj7eb7c582005-06-23 01:02:53 +00001285 file.name = ML_(addStr)(si,"???", -1);
jsgfcb1d1c02003-10-14 21:55:10 +00001286
1287 n_stab_entries = stab_sz/(int)sizeof(struct nlist);
1288
1289 /* empty initial file-wide scope */
1290 global = addSymsToScope(NULL, NULL, 0, NULL);
1291 scope.scope = global;
1292
1293 for (i = 0; i < n_stab_entries; i++) {
1294 const struct nlist *st = &stab[i];
1295 Char *no_fn_name = "???";
1296 Char *string;
1297
1298 if (debug && 1) {
1299 VG_(printf) ( "%2d type=%d othr=%d desc=%d value=0x%x strx=%d %s\n", i,
1300 st->n_type, st->n_other, st->n_desc,
1301 (int)st->n_value,
1302 (int)st->n_un.n_strx,
1303 stabstr + st->n_un.n_strx );
1304 }
1305
1306 /* handle continued string stabs */
1307 {
tomc2bf09c2005-07-29 18:33:19 +00001308 const Bool contdebug = False || stabs_debug;
jsgfcb1d1c02003-10-14 21:55:10 +00001309 Int buflen = 0;
1310 Int idx = 0;
1311 Char *buf = NULL;
1312 Int len;
1313 Bool continuing = False;
1314 UInt stringidx;
1315
1316 stringidx = st->n_un.n_strx;
1317 string = stabstr + stringidx;
1318 len = VG_(strlen)(string);
1319
1320 while(string && len > 0 && (continuing || string[len-1] == '\\')) {
1321 /* Gak, we have a continuation. Skip forward through
1322 subsequent stabs to gather all the parts of the
1323 continuation. Increment i, but keep st pointing at
1324 current stab. */
1325
1326 continuing = string[len-1] == '\\';
1327
1328 /* remove trailing \ */
1329 while(string[len-1] == '\\' && len > 0)
1330 len--;
1331
1332 if (contdebug)
1333 VG_(printf)("found extension string: \"%s\" len=%d(%c) idx=%d buflen=%d\n",
1334 string, len, string[len-1], idx, buflen);
1335
1336 /* XXX this is silly. The si->strtab should have a way of
1337 appending to the last added string... */
1338 if ((idx + len) >= buflen) {
1339 Char *n;
1340
1341 if (buflen == 0)
1342 buflen = 16;
1343 while((idx + len) >= buflen)
1344 buflen *= 2;
1345 n = VG_(arena_malloc)(VG_AR_SYMTAB, buflen);
1346 VG_(memcpy)(n, buf, idx);
1347
1348 if (buf != NULL)
1349 VG_(arena_free)(VG_AR_SYMTAB, buf);
1350 buf = n;
1351 }
1352
1353 VG_(memcpy)(&buf[idx], string, len);
1354 idx += len;
1355 if (contdebug) {
1356 buf[idx] = '\0';
1357 VG_(printf)("working buf=\"%s\"\n", buf);
1358 }
1359
1360 i++;
1361 if (i >= n_stab_entries)
1362 break;
1363
1364 if (stab[i].n_un.n_strx) {
1365 string = stabstr + stab[i].n_un.n_strx;
1366 len = VG_(strlen)(string);
1367 } else {
1368 string = NULL;
1369 len = 0;
1370 }
1371 }
1372
1373 if (buf != NULL) {
1374 i--; /* overstepped */
sewardj7eb7c582005-06-23 01:02:53 +00001375 string = ML_(addStr)(si, buf, idx);
jsgfcb1d1c02003-10-14 21:55:10 +00001376 VG_(arena_free)(VG_AR_SYMTAB, buf);
1377 if (contdebug)
1378 VG_(printf)("made composite: \"%s\"\n", string);
1379 }
1380 }
1381
1382 switch(st->n_type) {
1383 case N_UNDEF:
1384 /* new string table base */
1385 if (next_stabstr != NULL) {
1386 stabstr_sz -= next_stabstr - stabstr;
1387 stabstr = next_stabstr;
1388 if (stabstr_sz <= 0) {
1389 VG_(printf)(" @@ bad stabstr size %d\n", stabstr_sz);
1390 return;
1391 }
1392 }
1393 next_stabstr = stabstr + st->n_value;
1394 break;
1395
1396 case N_BINCL: {
1397 fileidx++;
1398 addHeader(tab, stabstr + st->n_un.n_strx, st->n_value, fileidx);
1399
1400 if (debug)
1401 VG_(printf)("BINCL: pushed %s fileidx=%d\n",
1402 stabstr + st->n_un.n_strx, fileidx);
1403 break;
1404 }
1405
1406 case N_EINCL:
1407 break;
1408
1409 case N_EXCL:
1410 ++fileidx;
1411
1412 addFileAlias(tab, stabstr + st->n_un.n_strx, st->n_value, fileidx);
1413
1414 if (debug) {
1415 VG_(printf)("reference to excluded include file %s; fileidx=%d\n",
1416 stabstr + st->n_un.n_strx, fileidx);
1417 }
1418 break;
1419
1420 case N_SOL: /* sub-source (include) file */
1421 if (line.ovf != 0)
1422 VG_(message)(Vg_UserMsg,
1423 "Warning: file %s is very big (> 65535 lines) "
1424 "Line numbers and annotation for this file might "
1425 "be wrong. Sorry",
1426 file.name);
1427 /* FALLTHROUGH */
1428
1429 case N_SO: { /* new source file */
1430 UChar *nm = string;
1431 UInt len = VG_(strlen)(nm);
1432 Addr addr = func.start + st->n_value;
1433
1434 if (line.addr != 0) {
1435 /* finish off previous line */
sewardj7eb7c582005-06-23 01:02:53 +00001436 ML_(addLineInfo)(si, file.name, NULL, line.addr,
jsgfcb1d1c02003-10-14 21:55:10 +00001437 addr, line.no + line.ovf * LINENO_OVERFLOW, i);
1438 }
1439
1440 /* reset line state */
1441 line.ovf = 0;
1442 line.addr = 0;
1443 line.prev = 0;
1444 line.no = 0;
1445 line.jump = True;
1446
1447 if (len > 0 && nm[len-1] != '/') {
sewardj7eb7c582005-06-23 01:02:53 +00001448 file.name = ML_(addStr)(si, nm, -1);
jsgfcb1d1c02003-10-14 21:55:10 +00001449 if (debug)
1450 VG_(printf)("new source: %s\n", file.name);
1451 if (st->n_type == N_SO) {
1452 fileidx = 0;
1453 clearStabFiles(tab);
1454 }
1455 } else if (len == 0)
sewardj7eb7c582005-06-23 01:02:53 +00001456 file.name = ML_(addStr)(si, "?1\0", -1);
jsgfcb1d1c02003-10-14 21:55:10 +00001457
1458 if (func.start != 0)
1459 line.jump = True;
1460 break;
1461 }
1462
1463 case N_SLINE: { /* line info */
1464 Addr addr = func.start + st->n_value;
1465
1466 if (line.addr != 0) {
1467 /* there was a previous */
sewardj7eb7c582005-06-23 01:02:53 +00001468 ML_(addLineInfo)(si, file.name, NULL, line.addr,
jsgfcb1d1c02003-10-14 21:55:10 +00001469 addr, line.no + line.ovf * LINENO_OVERFLOW, i);
1470 }
1471
1472 line.addr = addr;
1473 line.prev = line.no;
1474 line.no = (Int)((UShort)st->n_desc);
1475
1476 if (line.prev > line.no + OVERFLOW_DIFFERENCE && file.same) {
1477 VG_(message)(Vg_DebugMsg,
1478 "Line number overflow detected (%d --> %d) in %s",
1479 line.prev, line.no, file.name);
1480 line.ovf++;
1481 }
1482 file.same = True;
1483
1484 /* This is pretty horrible. If this is the first line of
1485 the function, then bind any unbound symbols to the arg
1486 scope, since they're probably arguments. */
1487 if (line.first) {
1488 line.first = False;
1489
1490 if (scope.nsyms != 0) {
1491 addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL);
1492 scope.symlist = NULL;
1493 scope.nsyms = 0;
1494 }
1495
1496 /* remember first line of function */
1497 if (func.start != 0) {
1498 func.filename = file.name;
1499 func.line = line.no;
1500 }
1501 } else if (func.start != 0 && (line.no < func.line || func.filename != file.name)) {
1502 /* If we're suddenly in code before the function starts
1503 or in a different file, then it seems like its
1504 probably some inlined code. Should do something
1505 useful with this information. */
1506 //VG_(printf)("possible inline?\n");
1507 line.jump = True;
1508 }
1509 break;
1510 }
1511
1512 case N_FUN: { /* function start/end */
1513 Addr addr = 0; /* end address for prev line/scope */
1514 Bool newfunc = False;
1515
1516 if (scope.nsyms != 0) {
1517 /* clean up any unbound symbols */
1518 addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL);
1519 scope.symlist = NULL;
1520 scope.nsyms = 0;
1521 }
1522
1523 /* if this the end of the function or we haven't
1524 previously finished the previous function... */
1525 if (*string == '\0' || func.start != 0) {
1526 /* end of function */
1527 newfunc = False;
1528 line.first = False;
1529
1530 /* end line at end of function */
1531 addr = func.start + st->n_value;
1532
1533 if (debug)
1534 VG_(printf)("ending func %s at %p\n", func.name, addr);
1535
1536 /* now between functions */
1537 func.name = no_fn_name;
1538 func.start = 0;
1539
1540 if (scope.addr != 0) {
1541 /* finish any previous scope range */
sewardj7eb7c582005-06-23 01:02:53 +00001542 ML_(addScopeInfo)(si, scope.addr, addr, scope.scope);
jsgfcb1d1c02003-10-14 21:55:10 +00001543 }
1544
1545 /* tidy up arg scope */
1546 /* XXX LEAK: free scope if it or any of its inner scopes was
1547 never added to a scope range */
1548
1549 if (scope.scope->depth == 0) {
1550 VG_(message)(Vg_UserMsg,
1551 "It seems there's more scopes closed than opened...\n");
1552 break;
1553 }
1554
1555 scope.scope = scope.scope->outer;
1556 scope.addr = addr;
1557 scope.addr = 0;
1558 }
1559
1560 if (*string != '\0') {
1561 /* new function */
1562 newfunc = True;
1563 line.first = True;
1564
1565 /* line ends at start of next function */
1566 addr = si->offset + st->n_value;
1567
1568 func.start = addr;
1569 func.name = string;
1570
1571 if (debug)
1572 VG_(printf)("\nnew func %s at %p\n", func.name, func.start);
1573
1574 }
1575
1576 if (line.addr) {
sewardj7eb7c582005-06-23 01:02:53 +00001577 ML_(addLineInfo)(si, file.name, NULL, line.addr,
jsgfcb1d1c02003-10-14 21:55:10 +00001578 addr, line.no + line.ovf * LINENO_OVERFLOW, i);
1579 line.addr = 0;
1580 }
1581
1582 if (scope.addr) {
1583 /* finish any previous scope range */
sewardj7eb7c582005-06-23 01:02:53 +00001584 ML_(addScopeInfo)(si, scope.addr, addr, scope.scope);
jsgfcb1d1c02003-10-14 21:55:10 +00001585 }
1586
1587 if (newfunc) {
1588 /* make little wrapper scope for args */
1589 Scope *sc;
1590 if (scope.addr) {
1591 /* finish any previous scope range */
sewardj7eb7c582005-06-23 01:02:53 +00001592 ML_(addScopeInfo)(si, scope.addr, addr, scope.scope);
jsgfcb1d1c02003-10-14 21:55:10 +00001593 }
1594
1595 sc = addSymsToScope(NULL, scope.symlist, scope.nsyms, scope.scope);
1596 scope.scope = sc;
1597 scope.nsyms = 0;
1598 scope.symlist = NULL;
1599 scope.addr = addr;
1600 }
1601 break;
1602 }
1603
1604 case N_LBRAC: {
1605 /* open new scope */
1606 Scope *sc;
1607 Addr addr = func.start + st->n_value;
1608
1609 if (scope.addr) {
1610 /* end previous range */
sewardj7eb7c582005-06-23 01:02:53 +00001611 ML_(addScopeInfo)(si, scope.addr, addr, scope.scope);
jsgfcb1d1c02003-10-14 21:55:10 +00001612 }
1613
1614 scope.addr = addr;
1615
1616 if (debug) {
1617 static const Char indent[]=
1618 " "
1619 " ";
1620 Int idx;
1621
1622 idx = sizeof(indent)-1 - (scope.depth * 2);
1623 scope.depth++;
1624 VG_(printf)("%s{\n", &indent[idx >= 0 ? idx : 0]);
1625 }
1626 /* add unbound syms to scope */
1627 sc = addSymsToScope(NULL, scope.symlist, scope.nsyms, scope.scope);
1628 scope.scope = sc;
1629 scope.nsyms = 0;
1630 scope.symlist = NULL;
1631
1632 break;
1633 }
1634
1635 case N_RBRAC: {
1636 /* close scope */
1637 Addr addr = func.start + st->n_value;
1638
1639 if (scope.nsyms != 0) {
1640 /* If there's any unbound symbols, tidy them up */
1641 addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL);
1642 scope.symlist = NULL;
1643 scope.nsyms = 0;
1644 }
1645
1646 vg_assert(scope.addr != 0);
sewardj7eb7c582005-06-23 01:02:53 +00001647 ML_(addScopeInfo)(si, scope.addr, addr, scope.scope);
jsgfcb1d1c02003-10-14 21:55:10 +00001648
1649 /* XXX LEAK: free scope if it or any of its inner scopes was
1650 never added to a scope range */
1651
1652 if (scope.scope->depth == 0) {
1653 /* complain */
1654 VG_(message)(Vg_UserMsg, "It seems there's more scopes closed than opened...\n");
1655 break;
1656 }
1657
1658 scope.scope = scope.scope->outer;
1659 scope.addr = addr;
1660 if (debug) {
1661 static const Char indent[]=
1662 " "
1663 " ";
1664 Int idx;
1665
1666 scope.depth--;
1667 idx = sizeof(indent)-1 - (scope.depth * 2);
1668 VG_(printf)("%s}\n", &indent[idx >= 0 ? idx : 0]);
1669 }
1670
1671 break;
1672 }
1673
1674 case N_GSYM: /* global variable */
1675 case N_STSYM: /* static in data segment */
1676 case N_LCSYM: /* static in bss segment */
1677 case N_PSYM: /* function parameter */
1678 case N_LSYM: /* stack variable */
1679 case N_RSYM: { /* register variable */
1680 Char *cp = string;
1681 Int val = st->n_value;
1682
1683 /* a single string can have multiple definitions nested in it */
1684 while(*cp != '\0') {
1685 struct symlist *s = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*s));
1686
1687 if (initSym(si, &s->sym, st->n_type, &cp, val)) {
1688 VG_(arena_free)(VG_AR_SYMTAB, s); /* not interesting */
1689 } else {
1690 s->next = scope.symlist;
1691 scope.symlist = s;
1692 scope.nsyms++;
1693 }
1694 switch(*cp) {
1695 case '\0': /* all done */
1696 break;
1697
1698 case '0' ... '9': /* symbol */
1699 case 'A' ... 'Z':
1700 case 'a' ... 'z':
1701 case '_':
1702 break;
1703
1704 case ' ': case ':': /* nameless type */
1705 break;
1706
1707 default:
1708 VG_(printf)(" @@ unlikely looking definition in unparsed remains \"%s\"\n", cp);
1709 break;
1710 }
1711 }
1712 break;
1713 }
1714 }
1715 }
1716
1717 if (scope.nsyms != 0)
1718 addSymsToScope(scope.scope, scope.symlist, scope.nsyms, NULL);
1719}
njn4bbdc972003-10-16 10:10:55 +00001720
1721/*--------------------------------------------------------------------*/
nethercote0febe082004-08-04 09:57:31 +00001722/*--- end ---*/
njn4bbdc972003-10-16 10:10:55 +00001723/*--------------------------------------------------------------------*/