blob: 90c3fc520c6b40ee03ffacca8a436e0ced656a7c [file] [log] [blame]
sewardjb8b79ad2008-03-03 01:35:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- Read DWARF3 ".debug_info" sections (DIE trees). ---*/
4/*--- readdwarf3.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2008-2008 OpenWorks LLP
12 info@open-works.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30
31 Neither the names of the U.S. Department of Energy nor the
32 University of California nor the names of its contributors may be
33 used to endorse or promote products derived from this software
34 without prior written permission.
35*/
36
37/* REFERENCE (without which this code will not make much sense):
38
39 DWARF Debugging Information Format, Version 3,
40 dated 20 December 2005 (the "D3 spec").
41
42 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
43 .doc (MS Word) version, but for some reason the section numbers
44 between the Word and PDF versions differ by 1 in the first digit.
45 All section references in this code are to the PDF version.
46
47 CURRENT HACKS:
48
49 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
50 assumed to mean "const void" or "volatile void" respectively.
51 GDB appears to interpret them like this, anyway.
52
53 In many cases it is important to know the svma of a CU (the "base
54 address of the CU", as the D3 spec calls it). There are some
55 situations in which the spec implies this value is unknown, but the
56 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
57 merely zero when not explicitly stated. So we too have to make
58 that assumption.
59
60 TODO, 2008 Feb 17:
61
62 get rid of cu_svma_known and document the assumed-zero svma hack.
63
64 ML_(sizeOfType): differentiate between zero sized types and types
65 for which the size is unknown. Is this important? I don't know.
66
67 DW_AT_array_types: deal with explicit sizes (currently we compute
68 the size from the bounds and the element size, although that's
69 fragile, if the bounds incompletely specified, or completely
70 absent)
71
72 Document reason for difference (by 1) of stack preening depth in
73 parse_var_DIE vs parse_type_DIE.
74
75 Don't hand to ML_(addVars), vars whose locations are entirely in
76 registers (DW_OP_reg*). This is merely a space-saving
77 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
78 expressions correctly, by failing to evaluate them and hence
79 effectively ignoring the variable with which they are associated.
80
81 Deal with DW_AT_array_types which have element size != stride
82
83 In some cases, the info for a variable is split between two
84 different DIEs (generally a declarer and a definer). We punt on
85 these. Could do better here.
86
87 The 'data_bias' argument passed to the expression evaluator
88 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
89 MaybeUWord, to make it clear when we do vs don't know what it is
90 for the evaluation of an expression. At the moment zero is passed
91 for this parameter in the don't know case. That's a bit fragile
92 and obscure; using a MaybeUWord would be clearer.
93
94 POTENTIAL PERFORMANCE IMPROVEMENTS:
95
96 The number of type entities that end up in the list of TyAdmins
97 rapidly becomes huge (eg, for libQtGui.so.4.3.2 (amd64-linux, size
98 80729047 bytes), there are 786860 entries in the list). Mostly
99 this seems to be caused by g++ adding type DIEs for all the basic
100 types once for each source file contributing to the compilation
101 unit, and for a large library they add up quickly. That causes
102 both a lot of work for this reader module, and also wastes vast
103 amounts of memory storing this duplicated information. We could
104 surely do a lot better here.
105
106 Handle interaction between read_DIE and parse_{var,type}_DIE
107 better. Currently read_DIE reads the entire DIE just to find where
108 the end is (and for debug printing), so that it can later reliably
109 move the cursor to the end regardless of what parse_{var,type}_DIE
110 do. This means many DIEs (most, even?) are read twice. It would
111 be smarter to make parse_{var,type}_DIE return a Bool indicating
112 whether or not they advanced the DIE cursor, and only if they
113 didn't should read_DIE itself read through the DIE.
114
115 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
116 zero variables in their .vars XArray. Rather than have an XArray
117 with zero elements (which uses 2 malloc'd blocks), allow the .vars
118 pointer to be NULL in this case.
119
120 More generally, reduce the amount of memory allocated and freed
121 while reading Dwarf3 type/variable information. Even modest (20MB)
122 objects cause this module to allocate and free hundreds of
123 thousands of small blocks, and ML_(arena_malloc) and its various
124 groupies always show up at the top of performance profiles. */
125
126#include "pub_core_basics.h"
127#include "pub_core_libcbase.h"
128#include "pub_core_libcassert.h"
129#include "pub_core_libcprint.h"
130#include "pub_core_options.h"
131#include "pub_core_xarray.h"
132#include "priv_misc.h" /* dinfo_zalloc/free */
133#include "priv_tytypes.h"
134#include "priv_d3basics.h"
135#include "priv_storage.h"
136#include "priv_readdwarf3.h" /* self */
137
138
139/*------------------------------------------------------------*/
140/*--- ---*/
141/*--- Basic machinery for parsing DIEs. ---*/
142/*--- ---*/
143/*------------------------------------------------------------*/
144
145#define TRACE_D3(format, args...) \
146 if (td3) { VG_(printf)(format, ## args); }
147
148#define D3_INVALID_CUOFF ((void*)(-1UL))
149#define D3_FAKEVOID_CUOFF ((void*)(-2UL))
150
151typedef
152 struct {
153 UChar* region_start_img;
154 UWord region_szB;
155 UWord region_next;
bartaed2e4c2008-03-03 20:40:51 +0000156 void (*barf)( HChar* ) __attribute__((noreturn));
sewardjb8b79ad2008-03-03 01:35:41 +0000157 HChar* barfstr;
158 }
159 Cursor;
160
161static inline Bool is_sane_Cursor ( Cursor* c ) {
162 if (!c) return False;
163 if (!c->barf) return False;
164 if (!c->barfstr) return False;
165 return True;
166}
167
168static void init_Cursor ( Cursor* c,
169 UChar* region_start_img,
170 UWord region_szB,
171 UWord region_next,
172 __attribute__((noreturn)) void (*barf)( HChar* ),
173 HChar* barfstr )
174{
175 vg_assert(c);
176 VG_(memset)(c, 0, sizeof(*c));
177 c->region_start_img = region_start_img;
178 c->region_szB = region_szB;
179 c->region_next = region_next;
180 c->barf = barf;
181 c->barfstr = barfstr;
182 vg_assert(is_sane_Cursor(c));
183}
184
185static Bool is_at_end_Cursor ( Cursor* c ) {
186 vg_assert(is_sane_Cursor(c));
187 return c->region_next >= c->region_szB;
188}
189
190static inline UWord get_position_of_Cursor ( Cursor* c ) {
191 vg_assert(is_sane_Cursor(c));
192 return c->region_next;
193}
194static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) {
195 c->region_next = pos;
196 vg_assert(is_sane_Cursor(c));
197}
198
199static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) {
200 vg_assert(is_sane_Cursor(c));
201 return c->region_szB - c->region_next;
202}
203
204static UChar* get_address_of_Cursor ( Cursor* c ) {
205 vg_assert(is_sane_Cursor(c));
206 return &c->region_start_img[ c->region_next ];
207}
208
209__attribute__((noreturn))
210static void failWith ( Cursor* c, HChar* str ) {
211 vg_assert(c);
212 vg_assert(c->barf);
213 c->barf(str);
214 /*NOTREACHED*/
215 vg_assert(0);
216}
217
218/* FIXME: document assumptions on endianness for
219 get_UShort/UInt/ULong. */
220static inline UChar get_UChar ( Cursor* c ) {
221 UChar r;
222 /* vg_assert(is_sane_Cursor(c)); */
223 if (c->region_next + sizeof(UChar) > c->region_szB) {
224 c->barf(c->barfstr);
225 /*NOTREACHED*/
226 vg_assert(0);
227 }
228 r = * (UChar*) &c->region_start_img[ c->region_next ];
229 c->region_next += sizeof(UChar);
230 return r;
231}
232static UShort get_UShort ( Cursor* c ) {
233 UShort r;
234 vg_assert(is_sane_Cursor(c));
235 if (c->region_next + sizeof(UShort) > c->region_szB) {
236 c->barf(c->barfstr);
237 /*NOTREACHED*/
238 vg_assert(0);
239 }
240 r = * (UShort*) &c->region_start_img[ c->region_next ];
241 c->region_next += sizeof(UShort);
242 return r;
243}
244static UInt get_UInt ( Cursor* c ) {
245 UInt r;
246 vg_assert(is_sane_Cursor(c));
247 if (c->region_next + sizeof(UInt) > c->region_szB) {
248 c->barf(c->barfstr);
249 /*NOTREACHED*/
250 vg_assert(0);
251 }
252 r = * (UInt*) &c->region_start_img[ c->region_next ];
253 c->region_next += sizeof(UInt);
254 return r;
255}
256static ULong get_ULong ( Cursor* c ) {
257 ULong r;
258 vg_assert(is_sane_Cursor(c));
259 if (c->region_next + sizeof(ULong) > c->region_szB) {
260 c->barf(c->barfstr);
261 /*NOTREACHED*/
262 vg_assert(0);
263 }
264 r = * (ULong*) &c->region_start_img[ c->region_next ];
265 c->region_next += sizeof(ULong);
266 return r;
267}
268static inline ULong get_ULEB128 ( Cursor* c ) {
269 ULong result;
270 Int shift;
271 UChar byte;
272 /* unroll first iteration */
273 byte = get_UChar( c );
274 result = (ULong)(byte & 0x7f);
275 if (LIKELY(!(byte & 0x80))) return result;
276 shift = 7;
277 /* end unroll first iteration */
278 do {
279 byte = get_UChar( c );
280 result |= ((ULong)(byte & 0x7f)) << shift;
281 shift += 7;
282 } while (byte & 0x80);
283 return result;
284}
285static Long get_SLEB128 ( Cursor* c ) {
286 ULong result = 0;
287 Int shift = 0;
288 UChar byte;
289 do {
290 byte = get_UChar(c);
291 result |= ((ULong)(byte & 0x7f)) << shift;
292 shift += 7;
293 } while (byte & 0x80);
294 if (shift < 64 && (byte & 0x40))
295 result |= -(1ULL << shift);
296 return result;
297}
298
299/* Assume 'c' points to the start of a string. Return the absolute
300 address of whatever it points at, and advance it past the
301 terminating zero. This makes it safe for the caller to then copy
302 the string with ML_(addStr), since (w.r.t. image overruns) the
303 process of advancing past the terminating zero will already have
304 "vetted" the string. */
305static UChar* get_AsciiZ ( Cursor* c ) {
306 UChar uc;
307 UChar* res = get_address_of_Cursor(c);
308 do { uc = get_UChar(c); } while (uc != 0);
309 return res;
310}
311
312static ULong peek_ULEB128 ( Cursor* c ) {
313 Word here = c->region_next;
314 ULong r = get_ULEB128( c );
315 c->region_next = here;
316 return r;
317}
318static UChar peek_UChar ( Cursor* c ) {
319 Word here = c->region_next;
320 UChar r = get_UChar( c );
321 c->region_next = here;
322 return r;
323}
324
325static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
326 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
327}
328
329static UWord get_UWord ( Cursor* c ) {
330 vg_assert(sizeof(UWord) == sizeof(void*));
331 if (sizeof(UWord) == 4) return get_UInt(c);
332 if (sizeof(UWord) == 8) return get_ULong(c);
333 vg_assert(0);
334}
335
336
337/* Read a DWARF3 'Initial Length' field */
338static ULong get_Initial_Length ( /*OUT*/Bool* is64,
339 Cursor* c,
340 HChar* barfMsg )
341{
342 ULong w64;
343 UInt w32;
344 *is64 = False;
345 w32 = get_UInt( c );
346 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
347 c->barf( barfMsg );
348 }
349 else if (w32 == 0xFFFFFFFF) {
350 *is64 = True;
351 w64 = get_ULong( c );
352 } else {
353 *is64 = False;
354 w64 = (ULong)w32;
355 }
356 return w64;
357}
358
359
360/*------------------------------------------------------------*/
361/*--- ---*/
362/*--- "CUConst" structure ---*/
363/*--- ---*/
364/*------------------------------------------------------------*/
365
366#define N_ABBV_CACHE 32
367
368/* Holds information that is constant through the parsing of a
369 Compilation Unit. This is basically plumbed through to
370 everywhere. */
371typedef
372 struct {
373 /* Call here if anything goes wrong */
bartaed2e4c2008-03-03 20:40:51 +0000374 void (*barf)( HChar* ) __attribute__((noreturn));
sewardjb8b79ad2008-03-03 01:35:41 +0000375 /* Is this 64-bit DWARF ? */
376 Bool is_dw64;
377 /* Which DWARF version ? (2 or 3) */
378 UShort version;
379 /* Length of this Compilation Unit, excluding its Header */
380 ULong unit_length;
381 /* Offset of start of this unit in .debug_info */
382 UWord cu_start_offset;
383 /* SVMA for this CU. In the D3 spec, is known as the "base
384 address of the compilation unit (last para sec 3.1.1).
385 Needed for (amongst things) interpretation of location-list
386 values. */
387 Addr cu_svma;
388 Bool cu_svma_known;
389 /* The debug_abbreviations table to be used for this Unit */
390 UChar* debug_abbv;
391 /* Upper bound on size thereof (an overestimate, in general) */
392 UWord debug_abbv_maxszB;
393 /* Where is .debug_str ? */
394 UChar* debug_str_img;
395 UWord debug_str_sz;
396 /* Where is .debug_ranges ? */
397 UChar* debug_ranges_img;
398 UWord debug_ranges_sz;
399 /* Where is .debug_loc ? */
400 UChar* debug_loc_img;
401 UWord debug_loc_sz;
402 /* Where is .debug_line? */
403 UChar* debug_line_img;
404 UWord debug_line_sz;
405 /* --- Needed so we can add stuff to the string table. --- */
406 struct _DebugInfo* di;
407 /* --- a cache for set_abbv_Cursor --- */
408 /* abbv_code == (ULong)-1 for an unused entry. */
409 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
410 UWord saC_cache_queries;
411 UWord saC_cache_misses;
412 }
413 CUConst;
414
415
416/*------------------------------------------------------------*/
417/*--- ---*/
418/*--- Helper functions for Guarded Expressions ---*/
419/*--- ---*/
420/*------------------------------------------------------------*/
421
422/* Parse the location list starting at img-offset 'debug_loc_offset'
423 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
424 and so I believe are correct SVMAs for the object as a whole. This
425 function allocates the UChar*, and the caller must deallocate it.
426 The resulting block is in so-called Guarded-Expression format.
427
428 Guarded-Expression format is similar but not identical to the DWARF3
429 location-list format. The format of each returned block is:
430
431 UChar biasMe;
432 UChar isEnd;
433 followed by zero or more of
434
435 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
436
437 '..bytes..' is an standard DWARF3 location expression which is
438 valid when aMin <= pc <= aMax (possibly after suitable biasing).
439
440 The number of bytes in '..bytes..' is nbytes.
441
442 The end of the sequence is marked by an isEnd == 1 value. All
443 previous isEnd values must be zero.
444
445 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
446 text_bias added before use, and 0 if the GX is this is not
447 necessary (is ready to go).
448
449 Hence the block can be quickly parsed and is self-describing. Note
450 that aMax is 1 less than the corresponding value in a DWARF3
451 location list. Zero length ranges, with aMax == aMin-1, are not
452 allowed.
453*/
454void ML_(pp_GX) ( GExpr* gx ) {
455 Addr aMin, aMax;
456 UChar uc;
457 UShort nbytes;
458 UChar* p = &gx->payload[0];
459 uc = *p++;
460 VG_(printf)("GX(%s){", uc == 0 ? "final" : "Breqd" );
461 vg_assert(uc == 0 || uc == 1);
462 while (True) {
463 uc = *p++;
464 if (uc == 1)
465 break; /*isEnd*/
466 vg_assert(uc == 0);
467 aMin = * (Addr*)p; p += sizeof(Addr);
468 aMax = * (Addr*)p; p += sizeof(Addr);
469 nbytes = * (UShort*)p; p += sizeof(UShort);
470 VG_(printf)("[%p,%p]=", aMin, aMax);
471 while (nbytes > 0) {
472 VG_(printf)("%02x", (UInt)*p++);
473 nbytes--;
474 }
475 if (*p == 0)
476 VG_(printf)(",");
477 }
478 VG_(printf)("}");
479}
480
481static void bias_GX ( /*MOD*/GExpr* gx, Addr bias )
482{
483 UShort nbytes;
484 UChar* p = &gx->payload[0];
485 UChar uc;
486 uc = *p++; /*biasMe*/
487 if (uc == 0)
488 return;
489 vg_assert(uc == 1);
490 p[-1] = 0; /* mark it as done */
491 while (True) {
492 uc = *p++;
493 if (uc == 1)
494 break; /*isEnd*/
495 vg_assert(uc == 0);
496 * ((Addr*)p) += bias; /*aMin*/ p += sizeof(Addr);
497 * ((Addr*)p) += bias; /*aMax*/ p += sizeof(Addr);
498 nbytes = * (UShort*)p; p += sizeof(UShort);
499 p += nbytes;
500 }
501}
502
503__attribute__((noinline))
504static GExpr* make_singleton_GX ( UChar* block, UWord nbytes )
505{
506 SizeT bytesReqd;
507 GExpr* gx;
508 UChar *p, *pstart;
509
510 vg_assert(sizeof(UWord) == sizeof(Addr));
511 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
512 bytesReqd
513 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/
514 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/
515 + sizeof(UShort) /*nbytes*/ + nbytes
516 + sizeof(UChar); /*isEnd*/
517
518 gx = ML_(dinfo_zalloc)( sizeof(GExpr) + bytesReqd );
519 vg_assert(gx);
520
521 p = pstart = &gx->payload[0];
522
523 * ((UChar*)p) = 0; /*biasMe*/ p += sizeof(UChar);
524 * ((UChar*)p) = 0; /*!isEnd*/ p += sizeof(UChar);
525 * ((Addr*)p) = 0; /*aMin*/ p += sizeof(Addr);
526 * ((Addr*)p) = ~((Addr)0); /*aMax */ p += sizeof(Addr);
527 * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort);
528 VG_(memcpy)(p, block, nbytes); p += nbytes;
529 * ((UChar*)p) = 1; /*isEnd*/ p += sizeof(UChar);
530
531 vg_assert( (SizeT)(p - pstart) == bytesReqd);
532 vg_assert( &gx->payload[bytesReqd]
533 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
534
535 gx->next = NULL;
536 return gx;
537}
538
539__attribute__((noinline))
540static GExpr* make_general_GX ( CUConst* cc,
541 Bool td3,
542 UWord debug_loc_offset,
543 Addr svma_of_referencing_CU )
544{
545 Addr base;
546 Cursor loc;
547 XArray* xa; /* XArray of UChar */
548 GExpr* gx;
549 Word nbytes;
550
551 vg_assert(sizeof(UWord) == sizeof(Addr));
552 if (cc->debug_loc_sz == 0)
553 cc->barf("make_general_GX: .debug_loc is empty/missing");
554
555 init_Cursor( &loc, cc->debug_loc_img,
556 cc->debug_loc_sz, 0, cc->barf,
557 "Overrun whilst reading .debug_loc section(2)" );
558 set_position_of_Cursor( &loc, debug_loc_offset );
559
560 TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n",
561 debug_loc_offset, get_address_of_Cursor( &loc ) );
562
563 /* Who frees this xa? It is freed before this fn exits. */
564 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
565 sizeof(UChar) );
566
567 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
568
569 base = 0;
570 while (True) {
571 Bool acquire;
572 UWord len;
573 /* Read a (host-)word pair. This is something of a hack since
574 the word size to read is really dictated by the ELF file;
575 however, we assume we're reading a file with the same
576 word-sizeness as the host. Reasonably enough. */
577 UWord w1 = get_UWord( &loc );
578 UWord w2 = get_UWord( &loc );
579
580 TRACE_D3(" %08lx %08lx\n", w1, w2);
581 if (w1 == 0 && w2 == 0)
582 break; /* end of list */
583
584 if (w1 == -1UL) {
585 /* new value for 'base' */
586 base = w2;
587 continue;
588 }
589
590 /* else a location expression follows */
591 /* else enumerate [w1+base, w2+base) */
592 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
593 (sec 2.17.2) */
594 if (w1 > w2) {
595 TRACE_D3("negative range is for .debug_loc expr at "
596 "file offset %lu\n",
597 debug_loc_offset);
598 cc->barf( "negative range in .debug_loc section" );
599 }
600
601 /* ignore zero length ranges */
602 acquire = w1 < w2;
603 len = (UWord)get_UShort( &loc );
604
605 if (acquire) {
606 UWord w;
607 UShort s;
608 UChar c;
609 c = 0; /* !isEnd*/
610 VG_(addBytesToXA)( xa, &c, sizeof(c) );
611 w = w1 + base + svma_of_referencing_CU;
612 VG_(addBytesToXA)( xa, &w, sizeof(w) );
613 w = w2 -1 + base + svma_of_referencing_CU;
614 VG_(addBytesToXA)( xa, &w, sizeof(w) );
615 s = (UShort)len;
616 VG_(addBytesToXA)( xa, &s, sizeof(s) );
617 }
618
619 while (len > 0) {
620 UChar byte = get_UChar( &loc );
621 TRACE_D3("%02x", (UInt)byte);
622 if (acquire)
623 VG_(addBytesToXA)( xa, &byte, 1 );
624 len--;
625 }
626 TRACE_D3("\n");
627 }
628
629 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
630
631 nbytes = VG_(sizeXA)( xa );
632 vg_assert(nbytes >= 1);
633
634 gx = ML_(dinfo_zalloc)( sizeof(GExpr) + nbytes );
635 vg_assert(gx);
636 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
637 vg_assert( &gx->payload[nbytes]
638 == ((UChar*)gx) + sizeof(GExpr) + nbytes );
639
640 VG_(deleteXA)( xa );
641
642 gx->next = NULL;
643
644 TRACE_D3("}\n");
645
646 return gx;
647}
648
649
650/*------------------------------------------------------------*/
651/*--- ---*/
652/*--- Helper functions for range lists and CU headers ---*/
653/*--- ---*/
654/*------------------------------------------------------------*/
655
656/* Denotes an address range. Both aMin and aMax are included in the
657 range; hence a complete range is (0, ~0) and an empty range is any
658 (X, X-1) for X > 0.*/
659typedef
660 struct { Addr aMin; Addr aMax; }
661 AddrRange;
662
663
664__attribute__((noinline))
665static XArray* /* of AddrRange */ empty_range_list ( void )
666{
667 XArray* xa; /* XArray of AddrRange */
668 /* Who frees this xa? varstack_preen() does. */
669 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
670 sizeof(AddrRange) );
671 return xa;
672}
673
674
675static XArray* unitary_range_list ( Addr aMin, Addr aMax )
676{
677 XArray* xa;
678 AddrRange pair;
679 vg_assert(aMin <= aMax);
680 /* Who frees this xa? varstack_preen() does. */
681 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
682 sizeof(AddrRange) );
683 pair.aMin = aMin;
684 pair.aMax = aMax;
685 VG_(addToXA)( xa, &pair );
686 return xa;
687}
688
689
690/* Enumerate the address ranges starting at img-offset
691 'debug_ranges_offset' in .debug_ranges. Results are biased with
692 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
693 object as a whole. This function allocates the XArray, and the
694 caller must deallocate it. */
695__attribute__((noinline))
696static XArray* /* of AddrRange */
697 get_range_list ( CUConst* cc,
698 Bool td3,
699 UWord debug_ranges_offset,
700 Addr svma_of_referencing_CU )
701{
702 Addr base;
703 Cursor ranges;
704 XArray* xa; /* XArray of AddrRange */
705 AddrRange pair;
706
707 if (cc->debug_ranges_sz == 0)
708 cc->barf("get_range_list: .debug_ranges is empty/missing");
709
710 init_Cursor( &ranges, cc->debug_ranges_img,
711 cc->debug_ranges_sz, 0, cc->barf,
712 "Overrun whilst reading .debug_ranges section(2)" );
713 set_position_of_Cursor( &ranges, debug_ranges_offset );
714
715 /* Who frees this xa? varstack_preen() does. */
716 xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
717 sizeof(AddrRange) );
718 base = 0;
719 while (True) {
720 /* Read a (host-)word pair. This is something of a hack since
721 the word size to read is really dictated by the ELF file;
722 however, we assume we're reading a file with the same
723 word-sizeness as the host. Reasonably enough. */
724 UWord w1 = get_UWord( &ranges );
725 UWord w2 = get_UWord( &ranges );
726
727 if (w1 == 0 && w2 == 0)
728 break; /* end of list. */
729
730 if (w1 == -1UL) {
731 /* new value for 'base' */
732 base = w2;
733 continue;
734 }
735
736 /* else enumerate [w1+base, w2+base) */
737 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
738 (sec 2.17.2) */
739 if (w1 > w2)
740 cc->barf( "negative range in .debug_ranges section" );
741 if (w1 < w2) {
742 pair.aMin = w1 + base + svma_of_referencing_CU;
743 pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
744 vg_assert(pair.aMin <= pair.aMax);
745 VG_(addToXA)( xa, &pair );
746 }
747 }
748 return xa;
749}
750
751
752/* Parse the Compilation Unit header indicated at 'c' and
753 initialise 'cc' accordingly. */
754static __attribute__((noinline))
755void parse_CU_Header ( /*OUT*/CUConst* cc,
756 Bool td3,
757 Cursor* c,
758 UChar* debug_abbv_img, UWord debug_abbv_sz )
759{
760 UChar address_size;
761 UWord debug_abbrev_offset;
762 Int i;
763
764 VG_(memset)(cc, 0, sizeof(*cc));
765 vg_assert(c && c->barf);
766 cc->barf = c->barf;
767
768 /* initial_length field */
769 cc->unit_length
770 = get_Initial_Length( &cc->is_dw64, c,
771 "parse_CU_Header: invalid initial-length field" );
772
773 TRACE_D3(" Length: %lld\n", cc->unit_length );
774
775 /* version */
776 cc->version = get_UShort( c );
777 if (cc->version != 2 && cc->version != 3)
778 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3" );
779 TRACE_D3(" Version: %d\n", (Int)cc->version );
780
781 /* debug_abbrev_offset */
782 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
783 if (debug_abbrev_offset >= debug_abbv_sz)
784 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
785 TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset );
786
787 /* address size. If this isn't equal to the host word size, just
788 give up. This makes it safe to assume elsewhere that
789 DW_FORM_addr can be treated as a host word. */
790 address_size = get_UChar( c );
791 if (address_size != sizeof(void*))
792 cc->barf( "parse_CU_Header: invalid address_size" );
793 TRACE_D3(" Pointer Size: %d\n", (Int)address_size );
794
795 /* Set up so that cc->debug_abbv points to the relevant table for
796 this CU. Set the szB so that at least we can't read off the end
797 of the debug_abbrev section -- potentially (and quite likely)
798 too big, if this isn't the last table in the section, but at
799 least it's safe. */
800 cc->debug_abbv = debug_abbv_img + debug_abbrev_offset;
801 cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset;
802 /* and empty out the set_abbv_Cursor cache */
803 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
804 for (i = 0; i < N_ABBV_CACHE; i++) {
805 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
806 cc->saC_cache[i].posn = 0;
807 }
808 cc->saC_cache_queries = 0;
809 cc->saC_cache_misses = 0;
810}
811
812
813/* Set up 'c' so it is ready to parse the abbv table entry code
814 'abbv_code' for this compilation unit. */
815static __attribute__((noinline))
816void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
817 CUConst* cc, ULong abbv_code )
818{
819 Int i;
820 ULong acode;
821
822 if (abbv_code == 0)
823 cc->barf("set_abbv_Cursor: abbv_code == 0" );
824
825 /* (ULong)-1 is used to represent an empty cache slot. So we can't
826 allow it. In any case no valid DWARF3 should make a reference
827 to a negative abbreviation code. [at least, they always seem to
828 be numbered upwards from zero as far as I have seen] */
829 vg_assert(abbv_code != (ULong)-1);
830
831 /* First search the cache. */
832 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
833 cc->saC_cache_queries++;
834 for (i = 0; i < N_ABBV_CACHE; i++) {
835 /* No need to test the cached abbv_codes for -1 (empty), since
836 we just asserted that abbv_code is not -1. */
837 if (cc->saC_cache[i].abbv_code == abbv_code) {
838 /* Found it. Cool. Set up the parser using the cached
839 position, and move this cache entry 1 step closer to the
840 front. */
841 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
842 init_Cursor( c, cc->debug_abbv,
843 cc->debug_abbv_maxszB, cc->saC_cache[i].posn,
844 cc->barf,
845 "Overrun whilst parsing .debug_abbrev section(1)" );
846 if (i > 0) {
847 ULong t_abbv_code = cc->saC_cache[i].abbv_code;
848 UWord t_posn = cc->saC_cache[i].posn;
849 while (i > 0) {
850 cc->saC_cache[i] = cc->saC_cache[i-1];
851 cc->saC_cache[0].abbv_code = t_abbv_code;
852 cc->saC_cache[0].posn = t_posn;
853 i--;
854 }
855 }
856 return;
857 }
858 }
859
860 /* No. It's not in the cache. We have to search through
861 .debug_abbrev, of course taking care to update the cache
862 when done. */
863
864 cc->saC_cache_misses++;
865 init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf,
866 "Overrun whilst parsing .debug_abbrev section(2)" );
867
868 /* Now iterate though the table until we find the requested
869 entry. */
870 while (True) {
871 ULong atag;
872 UInt has_children;
873 acode = get_ULEB128( c );
874 if (acode == 0) break; /* end of the table */
875 if (acode == abbv_code) break; /* found it */
876 atag = get_ULEB128( c );
877 has_children = get_UChar( c );
878 //TRACE_D3(" %llu %s [%s]\n",
879 // acode, pp_DW_TAG(atag), pp_DW_children(has_children));
880 while (True) {
881 ULong at_name = get_ULEB128( c );
882 ULong at_form = get_ULEB128( c );
883 if (at_name == 0 && at_form == 0) break;
884 //TRACE_D3(" %18s %s\n",
885 // pp_DW_AT(at_name), pp_DW_FORM(at_form));
886 }
887 }
888
889 if (acode == 0) {
890 /* Not found. This is fatal. */
891 cc->barf("set_abbv_Cursor: abbv_code not found");
892 }
893
894 /* Otherwise, 'c' is now set correctly to parse the relevant entry,
895 starting from the abbreviation entry's tag. So just cache
896 the result, and return. */
897 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
898 cc->saC_cache[i] = cc->saC_cache[i-1];
899 }
900 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
901 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
902 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
903}
904
905
906/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts.
907
908 If *cts itself contains the entire result, then *ctsSzB is set to
909 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero.
910
911 Alternatively, the result can be a block of data (in the
912 transiently mapped-in object, so-called "image" space). If so then
913 the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said
914 image, *ctsSzB is zero, and *ctsMemSzB is the size of the block.
915
916 Unfortunately this means it is impossible to represent a zero-size
917 image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0
918 and so is ambiguous (which case it is?)
919
920 Invariant on successful return:
921 (*ctsSzB > 0 && *ctsMemSzB == 0)
922 || (*ctsSzB == 0 && *ctsMemSzB > 0)
923*/
924static
925void get_Form_contents ( /*OUT*/ULong* cts,
926 /*OUT*/Int* ctsSzB,
927 /*OUT*/UWord* ctsMemSzB,
928 CUConst* cc, Cursor* c,
929 Bool td3, DW_FORM form )
930{
931 *cts = 0;
932 *ctsSzB = 0;
933 *ctsMemSzB = 0;
934 switch (form) {
935 case DW_FORM_data1:
936 *cts = (ULong)(UChar)get_UChar(c);
937 *ctsSzB = 1;
938 TRACE_D3("%u", (UInt)*cts);
939 break;
940 case DW_FORM_data2:
941 *cts = (ULong)(UShort)get_UShort(c);
942 *ctsSzB = 2;
943 TRACE_D3("%u", (UInt)*cts);
944 break;
945 case DW_FORM_data4:
946 *cts = (ULong)(UInt)get_UInt(c);
947 *ctsSzB = 4;
948 TRACE_D3("%u", (UInt)*cts);
949 break;
sewardj0b5bf912008-03-07 20:07:58 +0000950 case DW_FORM_data8:
951 *cts = get_ULong(c);
952 *ctsSzB = 8;
953 TRACE_D3("%llu", *cts);
954 break;
sewardjb8b79ad2008-03-03 01:35:41 +0000955 case DW_FORM_sdata:
956 *cts = (ULong)(Long)get_SLEB128(c);
957 *ctsSzB = 8;
958 TRACE_D3("%lld", (Long)*cts);
959 break;
960 case DW_FORM_addr:
961 /* note, this is a hack. DW_FORM_addr is defined as getting
962 a word the size of the target machine as defined by the
963 address_size field in the CU Header. However,
964 parse_CU_Header() rejects all inputs except those for
965 which address_size == sizeof(Word), hence we can just
966 treat it as a (host) Word. */
967 *cts = (ULong)(UWord)get_UWord(c);
968 *ctsSzB = sizeof(UWord);
969 TRACE_D3("0x%lx", (UWord)*cts);
970 break;
971 case DW_FORM_strp: {
972 /* this is an offset into .debug_str */
973 UChar* str;
974 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
975 if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz)
976 cc->barf("read_and_show_Form: DW_FORM_strp "
977 "points outside .debug_str");
978 /* FIXME: check the entire string lies inside debug_str,
979 not just the first byte of it. */
980 str = (UChar*)cc->debug_str_img + uw;
981 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str);
982 *cts = (ULong)(UWord)str;
983 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
984 break;
985 }
986 case DW_FORM_string: {
987 UChar* str = get_AsciiZ(c);
988 TRACE_D3("%s", str);
989 *cts = (ULong)(UWord)str;
990 /* strlen is safe because get_AsciiZ already 'vetted' the
991 entire string */
992 *ctsMemSzB = 1 + (ULong)VG_(strlen)(str);
993 break;
994 }
995 case DW_FORM_ref4: {
996 UInt u32 = get_UInt(c);
997 UWord res = cc->cu_start_offset + (UWord)u32;
998 *cts = (ULong)res;
999 *ctsSzB = sizeof(UWord);
1000 TRACE_D3("<%lx>", res);
1001 break;
1002 }
1003 case DW_FORM_flag: {
1004 UChar u8 = get_UChar(c);
1005 TRACE_D3("%u", (UInt)u8);
1006 *cts = (ULong)u8;
1007 *ctsSzB = 1;
1008 break;
1009 }
1010 case DW_FORM_block1: {
1011 ULong u64b;
1012 ULong u64 = (ULong)get_UChar(c);
1013 UChar* block = get_address_of_Cursor(c);
1014 TRACE_D3("%llu byte block: ", u64);
1015 for (u64b = u64; u64b > 0; u64b--) {
1016 UChar u8 = get_UChar(c);
1017 TRACE_D3("%x ", (UInt)u8);
1018 }
1019 *cts = (ULong)(UWord)block;
1020 *ctsMemSzB = (UWord)u64;
1021 break;
1022 }
1023 default:
1024 VG_(printf)("get_Form_contents: unhandled %lld (%s)\n",
1025 form, ML_(pp_DW_FORM)(form));
1026 c->barf("get_Form_contents: unhandled DW_FORM");
1027 }
1028}
1029
1030
1031/*------------------------------------------------------------*/
1032/*--- ---*/
1033/*--- Parsing of variable-related DIEs ---*/
1034/*--- ---*/
1035/*------------------------------------------------------------*/
1036
1037typedef
1038 struct _TempVar {
1039 struct _TempVar* next;
1040 UChar* name; /* in DebugInfo's .strchunks */
1041 /* Represent ranges economically. nRanges is the number of
1042 ranges. Cases:
1043 0: .rngOneMin .rngOneMax .manyRanges are all zero
1044 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1045 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1046 This is merely an optimisation to avoid having to allocate
1047 and free the XArray in the common (98%) of cases where there
1048 is zero or one address ranges. */
1049 UWord nRanges;
1050 Addr rngOneMin;
1051 Addr rngOneMax;
1052 XArray* rngMany; /* of AddrRange. UNIQUE PTR in AR_DINFO. */
1053 /* --- */
1054 Int level;
1055 Type* typeR;
1056 GExpr* gexpr; /* for this variable */
1057 GExpr* fbGX; /* to find the frame base of the enclosing fn, if
1058 any */
1059 UChar* fName; /* declaring file name, or NULL */
1060 Int fLine; /* declaring file line number, or zero */
1061 /* offset in .debug_info, so that abstract instances can be
1062 found to satisfy references from concrete instances. */
1063 UWord dioff;
1064 UWord absOri; /* so the absOri fields refer to dioff fields
1065 in some other, related TempVar. */
1066 }
1067 TempVar;
1068
1069#define N_D3_VAR_STACK 24
1070
1071typedef
1072 struct {
1073 /* Contains the range stack: a stack of address ranges, one
1074 stack entry for each nested scope.
1075
1076 Some scope entries are created by function definitions
1077 (DW_AT_subprogram), and for those, we also note the GExpr
1078 derived from its DW_AT_frame_base attribute, if any.
1079 Consequently it should be possible to find, for any
1080 variable's DIE, the GExpr for the the containing function's
1081 DW_AT_frame_base by scanning back through the stack to find
1082 the nearest entry associated with a function. This somewhat
1083 elaborate scheme is provided so as to make it possible to
1084 obtain the correct DW_AT_frame_base expression even in the
1085 presence of nested functions (or to be more precise, in the
1086 presence of nested DW_AT_subprogram DIEs).
1087 */
1088 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
1089 stack */
1090 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1091 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */
1092 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1093 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB
1094 expr, else NULL */
1095 /* The file name table. Is a mapping from integer index to the
1096 (permanent) copy of the string, iow a non-img area. */
1097 XArray* /* of UChar* */ filenameTable;
1098 }
1099 D3VarParser;
1100
1101static void varstack_show ( D3VarParser* parser, HChar* str ) {
1102 Word i, j;
1103 VG_(printf)(" varstack (%s) {\n", str);
1104 for (i = 0; i <= parser->sp; i++) {
1105 XArray* xa = parser->ranges[i];
1106 vg_assert(xa);
1107 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]);
1108 if (parser->isFunc[i]) {
1109 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1110 } else {
1111 vg_assert(parser->fbGX[i] == NULL);
1112 }
1113 VG_(printf)(": ");
1114 if (VG_(sizeXA)( xa ) == 0) {
1115 VG_(printf)("** empty PC range array **");
1116 } else {
1117 for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1118 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1119 vg_assert(range);
1120 VG_(printf)("[%p,%p] ", range->aMin, range->aMax);
1121 }
1122 }
1123 VG_(printf)("\n");
1124 }
1125 VG_(printf)(" }\n");
1126}
1127
1128/* Remove from the stack, all entries with .level > 'level' */
1129static
1130void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1131{
1132 Bool changed = False;
1133 vg_assert(parser->sp < N_D3_VAR_STACK);
1134 while (True) {
1135 vg_assert(parser->sp >= -1);
1136 if (parser->sp == -1) break;
1137 if (parser->level[parser->sp] <= level) break;
1138 if (0)
1139 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1140 vg_assert(parser->ranges[parser->sp]);
1141 /* Who allocated this xa? get_range_list() or
1142 unitary_range_list(). */
1143 VG_(deleteXA)( parser->ranges[parser->sp] );
1144 parser->ranges[parser->sp] = NULL;
1145 parser->level[parser->sp] = 0;
1146 parser->isFunc[parser->sp] = False;
1147 parser->fbGX[parser->sp] = NULL;
1148 parser->sp--;
1149 changed = True;
1150 }
1151 if (changed && td3)
1152 varstack_show( parser, "after preen" );
1153}
1154
1155static void varstack_push ( CUConst* cc,
1156 D3VarParser* parser,
1157 Bool td3,
1158 XArray* ranges, Int level,
1159 Bool isFunc, GExpr* fbGX ) {
1160 if (0)
1161 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
1162 parser->sp+1, level, ranges);
1163
1164 /* First we need to zap everything >= 'level', as we are about to
1165 replace any previous entry at 'level', so .. */
1166 varstack_preen(parser, /*td3*/False, level-1);
1167
1168 vg_assert(parser->sp >= -1);
1169 vg_assert(parser->sp < N_D3_VAR_STACK);
1170 if (parser->sp == N_D3_VAR_STACK-1)
1171 cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1172 "increase and recompile");
1173 if (parser->sp >= 0)
1174 vg_assert(parser->level[parser->sp] < level);
1175 parser->sp++;
1176 vg_assert(parser->ranges[parser->sp] == NULL);
1177 vg_assert(parser->level[parser->sp] == 0);
1178 vg_assert(parser->isFunc[parser->sp] == False);
1179 vg_assert(parser->fbGX[parser->sp] == NULL);
1180 vg_assert(ranges != NULL);
1181 if (!isFunc) vg_assert(fbGX == NULL);
1182 parser->ranges[parser->sp] = ranges;
1183 parser->level[parser->sp] = level;
1184 parser->isFunc[parser->sp] = isFunc;
1185 parser->fbGX[parser->sp] = fbGX;
1186 if (td3)
1187 varstack_show( parser, "after push" );
1188}
1189
1190
1191/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so
1192 refer either to a location expression or to a location list.
1193 Figure out which, and in both cases bundle the expression or
1194 location list into a so-called GExpr (guarded expression). */
1195__attribute__((noinline))
1196static GExpr* get_GX ( CUConst* cc, Bool td3,
1197 ULong cts, Int ctsSzB, UWord ctsMemSzB )
1198{
1199 GExpr* gexpr = NULL;
1200 if (ctsMemSzB > 0 && ctsSzB == 0) {
1201 /* represents an in-line location expression, and cts points
1202 right at it */
1203 gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB );
1204 }
1205 else
1206 if (ctsMemSzB == 0 && ctsSzB > 0) {
1207 /* represents location list. cts is the offset of it in
1208 .debug_loc. */
1209 if (!cc->cu_svma_known)
1210 cc->barf("get_GX: location list, but CU svma is unknown");
1211 gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma );
1212 }
1213 else {
1214 vg_assert(0); /* else caller is bogus */
1215 }
1216 return gexpr;
1217}
1218
1219
1220static
1221void read_filename_table( /*MOD*/D3VarParser* parser,
1222 CUConst* cc, UWord debug_line_offset,
1223 Bool td3 )
1224{
1225 Bool is_dw64;
1226 Cursor c;
1227 Word i;
1228 ULong unit_length;
1229 UShort version;
1230 ULong header_length;
1231 UChar minimum_instruction_length;
1232 UChar default_is_stmt;
1233 Char line_base;
1234 UChar line_range;
1235 UChar opcode_base;
1236 UChar* str;
1237
1238 vg_assert(parser && cc && cc->barf);
1239 if ((!cc->debug_line_img)
1240 || cc->debug_line_sz <= debug_line_offset)
1241 cc->barf("read_filename_table: .debug_line is missing?");
1242
1243 init_Cursor( &c, cc->debug_line_img,
1244 cc->debug_line_sz, debug_line_offset, cc->barf,
1245 "Overrun whilst reading .debug_line section(1)" );
1246
1247 unit_length
1248 = get_Initial_Length( &is_dw64, &c,
1249 "read_filename_table: invalid initial-length field" );
1250 version = get_UShort( &c );
1251 if (version != 2)
1252 cc->barf("read_filename_table: Only DWARF version 2 line info "
1253 "is currently supported.");
1254 header_length = (ULong)get_Dwarfish_UWord( &c, is_dw64 );
1255 minimum_instruction_length = get_UChar( &c );
1256 default_is_stmt = get_UChar( &c );
1257 line_base = (Char)get_UChar( &c );
1258 line_range = get_UChar( &c );
1259 opcode_base = get_UChar( &c );
1260 /* skip over "standard_opcode_lengths" */
1261 for (i = 1; i < (Word)opcode_base; i++)
1262 (void)get_UChar( &c );
1263
1264 /* skip over the directory names table */
1265 while (peek_UChar(&c) != 0) {
1266 (void)get_AsciiZ(&c);
1267 }
1268 (void)get_UChar(&c); /* skip terminating zero */
1269
1270 /* Read and record the file names table */
1271 vg_assert(parser->filenameTable);
1272 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1273 /* Add a dummy index-zero entry. DWARF3 numbers its files
1274 from 1, for some reason. */
1275 str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1276 VG_(addToXA)( parser->filenameTable, &str );
1277 while (peek_UChar(&c) != 0) {
1278 str = get_AsciiZ(&c);
1279 TRACE_D3(" read_filename_table: %ld %s\n",
1280 VG_(sizeXA)(parser->filenameTable), str);
1281 str = ML_(addStr)( cc->di, str, -1 );
1282 VG_(addToXA)( parser->filenameTable, &str );
1283 (void)get_ULEB128( &c ); /* skip directory index # */
1284 (void)get_ULEB128( &c ); /* skip last mod time */
1285 (void)get_ULEB128( &c ); /* file size */
1286 }
1287 /* We're done! The rest of it is not interesting. */
1288}
1289
1290
1291__attribute__((noinline))
1292static void parse_var_DIE ( /*OUT*/TempVar** tempvars,
1293 /*OUT*/GExpr** gexprs,
1294 /*MOD*/D3VarParser* parser,
1295 DW_TAG dtag,
1296 UWord posn,
1297 Int level,
1298 Cursor* c_die,
1299 Cursor* c_abbv,
1300 CUConst* cc,
1301 Bool td3 )
1302{
1303 ULong cts;
1304 Int ctsSzB;
1305 UWord ctsMemSzB;
1306
1307 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
1308 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1309
1310 varstack_preen( parser, td3, level-1 );
1311
1312 if (dtag == DW_TAG_compile_unit) {
1313 Bool have_lo = False;
1314 Bool have_hi1 = False;
1315 Bool have_range = False;
1316 Addr ip_lo = 0;
1317 Addr ip_hi1 = 0;
1318 Addr rangeoff = 0;
1319 while (True) {
1320 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1321 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1322 if (attr == 0 && form == 0) break;
1323 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1324 cc, c_die, False/*td3*/, form );
1325 if (attr == DW_AT_low_pc && ctsSzB > 0) {
1326 ip_lo = cts;
1327 have_lo = True;
1328 }
1329 if (attr == DW_AT_high_pc && ctsSzB > 0) {
1330 ip_hi1 = cts;
1331 have_hi1 = True;
1332 }
1333 if (attr == DW_AT_ranges && ctsSzB > 0) {
1334 rangeoff = cts;
1335 have_range = True;
1336 }
1337 if (attr == DW_AT_stmt_list && ctsSzB > 0) {
1338 read_filename_table( parser, cc, (UWord)cts, td3 );
1339 }
1340 }
1341 /* Now, does this give us an opportunity to find this
1342 CU's svma? */
1343#if 0
1344 if (level == 0 && have_lo) {
1345 vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1346 because we've already seen a DW_TAG_compile_unit DIE at level
1347 0. But that can't happen, because DWARF3 only allows exactly
1348 one top level DIE per CU. */
1349 cc->cu_svma_known = True;
1350 cc->cu_svma = ip_lo;
1351 if (1)
1352 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1353 /* Now, it may be that this DIE doesn't tell us the CU's
1354 SVMA, by way of not having a DW_AT_low_pc. That's OK --
1355 the CU doesn't *have* to have its SVMA specified.
1356
1357 But as per last para D3 spec sec 3.1.1 ("Normal and
1358 Partial Compilation Unit Entries", "If the base address
1359 (viz, the SVMA) is undefined, then any DWARF entry of
1360 structure defined interms of the base address of that
1361 compilation unit is not valid.". So that means, if whilst
1362 processing the children of this top level DIE (or their
1363 children, etc) we see a DW_AT_range, and cu_svma_known is
1364 False, then the DIE that contains it is (per the spec)
1365 invalid, and we can legitimately stop and complain. */
1366 }
1367#else
1368 /* .. whereas The Reality is, simply assume the SVMA is zero
1369 if it isn't specified. */
1370 if (level == 0) {
1371 vg_assert(!cc->cu_svma_known);
1372 cc->cu_svma_known = True;
1373 if (have_lo)
1374 cc->cu_svma = ip_lo;
1375 else
1376 cc->cu_svma = 0;
1377 }
1378#endif
1379 /* Do we have something that looks sane? */
1380 if (have_lo && have_hi1 && (!have_range)) {
1381 if (ip_lo < ip_hi1)
1382 varstack_push( cc, parser, td3,
1383 unitary_range_list(ip_lo, ip_hi1 - 1),
1384 level,
1385 False/*isFunc*/, NULL/*fbGX*/ );
1386 } else
1387 if ((!have_lo) && (!have_hi1) && have_range) {
1388 varstack_push( cc, parser, td3,
1389 get_range_list( cc, td3,
1390 rangeoff, cc->cu_svma ),
1391 level,
1392 False/*isFunc*/, NULL/*fbGX*/ );
1393 } else
1394 if ((!have_lo) && (!have_hi1) && (!have_range)) {
1395 /* CU has no code, presumably? */
1396 varstack_push( cc, parser, td3,
1397 empty_range_list(),
1398 level,
1399 False/*isFunc*/, NULL/*fbGX*/ );
1400 } else
1401 goto bad_DIE;
1402 }
1403
1404 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1405 Bool have_lo = False;
1406 Bool have_hi1 = False;
1407 Bool have_range = False;
1408 Addr ip_lo = 0;
1409 Addr ip_hi1 = 0;
1410 Addr rangeoff = 0;
1411 Bool isFunc = dtag == DW_TAG_subprogram;
1412 GExpr* fbGX = NULL;
1413 while (True) {
1414 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1415 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1416 if (attr == 0 && form == 0) break;
1417 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1418 cc, c_die, False/*td3*/, form );
1419 if (attr == DW_AT_low_pc && ctsSzB > 0) {
1420 ip_lo = cts;
1421 have_lo = True;
1422 }
1423 if (attr == DW_AT_high_pc && ctsSzB > 0) {
1424 ip_hi1 = cts;
1425 have_hi1 = True;
1426 }
1427 if (attr == DW_AT_ranges && ctsSzB > 0) {
1428 rangeoff = cts;
1429 have_range = True;
1430 }
1431 if (isFunc
1432 && attr == DW_AT_frame_base
1433 && ((ctsMemSzB > 0 && ctsSzB == 0)
1434 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1435 fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1436 vg_assert(fbGX);
1437 vg_assert(!fbGX->next);
1438 fbGX->next = *gexprs;
1439 *gexprs = fbGX;
1440 }
1441 }
1442 /* Do we have something that looks sane? */
1443 if (dtag == DW_TAG_subprogram
1444 && (!have_lo) && (!have_hi1) && (!have_range)) {
1445 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1446 representing a subroutine declaration that is not also a
1447 definition does not have code address or range
1448 attributes." */
1449 } else
1450 if (dtag == DW_TAG_lexical_block
1451 && (!have_lo) && (!have_hi1) && (!have_range)) {
1452 /* I believe this is legit, and means the lexical block
1453 contains no insns (whatever that might mean). Ignore. */
1454 } else
1455 if (have_lo && have_hi1 && (!have_range)) {
1456 /* This scope supplies just a single address range. */
1457 if (ip_lo < ip_hi1)
1458 varstack_push( cc, parser, td3,
1459 unitary_range_list(ip_lo, ip_hi1 - 1),
1460 level, isFunc, fbGX );
1461 } else
1462 if ((!have_lo) && (!have_hi1) && have_range) {
1463 /* This scope supplies multiple address ranges via the use of
1464 a range list. */
1465 varstack_push( cc, parser, td3,
1466 get_range_list( cc, td3,
1467 rangeoff, cc->cu_svma ),
1468 level, isFunc, fbGX );
1469 } else
1470 if (have_lo && (!have_hi1) && (!have_range)) {
1471 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
1472 Entries) says fairly clearly that a scope must have either
1473 _range or (_low_pc and _high_pc). */
1474 /* The spec is a bit ambiguous though. Perhaps a single byte
1475 range is intended? See sec 2.17 (Code Addresses And Ranges) */
1476 /* This case is here because icc9 produced this:
1477 <2><13bd>: DW_TAG_lexical_block
1478 DW_AT_decl_line : 5229
1479 DW_AT_decl_column : 37
1480 DW_AT_decl_file : 1
1481 DW_AT_low_pc : 0x401b03
1482 */
1483 /* Ignore (seems safe than pushing a single byte range) */
1484 } else
1485 goto bad_DIE;
1486 }
1487
1488 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1489 UChar* name = NULL;
1490 Type* typeR = D3_INVALID_CUOFF;
1491 Bool external = False;
1492 GExpr* gexpr = NULL;
1493 Int n_attrs = 0;
1494 UWord abs_ori = (UWord)D3_INVALID_CUOFF;
1495 Bool declaration = False;
1496 Int lineNo = 0;
1497 UChar* fileName = NULL;
1498 while (True) {
1499 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1500 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1501 if (attr == 0 && form == 0) break;
1502 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1503 cc, c_die, False/*td3*/, form );
1504 n_attrs++;
1505 if (attr == DW_AT_name && ctsMemSzB > 0) {
1506 name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1507 }
1508 if (attr == DW_AT_location
1509 && ((ctsMemSzB > 0 && ctsSzB == 0)
1510 || (ctsMemSzB == 0 && ctsSzB > 0))) {
1511 gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB );
1512 vg_assert(gexpr);
1513 vg_assert(!gexpr->next);
1514 gexpr->next = *gexprs;
1515 *gexprs = gexpr;
1516 }
1517 if (attr == DW_AT_type && ctsSzB > 0) {
1518 typeR = (Type*)(UWord)cts;
1519 }
1520 if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
1521 external = True;
1522 }
1523 if (attr == DW_AT_abstract_origin && ctsSzB > 0) {
1524 abs_ori = (UWord)cts;
1525 }
1526 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
1527 declaration = True;
1528 }
1529 if (attr == DW_AT_decl_line && ctsSzB > 0) {
1530 lineNo = (Int)cts;
1531 }
1532 if (attr == DW_AT_decl_file && ctsSzB > 0) {
1533 Int ftabIx = (Int)cts;
1534 if (ftabIx >= 1
1535 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1536 fileName = *(UChar**)
1537 VG_(indexXA)( parser->filenameTable, ftabIx );
1538 vg_assert(fileName);
1539 }
1540 if (0) VG_(printf)("XXX filename = %s\n", fileName);
1541 }
1542 }
1543 /* We'll collect it under if one of the following three
1544 conditions holds:
1545 (1) has location and type -> completed
1546 (2) has type only -> is an abstract instance
1547 (3) has location and abs_ori -> is a concrete instance
1548 Name, filename and line number are all option frills.
1549 */
1550 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1551 /* 2 */ || (typeR != D3_INVALID_CUOFF)
1552 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1553
1554 /* Add this variable to the list of interesting looking
1555 variables. Crucially, note along with it the address
1556 range(s) associated with the variable, which for locals
1557 will be the address ranges at the top of the varparser's
1558 stack. */
1559 GExpr* fbGX = NULL;
1560 Word i, nRanges;
1561 XArray* /* of AddrRange */ xa;
1562 TempVar* tv;
1563 /* Stack can't be empty; we put a dummy entry on it for the
1564 entire address range before starting with the DIEs for
1565 this CU. */
1566 vg_assert(parser->sp >= 0);
1567
1568 /* If this is a local variable (non-external), try to find
1569 the GExpr for the DW_AT_frame_base of the containing
1570 function. It should have been pushed on the stack at the
1571 time we encountered its DW_TAG_subprogram DIE, so the way
1572 to find it is to scan back down the stack looking for it.
1573 If there isn't an enclosing stack entry marked 'isFunc'
1574 then we must be seeing variable or formal param DIEs
1575 outside of a function, so we deem the Dwarf to be
1576 malformed if that happens. Note that the fbGX may be NULL
1577 if the containing DT_TAG_subprogram didn't supply a
1578 DW_AT_frame_base -- that's OK, but there must actually be
1579 a containing DW_TAG_subprogram. */
1580 if (!external) {
1581 Bool found = False;
1582 for (i = parser->sp; i >= 0; i--) {
1583 if (parser->isFunc[i]) {
1584 fbGX = parser->fbGX[i];
1585 found = True;
1586 break;
1587 }
1588 }
1589 if (!found) {
1590 if (0 && VG_(clo_verbosity) >= 0) {
1591 VG_(message)(Vg_DebugMsg,
1592 "warning: parse_var_DIE: non-external variable "
1593 "outside DW_TAG_subprogram");
1594 }
1595 /* goto bad_DIE; */
1596 /* This seems to happen a lot. Just ignore it -- if,
1597 when we come to evaluation of the location (guarded)
1598 expression, it requires a frame base value, and
1599 there's no expression for that, then evaluation as a
1600 whole will fail. Harmless - a bit of a waste of
1601 cycles but nothing more. */
1602 }
1603 }
1604
1605 /* re "external ? 0 : parser->sp" (twice), if the var is
1606 marked 'external' then we must put it at the global scope,
1607 as only the global scope (level 0) covers the entire PC
1608 address space. It is asserted elsewhere that level 0
1609 always covers the entire address space. */
1610 xa = parser->ranges[external ? 0 : parser->sp];
1611 nRanges = VG_(sizeXA)(xa);
1612 vg_assert(nRanges >= 0);
1613
1614 tv = ML_(dinfo_zalloc)( sizeof(TempVar) );
1615 tv->name = name;
1616 tv->level = external ? 0 : parser->sp;
1617 tv->typeR = typeR;
1618 tv->gexpr = gexpr;
1619 tv->fbGX = fbGX;
1620 tv->fName = fileName;
1621 tv->fLine = lineNo;
1622 tv->dioff = posn;
1623 tv->absOri = abs_ori;
1624
1625 /* See explanation on definition of type TempVar for the
1626 reason for this elaboration. */
1627 tv->nRanges = nRanges;
1628 tv->rngOneMin = 0;
1629 tv->rngOneMax = 0;
1630 tv->rngMany = NULL;
1631 if (nRanges == 1) {
1632 AddrRange* range = VG_(indexXA)(xa, 0);
1633 tv->rngOneMin = range->aMin;
1634 tv->rngOneMax = range->aMax;
1635 }
1636 else if (nRanges > 1) {
1637 tv->rngMany = VG_(cloneXA)( xa ); /* free when 'tv' freed */
1638 }
1639
1640 tv->next = *tempvars;
1641 *tempvars = tv;
1642
1643 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
1644 VG_(sizeXA)(xa) );
1645 /* collect stats on how effective the ->ranges special
1646 casing is */
1647 if (0) {
1648 static Int ntot=0, ngt=0;
1649 ntot++;
1650 if (tv->rngMany) ngt++;
1651 if (0 == (ntot % 100000))
1652 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
1653 }
1654
1655 }
1656
1657 /* Here are some other weird cases seen in the wild:
1658
1659 We have a variable with a name and a type, but no
1660 location. I guess that's a sign that it has been
1661 optimised away. Ignore it. Here's an example:
1662
1663 static Int lc_compar(void* n1, void* n2) {
1664 MC_Chunk* mc1 = *(MC_Chunk**)n1;
1665 MC_Chunk* mc2 = *(MC_Chunk**)n2;
1666 return (mc1->data < mc2->data ? -1 : 1);
1667 }
1668
1669 Both mc1 and mc2 are like this
1670 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
1671 DW_AT_name : mc1
1672 DW_AT_decl_file : 1
1673 DW_AT_decl_line : 216
1674 DW_AT_type : <5d3>
1675
1676 whereas n1 and n2 do have locations specified.
1677
1678 ---------------------------------------------
1679
1680 We see a DW_TAG_formal_parameter with a type, but
1681 no name and no location. It's probably part of a function type
1682 construction, thusly, hence ignore it:
1683 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
1684 DW_AT_sibling : <2c9>
1685 DW_AT_prototyped : 1
1686 DW_AT_type : <114>
1687 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1688 DW_AT_type : <13e>
1689 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
1690 DW_AT_type : <133>
1691
1692 ---------------------------------------------
1693
1694 Is very minimal, like this:
1695 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
1696 DW_AT_abstract_origin: <7ba>
1697 What that signifies I have no idea. Ignore.
1698
1699 ----------------------------------------------
1700
1701 Is very minimal, like this:
1702 <200f>: DW_TAG_formal_parameter
1703 DW_AT_abstract_ori: <1f4c>
1704 DW_AT_location : 13440
1705 What that signifies I have no idea. Ignore.
1706 It might be significant, though: the variable at least
1707 has a location and so might exist somewhere.
1708 Maybe we should handle this.
1709
1710 ---------------------------------------------
1711
1712 <22407>: DW_TAG_variable
1713 DW_AT_name : (indirect string, offset: 0x6579):
1714 vgPlain_trampoline_stuff_start
1715 DW_AT_decl_file : 29
1716 DW_AT_decl_line : 56
1717 DW_AT_external : 1
1718 DW_AT_declaration : 1
1719
1720 Nameless and typeless variable that has a location? Who
1721 knows. Not me.
1722 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
1723 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
1724 (DW_OP_addr: 3813c7c0)
1725
1726 No, really. Check it out. gcc is quite simply borked.
1727 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
1728 // followed by no attributes, and the next DIE is a sibling,
1729 // not a child
1730 */
1731 }
1732 return;
1733
1734 bad_DIE:
1735 set_position_of_Cursor( c_die, saved_die_c_offset );
1736 set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
1737 VG_(printf)("\nparse_var_DIE: confused by:\n");
1738 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
1739 while (True) {
1740 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1741 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1742 if (attr == 0 && form == 0) break;
1743 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr));
1744 /* Get the form contents, so as to print them */
1745 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1746 cc, c_die, True, form );
1747 VG_(printf)("\t\n");
1748 }
1749 VG_(printf)("\n");
1750 cc->barf("parse_var_DIE: confused by the above DIE");
1751 /*NOTREACHED*/
1752}
1753
1754
1755/*------------------------------------------------------------*/
1756/*--- ---*/
1757/*--- Parsing of type-related DIEs ---*/
1758/*--- ---*/
1759/*------------------------------------------------------------*/
1760
1761#define N_D3_TYPE_STACK 16
1762
1763typedef
1764 struct {
1765 /* What source language? 'C'=C/C++, 'F'=Fortran, '?'=other
1766 Established once per compilation unit. */
1767 UChar language;
1768 /* A stack of types which are currently under construction */
1769 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
1770 stack */
1771 Type* qparent[N_D3_TYPE_STACK];
1772 Int qlevel[N_D3_TYPE_STACK];
1773
1774 }
1775 D3TypeParser;
1776
1777static void typestack_show ( D3TypeParser* parser, HChar* str ) {
1778 Word i;
1779 VG_(printf)(" typestack (%s) {\n", str);
1780 for (i = 0; i <= parser->sp; i++) {
1781 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]);
1782 ML_(pp_Type)( parser->qparent[i] );
1783 VG_(printf)("\n");
1784 }
1785 VG_(printf)(" }\n");
1786}
1787
1788/* Remove from the stack, all entries with .level > 'level' */
1789static
1790void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
1791{
1792 Bool changed = False;
1793 vg_assert(parser->sp < N_D3_TYPE_STACK);
1794 while (True) {
1795 vg_assert(parser->sp >= -1);
1796 if (parser->sp == -1) break;
1797 if (parser->qlevel[parser->sp] <= level) break;
1798 if (0)
1799 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
1800 vg_assert(parser->qparent[parser->sp]);
1801 parser->qparent[parser->sp] = NULL;
1802 parser->qlevel[parser->sp] = 0;
1803 parser->sp--;
1804 changed = True;
1805 }
1806 if (changed && td3)
1807 typestack_show( parser, "after preen" );
1808}
1809
1810static Bool typestack_is_empty ( D3TypeParser* parser ) {
1811 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
1812 return parser->sp == -1;
1813}
1814
1815static void typestack_push ( CUConst* cc,
1816 D3TypeParser* parser,
1817 Bool td3,
1818 Type* parent, Int level ) {
1819 if (0)
1820 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %p\n",
1821 parser->sp+1, level, parent);
1822
1823 /* First we need to zap everything >= 'level', as we are about to
1824 replace any previous entry at 'level', so .. */
1825 typestack_preen(parser, /*td3*/False, level-1);
1826
1827 vg_assert(parser->sp >= -1);
1828 vg_assert(parser->sp < N_D3_TYPE_STACK);
1829 if (parser->sp == N_D3_TYPE_STACK-1)
1830 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
1831 "increase and recompile");
1832 if (parser->sp >= 0)
1833 vg_assert(parser->qlevel[parser->sp] < level);
1834 parser->sp++;
1835 vg_assert(parser->qparent[parser->sp] == NULL);
1836 vg_assert(parser->qlevel[parser->sp] == 0);
1837 vg_assert(parent != NULL);
1838 parser->qparent[parser->sp] = parent;
1839 parser->qlevel[parser->sp] = level;
1840 if (td3)
1841 typestack_show( parser, "after push" );
1842}
1843
1844
1845/* Parse a type-related DIE. 'parser' holds the current parser state.
1846 'admin' is where the completed types are dumped. 'dtag' is the tag
1847 for this DIE. 'c_die' points to the start of the data fields (FORM
1848 stuff) for the DIE. c_abbv points to the start of the (name,form)
1849 pairs which describe the DIE.
1850
1851 We may find the DIE uninteresting, in which case we should ignore
1852 it.
1853*/
1854__attribute__((noinline))
1855static void parse_type_DIE ( /*OUT*/TyAdmin** admin,
1856 /*MOD*/D3TypeParser* parser,
1857 DW_TAG dtag,
1858 UWord posn,
1859 Int level,
1860 Cursor* c_die,
1861 Cursor* c_abbv,
1862 CUConst* cc,
1863 Bool td3 )
1864{
1865 ULong cts;
1866 Int ctsSzB;
1867 UWord ctsMemSzB;
1868 Type* type = NULL;
1869 TyAtom* atom = NULL;
1870 TyField* field = NULL;
1871 D3Expr* expr = NULL;
1872 TyBounds* bounds = NULL;
1873
1874 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
1875 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1876
1877 /* If we've returned to a level at or above any previously noted
1878 parent, un-note it, so we don't believe we're still collecting
1879 its children. */
1880 typestack_preen( parser, td3, level-1 );
1881
1882 if (dtag == DW_TAG_compile_unit) {
1883 /* See if we can find DW_AT_language, since it is important for
1884 establishing array bounds (see DW_TAG_subrange_type below in
1885 this fn) */
1886 while (True) {
1887 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1888 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1889 if (attr == 0 && form == 0) break;
1890 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1891 cc, c_die, False/*td3*/, form );
1892 if (attr != DW_AT_language)
1893 continue;
1894 if (ctsSzB == 0)
1895 goto bad_DIE;
1896 switch (cts) {
1897 case DW_LANG_C89: case DW_LANG_C:
1898 case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
1899 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
1900 case DW_LANG_Upc:
1901 parser->language = 'C'; break;
1902 case DW_LANG_Fortran77: case DW_LANG_Fortran90:
1903 case DW_LANG_Fortran95:
1904 parser->language = 'F'; break;
1905 case DW_LANG_Ada83: case DW_LANG_Cobol74:
1906 case DW_LANG_Cobol85: case DW_LANG_Pascal83:
1907 case DW_LANG_Modula2: case DW_LANG_Java:
1908 case DW_LANG_C99: case DW_LANG_Ada95:
1909 case DW_LANG_PLI: case DW_LANG_D:
1910 case DW_LANG_Mips_Assembler:
1911 parser->language = '?'; break;
1912 default:
1913 goto bad_DIE;
1914 }
1915 }
1916 }
1917
1918 if (dtag == DW_TAG_base_type) {
1919 /* We can pick up a new base type any time. */
1920 type = ML_(new_Type)();
1921 type->tag = Ty_Base;
1922 while (True) {
1923 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1924 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1925 if (attr == 0 && form == 0) break;
1926 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
1927 cc, c_die, False/*td3*/, form );
1928 if (attr == DW_AT_name && ctsMemSzB > 0) {
1929 type->Ty.Base.name
1930 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
1931 }
1932 if (attr == DW_AT_byte_size && ctsSzB > 0) {
1933 type->Ty.Base.szB = cts;
1934 }
1935 if (attr == DW_AT_encoding && ctsSzB > 0) {
1936 switch (cts) {
1937 case DW_ATE_unsigned: case DW_ATE_unsigned_char:
1938 case DW_ATE_boolean:/* FIXME - is this correct? */
1939 type->Ty.Base.enc = 'U'; break;
1940 case DW_ATE_signed: case DW_ATE_signed_char:
1941 type->Ty.Base.enc = 'S'; break;
1942 case DW_ATE_float:
1943 type->Ty.Base.enc = 'F'; break;
1944 case DW_ATE_complex_float:
1945 type->Ty.Base.enc = 'C'; break;
1946 default:
1947 goto bad_DIE;
1948 }
1949 }
1950 }
1951
1952 /* Invent a name if it doesn't have one. gcc-4.3
1953 -ftree-vectorize is observed to emit nameless base types. */
1954 if (!type->Ty.Base.name)
1955 type->Ty.Base.name
1956 = ML_(addStr)( cc->di, "<anon_base_type>", -1 );
1957
1958 /* Do we have something that looks sane? */
1959 if (/* must have a name */
1960 type->Ty.Base.name == NULL
1961 /* and a plausible size. Yes, really 32: "complex long
1962 double" apparently has size=32 */
1963 || type->Ty.Base.szB < 0 || type->Ty.Base.szB > 32
1964 /* and a plausible encoding */
1965 || (type->Ty.Base.enc != 'U'
1966 && type->Ty.Base.enc != 'S'
1967 && type->Ty.Base.enc != 'F'
1968 && type->Ty.Base.enc != 'C'))
1969 goto bad_DIE;
1970 /* Last minute hack: if we see this
1971 <1><515>: DW_TAG_base_type
1972 DW_AT_byte_size : 0
1973 DW_AT_encoding : 5
1974 DW_AT_name : void
1975 convert it into a real Void type. */
1976 if (type->Ty.Base.szB == 0
1977 && 0 == VG_(strcmp)("void", type->Ty.Base.name)) {
1978 VG_(memset)(type, 0, sizeof(*type));
1979 type->tag = Ty_Void;
1980 type->Ty.Void.isFake = False; /* it's a real one! */
1981 }
1982 goto acquire_Type;
1983 }
1984
1985 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
1986 || dtag == DW_TAG_ptr_to_member_type) {
1987 /* This seems legit for _pointer_type and _reference_type. I
1988 don't know if rolling _ptr_to_member_type in here really is
1989 legit, but it's better than not handling it at all. */
1990 type = ML_(new_Type)();
1991 type->tag = Ty_PorR;
1992 /* target type defaults to void */
1993 type->Ty.PorR.typeR = D3_FAKEVOID_CUOFF;
1994 type->Ty.PorR.isPtr = dtag == DW_TAG_pointer_type
1995 || dtag == DW_TAG_ptr_to_member_type;
1996 /* Pointer types don't *have* to specify their size, in which
1997 case we assume it's a machine word. But if they do specify
1998 it, it must be a machine word :-) This probably assumes that
1999 the word size of the Dwarf3 we're reading is the same size as
2000 that on the machine. gcc appears to give a size whereas icc9
2001 doesn't. */
2002 if (type->Ty.PorR.isPtr)
2003 type->Ty.PorR.szB = sizeof(Word);
2004 while (True) {
2005 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2006 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2007 if (attr == 0 && form == 0) break;
2008 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2009 cc, c_die, False/*td3*/, form );
2010 if (attr == DW_AT_byte_size && ctsSzB > 0) {
2011 type->Ty.PorR.szB = cts;
2012 }
2013 if (attr == DW_AT_type && ctsSzB > 0) {
2014 type->Ty.PorR.typeR = (Type*)(UWord)cts;
2015 }
2016 }
2017 /* Do we have something that looks sane? */
2018 if (type->Ty.PorR.szB != sizeof(Word))
2019 goto bad_DIE;
2020 else
2021 goto acquire_Type;
2022 }
2023
2024 if (dtag == DW_TAG_enumeration_type) {
2025 /* Create a new Type to hold the results. */
2026 type = ML_(new_Type)();
2027 type->tag = Ty_Enum;
2028 type->Ty.Enum.name = NULL;
2029 type->Ty.Enum.atomRs
2030 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2031 sizeof(TyAtom*) );
2032 while (True) {
2033 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2034 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2035 if (attr == 0 && form == 0) break;
2036 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2037 cc, c_die, False/*td3*/, form );
2038 if (attr == DW_AT_name && ctsMemSzB > 0) {
2039 type->Ty.Enum.name
2040 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2041 }
2042 if (attr == DW_AT_byte_size && ctsSzB > 0) {
2043 type->Ty.Enum.szB = cts;
2044 }
2045 }
2046 /* Do we have something that looks sane? */
2047 if (type->Ty.Enum.szB == 0 /* we must know the size */
2048 /* But the name can be present, or not */)
2049 goto bad_DIE;
2050 /* On't stack! */
2051 typestack_push( cc, parser, td3, type, level );
2052 goto acquire_Type;
2053 }
2054
2055 if (dtag == DW_TAG_enumerator) {
2056 Bool have_value = False;
2057 atom = ML_(new_TyAtom)( NULL, 0 );
2058 while (True) {
2059 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2060 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2061 if (attr == 0 && form == 0) break;
2062 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2063 cc, c_die, False/*td3*/, form );
2064 if (attr == DW_AT_name && ctsMemSzB > 0) {
2065 atom->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2066 }
2067 if (attr == DW_AT_const_value && ctsSzB > 0) {
2068 atom->value = cts;
2069 have_value = True;
2070 }
2071 }
2072 /* Do we have something that looks sane? */
2073 if ((!have_value) || atom->name == NULL)
2074 goto bad_DIE;
2075 /* Do we have a plausible parent? */
2076 if (typestack_is_empty(parser)) goto bad_DIE;
2077 vg_assert(parser->qparent[parser->sp]);
2078 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2079 if (parser->qparent[parser->sp]->tag != Ty_Enum) goto bad_DIE;
2080 /* Record this child in the parent */
2081 vg_assert(parser->qparent[parser->sp]->Ty.Enum.atomRs);
2082 VG_(addToXA)( parser->qparent[parser->sp]->Ty.Enum.atomRs, &atom );
2083 /* And record the child itself */
2084 goto acquire_Atom;
2085 }
2086
2087 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_union_type) {
2088 Bool have_szB = False;
2089 Bool is_decl = False;
2090 Bool is_spec = False;
2091 /* Create a new Type to hold the results. */
2092 type = ML_(new_Type)();
2093 type->tag = Ty_StOrUn;
2094 type->Ty.StOrUn.name = NULL;
2095 type->Ty.StOrUn.fields
2096 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2097 sizeof(TyAtom*) );
2098 type->Ty.StOrUn.complete = True;
2099 type->Ty.StOrUn.isStruct = dtag == DW_TAG_structure_type;
2100 while (True) {
2101 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2102 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2103 if (attr == 0 && form == 0) break;
2104 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2105 cc, c_die, False/*td3*/, form );
2106 if (attr == DW_AT_name && ctsMemSzB > 0) {
2107 type->Ty.StOrUn.name
2108 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2109 }
2110 if (attr == DW_AT_byte_size && ctsSzB >= 0) {
2111 type->Ty.StOrUn.szB = cts;
2112 have_szB = True;
2113 }
2114 if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
2115 is_decl = True;
2116 }
2117 if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) {
2118 is_spec = True;
2119 }
2120 }
2121 /* Do we have something that looks sane? */
2122 if (is_decl && (!is_spec)) {
2123 /* It's a DW_AT_declaration. We require the name but
2124 nothing else. */
2125 if (type->Ty.StOrUn.name == NULL)
2126 goto bad_DIE;
2127 type->Ty.StOrUn.complete = False;
2128 goto acquire_Type;
2129 }
2130 if ((!is_decl) /* && (!is_spec) */) {
2131 /* this is the common, ordinary case */
2132 if ((!have_szB) /* we must know the size */
2133 /* But the name can be present, or not */)
2134 goto bad_DIE;
2135 /* On't stack! */
2136 typestack_push( cc, parser, td3, type, level );
2137 goto acquire_Type;
2138 }
2139 else {
2140 /* don't know how to handle any other variants just now */
2141 goto bad_DIE;
2142 }
2143 }
2144
2145 if (dtag == DW_TAG_member) {
2146 /* Acquire member entries for both DW_TAG_structure_type and
2147 DW_TAG_union_type. They differ minorly, in that struct
2148 members must have a DW_AT_data_member_location expression
2149 whereas union members must not. */
2150 Bool parent_is_struct;
2151 field = ML_(new_TyField)( NULL, NULL, NULL );
2152 field->typeR = D3_INVALID_CUOFF;
2153 expr = NULL;
2154 while (True) {
2155 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2156 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2157 if (attr == 0 && form == 0) break;
2158 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2159 cc, c_die, False/*td3*/, form );
2160 if (attr == DW_AT_name && ctsMemSzB > 0) {
2161 field->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2162 }
2163 if (attr == DW_AT_type && ctsSzB > 0) {
2164 field->typeR = (Type*)(UWord)cts;
2165 }
2166 if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
2167 UChar* copy = ML_(addStr)( cc->di, (UChar*)(UWord)cts,
2168 (Int)ctsMemSzB );
2169 expr = ML_(new_D3Expr)( copy, (UWord)ctsMemSzB );
2170 }
2171 }
2172 /* Do we have a plausible parent? */
2173 if (typestack_is_empty(parser)) goto bad_DIE;
2174 vg_assert(parser->qparent[parser->sp]);
2175 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2176 if (parser->qparent[parser->sp]->tag != Ty_StOrUn) goto bad_DIE;
2177 /* Do we have something that looks sane? If this a member of a
2178 struct, we must have a location expression; but if a member
2179 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
2180 to reject in the latter case, but some compilers have been
2181 observed to emit constant-zero expressions. So just ignore
2182 them. */
2183 parent_is_struct
2184 = parser->qparent[parser->sp]->Ty.StOrUn.isStruct;
2185 if (!field->name)
2186 field->name = ML_(addStr)(cc->di, "<anon_field>", -1);
2187 if ((!field->name) || (field->typeR == D3_INVALID_CUOFF))
2188 goto bad_DIE;
2189 if (parent_is_struct && (!expr))
2190 goto bad_DIE;
2191 if ((!parent_is_struct) && expr) {
2192 /* If this is a union type, pretend we haven't seen the data
2193 member location expression, as it is by definition
2194 redundant (it must be zero). */
2195 expr = NULL;
2196 }
2197 /* Record this child in the parent */
2198 field->isStruct = parent_is_struct;
2199 if (expr)
2200 field->loc = expr;
2201 vg_assert(parser->qparent[parser->sp]->Ty.StOrUn.fields);
2202 VG_(addToXA)( parser->qparent[parser->sp]->Ty.StOrUn.fields,
2203 &field );
2204 /* And record the child itself */
2205 goto acquire_Field_and_Expr;
2206 }
2207
2208 if (dtag == DW_TAG_array_type) {
2209 type = ML_(new_Type)();
2210 type->tag = Ty_Array;
2211 type->Ty.Array.typeR = D3_INVALID_CUOFF;
2212 type->Ty.Array.bounds
2213 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2214 sizeof(TyBounds*) );
2215 while (True) {
2216 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2217 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2218 if (attr == 0 && form == 0) break;
2219 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2220 cc, c_die, False/*td3*/, form );
2221 if (attr == DW_AT_type && ctsSzB > 0) {
2222 type->Ty.Array.typeR = (Type*)(UWord)cts;
2223 }
2224 }
2225 if (type->Ty.Array.typeR == D3_INVALID_CUOFF)
2226 goto bad_DIE;
2227 /* On't stack! */
2228 typestack_push( cc, parser, td3, type, level );
2229 goto acquire_Type;
2230 }
2231
2232 if (dtag == DW_TAG_subrange_type) {
2233 Bool have_lower = False;
2234 Bool have_upper = False;
2235 Bool have_count = False;
2236 Long lower = 0;
2237 Long upper = 0;
2238 Long count = 0;
2239
2240 switch (parser->language) {
2241 case 'C': have_lower = True; lower = 0; break;
2242 case 'F': have_lower = True; lower = 1; break;
2243 case '?': have_lower = False; break;
2244 default: vg_assert(0); /* assured us by handling of
2245 DW_TAG_compile_unit in this fn */
2246 }
2247 bounds = ML_(new_TyBounds)();
2248 while (True) {
2249 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2250 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2251 if (attr == 0 && form == 0) break;
2252 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2253 cc, c_die, False/*td3*/, form );
2254 if (attr == DW_AT_lower_bound && ctsSzB > 0) {
2255 lower = (Long)cts;
2256 have_lower = True;
2257 }
2258 if (attr == DW_AT_upper_bound && ctsSzB > 0) {
2259 upper = (Long)cts;
2260 have_upper = True;
2261 }
2262 if (attr == DW_AT_count && ctsSzB > 0) {
2263 count = cts;
2264 have_count = True;
2265 }
2266 }
2267 /* FIXME: potentially skip the rest if no parent present, since
2268 it could be the case that this subrange type is free-standing
2269 (not being used to describe the bounds of a containing array
2270 type) */
2271 /* Do we have a plausible parent? */
2272 if (typestack_is_empty(parser)) goto bad_DIE;
2273 vg_assert(parser->qparent[parser->sp]);
2274 if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
2275 if (parser->qparent[parser->sp]->tag != Ty_Array) goto bad_DIE;
2276
2277 /* Figure out if we have a definite range or not */
2278 if (have_lower && have_upper && (!have_count)) {
2279 bounds->knownL = True;
2280 bounds->knownU = True;
2281 bounds->boundL = lower;
2282 bounds->boundU = upper;
2283 }
2284 else if (have_lower && (!have_upper) && (!have_count)) {
2285 bounds->knownL = True;
2286 bounds->knownU = False;
2287 bounds->boundL = lower;
2288 bounds->boundU = 0;
2289 } else {
2290 /* FIXME: handle more cases */
2291 goto bad_DIE;
2292 }
2293
2294 /* Record this bound in the parent */
2295 vg_assert(parser->qparent[parser->sp]->Ty.Array.bounds);
2296 VG_(addToXA)( parser->qparent[parser->sp]->Ty.Array.bounds,
2297 &bounds );
2298 /* And record the child itself */
2299 goto acquire_Bounds;
2300 }
2301
2302 if (dtag == DW_TAG_typedef) {
2303 /* We can pick up a new base type any time. */
2304 type = ML_(new_Type)();
2305 type->tag = Ty_TyDef;
2306 type->Ty.TyDef.name = NULL;
2307 type->Ty.TyDef.typeR = D3_INVALID_CUOFF;
2308 while (True) {
2309 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2310 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2311 if (attr == 0 && form == 0) break;
2312 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2313 cc, c_die, False/*td3*/, form );
2314 if (attr == DW_AT_name && ctsMemSzB > 0) {
2315 type->Ty.TyDef.name
2316 = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
2317 }
2318 if (attr == DW_AT_type && ctsSzB > 0) {
2319 type->Ty.TyDef.typeR = (Type*)(UWord)cts;
2320 }
2321 }
2322 /* Do we have something that looks sane? */
2323 if (/* must have a name */
2324 type->Ty.TyDef.name == NULL
2325 /* but the referred-to type can be absent */)
2326 goto bad_DIE;
2327 else
2328 goto acquire_Type;
2329 }
2330
2331 if (dtag == DW_TAG_subroutine_type) {
2332 /* function type? just record that one fact and ask no
2333 further questions. */
2334 type = ML_(new_Type)();
2335 type->tag = Ty_Fn;
2336 goto acquire_Type;
2337 }
2338
2339 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2340 Int have_ty = 0;
2341 type = ML_(new_Type)();
2342 type->tag = Ty_Qual;
2343 type->Ty.Qual.qual
2344 = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2345 /* target type defaults to 'void' */
2346 type->Ty.Qual.typeR = D3_FAKEVOID_CUOFF;
2347 while (True) {
2348 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2349 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2350 if (attr == 0 && form == 0) break;
2351 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2352 cc, c_die, False/*td3*/, form );
2353 if (attr == DW_AT_type && ctsSzB > 0) {
2354 type->Ty.Qual.typeR = (Type*)(UWord)cts;
2355 have_ty++;
2356 }
2357 }
2358 /* gcc sometimes generates DW_TAG_const/volatile_type without
2359 DW_AT_type and GDB appears to interpret the type as 'const
2360 void' (resp. 'volatile void'). So just allow it .. */
2361 if (have_ty == 1 || have_ty == 0)
2362 goto acquire_Type;
2363 else
2364 goto bad_DIE;
2365 }
2366
2367 /* else ignore this DIE */
2368 return;
2369 /*NOTREACHED*/
2370
2371 acquire_Type:
2372 if (0) VG_(printf)("YYYY Acquire Type\n");
2373 vg_assert(type); vg_assert(!atom); vg_assert(!field);
2374 vg_assert(!expr); vg_assert(!bounds);
2375 *admin = ML_(new_TyAdmin)( posn, *admin );
2376 (*admin)->payload = type;
2377 (*admin)->tag = TyA_Type;
2378 return;
2379 /*NOTREACHED*/
2380
2381 acquire_Atom:
2382 if (0) VG_(printf)("YYYY Acquire Atom\n");
2383 vg_assert(!type); vg_assert(atom); vg_assert(!field);
2384 vg_assert(!expr); vg_assert(!bounds);
2385 *admin = ML_(new_TyAdmin)( posn, *admin );
2386 (*admin)->payload = atom;
2387 (*admin)->tag = TyA_Atom;
2388 return;
2389 /*NOTREACHED*/
2390
2391 acquire_Field_and_Expr:
2392 /* For union members, Expr should be absent */
2393 if (0) VG_(printf)("YYYY Acquire Field and Expr\n");
2394 vg_assert(!type); vg_assert(!atom); vg_assert(field);
2395 /*vg_assert(expr);*/ vg_assert(!bounds);
2396 if (expr) {
2397 *admin = ML_(new_TyAdmin)( (UWord)D3_INVALID_CUOFF,
2398 *admin );
2399 (*admin)->payload = expr;
2400 (*admin)->tag = TyA_Expr;
2401 }
2402 *admin = ML_(new_TyAdmin)( posn, *admin );
2403 (*admin)->payload = field;
2404 (*admin)->tag = TyA_Field;
2405 return;
2406 /*NOTREACHED*/
2407
2408 acquire_Bounds:
2409 if (0) VG_(printf)("YYYY Acquire Bounds\n");
2410 vg_assert(!type); vg_assert(!atom); vg_assert(!field);
2411 vg_assert(!expr); vg_assert(bounds);
2412 *admin = ML_(new_TyAdmin)( posn, *admin );
2413 (*admin)->payload = bounds;
2414 (*admin)->tag = TyA_Bounds;
2415 return;
2416 /*NOTREACHED*/
2417
2418 bad_DIE:
2419 set_position_of_Cursor( c_die, saved_die_c_offset );
2420 set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2421 VG_(printf)("\nparse_type_DIE: confused by:\n");
2422 VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) );
2423 while (True) {
2424 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2425 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2426 if (attr == 0 && form == 0) break;
2427 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr));
2428 /* Get the form contents, so as to print them */
2429 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2430 cc, c_die, True, form );
2431 VG_(printf)("\t\n");
2432 }
2433 VG_(printf)("\n");
2434 cc->barf("parse_type_DIE: confused by the above DIE");
2435 /*NOTREACHED*/
2436}
2437
2438
2439/*------------------------------------------------------------*/
2440/*--- ---*/
2441/*--- Resolution of references to type DIEs ---*/
2442/*--- ---*/
2443/*------------------------------------------------------------*/
2444
2445static Int cmp_D3TyAdmin_by_cuOff ( void* v1, void* v2 ) {
2446 TyAdmin* a1 = *(TyAdmin**)v1;
2447 TyAdmin* a2 = *(TyAdmin**)v2;
2448 if (a1->cuOff < a2->cuOff) return -1;
2449 if (a1->cuOff > a2->cuOff) return 1;
2450 return 0;
2451}
2452
2453/* Look up 'cuOff' in 'map', to find the associated D3TyAdmin*. Check
2454 that the found D3TyAdmin has tag 'adtag'. Sets *payload to be the
2455 resulting payload pointer and returns True on success.
2456
2457 Also, if 'allow_invalid' is True, then if cuOff is
2458 D3_INVALID_CUOFF, return NULL in *payload.
2459
2460 Otherwise (conceptually fails) and returns False. */
2461__attribute__((noinline))
2462static Bool resolve_binding ( /*OUT*/void** payload,
2463 XArray* map, void* cuOff,
2464 TyAdminTag tag,
2465 Bool allow_invalid ) {
2466 Bool found;
2467 Word ixLo, ixHi;
2468 TyAdmin dummy, *dummyP, *admin;
2469
2470 if (cuOff == D3_INVALID_CUOFF && allow_invalid) {
2471 *payload = NULL;
2472 return True;
2473 }
2474
2475 VG_(memset)(&dummy, 0, sizeof(dummy));
2476 dummy.cuOff = (UWord)cuOff;
2477 dummyP = &dummy;
2478 found = VG_(lookupXA)( map, &dummyP, &ixLo, &ixHi );
2479 if (!found)
2480 return False;
2481 /* If this doesn't hold, we must have seen more than one DIE with
2482 the same cuOff(set). Which isn't possible. */
2483 vg_assert(ixLo == ixHi);
2484 admin = *(TyAdmin**)VG_(indexXA)( map, ixLo );
2485 /* All payload pointers should be non-NULL. Ensured by assertion in
2486 loop in resolve_type_entities that creates 'map'. Hence it is
2487 safe to return NULL to indicate 'not found'. */
2488 vg_assert(admin->payload);
2489 vg_assert(admin->cuOff == (UWord)cuOff); /* stay sane */
2490
2491 if (admin->tag != tag)
2492 return False;
2493
2494 *payload = admin->payload;
2495 return True;
2496}
2497
2498__attribute__((noinline))
2499static void resolve_type_entities ( /*MOD*/TyAdmin* admin,
2500 /*MOD*/TempVar* vars )
2501{
2502 Bool ok;
2503 void* payload;
2504 TyAdmin* adp;
2505 XArray* /* of D3TyAdmin* */ map;
2506
2507 map = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
2508 sizeof(TyAdmin*) );
2509 for (adp = admin; adp; adp = adp->next) {
2510 vg_assert(adp);
2511 vg_assert(adp->payload != NULL);
2512 if (adp->cuOff != (UWord)D3_INVALID_CUOFF) {
2513 VG_(addToXA)( map, &adp );
2514 }
2515 }
2516
2517 VG_(setCmpFnXA)( map, cmp_D3TyAdmin_by_cuOff );
2518 if (0)
2519 VG_(printf)("XXXXXX sorting map with %d entries\n",
2520 (Int)VG_(sizeXA)(map));
2521 VG_(sortXA)( map );
2522
2523 for (adp = admin; adp; adp = adp->next) {
2524 vg_assert(adp->payload);
2525 switch (adp->tag) {
2526 case TyA_Bounds: {
2527 TyBounds* bounds = (TyBounds*)adp->payload;
2528 if (bounds->knownL && bounds->knownU
2529 && bounds->knownL > bounds->knownU) goto baaad;
2530 break;
2531 }
2532 case TyA_Atom: {
2533 TyAtom* atom = (TyAtom*)adp->payload;
2534 if (!atom->name) goto baaad;
2535 break;
2536 }
2537 case TyA_Expr: {
2538 D3Expr* expr = (D3Expr*)adp->payload;
2539 if (!expr->bytes) goto baaad;
2540 break;
2541 }
2542 case TyA_Field: {
2543 TyField* field = (TyField*)adp->payload;
2544 if (!field->name) goto baaad;
2545 if ( (field->isStruct && (!field->loc))
2546 || ((!field->isStruct) && field->loc))
2547 goto baaad;
2548 ok = resolve_binding( &payload, map, field->typeR,
2549 TyA_Type, False/*!allow_invalid*/ );
2550 if (!ok) goto baaad;
2551 field->typeR = payload;
2552 break;
2553 }
2554 case TyA_Type: {
2555 UChar enc;
2556 XArray* xa;
2557 Type* ty = (Type*)adp->payload;
2558 switch (ty->tag) {
2559 case Ty_Base:
2560 enc = ty->Ty.Base.enc;
2561 if ((!ty->Ty.Base.name)
2562 || ty->Ty.Base.szB < 1 || ty->Ty.Base.szB > 32
2563 || (enc != 'S' && enc != 'U' && enc != 'F' && enc != 'C'))
2564 goto baaad;
2565 break;
2566 case Ty_TyDef:
2567 if (!ty->Ty.TyDef.name) goto baaad;
2568 ok = resolve_binding( &payload, map,
2569 ty->Ty.TyDef.typeR,
2570 TyA_Type,
2571 True/*allow_invalid*/ );
2572 if (!ok) goto baaad;
2573 ty->Ty.TyDef.typeR = payload;
2574 break;
2575 case Ty_PorR:
2576 if (ty->Ty.PorR.szB != sizeof(Word)) goto baaad;
2577 ok = resolve_binding( &payload, map,
2578 ty->Ty.PorR.typeR,
2579 TyA_Type,
2580 False/*!allow_invalid*/ );
2581 if (!ok) goto baaad;
2582 ty->Ty.PorR.typeR = payload;
2583 break;
2584 case Ty_Array:
2585 if (!ty->Ty.Array.bounds) goto baaad;
2586 ok = resolve_binding( &payload, map,
2587 ty->Ty.Array.typeR,
2588 TyA_Type,
2589 False/*!allow_invalid*/ );
2590 if (!ok) goto baaad;
2591 ty->Ty.Array.typeR = payload;
2592 break;
2593 case Ty_Enum:
2594 if ((!ty->Ty.Enum.atomRs)
2595 || ty->Ty.Enum.szB < 1
2596 || ty->Ty.Enum.szB > 8) goto baaad;
2597 xa = ty->Ty.Enum.atomRs;
2598 break;
2599 case Ty_StOrUn:
2600 xa = ty->Ty.StOrUn.fields;
2601 if (!xa) goto baaad;
2602 break;
2603 case Ty_Fn:
2604 break;
2605 case Ty_Qual:
2606 if (ty->Ty.Qual.qual != 'C'
2607 && ty->Ty.Qual.qual != 'V') goto baaad;
2608 ok = resolve_binding( &payload, map,
2609 ty->Ty.Qual.typeR,
2610 TyA_Type,
2611 False/*!allow_invalid*/ );
2612 if (!ok) goto baaad;
2613 ty->Ty.Qual.typeR = payload;
2614 break;
2615 case Ty_Void:
2616 if (ty->Ty.Void.isFake != False
2617 && ty->Ty.Void.isFake != True) goto baaad;
2618 break;
2619 default:
2620 goto baaad;
2621 }
2622 break;
2623 }
2624 baaad:
2625 default:
2626 VG_(printf)("valgrind: bad D3TyAdmin: ");
2627 ML_(pp_TyAdmin)(adp);
2628 VG_(printf)("\n");
2629 }
2630 }
2631
2632 /* Now resolve the variables list */
2633 for (; vars; vars = vars->next) {
2634 payload = NULL;
2635 ok = resolve_binding( &payload, map, vars->typeR,
2636 TyA_Type, True/*allow_invalid*/ );
2637
2638 if (0 && !ok)
2639 VG_(printf)("Can't resolve type reference 0x%lx\n",
2640 (UWord)vars->typeR);
2641 //vg_assert(ok);
2642 vars->typeR = payload;
2643 }
2644
2645 VG_(deleteXA)( map );
2646}
2647
2648
2649/*------------------------------------------------------------*/
2650/*--- ---*/
2651/*--- Parsing of Compilation Units ---*/
2652/*--- ---*/
2653/*------------------------------------------------------------*/
2654
2655static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) {
2656 TempVar* t1 = *(TempVar**)v1;
2657 TempVar* t2 = *(TempVar**)v2;
2658 if (t1->dioff < t2->dioff) return -1;
2659 if (t1->dioff > t2->dioff) return 1;
2660 return 0;
2661}
2662
2663static void read_DIE ( /*OUT*/TyAdmin** admin,
2664 /*OUT*/TempVar** tempvars,
2665 /*OUT*/GExpr** gexprs,
2666 /*MOD*/D3TypeParser* typarser,
2667 /*MOD*/D3VarParser* varparser,
2668 Cursor* c, Bool td3, CUConst* cc, Int level )
2669{
2670 Cursor abbv;
2671 ULong atag, abbv_code;
2672 UWord posn;
2673 UInt has_children;
2674 UWord start_die_c_offset, start_abbv_c_offset;
2675 UWord after_die_c_offset, after_abbv_c_offset;
2676
2677 /* --- Deal with this DIE --- */
2678 posn = get_position_of_Cursor( c );
2679 abbv_code = get_ULEB128( c );
2680 set_abbv_Cursor( &abbv, td3, cc, abbv_code );
2681 atag = get_ULEB128( &abbv );
2682 TRACE_D3("\n");
2683 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
2684 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
2685
2686 if (atag == 0)
2687 cc->barf("read_DIE: invalid zero tag on DIE");
2688
2689 has_children = get_UChar( &abbv );
2690 if (has_children != DW_children_no && has_children != DW_children_yes)
2691 cc->barf("read_DIE: invalid has_children value");
2692
2693 /* We're set up to look at the fields of this DIE. Hand it off to
2694 any parser(s) that want to see it. Since they will in general
2695 advance both the DIE and abbrev cursors, remember their current
2696 settings so that we can then back up and do one final pass over
2697 the DIE, to print out its contents. */
2698
2699 start_die_c_offset = get_position_of_Cursor( c );
2700 start_abbv_c_offset = get_position_of_Cursor( &abbv );
2701
2702 while (True) {
2703 ULong cts;
2704 Int ctsSzB;
2705 UWord ctsMemSzB;
2706 ULong at_name = get_ULEB128( &abbv );
2707 ULong at_form = get_ULEB128( &abbv );
2708 if (at_name == 0 && at_form == 0) break;
2709 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name));
2710 /* Get the form contents, but ignore them; the only purpose is
2711 to print them, if td3 is True */
2712 get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
2713 cc, c, td3, (DW_FORM)at_form );
2714 TRACE_D3("\t");
2715 TRACE_D3("\n");
2716 }
2717
2718 after_die_c_offset = get_position_of_Cursor( c );
2719 after_abbv_c_offset = get_position_of_Cursor( &abbv );
2720
2721 set_position_of_Cursor( c, start_die_c_offset );
2722 set_position_of_Cursor( &abbv, start_abbv_c_offset );
2723
2724 parse_type_DIE( admin,
2725 typarser,
2726 (DW_TAG)atag,
2727 posn,
2728 level,
2729 c, /* DIE cursor */
2730 &abbv, /* abbrev cursor */
2731 cc,
2732 td3 );
2733
2734 set_position_of_Cursor( c, start_die_c_offset );
2735 set_position_of_Cursor( &abbv, start_abbv_c_offset );
2736
2737 parse_var_DIE( tempvars,
2738 gexprs,
2739 varparser,
2740 (DW_TAG)atag,
2741 posn,
2742 level,
2743 c, /* DIE cursor */
2744 &abbv, /* abbrev cursor */
2745 cc,
2746 td3 );
2747
2748 set_position_of_Cursor( c, after_die_c_offset );
2749 set_position_of_Cursor( &abbv, after_abbv_c_offset );
2750
2751 /* --- Now recurse into its children, if any --- */
2752 if (has_children == DW_children_yes) {
2753 if (0) TRACE_D3("BEGIN children of level %d\n", level);
2754 while (True) {
2755 atag = peek_ULEB128( c );
2756 if (atag == 0) break;
2757 read_DIE( admin, tempvars, gexprs, typarser, varparser,
2758 c, td3, cc, level+1 );
2759 }
2760 /* Now we need to eat the terminating zero */
2761 atag = get_ULEB128( c );
2762 vg_assert(atag == 0);
2763 if (0) TRACE_D3("END children of level %d\n", level);
2764 }
2765
2766}
2767
2768
2769static
2770void new_dwarf3_reader_wrk (
2771 struct _DebugInfo* di,
2772 __attribute__((noreturn))
2773 void (*barf)( HChar* ),
2774 UChar* debug_info_img, SizeT debug_info_sz,
2775 UChar* debug_abbv_img, SizeT debug_abbv_sz,
2776 UChar* debug_line_img, SizeT debug_line_sz,
2777 UChar* debug_str_img, SizeT debug_str_sz,
2778 UChar* debug_ranges_img, SizeT debug_ranges_sz,
2779 UChar* debug_loc_img, SizeT debug_loc_sz
2780)
2781{
2782 TyAdmin *admin, *adminp;
2783 TempVar *tempvars, *varp, *varp2;
2784 GExpr *gexprs, *gexpr;
2785 Cursor abbv; /* for showing .debug_abbrev */
2786 Cursor info; /* primary cursor for parsing .debug_info */
2787 Cursor ranges; /* for showing .debug_ranges */
2788 D3TypeParser typarser;
2789 D3VarParser varparser;
2790 Addr dr_base;
2791 UWord dr_offset;
2792 Word i;
2793 Bool td3 = di->trace_symtab;
2794 XArray* /* of TempVar* */ dioff_lookup_tab;
2795
2796#if 0
2797 /* This doesn't work properly because it assumes all entries are
2798 packed end to end, with no holes. But that doesn't always
2799 appear to be the case, so it loses sync. And the D3 spec
2800 doesn't appear to require a no-hole situation either. */
2801 /* Display .debug_loc */
2802 Addr dl_base;
2803 UWord dl_offset;
2804 Cursor loc; /* for showing .debug_loc */
2805 TRACE_SYMTAB("\n");
2806 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
2807 TRACE_SYMTAB(" Offset Begin End Expression\n");
2808 init_Cursor( &loc, debug_loc_img,
2809 debug_loc_sz, 0, barf,
2810 "Overrun whilst reading .debug_loc section(1)" );
2811 dl_base = 0;
2812 dl_offset = 0;
2813 while (True) {
2814 UWord w1, w2;
2815 UWord len;
2816 if (is_at_end_Cursor( &loc ))
2817 break;
2818
2819 /* Read a (host-)word pair. This is something of a hack since
2820 the word size to read is really dictated by the ELF file;
2821 however, we assume we're reading a file with the same
2822 word-sizeness as the host. Reasonably enough. */
2823 w1 = get_UWord( &loc );
2824 w2 = get_UWord( &loc );
2825
2826 if (w1 == 0 && w2 == 0) {
2827 /* end of list. reset 'base' */
2828 TRACE_D3(" %08lx <End of list>\n", dl_offset);
2829 dl_base = 0;
2830 dl_offset = get_position_of_Cursor( &loc );
2831 continue;
2832 }
2833
2834 if (w1 == -1UL) {
2835 /* new value for 'base' */
2836 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
2837 dl_offset, w1, w2);
2838 dl_base = w2;
2839 continue;
2840 }
2841
2842 /* else a location expression follows */
2843 TRACE_D3(" %08lx %08lx %08lx ",
2844 dl_offset, w1 + dl_base, w2 + dl_base);
2845 len = (UWord)get_UShort( &loc );
2846 while (len > 0) {
2847 UChar byte = get_UChar( &loc );
2848 TRACE_D3("%02x", (UInt)byte);
2849 len--;
2850 }
2851 TRACE_SYMTAB("\n");
2852 }
2853#endif
2854
2855 /* Display .debug_ranges */
2856 TRACE_SYMTAB("\n");
2857 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
2858 TRACE_SYMTAB(" Offset Begin End\n");
2859 init_Cursor( &ranges, debug_ranges_img,
2860 debug_ranges_sz, 0, barf,
2861 "Overrun whilst reading .debug_ranges section(1)" );
2862 dr_base = 0;
2863 dr_offset = 0;
2864 while (True) {
2865 UWord w1, w2;
2866
2867 if (is_at_end_Cursor( &ranges ))
2868 break;
2869
2870 /* Read a (host-)word pair. This is something of a hack since
2871 the word size to read is really dictated by the ELF file;
2872 however, we assume we're reading a file with the same
2873 word-sizeness as the host. Reasonably enough. */
2874 w1 = get_UWord( &ranges );
2875 w2 = get_UWord( &ranges );
2876
2877 if (w1 == 0 && w2 == 0) {
2878 /* end of list. reset 'base' */
2879 TRACE_D3(" %08lx <End of list>\n", dr_offset);
2880 dr_base = 0;
2881 dr_offset = get_position_of_Cursor( &ranges );
2882 continue;
2883 }
2884
2885 if (w1 == -1UL) {
2886 /* new value for 'base' */
2887 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
2888 dr_offset, w1, w2);
2889 dr_base = w2;
2890 continue;
2891 }
2892
2893 /* else a range [w1+base, w2+base) is denoted */
2894 TRACE_D3(" %08lx %08lx %08lx\n",
2895 dr_offset, w1 + dr_base, w2 + dr_base);
2896 }
2897
2898
2899 /* Display .debug_abbrev */
2900 init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf,
2901 "Overrun whilst reading .debug_abbrev section" );
2902 TRACE_SYMTAB("\n");
2903 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
2904 while (True) {
2905 if (is_at_end_Cursor( &abbv ))
2906 break;
2907 /* Read one abbreviation table */
2908 TRACE_D3(" Number TAG\n");
2909 while (True) {
2910 ULong atag;
2911 UInt has_children;
2912 ULong acode = get_ULEB128( &abbv );
2913 if (acode == 0) break; /* end of the table */
2914 atag = get_ULEB128( &abbv );
2915 has_children = get_UChar( &abbv );
2916 TRACE_D3(" %llu %s [%s]\n",
2917 acode, ML_(pp_DW_TAG)(atag),
2918 ML_(pp_DW_children)(has_children));
2919 while (True) {
2920 ULong at_name = get_ULEB128( &abbv );
2921 ULong at_form = get_ULEB128( &abbv );
2922 if (at_name == 0 && at_form == 0) break;
2923 TRACE_D3(" %18s %s\n",
2924 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
2925 }
2926 }
2927 }
2928 TRACE_SYMTAB("\n");
2929
2930 /* Now loop over the Compilation Units listed in the .debug_info
2931 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
2932 unit contains a Compilation Unit Header followed by precisely
2933 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
2934 init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf,
2935 "Overrun whilst reading .debug_info section" );
2936
2937 /* We'll park the harvested type information in here. Also create
2938 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
2939 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
2940 huge and presumably will not occur in any valid DWARF3 file --
2941 it would need to have a .debug_info section 4GB long for that to
2942 happen. These type entries end up in the DebugInfo. */
2943 admin = NULL;
2944 { Type* tVoid = ML_(new_Type)();
2945 tVoid->tag = Ty_Void;
2946 tVoid->Ty.Void.isFake = True;
2947 admin = ML_(new_TyAdmin)( (UWord)D3_FAKEVOID_CUOFF, admin );
2948 admin->payload = tVoid;
2949 admin->tag = TyA_Type;
2950 }
2951
2952 /* List of variables we're accumulating. These don't end up in the
2953 DebugInfo; instead their contents are handed to ML_(addVar) and
2954 the list elements are then deleted. */
2955 tempvars = NULL;
2956
2957 /* List of GExprs we're accumulating. These wind up in the
2958 DebugInfo. */
2959 gexprs = NULL;
2960
2961 /* We need a D3TypeParser to keep track of partially constructed
2962 types. It'll be discarded as soon as we've completed the CU,
2963 since the resulting information is tipped in to 'admin' as it is
2964 generated. */
2965 VG_(memset)( &typarser, 0, sizeof(typarser) );
2966 typarser.sp = -1;
2967 typarser.language = '?';
2968
2969 VG_(memset)( &varparser, 0, sizeof(varparser) );
2970 varparser.sp = -1;
2971
2972 TRACE_D3("\n------ Parsing .debug_info section ------\n");
2973 while (True) {
2974 UWord cu_start_offset, cu_offset_now;
2975 CUConst cc;
2976
2977 /* It seems icc9 finishes the DIE info before debug_info_sz
2978 bytes have been used up. So be flexible, and declare the
2979 sequence complete if there is not enough remaining bytes to
2980 hold even the smallest conceivable CU header. (11 bytes I
2981 reckon). */
2982 Word avail = get_remaining_length_Cursor( &info );
2983 if (avail < 11) {
2984 if (avail > 0)
2985 TRACE_D3("new_dwarf3_reader_wrk: warning: "
2986 "%ld unused bytes after end of DIEs\n", avail);
2987 break;
2988 }
2989
2990 /* Check the varparser's stack is in a sane state. */
2991 vg_assert(varparser.sp == -1);
2992 for (i = 0; i < N_D3_VAR_STACK; i++) {
2993 vg_assert(varparser.ranges[i] == NULL);
2994 vg_assert(varparser.level[i] == 0);
2995 }
2996 for (i = 0; i < N_D3_TYPE_STACK; i++) {
2997 vg_assert(typarser.qparent[i] == NULL);
2998 vg_assert(typarser.qlevel[i] == 0);
2999 }
3000
3001 cu_start_offset = get_position_of_Cursor( &info );
3002 TRACE_D3("\n");
3003 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3004 /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3005 (saC_cache) */
3006 parse_CU_Header( &cc, td3, &info,
3007 (UChar*)debug_abbv_img, debug_abbv_sz );
3008 cc.debug_str_img = debug_str_img;
3009 cc.debug_str_sz = debug_str_sz;
3010 cc.debug_ranges_img = debug_ranges_img;
3011 cc.debug_ranges_sz = debug_ranges_sz;
3012 cc.debug_loc_img = debug_loc_img;
3013 cc.debug_loc_sz = debug_loc_sz;
3014 cc.debug_line_img = debug_line_img;
3015 cc.debug_line_sz = debug_line_sz;
3016 cc.cu_start_offset = cu_start_offset;
3017 cc.di = di;
3018 /* The CU's svma can be deduced by looking at the AT_low_pc
3019 value in the top level TAG_compile_unit, which is the topmost
3020 DIE. We'll leave it for the 'varparser' to acquire that info
3021 and fill it in -- since it is the only party to want to know
3022 it. */
3023 cc.cu_svma_known = False;
3024 cc.cu_svma = 0;
3025
3026 /* Create a fake outermost-level range covering the entire
3027 address range. So we always have *something* to catch all
3028 variable declarations. */
3029 varstack_push( &cc, &varparser, td3,
3030 unitary_range_list(0UL, ~0UL),
3031 -1, False/*isFunc*/, NULL/*fbGX*/ );
3032
3033 /* And set up the file name table. When we come across the top
3034 level DIE for this CU (which is what the next call to
3035 read_DIE should process) we will copy all the file names out
3036 of the .debug_line img area and use this table to look up the
3037 copies when we later see filename numbers in DW_TAG_variables
3038 etc. */
3039 vg_assert(!varparser.filenameTable );
3040 varparser.filenameTable
3041 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
3042 sizeof(UChar*) );
3043 vg_assert(varparser.filenameTable );
3044
3045 /* Now read the one-and-only top-level DIE for this CU. */
3046 vg_assert(varparser.sp == 0);
3047 read_DIE( &admin, &tempvars, &gexprs, &typarser, &varparser,
3048 &info, td3, &cc, 0 );
3049
3050 cu_offset_now = get_position_of_Cursor( &info );
3051 if (1) TRACE_D3("offset now %ld, d-i-size %ld\n",
3052 cu_offset_now, debug_info_sz);
3053 if (cu_offset_now > debug_info_sz)
3054 barf("toplevel DIEs beyond end of CU");
3055 if (cu_offset_now == debug_info_sz)
3056 break;
3057
3058 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
3059 anywhere else at all. Our fake the-entire-address-space
3060 range is at level -1, so preening to -2 should completely
3061 empty the stack out. */
3062 TRACE_D3("\n");
3063 varstack_preen( &varparser, td3, -2 );
3064 /* Similarly, empty the type stack out. */
3065 typestack_preen( &typarser, td3, -2 );
3066 /* else keep going */
3067
3068 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
3069 cc.saC_cache_queries, cc.saC_cache_misses);
3070
3071 vg_assert(varparser.filenameTable );
3072 VG_(deleteXA)( varparser.filenameTable );
3073 varparser.filenameTable = NULL;
3074 }
3075
3076 /* Put the type entry list the right way round. Not strictly
3077 necessary, but makes it easier to read. */
3078 vg_assert(admin);
3079 if (admin) {
3080 TyAdmin *next, *prev = NULL;
3081 for (adminp = admin; adminp; adminp = next) {
3082 next = adminp->next;
3083 adminp->next = prev;
3084 prev = adminp;
3085 }
3086 admin = prev;
3087 }
3088
3089 /* Put the variable list the right way round. Not strictly
3090 necessary, but makes it easier to read. */
3091 if (tempvars) {
3092 TempVar *next, *prev = NULL;
3093 for (varp = tempvars; varp; varp = next) {
3094 next = varp->next;
3095 varp->next = prev;
3096 prev = varp;
3097 }
3098 tempvars = prev;
3099 }
3100
3101 TRACE_D3("\n");
3102 TRACE_D3("------ Acquired the following type entities: ------\n");
3103 for (adminp = admin; adminp; adminp = adminp->next) {
3104 TRACE_D3(" ");
3105 if (td3) ML_(pp_TyAdmin)( adminp );
3106 TRACE_D3("\n");
3107 }
3108 TRACE_D3("\n");
3109 TRACE_D3("------ Resolving type entries ------\n");
3110
3111 resolve_type_entities( admin, tempvars );
3112 for (gexpr = gexprs; gexpr; gexpr = gexpr->next) {
3113 bias_GX( gexpr, di->text_bias );
3114 }
3115
3116 TRACE_D3("\n");
3117 TRACE_D3("------ Acquired the following variables: ------\n\n");
3118
3119 /* Park (pointers to) all the vars in an XArray, so we can look up
3120 abstract origins quickly. The array is sorted (hence, looked-up
3121 by) the .dioff fields. Since the .dioffs should be instrictly
3122 ascending order, there is no need to sort the array after
3123 construction. The ascendingness is however asserted for. */
3124 dioff_lookup_tab
3125 = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
3126 sizeof(TempVar*) );
3127 vg_assert(dioff_lookup_tab);
3128 varp2 = NULL;
3129 for (varp = tempvars; varp; varp = varp->next) {
3130 if (varp2)
3131 vg_assert(varp2->dioff < varp->dioff);
3132 VG_(addToXA)( dioff_lookup_tab, &varp );
3133 varp2 = varp;
3134 }
3135 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
3136 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
3137
3138 /* Now visit each var. Collect up as much info as possible for
3139 each var and hand it to ML_(addVar). */
3140 for (varp = tempvars; varp; varp = varp->next) {
3141
3142 /* Possibly show .. */
3143 if (td3) {
3144 VG_(printf)("<%lx> addVar: level %d: %s :: ",
3145 varp->dioff,
3146 varp->level,
3147 varp->name ? varp->name : (UChar*)"<anon_var>" );
3148 if (varp->typeR) {
3149 ML_(pp_Type_C_ishly)( varp->typeR );
3150 } else {
3151 VG_(printf)("NULL");
3152 }
3153 VG_(printf)("\n Loc=");
3154 if (varp->gexpr) {
3155 ML_(pp_GX)(varp->gexpr);
3156 } else {
3157 VG_(printf)("NULL");
3158 }
3159 VG_(printf)("\n");
3160 if (varp->fbGX) {
3161 VG_(printf)(" FrB=");
3162 ML_(pp_GX)( varp->fbGX );
3163 VG_(printf)("\n");
3164 } else {
3165 VG_(printf)(" FrB=none\n");
3166 }
3167 VG_(printf)(" declared at: %s:%d\n",
3168 varp->fName ? varp->fName : (UChar*)"NULL",
3169 varp->fLine );
3170 if (varp->absOri != (UWord)D3_INVALID_CUOFF)
3171 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri);
3172 }
3173
3174 /* Skip variables which have no location. These must be
3175 abstract instances; they are useless as-is since with no
3176 location they have no specified memory location. They will
3177 presumably be referred to via the absOri fields of other
3178 variables. */
3179 if (!varp->gexpr) {
3180 TRACE_D3(" SKIP (no location)\n\n");
3181 continue;
3182 }
3183
3184 /* So it has a location, at least. If it refers to some other
3185 entry through its absOri field, pull in further info through
3186 that. */
3187 if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
3188 Bool found;
3189 Word ixFirst, ixLast;
3190 TempVar key;
3191 TempVar* keyp = &key;
3192 TempVar *varAI;
3193 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
3194 key.dioff = varp->absOri; /* this is what we want to find */
3195 found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
3196 &ixFirst, &ixLast );
3197 if (!found)
3198 barf("DW_AT_abstract_origin can't be resolved");
3199 /* If the following fails, there is more than one entry with
3200 the same dioff. Which can't happen. */
3201 vg_assert(ixFirst == ixLast);
3202 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
3203 /* stay sane */
3204 vg_assert(varAI);
3205 vg_assert(varAI->dioff == varp->absOri);
3206
3207 /* Copy what useful info we can. */
3208 if (varAI->typeR && !varp->typeR)
3209 varp->typeR = varAI->typeR;
3210 if (varAI->name && !varp->name)
3211 varp->name = varAI->name;
3212 if (varAI->fName && !varp->fName)
3213 varp->fName = varAI->fName;
3214 if (varAI->fLine > 0 && varp->fLine == 0)
3215 varp->fLine = varAI->fLine;
3216 }
3217
3218 /* Give it a name if it doesn't have one. */
3219 if (!varp->name)
3220 varp->name = ML_(addStr)( di, "<anon_var>", -1 );
3221
3222 /* So now does it have enough info to be useful? */
3223 /* NOTE: re typeR: this is a hack. If typeR is NULL then the
3224 type didn't get resolved. Really, in that case something's
3225 broken earlier on, and should be fixed, rather than just
3226 skipping the variable. */
3227 if (!varp->typeR) continue;
3228 vg_assert(varp->gexpr);
3229 vg_assert(varp->name);
3230 vg_assert(varp->typeR);
3231 vg_assert(varp->level >= 0);
3232
3233 /* Ok. So we're going to keep it. Call ML_(addVar) once for
3234 each address range in which the variable exists. */
3235 TRACE_D3(" ACQUIRE for range(s) ");
3236 { AddrRange oneRange;
3237 AddrRange* varPcRanges;
3238 Word nVarPcRanges;
3239 /* Set up to iterate over address ranges, however
3240 represented. */
3241 if (varp->nRanges == 0 || varp->nRanges == 1) {
3242 vg_assert(!varp->rngMany);
3243 if (varp->nRanges == 0) {
3244 vg_assert(varp->rngOneMin == 0);
3245 vg_assert(varp->rngOneMax == 0);
3246 }
3247 nVarPcRanges = varp->nRanges;
3248 oneRange.aMin = varp->rngOneMin;
3249 oneRange.aMax = varp->rngOneMax;
3250 varPcRanges = &oneRange;
3251 } else {
3252 vg_assert(varp->rngMany);
3253 vg_assert(varp->rngOneMin == 0);
3254 vg_assert(varp->rngOneMax == 0);
3255 nVarPcRanges = VG_(sizeXA)(varp->rngMany);
3256 vg_assert(nVarPcRanges >= 2);
3257 vg_assert(nVarPcRanges == (Word)varp->nRanges);
3258 varPcRanges = VG_(indexXA)(varp->rngMany, 0);
3259 }
3260 if (varp->level == 0)
3261 vg_assert( nVarPcRanges == 1 );
3262 /* and iterate */
3263 for (i = 0; i < nVarPcRanges; i++) {
3264 Addr pcMin = varPcRanges[i].aMin;
3265 Addr pcMax = varPcRanges[i].aMax;
3266 vg_assert(pcMin <= pcMax);
3267 /* Level 0 is the global address range. So at level 0 we
3268 don't want to bias pcMin/pcMax; but at all other levels
3269 we do since those are derived from svmas in the Dwarf
3270 we're reading. Be paranoid ... */
3271 if (varp->level == 0) {
3272 vg_assert(pcMin == (Addr)0);
3273 vg_assert(pcMax == ~(Addr)0);
3274 } else {
3275 /* vg_assert(pcMin > (Addr)0);
3276 No .. we can legitimately expect to see ranges like
3277 0x0-0x11D (pre-biasing, of course). */
3278 vg_assert(pcMax < ~(Addr)0);
3279 }
3280
3281 if (i > 0 && (i%2) == 0) TRACE_D3("\n ");
3282 TRACE_D3("[%p,%p] ", pcMin, pcMax );
3283
3284 ML_(addVar)(
3285 di, varp->level,
3286 pcMin + (varp->level==0 ? 0 : di->text_bias),
sewardj0b5bf912008-03-07 20:07:58 +00003287 pcMax + (varp->level==0 ? 0 : di->text_bias),
sewardjb8b79ad2008-03-03 01:35:41 +00003288 varp->name, (void*)varp->typeR,
3289 varp->gexpr, varp->fbGX,
3290 varp->fName, varp->fLine, td3
3291 );
3292 }
3293 }
3294
3295 TRACE_D3("\n\n");
3296 /* and move on to the next var */
3297 }
3298
3299 /* Now free all the TempVars */
3300 for (varp = tempvars; varp; varp = varp2) {
3301 varp2 = varp->next;
3302 if (varp->rngMany)
3303 VG_(deleteXA)(varp->rngMany);
3304 ML_(dinfo_free)(varp);
3305 }
3306 tempvars = NULL;
3307
3308 /* And get rid of the temporary mapping table. */
3309 VG_(deleteXA)( dioff_lookup_tab );
3310
3311 /* record the TyAdmins and the GExprs in di so they can be freed
3312 later */
3313 vg_assert(!di->admin_tyadmins);
3314 di->admin_tyadmins = admin;
3315 vg_assert(!di->admin_gexprs);
3316 di->admin_gexprs = gexprs;
3317}
3318
3319
3320/*------------------------------------------------------------*/
3321/*--- ---*/
3322/*--- The "new" DWARF3 reader -- top level control logic ---*/
3323/*--- ---*/
3324/*------------------------------------------------------------*/
3325
3326/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
3327#include <setjmp.h> /* For jmp_buf */
3328/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
3329
3330static Bool d3rd_jmpbuf_valid = False;
3331static HChar* d3rd_jmpbuf_reason = NULL;
3332static jmp_buf d3rd_jmpbuf;
3333
3334static __attribute__((noreturn)) void barf ( HChar* reason ) {
3335 vg_assert(d3rd_jmpbuf_valid);
3336 d3rd_jmpbuf_reason = reason;
3337 __builtin_longjmp(&d3rd_jmpbuf, 1);
3338 /*NOTREACHED*/
3339 vg_assert(0);
3340}
3341
3342
3343void
3344ML_(new_dwarf3_reader) (
3345 struct _DebugInfo* di,
3346 UChar* debug_info_img, SizeT debug_info_sz,
3347 UChar* debug_abbv_img, SizeT debug_abbv_sz,
3348 UChar* debug_line_img, SizeT debug_line_sz,
3349 UChar* debug_str_img, SizeT debug_str_sz,
3350 UChar* debug_ranges_img, SizeT debug_ranges_sz,
3351 UChar* debug_loc_img, SizeT debug_loc_sz
3352)
3353{
3354 volatile Int jumped;
3355 volatile Bool td3 = di->trace_symtab;
3356
3357 /* Run the _wrk function to read the dwarf3. If it succeeds, it
3358 just returns normally. If there is any failure, it longjmp's
3359 back here, having first set d3rd_jmpbuf_reason to something
3360 useful. */
3361 vg_assert(d3rd_jmpbuf_valid == False);
3362 vg_assert(d3rd_jmpbuf_reason == NULL);
3363
3364 d3rd_jmpbuf_valid = True;
3365 jumped = __builtin_setjmp(&d3rd_jmpbuf);
3366 if (jumped == 0) {
3367 /* try this ... */
3368 new_dwarf3_reader_wrk( di, barf,
3369 debug_info_img, debug_info_sz,
3370 debug_abbv_img, debug_abbv_sz,
3371 debug_line_img, debug_line_sz,
3372 debug_str_img, debug_str_sz,
3373 debug_ranges_img, debug_ranges_sz,
3374 debug_loc_img, debug_loc_sz );
3375 d3rd_jmpbuf_valid = False;
3376 TRACE_D3("\n------ .debug_info reading was successful ------\n");
3377 } else {
3378 /* It longjmp'd. */
3379 d3rd_jmpbuf_valid = False;
3380 /* Can't longjump without giving some sort of reason. */
3381 vg_assert(d3rd_jmpbuf_reason != NULL);
3382
3383 TRACE_D3("\n------ .debug_info reading failed ------\n");
3384
3385 ML_(symerr)(di, True, d3rd_jmpbuf_reason);
3386 }
3387
3388 d3rd_jmpbuf_valid = False;
3389 d3rd_jmpbuf_reason = NULL;
3390}
3391
3392
3393
3394/* --- Unused code fragments which might be useful one day. --- */
3395
3396#if 0
3397 /* Read the arange tables */
3398 TRACE_SYMTAB("\n");
3399 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
3400 init_Cursor( &aranges, debug_aranges_img,
3401 debug_aranges_sz, 0, barf,
3402 "Overrun whilst reading .debug_aranges section" );
3403 while (True) {
3404 ULong len, d_i_offset;
3405 Bool is64;
3406 UShort version;
3407 UChar asize, segsize;
3408
3409 if (is_at_end_Cursor( &aranges ))
3410 break;
3411 /* Read one arange thingy */
3412 /* initial_length field */
3413 len = get_Initial_Length( &is64, &aranges,
3414 "in .debug_aranges: invalid initial-length field" );
3415 version = get_UShort( &aranges );
3416 d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
3417 asize = get_UChar( &aranges );
3418 segsize = get_UChar( &aranges );
3419 TRACE_D3(" Length: %llu\n", len);
3420 TRACE_D3(" Version: %d\n", (Int)version);
3421 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset);
3422 TRACE_D3(" Pointer Size: %d\n", (Int)asize);
3423 TRACE_D3(" Segment Size: %d\n", (Int)segsize);
3424 TRACE_D3("\n");
3425 TRACE_D3(" Address Length\n");
3426
3427 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
3428 (void)get_UChar( & aranges );
3429 }
3430 while (True) {
3431 ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
3432 ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
3433 TRACE_D3(" 0x%016llx 0x%llx\n", address, length);
3434 if (address == 0 && length == 0) break;
3435 }
3436 }
3437 TRACE_SYMTAB("\n");
3438#endif
3439
3440/*--------------------------------------------------------------------*/
3441/*--- end readdwarf3.c ---*/
3442/*--------------------------------------------------------------------*/