blob: 6e2369bd1e402dcd623795780968cfd361494aa0 [file] [log] [blame]
sewardj024598e2008-09-18 14:43:05 +00001
2/*--------------------------------------------------------------------*/
3/*--- Ptrcheck: a pointer-use checker. ---*/
4/*--- This file checks stack and global array accesses. ---*/
5/*--- sg_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
10 use in programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
36*/
37
38#include "pub_tool_basics.h"
39#include "pub_tool_libcbase.h"
40#include "pub_tool_libcassert.h"
41#include "pub_tool_libcprint.h"
42#include "pub_tool_tooliface.h"
43#include "pub_tool_wordfm.h"
44#include "pub_tool_xarray.h"
45#include "pub_tool_threadstate.h"
46#include "pub_tool_mallocfree.h"
47#include "pub_tool_machine.h"
48#include "pub_tool_debuginfo.h"
49#include "pub_tool_options.h"
50
51#include "pc_common.h"
52
53#include "sg_main.h" // self
54
55
56static
57void preen_Invars ( Addr a, SizeT len, Bool isHeap ); /*fwds*/
58
59
60//////////////////////////////////////////////////////////////
61// //
62// Basic Stuff //
63// //
64//////////////////////////////////////////////////////////////
65
66static inline Bool is_sane_TId ( ThreadId tid )
67{
68 return tid >= 0 && tid < VG_N_THREADS
69 && tid != VG_INVALID_THREADID;
70}
71
72static void* sg_malloc ( HChar* cc, SizeT n ) {
73 void* p;
74 tl_assert(n > 0);
75 p = VG_(malloc)( cc, n );
76 tl_assert(p);
77 return p;
78}
79
80static void sg_free ( void* p ) {
81 tl_assert(p);
82 VG_(free)(p);
83}
84
85
86/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
87 first interval is lower, 1 if the first interval is higher, and 0
88 if there is any overlap. Redundant paranoia with casting is there
89 following what looked distinctly like a bug in gcc-4.1.2, in which
90 some of the comparisons were done signedly instead of
91 unsignedly. */
92inline
93static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94 Addr a2, SizeT n2 ) {
95 UWord a1w = (UWord)a1;
96 UWord n1w = (UWord)n1;
97 UWord a2w = (UWord)a2;
98 UWord n2w = (UWord)n2;
99 tl_assert(n1w > 0 && n2w > 0);
100 if (a1w + n1w <= a2w) return -1L;
101 if (a2w + n2w <= a1w) return 1L;
102 return 0;
103}
104
105/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106 within [aBig,aBig+nBig). */
107inline
108static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109 Addr aSmall, SizeT nSmall ) {
110 tl_assert(nBig > 0 && nSmall > 0);
111 return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112}
113
114inline
115static Addr Addr__min ( Addr a1, Addr a2 ) {
116 return a1 < a2 ? a1 : a2;
117}
118
119inline
120static Addr Addr__max ( Addr a1, Addr a2 ) {
121 return a1 < a2 ? a2 : a1;
122}
123
124
125//////////////////////////////////////////////////////////////
126// //
127// StackBlocks Persistent Cache //
128// //
129//////////////////////////////////////////////////////////////
130
131/* We maintain a set of XArray* of StackBlocks. These are never
132 freed. When a new StackBlock vector is acquired from
133 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134 If not present, it is added. If present, the just-acquired one is
135 freed and the copy used.
136
137 This simplifies storage management elsewhere. It allows us to
138 assume that a pointer to an XArray* of StackBlock is valid forever.
139 It also means there are no duplicates anywhere, which could be
140 important from a space point of view for programs that generate a
141 lot of translations, or where translations are frequently discarded
142 and re-made.
143
144 Note that we normalise the arrays by sorting the elements according
145 to an arbitrary total order, so as to avoid the situation that two
146 vectors describe the same set of variables but are not structurally
147 identical. */
148
149static inline Bool StackBlock__sane ( StackBlock* fb )
150{
151 if (fb->name[ sizeof(fb->name)-1 ] != 0)
152 return False;
153 if (fb->spRel != False && fb->spRel != True)
154 return False;
155 if (fb->isVec != False && fb->isVec != True)
156 return False;
157 return True;
158}
159
160/* Generate an arbitrary total ordering on StackBlocks. */
161static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
162{
163 Word r;
164 tl_assert(StackBlock__sane(fb1));
165 tl_assert(StackBlock__sane(fb2));
166 /* Hopefully the .base test hits most of the time. For the blocks
167 associated with any particular instruction, if the .base values
168 are the same then probably it doesn't make sense for the other
169 fields to be different. But this is supposed to be a completely
170 general structural total order, so we have to compare everything
171 anyway. */
172 if (fb1->base < fb2->base) return -1;
173 if (fb1->base > fb2->base) return 1;
174 /* compare sizes */
175 if (fb1->szB < fb2->szB) return -1;
176 if (fb1->szB > fb2->szB) return 1;
177 /* compare sp/fp flag */
178 if (fb1->spRel < fb2->spRel) return -1;
179 if (fb1->spRel > fb2->spRel) return 1;
180 /* compare is/is-not array-typed flag */
181 if (fb1->isVec < fb2->isVec) return -1;
182 if (fb1->isVec > fb2->isVec) return 1;
183 /* compare the name */
184 r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185 return r;
186}
187
188/* Returns True if all fields except .szB are the same. szBs may or
189 may not be the same; they are simply not consulted. */
190static Bool StackBlock__all_fields_except_szB_are_equal (
191 StackBlock* fb1,
192 StackBlock* fb2
193 )
194{
195 tl_assert(StackBlock__sane(fb1));
196 tl_assert(StackBlock__sane(fb2));
197 return fb1->base == fb2->base
198 && fb1->spRel == fb2->spRel
199 && fb1->isVec == fb2->isVec
200 && 0 == VG_(strcmp)(fb1->name, fb2->name);
201}
202
203
204/* Generate an arbitrary total ordering on vectors of StackBlocks. */
205static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206{
207 Word i, r, n1, n2;
208 n1 = VG_(sizeXA)( fb1s );
209 n2 = VG_(sizeXA)( fb2s );
210 if (n1 < n2) return -1;
211 if (n1 > n2) return 1;
212 for (i = 0; i < n1; i++) {
213 StackBlock *fb1, *fb2;
214 fb1 = VG_(indexXA)( fb1s, i );
215 fb2 = VG_(indexXA)( fb2s, i );
216 r = StackBlock__cmp( fb1, fb2 );
217 if (r != 0) return r;
218 }
219 tl_assert(i == n1 && i == n2);
220 return 0;
221}
222
223static void pp_StackBlock ( StackBlock* sb )
224{
225 VG_(printf)("StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }",
226 sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
227 sb->isVec ? 'Y' : 'N', &sb->name[0] );
228}
229
230static void pp_StackBlocks ( XArray* sbs )
231{
232 Word i, n = VG_(sizeXA)( sbs );
233 VG_(printf)("<<< STACKBLOCKS\n" );
234 for (i = 0; i < n; i++) {
235 VG_(printf)(" ");
236 pp_StackBlock( (StackBlock*)VG_(indexXA)( sbs, i ) );
237 VG_(printf)("\n");
238 }
239 VG_(printf)(">>> STACKBLOCKS\n" );
240}
241
242
243/* ---------- The StackBlock vector cache ---------- */
244
245static WordFM* /* XArray* of StackBlock -> nothing */
246 frameBlocks_set = NULL;
247
248static void init_StackBlocks_set ( void )
249{
250 tl_assert(!frameBlocks_set);
251 frameBlocks_set
252 = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
253 (Word(*)(UWord,UWord))StackBlocks__cmp );
254 tl_assert(frameBlocks_set);
255}
256
257/* Find the given StackBlock-vector in our collection thereof. If
258 found, deallocate the supplied one, and return the address of the
259 copy. If not found, add the supplied one to our collection and
260 return its address. */
261static XArray* /* of StackBlock */
262 StackBlocks__find_and_dealloc__or_add
263 ( XArray* /* of StackBlock */ orig )
264{
265 UWord key, val;
266
267 /* First, normalise, as per comments above. */
268 VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
269 VG_(sortXA)( orig );
270
271 /* Now get rid of any exact duplicates. */
272 nuke_dups:
273 { Word r, w, nEQ, n = VG_(sizeXA)( orig );
274 if (n >= 2) {
275 w = 0;
276 nEQ = 0;
277 for (r = 0; r < n; r++) {
278 if (r+1 < n) {
279 StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
280 StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
281 Word c = StackBlock__cmp(pR0,pR1);
282 tl_assert(c == -1 || c == 0);
283 if (c == 0) { nEQ++; continue; }
284 }
285 if (w != r) {
286 StackBlock* pW = VG_(indexXA)( orig, w );
287 StackBlock* pR = VG_(indexXA)( orig, r );
288 *pW = *pR;
289 }
290 w++;
291 }
292 tl_assert(r == n);
293 tl_assert(w + nEQ == n);
294 if (w < n) {
295 VG_(dropTailXA)( orig, n-w );
296 }
297 if (0) VG_(printf)("delta %ld\n", n-w);
298 }
299 }
300
301 /* Deal with the following strangeness, where two otherwise
302 identical blocks are claimed to have different sizes. In which
303 case we use the larger size. */
304 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
305 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
306 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
307 */
308 { Word i, n = VG_(sizeXA)( orig );
309 if (n >= 2) {
310 for (i = 0; i < n-1; i++) {
311 StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
312 StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
313 if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
314 /* They can't be identical because the previous tidying
315 pass would have removed the duplicates. And they
316 can't be > because the earlier sorting pass would
317 have ordered otherwise-identical descriptors
318 according to < on .szB fields. Hence: */
319 tl_assert(sb0->szB < sb1->szB);
320 sb0->szB = sb1->szB;
321 /* This makes the blocks identical, at the size of the
322 larger one. Rather than go to all the hassle of
323 sliding the rest down, simply go back to the
324 remove-duplicates stage. The assertion guarantees
325 that we eventually make progress, since the rm-dups
326 stage will get rid of one of the blocks. This is
327 expected to happen only exceedingly rarely. */
328 tl_assert(StackBlock__cmp(sb0,sb1) == 0);
329 goto nuke_dups;
330 }
331 }
332 }
333 }
334
335 /* A rather poor sanity check on the results. */
336 { Word i, n = VG_(sizeXA)( orig );
337 for (i = 0; i < n-1; i++) {
338 StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
339 StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
340 if (sb1->base == sb2->base)
341 pp_StackBlocks(orig);
342 tl_assert(sb1->base != sb2->base);
343 }
344 }
345
346 /* Now, do we have it already? */
347 if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
348 /* yes */
349 XArray* res;
350 tl_assert(val == 0);
351 tl_assert(key != (UWord)orig);
352 VG_(deleteXA)(orig);
353 res = (XArray*)key;
354 return res;
355 } else {
356 /* no */
357 VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
358 return orig;
359 }
360}
361
362/* Top level function for getting the StackBlock vector for a given
363 instruction. It is guaranteed that the returned pointer will be
364 valid for the entire rest of the run, and also that the addresses
365 of the individual elements of the array will not change. */
366
367static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
368{
369 XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
370 tl_assert(blocks);
371 return StackBlocks__find_and_dealloc__or_add( blocks );
372}
373
374
375//////////////////////////////////////////////////////////////
376// //
377// GlobalBlocks Persistent Cache //
378// //
379//////////////////////////////////////////////////////////////
380
381/* Generate an arbitrary total ordering on GlobalBlocks. */
382static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
383{
384 Word r;
385 /* compare addrs */
386 if (gb1->addr < gb2->addr) return -1;
387 if (gb1->addr > gb2->addr) return 1;
388 /* compare sizes */
389 if (gb1->szB < gb2->szB) return -1;
390 if (gb1->szB > gb2->szB) return 1;
391 /* compare is/is-not array-typed flag */
392 if (gb1->isVec < gb2->isVec) return -1;
393 if (gb1->isVec > gb2->isVec) return 1;
394 /* compare the name */
395 r = (Word)VG_(strcmp)(gb1->name, gb2->name);
396 if (r != 0) return r;
397 /* compare the soname */
398 r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
399 return r;
400}
401
402static WordFM* /* GlobalBlock* -> nothing */
403 globalBlock_set = NULL;
404
405static void init_GlobalBlock_set ( void )
406{
407 tl_assert(!globalBlock_set);
408 globalBlock_set
409 = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
410 (Word(*)(UWord,UWord))GlobalBlock__cmp );
411 tl_assert(globalBlock_set);
412}
413
414
415/* Top level function for making GlobalBlocks persistent. Call here
416 with a non-persistent version, and the returned one is guaranteed
417 to be valid for the entire rest of the run. The supplied one is
418 copied, not stored, so can be freed after the call. */
419
420static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
421{
422 UWord key, val;
423 /* Now, do we have it already? */
424 if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
425 /* yes, return the copy */
426 GlobalBlock* res;
427 tl_assert(val == 0);
428 res = (GlobalBlock*)key;
429 tl_assert(res != orig);
430 return res;
431 } else {
432 /* no. clone it, store the clone and return the clone's
433 address. */
434 GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
435 sizeof(GlobalBlock) );
436 tl_assert(clone);
437 *clone = *orig;
438 VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
439 return clone;
440 }
441}
442
443
444//////////////////////////////////////////////////////////////
445// //
446// Interval tree of StackTreeBlock //
447// //
448//////////////////////////////////////////////////////////////
449
450/* A node in a stack interval tree. Zero length intervals (.szB == 0)
451 are not allowed.
452
453 A stack interval tree is a (WordFM StackTreeNode* void). There is
454 one stack interval tree for each thread.
455*/
456typedef
457 struct {
458 Addr addr;
459 SizeT szB; /* copied from .descr->szB */
460 StackBlock* descr; /* it's an instance of this block */
461 UWord depth; /* depth of stack at time block was pushed */
462 }
463 StackTreeNode;
464
465static void pp_StackTree ( WordFM* sitree, HChar* who )
466{
467 UWord keyW, valW;
468 VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
469 VG_(initIterFM)( sitree );
470 while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
471 StackTreeNode* nd = (StackTreeNode*)keyW;
472 VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
473 nd->descr, nd->descr->name, nd->descr->szB);
474 }
475 VG_(printf)(">>> END pp_StackTree %s\n", who );
476}
477
478/* Interval comparison function for StackTreeNode */
479static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
480 StackTreeNode* sn2 )
481{
482 return cmp_nonempty_intervals(sn1->addr, sn1->szB,
483 sn2->addr, sn2->szB);
484}
485
486/* Find the node holding 'a', if any. */
487static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
488{
489 UWord keyW, valW;
490 StackTreeNode key;
491 tl_assert(sitree);
492 key.addr = a;
493 key.szB = 1;
494 if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
495 StackTreeNode* res = (StackTreeNode*)keyW;
496 tl_assert(valW == 0);
497 tl_assert(res != &key);
498 return res;
499 } else {
500 return NULL;
501 }
502}
503
504/* Note that the supplied XArray of FrameBlock must have been
505 made persistent already. */
506__attribute__((noinline))
507static void add_blocks_to_StackTree (
508 /*MOD*/WordFM* sitree,
509 XArray* /* FrameBlock */ descrs,
510 XArray* /* Addr */ bases,
511 UWord depth
512 )
513{
514 Bool debug = (Bool)0;
515 Word i, nDescrs, nBases;
516
517 nDescrs = VG_(sizeXA)( descrs ),
518 nBases = VG_(sizeXA)( bases );
519 tl_assert(nDescrs == nBases);
520
521 if (nDescrs == 0) return;
522
523 tl_assert(sitree);
524 if (debug) {
525 VG_(printf)("\n");
526 pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
527 }
528
529 for (i = 0; i < nDescrs; i++) {
530 Bool already_present;
531 StackTreeNode* nyu;
532 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
533 StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
534 tl_assert(descr->szB > 0);
535 nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
536 nyu->addr = addr;
537 nyu->szB = descr->szB;
538 nyu->descr = descr;
539 nyu->depth = depth;
540 if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
541 already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
542 /* The interval can't already be there; else we have
543 overlapping stack blocks. */
544 tl_assert(!already_present);
545 if (debug) {
546 pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
547 }
548 }
549 if (debug) {
550 pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
551 VG_(printf)("\n");
552 }
553}
554
555static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
556 XArray* /* Addr */ bases )
557{
558 UWord oldK, oldV;
559 Word i, nBases = VG_(sizeXA)( bases );
560 for (i = 0; i < nBases; i++) {
561 Bool b;
562 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
563 StackTreeNode* nd = find_StackTreeNode(sitree, addr);
564 /* The interval must be there; we added it earlier when
565 the associated frame was created. */
566 tl_assert(nd);
567 b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
568 /* we just found the block! */
569 tl_assert(b);
570 tl_assert(oldV == 0);
571 tl_assert(nd == (StackTreeNode*)oldK);
572 sg_free(nd);
573 }
574}
575
576
577static void delete_StackTree__kFin ( UWord keyW ) {
578 StackTreeNode* nd = (StackTreeNode*)keyW;
579 tl_assert(nd);
580 sg_free(nd);
581}
582static void delete_StackTree__vFin ( UWord valW ) {
583 tl_assert(valW == 0);
584}
585static void delete_StackTree ( WordFM* sitree )
586{
587 VG_(deleteFM)( sitree,
588 delete_StackTree__kFin, delete_StackTree__vFin );
589}
590
591static WordFM* new_StackTree ( void ) {
592 return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
593 (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
594}
595
596
597//////////////////////////////////////////////////////////////
598// //
599// Interval tree of GlobalTreeBlock //
600// //
601//////////////////////////////////////////////////////////////
602
603/* A node in a global interval tree. Zero length intervals
604 (.szB == 0) are not allowed.
605
606 A global interval tree is a (WordFM GlobalTreeNode* void). There
607 is one global interval tree for the entire process.
608*/
609typedef
610 struct {
611 Addr addr; /* copied from .descr->addr */
612 SizeT szB; /* copied from .descr->szB */
613 GlobalBlock* descr; /* it's this block */
614 }
615 GlobalTreeNode;
616
617static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
618 tl_assert(nd->descr);
619 VG_(printf)("GTNode [%#lx,+%ld) %s",
620 nd->addr, nd->szB, nd->descr->name);
621}
622
623static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
624 HChar* who )
625{
626 UWord keyW, valW;
627 GlobalTreeNode* nd;
628 VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
629 VG_(initIterFM)( gitree );
630 while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
631 tl_assert(valW == 0);
632 nd = (GlobalTreeNode*)keyW;
633 VG_(printf)(" ");
634 GlobalTreeNode__pp(nd);
635 VG_(printf)("\n");
636 }
637 VG_(doneIterFM)( gitree );
638 VG_(printf)(">>>\n");
639}
640
641/* Interval comparison function for GlobalTreeNode */
642static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
643 GlobalTreeNode* gn2 )
644{
645 return cmp_nonempty_intervals( gn1->addr, gn1->szB,
646 gn2->addr, gn2->szB );
647}
648
649/* Find the node holding 'a', if any. */
650static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
651{
652 UWord keyW, valW;
653 GlobalTreeNode key;
654 key.addr = a;
655 key.szB = 1;
656 if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
657 GlobalTreeNode* res = (GlobalTreeNode*)keyW;
658 tl_assert(valW == 0);
659 tl_assert(res != &key);
660 return res;
661 } else {
662 return NULL;
663 }
664}
665
666/* Note that the supplied GlobalBlock must have been made persistent
667 already. */
668static void add_block_to_GlobalTree (
669 /*MOD*/WordFM* gitree,
670 GlobalBlock* descr
671 )
672{
673 Bool already_present;
674 GlobalTreeNode *nyu, *nd;
675 UWord keyW, valW;
676
677 tl_assert(descr->szB > 0);
678 nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
679 nyu->addr = descr->addr;
680 nyu->szB = descr->szB;
681 nyu->descr = descr;
682
683 /* Basically it's an error to add a global block to the tree that
684 is already in the tree. However, detect and ignore attempts to
685 insert exact duplicates; they do appear for some reason
686 (possible a bug in m_debuginfo?) */
687 already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
688 if (already_present) {
689 tl_assert(valW == 0);
690 nd = (GlobalTreeNode*)keyW;
691 tl_assert(nd);
692 tl_assert(nd != nyu);
693 tl_assert(nd->descr);
694 tl_assert(nyu->descr);
695 if (nd->addr == nyu->addr && nd->szB == nyu->szB
696 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
697 /* Although it seems reasonable to demand that duplicate
698 blocks have identical names, that is too strict. For
699 example, reading debuginfo from glibc produces two
700 otherwise identical blocks with names "tzname" and
701 "__tzname". A constraint of the form "must be identical,
702 or one must be a substring of the other" would fix that.
703 However, such trickery is scuppered by the fact that we
704 truncate all variable names to 15 characters to make
705 storage management simpler, hence giving pairs like
706 "__EI___pthread_" (truncated) vs "__pthread_keys". So
707 it's simplest just to skip the name comparison
708 completely. */
709 && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
710 /* exact duplicate; ignore it */
711 sg_free(nyu);
712 return;
713 }
714 /* else fall through; the assertion below will catch it */
715 }
716
717 already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
718 /* The interval can't already be there; else we have
719 overlapping global blocks. */
720 if (already_present) {
721 GlobalTree__pp( gitree, "add_block_to_GlobalTree: non-exact duplicate" );
722 VG_(printf)("Overlapping block: ");
723 GlobalTreeNode__pp(nyu);
724 VG_(printf)("\n");
725 }
726 tl_assert(!already_present);
727}
728
729static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
730 Addr a, SizeT szB )
731{
732 /* One easy way to do this: look up [a,a+szB) in the tree. That
733 will either succeed, producing a block which intersects that
734 range, in which case we delete it and repeat; or it will fail,
735 in which case there are no blocks intersecting the range, and we
736 can bring the process to a halt. */
737 UWord keyW, valW, oldK, oldV;
738 GlobalTreeNode key, *nd;
739 Bool b, anyFound;
740
741 tl_assert(szB > 0);
742
743 anyFound = False;
744
745 key.addr = a;
746 key.szB = szB;
747
748 while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
749 anyFound = True;
750 nd = (GlobalTreeNode*)keyW;
751 tl_assert(valW == 0);
752 tl_assert(nd != &key);
753 tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
754
755 b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
756 tl_assert(b);
757 tl_assert(oldV == 0);
758 tl_assert(oldK == keyW); /* check we deleted the node we just found */
759 }
760
761 return anyFound;
762}
763
764
765//////////////////////////////////////////////////////////////
766// //
767// Invar //
768// //
769//////////////////////////////////////////////////////////////
770
771/* An invariant, as resulting from watching the destination of a
772 memory referencing instruction. Initially is Inv_Unset until the
773 instruction makes a first access. */
774
775typedef
776 enum {
777 Inv_Unset=1, /* not established yet */
778 Inv_Unknown, /* unknown location */
779 Inv_Stack0, /* array-typed stack block in innermost frame */
780 Inv_StackN, /* array-typed stack block in non-innermost frame */
781 Inv_Global, /* array-typed global block */
782 }
783 InvarTag;
784
785typedef
786 struct {
787 InvarTag tag;
788 union {
789 struct {
790 } Unset;
791 struct {
792 } Unknown;
793 struct {
794 Addr addr;
795 SizeT szB;
796 StackBlock* descr;
797 } Stack0; /* innermost stack frame */
798 struct {
799 /* Pointer to a node in the interval tree for
800 this thread. */
801 StackTreeNode* nd;
802 } StackN; /* non-innermost stack frame */
803 struct {
804 /* Pointer to a GlobalBlock in the interval tree of
805 global blocks. */
806 GlobalTreeNode* nd;
807 } Global;
808 }
809 Inv;
810 }
811 Invar;
812
813/* Partial debugging printing for an Invar. */
814static void pp_Invar ( Invar* i )
815{
816 switch (i->tag) {
817 case Inv_Unset:
818 VG_(printf)("Unset");
819 break;
820 case Inv_Unknown:
821 VG_(printf)("Unknown");
822 break;
823 case Inv_Stack0:
824 VG_(printf)("Stack0 [%#lx,+%lu)",
825 i->Inv.Stack0.addr, i->Inv.Stack0.szB);
826 break;
827 case Inv_StackN:
828 VG_(printf)("StackN [%#lx,+%lu)",
829 i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
830 break;
831 case Inv_Global:
832 VG_(printf)("Global [%#lx,+%lu)",
833 i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
834 break;
835 default:
836 tl_assert(0);
837 }
838}
839
840/* Compare two Invars for equality. */
841static Bool eq_Invar ( Invar* i1, Invar* i2 )
842{
843 tl_assert(i1->tag != Inv_Unset);
844 tl_assert(i2->tag != Inv_Unset);
845 if (i1->tag != i2->tag)
846 return False;
847 switch (i1->tag) {
848 case Inv_Unknown:
849 return True;
850 case Inv_Stack0:
851 return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
852 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
853 case Inv_StackN:
854 return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
855 case Inv_Global:
856 return i1->Inv.Global.nd == i2->Inv.Global.nd;
857 default:
858 tl_assert(0);
859 }
860 /*NOTREACHED*/
861 tl_assert(0);
862}
863
864/* Print selected parts of an Invar, suitable for use in error
865 messages. */
866static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
867{
868 HChar* str;
869 tl_assert(nBuf >= 96);
870 buf[0] = 0;
871 switch (inv->tag) {
872 case Inv_Unknown:
873 VG_(sprintf)(buf, "%s", "unknown");
874 break;
875 case Inv_Stack0:
876 str = "array";
877 VG_(sprintf)(buf, "stack %s \"%s\" in this frame",
878 str, inv->Inv.Stack0.descr->name );
879 break;
880 case Inv_StackN:
881 str = "array";
882 VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here",
883 str, inv->Inv.StackN.nd->descr->name,
884 depth - inv->Inv.StackN.nd->depth );
885 break;
886 case Inv_Global:
887 str = "array";
888 VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"",
889 str, inv->Inv.Global.nd->descr->name,
890 inv->Inv.Global.nd->descr->soname );
891 break;
892 case Inv_Unset:
893 VG_(sprintf)(buf, "%s", "Unset!");
894 break;
895 default:
896 tl_assert(0);
897 }
898}
899
900
901//////////////////////////////////////////////////////////////
902// //
903// our globals //
904// //
905//////////////////////////////////////////////////////////////
906
907//////////////////////////////////////////////////////////////
908///
909
910#define N_QCACHE 16
911
912/* Powers of two only, else the result will be chaos */
913#define QCACHE_ADVANCE_EVERY 16
914
915/* Per-thread query cache. Note that the invar can only be Inv_StackN
916 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
917typedef
918 struct {
919 Addr addr;
920 SizeT szB;
921 Invar inv;
922 }
923 QCElem;
924
925typedef
926 struct {
927 Word nInUse;
928 QCElem elems[N_QCACHE];
929 }
930 QCache;
931
932static void QCache__invalidate ( QCache* qc ) {
933 tl_assert(qc->nInUse >= 0);
934 qc->nInUse = 0;
935}
936
937static void QCache__pp ( QCache* qc, HChar* who )
938{
939 Word i;
940 VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
941 for (i = 0; i < qc->nInUse; i++) {
942 VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
943 pp_Invar(&qc->elems[i].inv);
944 VG_(printf)("\n");
945 }
946 VG_(printf)(">>>\n");
947}
948
949static ULong stats__qcache_queries = 0;
950static ULong stats__qcache_misses = 0;
951static ULong stats__qcache_probes = 0;
952
953///
954//////////////////////////////////////////////////////////////
955
956/* Each thread has:
957 * a shadow stack of StackFrames, which is a double-linked list
958 * an stack block interval tree
959*/
960static struct _StackFrame* shadowStacks[VG_N_THREADS];
961
962static WordFM* /* StackTreeNode */ siTrees[VG_N_THREADS];
963
964static QCache qcaches[VG_N_THREADS];
965
966
967/* Additionally, there is one global variable interval tree
968 for the entire process.
969*/
970static WordFM* /* GlobalTreeNode */ giTree;
971
972
973static void invalidate_all_QCaches ( void )
974{
975 Word i;
976 for (i = 0; i < VG_N_THREADS; i++) {
977 QCache__invalidate( &qcaches[i] );
978 }
979}
980
981static void ourGlobals_init ( void )
982{
983 Word i;
984 for (i = 0; i < VG_N_THREADS; i++) {
985 shadowStacks[i] = NULL;
986 siTrees[i] = NULL;
987 }
988 invalidate_all_QCaches();
989 giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
990 (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
991}
992
993
994//////////////////////////////////////////////////////////////
995// //
996// Handle global variable load/unload events //
997// //
998//////////////////////////////////////////////////////////////
999
1000static void acquire_globals ( ULong di_handle )
1001{
1002 Word n, i;
1003 XArray* /* of GlobalBlock */ gbs;
1004 if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1005 gbs = VG_(di_get_global_blocks_from_dihandle)
1006 (di_handle, True/*arrays only*/);
1007 if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs ));
1008
1009 n = VG_(sizeXA)( gbs );
1010 for (i = 0; i < n; i++) {
1011 GlobalBlock* gbp;
1012 GlobalBlock* gb = VG_(indexXA)( gbs, i );
1013 if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n",
1014 gb->szB, gb->addr, gb->soname, gb->name );
1015 tl_assert(gb->szB > 0);
1016 /* Make a persistent copy of each GlobalBlock, and add it
1017 to the tree. */
1018 gbp = get_persistent_GlobalBlock( gb );
1019 add_block_to_GlobalTree( giTree, gbp );
1020 }
1021
1022 VG_(deleteXA)( gbs );
1023}
1024
1025
1026/* We only intercept these two because we need to see any di_handles
1027 that might arise from the mappings/allocations. */
1028void sg_new_mem_mmap( Addr a, SizeT len,
1029 Bool rr, Bool ww, Bool xx, ULong di_handle )
1030{
1031 if (di_handle > 0)
1032 acquire_globals(di_handle);
1033}
1034void sg_new_mem_startup( Addr a, SizeT len,
1035 Bool rr, Bool ww, Bool xx, ULong di_handle )
1036{
1037 if (di_handle > 0)
1038 acquire_globals(di_handle);
1039}
1040void sg_die_mem_munmap ( Addr a, SizeT len )
1041{
1042 Bool debug = (Bool)0;
1043 Bool overlap = False;
1044
1045 if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1046
1047 if (len == 0)
1048 return;
1049
1050 overlap = del_GlobalTree_range(giTree, a, len);
1051
1052 { /* redundant sanity check */
1053 UWord keyW, valW;
1054 VG_(initIterFM)( giTree );
1055 while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1056 GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1057 tl_assert(valW == 0);
1058 tl_assert(nd->szB > 0);
1059 tl_assert(nd->addr + nd->szB <= a
1060 || a + len <= nd->addr);
1061 }
1062 VG_(doneIterFM)( giTree );
1063 }
1064
1065 if (!overlap)
1066 return;
1067
1068 /* Ok, the range contained some blocks. Therefore we'll need to
1069 visit all the Invars in all the thread shadow stacks, and
1070 convert all Inv_Global{S,V} entries that intersect [a,a+len) to
1071 Inv_Unknown. */
1072 tl_assert(len > 0);
1073 preen_Invars( a, len, False/*!isHeap*/ );
1074 invalidate_all_QCaches();
1075}
1076
1077
1078//////////////////////////////////////////////////////////////
1079// //
1080// StackFrame //
1081// //
1082//////////////////////////////////////////////////////////////
1083
1084static ULong stats__total_accesses = 0;
1085static ULong stats__classify_Stack0 = 0;
1086static ULong stats__classify_StackN = 0;
1087static ULong stats__classify_Global = 0;
1088static ULong stats__classify_Unknown = 0;
1089static ULong stats__Invars_preened = 0;
1090static ULong stats__Invars_changed = 0;
1091static ULong stats__t_i_b_empty = 0;
1092static ULong stats__htab_fast = 0;
1093static ULong stats__htab_searches = 0;
1094static ULong stats__htab_probes = 0;
1095static ULong stats__htab_resizes = 0;
1096
1097
1098/* A dynamic instance of an instruction */
1099typedef
1100 struct {
1101 /* IMMUTABLE */
1102 Addr insn_addr; /* NB! zero means 'not in use' */
1103 XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1104 /* MUTABLE */
1105 Invar invar;
1106 }
1107 IInstance;
1108
1109
1110#define N_HTAB_FIXED 64
1111
1112typedef
1113 struct _StackFrame {
1114 /* The sp when the frame was created, so we know when to get rid
1115 of it. */
1116 Addr creation_sp;
1117 /* The stack frames for a thread are arranged as a doubly linked
1118 list. Obviously the outermost frame in the stack has .outer
1119 as NULL and the innermost in theory has .inner as NULL.
1120 However, when a function returns, we don't delete the
1121 just-vacated StackFrame. Instead, it is retained in the list
1122 and will be re-used when the next call happens. This is so
1123 as to avoid constantly having to dynamically allocate and
1124 deallocate frames. */
1125 struct _StackFrame* inner;
1126 struct _StackFrame* outer;
1127 Word depth; /* 0 for outermost; increases inwards */
1128 /* Information for each memory referencing instruction, for this
1129 instantiation of the function. The iinstances array is
1130 operated as a simple linear-probe hash table, which is
1131 dynamically expanded as necessary. Once critical thing is
1132 that an IInstance with a .insn_addr of zero is interpreted to
1133 mean that hash table slot is unused. This means we can't
1134 store an IInstance for address zero. */
1135 /* Note that htab initially points to htab_fixed. If htab_fixed
1136 turns out not to be big enough then htab is made to point to
1137 dynamically allocated memory. But it's often the case that
1138 htab_fixed is big enough, so this optimisation saves a huge
1139 number of sg_malloc/sg_free call pairs. */
1140 IInstance* htab;
1141 UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1142 UWord htab_used; /* number of hash table slots currently in use */
1143 /* If this frame is currently making a call, then the following
1144 are relevant. */
1145 Addr sp_at_call;
1146 Addr fp_at_call;
1147 XArray* /* of Addr */ blocks_added_by_call;
1148 /* See comment just above */
1149 IInstance htab_fixed[N_HTAB_FIXED];
1150 }
1151 StackFrame;
1152
1153
1154
1155
1156
1157/* Move this somewhere else? */
1158/* Visit all Invars in the entire system. If 'isHeap' is True, change
1159 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1160 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1161 instead. */
1162
1163__attribute__((noinline))
1164static void preen_Invar ( Invar* inv, Addr a, SizeT len, Bool isHeap )
1165{
1166 stats__Invars_preened++;
1167 tl_assert(len > 0);
1168 tl_assert(inv);
1169 switch (inv->tag) {
1170#if 0
1171 case Inv_Heap:
1172 tl_assert(inv->Inv.Heap.len > 0);
1173 if (isHeap && rangesOverlap(a, len, inv->Inv.Heap.start,
1174 inv->Inv.Heap.len)) {
1175 inv->tag = Inv_Unknown;
1176 stats__Invars_changed++;
1177 }
1178 break;
1179 case Inv_GlobalS:
1180 case Inv_GlobalV:
1181 tl_assert(inv->Inv.Global.len > 0);
1182 if ((!isHeap)
1183 && rangesOverlap(a, len, inv->Inv.Global.start,
1184 inv->Inv.Global.len)) {
1185 inv->tag = Inv_Unknown;
1186 stats__Invars_changed++;
1187 }
1188 break;
1189 case Inv_StackS:
1190 case Inv_StackV:
1191 case Inv_Unknown:
1192 break;
1193#endif
1194 default: tl_assert(0);
1195 }
1196}
1197
1198__attribute__((noinline))
1199static void preen_Invars ( Addr a, SizeT len, Bool isHeap )
1200{
1201 tl_assert(0);
1202#if 0
1203 Int i;
1204 Word ixFrames, nFrames;
1205 UWord u;
1206 XArray* stack; /* XArray* of StackFrame */
1207 StackFrame* frame;
1208 tl_assert(len > 0);
1209 tl_assert(0);
1210 for (i = 0; i < VG_N_THREADS; i++) {
1211tl_assert(0);
1212 stack = shadowStacks[i];
1213 if (!stack)
1214 continue;
1215 nFrames = VG_(sizeXA)( stack );
1216 for (ixFrames = 0; ixFrames < nFrames; ixFrames++) {
1217 UWord xx = 0; /* sanity check only; count of used htab entries */
1218 frame = VG_(indexXA)( stack, ixFrames );
1219 tl_assert(frame->htab);
1220 for (u = 0; u < frame->htab_size; u++) {
1221 IInstance* ii = &frame->htab[u];
1222 if (ii->insn_addr == 0)
1223 continue; /* not in use */
1224 preen_Invar( &ii->invar, a, len, isHeap );
1225 xx++;
1226 }
1227 tl_assert(xx == frame->htab_used);
1228 }
1229 }
1230#endif
1231}
1232
1233
1234/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1235 of the ip are guaranteed to be zero */
1236inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1237 return (ip >> 0) & (htab_size - 1);
1238}
1239
1240__attribute__((noinline))
1241static void initialise_II_hash_table ( StackFrame* sf )
1242{
1243 UWord i;
1244 sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1245 sf->htab = &sf->htab_fixed[0];
1246 tl_assert(sf->htab);
1247 sf->htab_used = 0;
1248 for (i = 0; i < sf->htab_size; i++)
1249 sf->htab[i].insn_addr = 0; /* NOT IN USE */
1250}
1251
1252
1253__attribute__((noinline))
1254static void resize_II_hash_table ( StackFrame* sf )
1255{
1256 UWord i, j, ix, old_size, new_size;
1257 IInstance *old_htab, *new_htab, *old;
1258
1259 tl_assert(sf && sf->htab);
1260 old_size = sf->htab_size;
1261 new_size = 2 * old_size;
1262 old_htab = sf->htab;
1263 new_htab = sg_malloc( "di.sg_main.rIht.1",
1264 new_size * sizeof(IInstance) );
1265 for (i = 0; i < new_size; i++) {
1266 new_htab[i].insn_addr = 0; /* NOT IN USE */
1267 }
1268 for (i = 0; i < old_size; i++) {
1269 old = &old_htab[i];
1270 if (old->insn_addr == 0 /* NOT IN USE */)
1271 continue;
1272 ix = compute_II_hash(old->insn_addr, new_size);
1273 /* find out where to put this, in the new table */
1274 j = new_size;
1275 while (1) {
1276 if (new_htab[ix].insn_addr == 0)
1277 break;
1278 /* This can't ever happen, because it would mean the new
1279 table is full; that isn't allowed -- even the old table is
1280 only allowed to become half full. */
1281 tl_assert(j > 0);
1282 j--;
1283 ix++; if (ix == new_size) ix = 0;
1284 }
1285 /* copy the old entry to this location */
1286 tl_assert(ix < new_size);
1287 tl_assert(new_htab[ix].insn_addr == 0);
1288 new_htab[ix] = *old;
1289 tl_assert(new_htab[ix].insn_addr != 0);
1290 }
1291 /* all entries copied; free old table. */
1292 if (old_htab != &sf->htab_fixed[0])
1293 sg_free(old_htab);
1294 sf->htab = new_htab;
1295 sf->htab_size = new_size;
1296 /* check sf->htab_used is correct. Optional and a bit expensive
1297 but anyway: */
1298 j = 0;
1299 for (i = 0; i < new_size; i++) {
1300 if (new_htab[i].insn_addr != 0) {
1301 j++;
1302 }
1303 }
1304 tl_assert(j == sf->htab_used);
1305 if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1306}
1307
1308
1309__attribute__((noinline))
1310static IInstance* find_or_create_IInstance_SLOW (
1311 StackFrame* sf,
1312 Addr ip,
1313 XArray* /* StackBlock */ ip_frameblocks
1314 )
1315{
1316 UWord i, ix;
1317
1318 stats__htab_searches++;
1319
1320 tl_assert(sf);
1321 tl_assert(sf->htab);
1322
1323 /* Make sure the table loading doesn't get too high. */
1324 if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1325 stats__htab_resizes++;
1326 resize_II_hash_table(sf);
1327 }
1328 tl_assert(2 * sf->htab_used <= sf->htab_size);
1329
1330 ix = compute_II_hash(ip, sf->htab_size);
1331 i = sf->htab_size;
1332 while (1) {
1333 stats__htab_probes++;
1334 /* Note that because of the way the fast-case handler works,
1335 these two tests are actually redundant in the first iteration
1336 of this loop. (Except they aren't redundant if the code just
1337 above resized the table first. :-) */
1338 if (sf->htab[ix].insn_addr == ip)
1339 return &sf->htab[ix];
1340 if (sf->htab[ix].insn_addr == 0)
1341 break;
1342 /* If i ever gets to zero and we have found neither what we're
1343 looking for nor an empty slot, the table must be full. Which
1344 isn't possible -- we monitor the load factor to ensure it
1345 doesn't get above say 50%; if that ever does happen the table
1346 is resized. */
1347 tl_assert(i > 0);
1348 i--;
1349 ix++;
1350 if (ix == sf->htab_size) ix = 0;
1351 }
1352
1353 /* So now we've found a free slot at ix, and we can use that. */
1354 tl_assert(sf->htab[ix].insn_addr == 0);
1355
1356 /* Add a new record in this slot. */
1357 tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1358 sf->htab[ix].insn_addr = ip;
1359 sf->htab[ix].blocks = ip_frameblocks;
1360 sf->htab[ix].invar.tag = Inv_Unset;
1361 sf->htab_used++;
1362 return &sf->htab[ix];
1363}
1364
1365
1366inline
1367static IInstance* find_or_create_IInstance (
1368 StackFrame* sf,
1369 Addr ip,
1370 XArray* /* StackBlock */ ip_frameblocks
1371 )
1372{
1373 UWord ix = compute_II_hash(ip, sf->htab_size);
1374 /* Is it in the first slot we come to? */
1375 if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1376 stats__htab_fast++;
1377 return &sf->htab[ix];
1378 }
1379 /* If the first slot we come to is empty, bag it. */
1380 if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1381 stats__htab_fast++;
1382 tl_assert(ip != 0);
1383 sf->htab[ix].insn_addr = ip;
1384 sf->htab[ix].blocks = ip_frameblocks;
1385 sf->htab[ix].invar.tag = Inv_Unset;
1386 sf->htab_used++;
1387 return &sf->htab[ix];
1388 }
1389 /* Otherwise we hand off to the slow case, which searches other
1390 slots, and optionally resizes the table if necessary. */
1391 return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1392}
1393
1394
1395__attribute__((noinline))
1396static Addr calculate_StackBlock_EA ( StackBlock* descr,
1397 Addr sp, Addr fp ) {
1398 UWord w1 = (UWord)descr->base;
1399 UWord w2 = (UWord)(descr->spRel ? sp : fp);
1400 UWord ea = w1 + w2;
1401 return ea;
1402}
1403
1404/* Given an array of StackBlocks, return an array of Addrs, holding
1405 their effective addresses. Caller deallocates result array. */
1406__attribute__((noinline))
1407static XArray* /* Addr */ calculate_StackBlock_EAs (
1408 XArray* /* StackBlock */ blocks,
1409 Addr sp, Addr fp
1410 )
1411{
1412 XArray* res;
1413 Word i, n = VG_(sizeXA)( blocks );
1414 tl_assert(n > 0);
1415 res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1416 for (i = 0; i < n; i++) {
1417 StackBlock* blk = VG_(indexXA)( blocks, i );
1418 Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1419 VG_(addToXA)( res, &ea );
1420 }
1421 return res;
1422}
1423
1424
1425/* Try to classify the block into which a memory access falls, and
1426 write the result in 'inv'. This writes all relevant fields of
1427 'inv'. */
1428__attribute__((noinline))
1429static void classify_address ( /*OUT*/Invar* inv,
1430 ThreadId tid,
1431 Addr ea, Addr sp, Addr fp,
1432 UWord szB,
1433 XArray* /* of StackBlock */ thisInstrBlocks )
1434{
1435 tl_assert(szB > 0);
1436 /* First, look in the stack blocks accessible in this instruction's
1437 frame. */
1438 {
1439 Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1440 if (nBlocks == 0) stats__t_i_b_empty++;
1441 for (i = 0; i < nBlocks; i++) {
1442 StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1443 Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1444 if (bea <= ea && ea + szB <= bea + descr->szB) {
1445 /* found it */
1446 inv->tag = Inv_Stack0;
1447 inv->Inv.Stack0.addr = bea;
1448 inv->Inv.Stack0.szB = descr->szB;
1449 inv->Inv.Stack0.descr = descr;
1450 stats__classify_Stack0++;
1451 return;
1452 }
1453 }
1454 }
1455 /* Look in this thread's query cache */
1456 { Word i;
1457 QCache* cache = &qcaches[tid];
1458 static UWord ctr = 0;
1459 stats__qcache_queries++;
1460 for (i = 0; i < cache->nInUse; i++) {
1461 if (0) /* expensive in a loop like this */
1462 tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1463 stats__qcache_probes++;
1464 if (is_subinterval_of(cache->elems[i].addr,
1465 cache->elems[i].szB, ea, szB)) {
1466 if (i > 0
1467 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1468 QCElem tmp;
1469 tmp = cache->elems[i-1];
1470 cache->elems[i-1] = cache->elems[i];
1471 cache->elems[i] = tmp;
1472 i--;
1473 }
1474 *inv = cache->elems[i].inv;
1475 return;
1476 }
1477 }
1478 stats__qcache_misses++;
1479 }
1480 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1481 in some calling frame? Consult this thread's stack-block
1482 interval tree to find out. */
1483 { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1484 /* We know that [ea,ea+1) is in the block, but we need to
1485 restrict to the case where the whole access falls within
1486 it. */
1487 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1488 nd = NULL;
1489 }
1490 if (nd) {
1491 /* found it */
1492 inv->tag = Inv_StackN;
1493 inv->Inv.StackN.nd = nd;
1494 stats__classify_StackN++;
1495 goto out;
1496 }
1497 }
1498 /* Not in a stack block. Try the global pool. */
1499 { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1500 /* We know that [ea,ea+1) is in the block, but we need to
1501 restrict to the case where the whole access falls within
1502 it. */
1503 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1504 nd = NULL;
1505 }
1506 if (nd) {
1507 /* found it */
1508 inv->tag = Inv_Global;
1509 inv->Inv.Global.nd = nd;
1510 stats__classify_Global++;
1511 goto out;
1512 }
1513 }
1514 /* No idea - give up. */
1515 inv->tag = Inv_Unknown;
1516 stats__classify_Unknown++;
1517
1518 /* Update the cache */
1519 out:
1520 { Addr toadd_addr = 0;
1521 SizeT toadd_szB = 0;
1522 QCache* cache = &qcaches[tid];
1523
1524 static UWord ctr = 0;
1525 Bool show = False;
1526 if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1527
1528 if (show) QCache__pp(cache, "before upd");
1529
1530 switch (inv->tag) {
1531 case Inv_Global:
1532 toadd_addr = inv->Inv.Global.nd->addr;
1533 toadd_szB = inv->Inv.Global.nd->szB;
1534 break;
1535 case Inv_StackN:
1536 toadd_addr = inv->Inv.StackN.nd->addr;
1537 toadd_szB = inv->Inv.StackN.nd->szB;
1538 break;
1539 case Inv_Unknown: {
1540 /* This is more complex. We need to figure out the
1541 intersection of the "holes" in the global and stack
1542 interval trees into which [ea,ea+szB) falls. This is
1543 further complicated by the fact that [ea,ea+szB) might
1544 not fall cleanly into a hole; it may instead fall across
1545 the boundary of a stack or global block. In that case
1546 we just ignore it and don't update the cache, since we
1547 have no way to represent this situation precisely. */
1548 StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB;
1549 GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1550 Addr gMin, gMax, sMin, sMax, uMin, uMax;
1551 Bool sOK, gOK;
1552 sNegInf.addr = 0;
1553 sNegInf.szB = 1;
1554 sPosInf.addr = ~(UWord)0;
1555 sPosInf.szB = 1;
1556 gNegInf.addr = 0;
1557 gNegInf.szB = 1;
1558 gPosInf.addr = ~(UWord)0;
1559 gPosInf.szB = 1;
1560 sKey.addr = ea;
1561 sKey.szB = szB;
1562 gKey.addr = ea;
1563 gKey.szB = szB;
1564 if (0) VG_(printf)("Tree sizes %ld %ld\n",
1565 VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1566 sOK = VG_(findBoundsFM)( siTrees[tid],
sewardj95208452008-10-18 19:55:31 +00001567 (UWord*)&sLB, NULL/*unused*/,
1568 (UWord*)&sUB, NULL/*unused*/,
1569 (UWord)&sNegInf, 0/*unused*/,
1570 (UWord)&sPosInf, 0/*unused*/,
sewardj024598e2008-09-18 14:43:05 +00001571 (UWord)&sKey );
1572 gOK = VG_(findBoundsFM)( giTree,
sewardj95208452008-10-18 19:55:31 +00001573 (UWord*)&gLB, NULL/*unused*/,
1574 (UWord*)&gUB, NULL/*unused*/,
1575 (UWord)&gNegInf, 0/*unused*/,
1576 (UWord)&gPosInf, 0/*unused*/,
sewardj024598e2008-09-18 14:43:05 +00001577 (UWord)&gKey );
1578 if (!(sOK && gOK)) {
1579 /* If this happens, then [ea,ea+szB) partially overlaps
1580 a heap or stack block. We can't represent that, so
1581 just forget it (should be very rare). However, do
1582 maximum sanity checks first. In such a
1583 partial overlap case, it can't be the case that both
1584 [ea] and [ea+szB-1] overlap the same block, since if
1585 that were indeed the case then it wouldn't be a
1586 partial overlap; rather it would simply fall inside
1587 that block entirely and we shouldn't be inside this
1588 conditional at all. */
1589 if (!sOK) {
1590 StackTreeNode *ndFirst, *ndLast;
1591 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1592 ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1593 /* if both ends of the range fall inside a block,
1594 they can't be in the same block. */
1595 if (ndFirst && ndLast)
1596 tl_assert(ndFirst != ndLast);
1597 /* for each end of the range, if it is in a block,
1598 the range as a whole can't be entirely within the
1599 block. */
1600 if (ndFirst)
1601 tl_assert(!is_subinterval_of(ndFirst->addr,
1602 ndFirst->szB, ea, szB));
1603 if (ndLast)
1604 tl_assert(!is_subinterval_of(ndLast->addr,
1605 ndLast->szB, ea, szB));
1606 }
1607 if (!gOK) {
1608 GlobalTreeNode *ndFirst, *ndLast;
1609 ndFirst = find_GlobalTreeNode( giTree, ea );
1610 ndLast = find_GlobalTreeNode( giTree, ea+szB-1 );
1611 /* if both ends of the range fall inside a block,
1612 they can't be in the same block. */
1613 if (ndFirst && ndLast)
1614 tl_assert(ndFirst != ndLast);
1615 /* for each end of the range, if it is in a block,
1616 the range as a whole can't be entirely within the
1617 block. */
1618 if (ndFirst)
1619 tl_assert(!is_subinterval_of(ndFirst->addr,
1620 ndFirst->szB, ea, szB));
1621 if (ndLast)
1622 tl_assert(!is_subinterval_of(ndLast->addr,
1623 ndLast->szB, ea, szB));
1624 }
1625 if (0) VG_(printf)("overlapping blocks in cache\n");
1626 return;
1627 }
1628 sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB);
1629 sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1);
1630 gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB);
1631 gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1);
1632 if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1633 sMin, sMax, gMin, gMax);
1634 /* [sMin,sMax] and [gMin,gMax] must both contain
1635 [ea,ea+szB) (right?) That implies they must overlap at
1636 at least over [ea,ea+szB). */
1637 tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1638 tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1639 /* So now compute their intersection. */
1640 uMin = Addr__max( sMin, gMin );
1641 uMax = Addr__min( sMax, gMax );
1642 if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1643 tl_assert(uMin <= uMax);
1644 tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1645 /* Finally, we can park [uMin,uMax] in the cache. However,
1646 if uMax is ~0, we can't represent the difference; hence
1647 fudge uMax. */
1648 if (uMin < uMax && uMax == ~(UWord)0)
1649 uMax--;
1650 toadd_addr = uMin;
1651 toadd_szB = uMax - uMin + 1;
1652 break;
1653 }
1654 default:
1655 /* We should only be caching info for the above 3 cases */
1656 tl_assert(0);
1657 } /* switch (inv->tag) */
1658
1659 { /* and actually add this to the cache, finally */
1660 Word i;
1661 Word ip = cache->nInUse / 2; /* doesn't seem critical */
1662
1663 if (cache->nInUse < N_QCACHE)
1664 cache->nInUse++;
1665 for (i = cache->nInUse-1; i > ip; i--) {
1666 cache->elems[i] = cache->elems[i-1];
1667 }
1668
1669 tl_assert(toadd_szB > 0);
1670 cache->elems[ip].addr = toadd_addr;
1671 cache->elems[ip].szB = toadd_szB;
1672 cache->elems[ip].inv = *inv;
1673 }
1674
1675 if (show) QCache__pp(cache, "after upd");
1676
1677 }
1678}
1679
1680
1681/* CALLED FROM GENERATED CODE */
1682static
1683VG_REGPARM(3)
1684void helperc__mem_access ( /* Known only at run time: */
1685 Addr ea, Addr sp, Addr fp,
1686 /* Known at translation time: */
1687 Word sszB, Addr ip, XArray* ip_frameBlocks )
1688{
1689 UWord szB;
1690 IInstance* iinstance;
1691 Invar* inv;
1692 Invar new_inv;
1693 ThreadId tid = VG_(get_running_tid)();
1694 StackFrame* frame;
1695 HChar bufE[128], bufA[128];
1696
1697 stats__total_accesses++;
1698
1699 tl_assert(is_sane_TId(tid));
1700 frame = shadowStacks[tid];
1701 tl_assert(frame);
1702
1703 /* Find the instance info for this instruction. */
1704 tl_assert(ip_frameBlocks);
1705 iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1706 tl_assert(iinstance);
1707 tl_assert(iinstance->blocks == ip_frameBlocks);
1708
1709 szB = (sszB < 0) ? (-sszB) : sszB;
1710 tl_assert(szB > 0);
1711
1712 inv = &iinstance->invar;
1713
1714 /* Deal with first uses of instruction instances. */
1715 if (inv->tag == Inv_Unset) {
1716 /* This is the first use of this instance of the instruction, so
1717 we can't make any check; we merely record what we saw, so we
1718 can compare it against what happens for 2nd and subsequent
1719 accesses. */
1720 classify_address( inv,
1721 tid, ea, sp, fp, szB, iinstance->blocks );
1722 tl_assert(inv->tag != Inv_Unset);
1723 return;
1724 }
1725
1726 /* So generate an Invar and see if it's different from what
1727 we had before. */
1728 classify_address( &new_inv,
1729 tid, ea, sp, fp, szB, iinstance->blocks );
1730 tl_assert(new_inv.tag != Inv_Unset);
1731
1732 /* Did we see something different from before? If no, then there's
1733 no error. */
1734 if (eq_Invar(&new_inv, inv))
1735 return;
1736
1737 tl_assert(inv->tag != Inv_Unset);
1738
1739 VG_(memset)(bufE, 0, sizeof(bufE));
1740 show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1741
1742 VG_(memset)(bufA, 0, sizeof(bufA));
1743 show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1744
1745 sg_record_error_SorG( tid, ea, sszB, bufE, bufA );
1746
1747 /* And now install the new observation as "standard", so as to
1748 make future error messages make more sense. */
1749 *inv = new_inv;
1750}
1751
1752
1753////////////////////////////////////////
1754/* Primary push-a-new-frame routine. Called indirectly from
1755 generated code. */
1756
1757static UWord stats__max_sitree_size = 0;
1758static UWord stats__max_gitree_size = 0;
1759
1760static
1761void shadowStack_new_frame ( ThreadId tid,
1762 Addr sp_at_call_insn,
1763 Addr sp_post_call_insn,
1764 Addr fp_at_call_insn,
1765 Addr ip_post_call_insn,
1766 XArray* descrs_at_call_insn )
1767{
1768 StackFrame *callee, *caller;
1769 tl_assert(is_sane_TId(tid));
1770
1771 caller = shadowStacks[tid];
1772 tl_assert(caller);
1773
1774 if (caller->outer) { /* "this is not the outermost frame" */
1775 tl_assert(caller->outer->inner == caller);
1776 tl_assert(caller->outer->depth >= 0);
1777 tl_assert(1 + caller->outer->depth == caller->depth);
1778 } else {
1779 tl_assert(caller->depth == 0);
1780 }
1781
1782 caller->sp_at_call = sp_at_call_insn;
1783 caller->fp_at_call = fp_at_call_insn;
1784
1785 if (descrs_at_call_insn) {
1786 tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1787 caller->blocks_added_by_call
1788 = calculate_StackBlock_EAs( descrs_at_call_insn,
1789 sp_at_call_insn, fp_at_call_insn );
1790 if (caller->blocks_added_by_call)
1791 add_blocks_to_StackTree( siTrees[tid],
1792 descrs_at_call_insn,
1793 caller->blocks_added_by_call,
1794 caller->depth /* stack depth at which
1795 these blocks are
1796 considered to exist*/ );
1797 if (1) {
1798 UWord s = VG_(sizeFM)( siTrees[tid] );
1799 UWord g = VG_(sizeFM)( giTree );
1800 Bool sb = s > stats__max_sitree_size;
1801 Bool gb = g > stats__max_gitree_size;
1802 if (sb) stats__max_sitree_size = s;
1803 if (gb) stats__max_gitree_size = g;
1804 if (0 && (sb || gb))
1805 VG_(message)(Vg_DebugMsg,
1806 "exp-sgcheck: new max tree sizes: "
1807 "StackTree %ld, GlobalTree %ld",
1808 stats__max_sitree_size, stats__max_gitree_size );
1809 }
1810 } else {
1811 caller->blocks_added_by_call = NULL;
1812 }
1813
1814 /* caller->blocks_added_by_call is used again (and then freed) when
1815 this frame is removed from the stack. */
1816
1817 if (caller->inner) {
1818 callee = caller->inner;
1819 } else {
1820 callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1821 VG_(memset)(callee, 0, sizeof(StackFrame));
1822 callee->outer = caller;
1823 caller->inner = callee;
1824 callee->depth = 1 + caller->depth;
1825 tl_assert(callee->inner == NULL);
1826 }
1827
1828 /* This sets up .htab, .htab_size and .htab_used */
1829 initialise_II_hash_table( callee );
1830
1831 callee->creation_sp = sp_post_call_insn;
1832 callee->sp_at_call = 0; // not actually required ..
1833 callee->fp_at_call = 0; // .. these 3 initialisations are ..
1834 callee->blocks_added_by_call = NULL; // .. just for cleanness
1835
1836 /* record the new running stack frame */
1837 shadowStacks[tid] = callee;
1838
1839 /* and this thread's query cache is now invalid */
1840 QCache__invalidate( &qcaches[tid] );
1841
1842 if (0)
1843 { Word d = callee->depth;
1844 HChar fnname[80];
1845 Bool ok;
1846 Addr ip = ip_post_call_insn;
1847 ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1848 while (d > 0) {
1849 VG_(printf)(" ");
1850 d--;
1851 }
1852 VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1853 }
1854}
1855
1856/* CALLED FROM GENERATED CODE */
1857static
1858VG_REGPARM(3)
1859void helperc__new_frame ( Addr sp_post_call_insn,
1860 Addr fp_at_call_insn,
1861 Addr ip_post_call_insn,
1862 XArray* blocks_at_call_insn,
1863 Word sp_adjust )
1864{
1865 ThreadId tid = VG_(get_running_tid)();
1866 Addr sp_at_call_insn = sp_post_call_insn + sp_adjust;
1867 shadowStack_new_frame( tid,
1868 sp_at_call_insn,
1869 sp_post_call_insn,
1870 fp_at_call_insn,
1871 ip_post_call_insn,
1872 blocks_at_call_insn );
1873}
1874
1875
1876////////////////////////////////////////
1877/* Primary remove-frame(s) routine. Called indirectly from
1878 generated code. */
1879
1880__attribute__((noinline))
1881static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1882{
1883 StackFrame *innermost, *innermostOrig;
1884 tl_assert(is_sane_TId(tid));
1885 innermost = shadowStacks[tid];
1886 tl_assert(innermost);
1887 innermostOrig = innermost;
1888 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1889 while (1) {
1890 if (!innermost->outer)
1891 break;
1892 if (innermost->inner)
1893 tl_assert(innermost->inner->outer == innermost);
1894 tl_assert(innermost->outer->inner == innermost);
1895 tl_assert(innermost->blocks_added_by_call == NULL);
1896 if (sp_now <= innermost->creation_sp) break;
1897 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1898 tl_assert(innermost->htab);
1899 if (innermost->htab != &innermost->htab_fixed[0])
1900 sg_free(innermost->htab);
1901 /* be on the safe side */
1902 innermost->creation_sp = 0;
1903 innermost->htab = NULL;
1904 innermost->htab_size = 0;
1905 innermost->htab_used = 0;
1906 innermost->sp_at_call = 0;
1907 innermost->fp_at_call = 0;
1908 innermost->blocks_added_by_call = NULL;
1909 innermost = innermost->outer;
1910
1911 /* So now we're "back" in the calling frame. Remove from this
1912 thread's stack-interval-tree, the blocks added at the time of
1913 the call. */
1914
1915 if (innermost->outer) { /* not at the outermost frame */
1916 if (innermost->blocks_added_by_call == NULL) {
1917 } else {
1918 del_blocks_from_StackTree( siTrees[tid],
1919 innermost->blocks_added_by_call );
1920 VG_(deleteXA)( innermost->blocks_added_by_call );
1921 innermost->blocks_added_by_call = NULL;
1922 }
1923 }
1924 /* That completes the required tidying of the interval tree
1925 associated with the frame we just removed. */
1926
1927 if (0) {
1928 Word d = innermost->depth;
1929 while (d > 0) {
1930 VG_(printf)(" ");
1931 d--;
1932 }
1933 VG_(printf)("X\n");
1934 }
1935
1936 }
1937
1938 tl_assert(innermost);
1939
1940 if (innermost != innermostOrig) {
1941 shadowStacks[tid] = innermost;
1942 /* this thread's query cache is now invalid */
1943 QCache__invalidate( &qcaches[tid] );
1944 }
1945}
1946
1947
1948
1949//////////////////////////////////////////////////////////////
1950// //
1951// Instrumentation //
1952// //
1953//////////////////////////////////////////////////////////////
1954
1955/* What does instrumentation need to do?
1956
1957 - at each Call transfer, generate a call to shadowStack_new_frame
1958 do this by manually inspecting the IR
1959
1960 - at each sp change, if the sp change is negative,
1961 call shadowStack_unwind
1962 do this by asking for SP-change analysis
1963
1964 - for each memory referencing instruction,
1965 call helperc__mem_access
1966*/
1967
1968/* A complication: sg_ instrumentation and h_ instrumentation need to
1969 be interleaved. Since the latter is a lot more complex than the
1970 former, we split the sg_ instrumentation here into four functions
1971 and let the h_ instrumenter call the four functions as it goes.
1972 Hence the h_ instrumenter drives the sg_ instrumenter.
1973
1974 To make this viable, the sg_ instrumenter carries what running
1975 state it needs in 'struct _SGEnv'. This is exported only
1976 abstractly from this file.
1977*/
1978
1979struct _SGEnv {
1980 /* the current insn's IP */
1981 Addr64 curr_IP;
1982 /* whether the above is actually known */
1983 Bool curr_IP_known;
1984 /* if we find a mem ref, is it the first for this insn? Used for
1985 detecting insns which make more than one memory ref, a situation
1986 we basically can't really handle properly; and so we ignore all
1987 but the first ref. */
1988 Bool firstRef;
1989};
1990
1991
1992/* --- Helper fns for instrumentation --- */
1993
1994static IRTemp gen_Get_SP ( IRSB* bbOut,
1995 VexGuestLayout* layout,
1996 Int hWordTy_szB )
1997{
1998 IRExpr* sp_expr;
1999 IRTemp sp_temp;
2000 IRType sp_type;
2001 /* This in effect forces the host and guest word sizes to be the
2002 same. */
2003 tl_assert(hWordTy_szB == layout->sizeof_SP);
2004 sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2005 sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2006 sp_temp = newIRTemp( bbOut->tyenv, sp_type );
2007 addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2008 return sp_temp;
2009}
2010
2011static IRTemp gen_Get_FP ( IRSB* bbOut,
2012 VexGuestLayout* layout,
2013 Int hWordTy_szB )
2014{
2015 IRExpr* fp_expr;
2016 IRTemp fp_temp;
2017 IRType fp_type;
2018 /* This in effect forces the host and guest word sizes to be the
2019 same. */
2020 tl_assert(hWordTy_szB == layout->sizeof_SP);
2021 fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2022 fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2023 fp_temp = newIRTemp( bbOut->tyenv, fp_type );
2024 addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2025 return fp_temp;
2026}
2027
2028static void instrument_mem_access ( IRSB* bbOut,
2029 IRExpr* addr,
2030 Int szB,
2031 Bool isStore,
2032 Int hWordTy_szB,
2033 Addr curr_IP,
2034 VexGuestLayout* layout )
2035{
2036 IRType tyAddr = Ity_INVALID;
2037 XArray* frameBlocks = NULL;
2038
2039 tl_assert(isIRAtom(addr));
2040 tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2041
2042 tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2043 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2044
2045#if defined(VGA_x86)
2046 { UChar* p = (UChar*)curr_IP;
2047 // pop %ebp; RET
2048 if (p[-1] == 0x5d && p[0] == 0xc3) return;
2049 // pop %ebp; RET $imm16
2050 if (p[-1] == 0x5d && p[0] == 0xc2) return;
2051 // PUSH %EBP; mov %esp,%ebp
2052 if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2053 }
2054#endif
2055
2056 /* First off, find or create the StackBlocks for this instruction. */
2057 frameBlocks = get_StackBlocks_for_IP( curr_IP );
2058 tl_assert(frameBlocks);
2059 //if (VG_(sizeXA)( frameBlocks ) == 0)
2060 // frameBlocks = NULL;
2061
2062 /* Generate a call to "helperc__mem_access", passing:
2063 addr current_SP current_FP szB curr_IP frameBlocks
2064 */
2065 { IRTemp t_SP = gen_Get_SP( bbOut, layout, hWordTy_szB );
2066 IRTemp t_FP = gen_Get_FP( bbOut, layout, hWordTy_szB );
2067 IRExpr** args
2068 = mkIRExprVec_6( addr,
2069 IRExpr_RdTmp(t_SP),
2070 IRExpr_RdTmp(t_FP),
2071 mkIRExpr_HWord( isStore ? (-szB) : szB ),
2072 mkIRExpr_HWord( curr_IP ),
2073 mkIRExpr_HWord( (HWord)frameBlocks ) );
2074 IRDirty* di
2075 = unsafeIRDirty_0_N( 3/*regparms*/,
2076 "helperc__mem_access",
2077 VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2078 args );
2079
2080 addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2081 }
2082}
2083
2084
2085/* --- Instrumentation main (4 fns) --- */
2086
2087struct _SGEnv * sg_instrument_init ( void )
2088{
2089 struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2090 sizeof(struct _SGEnv));
2091 tl_assert(env);
2092 env->curr_IP = 0;
2093 env->curr_IP_known = False;
2094 env->firstRef = True;
2095 return env;
2096}
2097
2098void sg_instrument_fini ( struct _SGEnv * env )
2099{
2100 sg_free(env);
2101}
2102
2103/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2104 as required. This must be called before 'st' itself is added to
2105 'sbOut'. */
2106void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2107 /*MOD*/IRSB* sbOut,
2108 IRStmt* st,
2109 VexGuestLayout* layout,
2110 IRType gWordTy, IRType hWordTy )
2111{
2112 tl_assert(st);
2113 tl_assert(isFlatIRStmt(st));
2114 switch (st->tag) {
2115 case Ist_NoOp:
2116 case Ist_AbiHint:
2117 case Ist_Put:
2118 case Ist_PutI:
2119 case Ist_MBE:
2120 /* None of these can contain any memory references. */
2121 break;
2122
2123 case Ist_Exit:
2124 tl_assert(st->Ist.Exit.jk != Ijk_Call);
2125 /* else we must deal with a conditional call */
2126 break;
2127
2128 case Ist_IMark:
2129 env->curr_IP_known = True;
2130 env->curr_IP = (Addr)st->Ist.IMark.addr;
2131 env->firstRef = True;
2132 break;
2133
2134 case Ist_Store:
2135 tl_assert(env->curr_IP_known);
2136 if (env->firstRef) {
2137 instrument_mem_access(
2138 sbOut,
2139 st->Ist.Store.addr,
2140 sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2141 True/*isStore*/,
2142 sizeofIRType(hWordTy),
2143 env->curr_IP, layout
2144 );
2145 env->firstRef = False;
2146 }
2147 break;
2148
2149 case Ist_WrTmp: {
2150 IRExpr* data = st->Ist.WrTmp.data;
2151 if (data->tag == Iex_Load) {
2152 tl_assert(env->curr_IP_known);
2153 if (env->firstRef) {
2154 instrument_mem_access(
2155 sbOut,
2156 data->Iex.Load.addr,
2157 sizeofIRType(data->Iex.Load.ty),
2158 False/*!isStore*/,
2159 sizeofIRType(hWordTy),
2160 env->curr_IP, layout
2161 );
2162 env->firstRef = False;
2163 }
2164 }
2165 break;
2166 }
2167
2168 case Ist_Dirty: {
2169 Int dataSize;
2170 IRDirty* d = st->Ist.Dirty.details;
2171 if (d->mFx != Ifx_None) {
2172 /* This dirty helper accesses memory. Collect the
2173 details. */
2174 tl_assert(env->curr_IP_known);
2175 if (env->firstRef) {
2176 tl_assert(d->mAddr != NULL);
2177 tl_assert(d->mSize != 0);
2178 dataSize = d->mSize;
2179 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2180 instrument_mem_access(
2181 sbOut, d->mAddr, dataSize, False/*!isStore*/,
2182 sizeofIRType(hWordTy), env->curr_IP, layout
2183 );
2184 }
2185 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2186 instrument_mem_access(
2187 sbOut, d->mAddr, dataSize, True/*isStore*/,
2188 sizeofIRType(hWordTy), env->curr_IP, layout
2189 );
2190 }
2191 env->firstRef = False;
2192 }
2193 } else {
2194 tl_assert(d->mAddr == NULL);
2195 tl_assert(d->mSize == 0);
2196 }
2197 break;
2198 }
2199
2200 default:
2201 tl_assert(0);
2202
2203 } /* switch (st->tag) */
2204}
2205
2206
2207/* Add instrumentation for the final jump of an IRSB 'sbOut', and
2208 possibly modify 'env' as required. This must be the last
2209 instrumentation statement in the block. */
2210void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2211 /*MOD*/IRSB* sbOut,
2212 IRExpr* next,
2213 IRJumpKind jumpkind,
2214 VexGuestLayout* layout,
2215 IRType gWordTy, IRType hWordTy )
2216{
2217 if (jumpkind == Ijk_Call) {
2218 // Assumes x86 or amd64
2219 IRTemp sp_post_call_insn, fp_post_call_insn;
2220 XArray* frameBlocks;
2221 IRExpr** args;
2222 IRDirty* di;
2223 sp_post_call_insn
2224 = gen_Get_SP( sbOut, layout, sizeofIRType(hWordTy) );
2225 fp_post_call_insn
2226 = gen_Get_FP( sbOut, layout, sizeofIRType(hWordTy) );
2227 tl_assert(env->curr_IP_known);
2228 frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2229 tl_assert(frameBlocks);
2230 if (VG_(sizeXA)(frameBlocks) == 0)
2231 frameBlocks = NULL;
2232 args
2233 = mkIRExprVec_5(
2234 IRExpr_RdTmp(sp_post_call_insn),
2235 IRExpr_RdTmp(fp_post_call_insn),
2236 /* assume the call doesn't change FP */
2237 next,
2238 mkIRExpr_HWord( (HWord)frameBlocks ),
2239 mkIRExpr_HWord( sizeofIRType(gWordTy) )
2240 );
2241 di = unsafeIRDirty_0_N(
2242 3/*regparms*/,
2243 "helperc__new_frame",
2244 VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2245 args );
2246 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2247 }
2248}
2249
2250
2251//////////////////////////////////////////////////////////////
2252// //
2253// end Instrumentation //
2254// //
2255//////////////////////////////////////////////////////////////
2256
2257
2258//////////////////////////////////////////////////////////////
2259// //
2260// misc //
2261// //
2262//////////////////////////////////////////////////////////////
2263
2264/* Make a new empty stack frame that is suitable for being the
2265 outermost frame in a stack. It has a creation_sp of effectively
2266 infinity, so it can never be removed. */
2267static StackFrame* new_root_StackFrame ( void )
2268{
2269 StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2270 VG_(memset)( sframe, 0, sizeof(*sframe) );
2271 sframe->creation_sp = ~0UL;
2272
2273 /* This sets up .htab, .htab_size and .htab_used */
2274 initialise_II_hash_table( sframe );
2275
2276 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2277
2278 return sframe;
2279}
2280
2281/* Primary routine for setting up the shadow stack for a new thread.
2282 Note that this is used to create not only child thread stacks, but
2283 the root thread's stack too. We create a new stack with
2284 .creation_sp set to infinity, so that the outermost frame can never
2285 be removed (by shadowStack_unwind). The core calls this function
2286 as soon as a thread is created. We cannot yet get its SP value,
2287 since that may not yet be set. */
2288static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2289{
2290 tl_assert(is_sane_TId(child));
2291 if (parent == VG_INVALID_THREADID) {
2292 /* creating the main thread's stack */
2293 } else {
2294 tl_assert(is_sane_TId(parent));
2295 tl_assert(parent != child);
2296 tl_assert(shadowStacks[parent] != NULL);
2297 tl_assert(siTrees[parent] != NULL);
2298 }
2299
2300 /* Create the child's stack. Bear in mind we may be re-using
2301 it. */
2302 if (shadowStacks[child] == NULL) {
2303 /* First use of this stack. Just allocate an initial frame. */
2304 tl_assert(siTrees[child] == NULL);
2305 } else {
2306 StackFrame *frame, *frame2;
2307 /* re-using a stack. */
2308 /* get rid of the interval tree */
2309 tl_assert(siTrees[child] != NULL);
2310 delete_StackTree( siTrees[child] );
2311 siTrees[child] = NULL;
2312 /* Throw away all existing frames. */
2313 frame = shadowStacks[child];
2314 while (frame->outer)
2315 frame = frame->outer;
2316 tl_assert(frame->depth == 0);
2317 while (frame) {
2318 frame2 = frame->inner;
2319 if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2320 sg_free(frame);
2321 frame = frame2;
2322 }
2323 shadowStacks[child] = NULL;
2324 }
2325
2326 tl_assert(shadowStacks[child] == NULL);
2327 tl_assert(siTrees[child] == NULL);
2328
2329 /* Set up the initial stack frame. */
2330 shadowStacks[child] = new_root_StackFrame();
2331
2332 /* and set up the child's stack block interval tree. */
2333 siTrees[child] = new_StackTree();
2334}
2335
2336/* Once a thread is ready to go, the core calls here. We take the
2337 opportunity to push a second frame on its stack, with the
2338 presumably valid SP value that is going to be used for the thread's
2339 startup. Hence we should always wind up with a valid outermost
2340 frame for the thread. */
2341static void shadowStack_set_initial_SP ( ThreadId tid )
2342{
2343 StackFrame* sf;
2344 tl_assert(is_sane_TId(tid));
2345 sf = shadowStacks[tid];
2346 tl_assert(sf != NULL);
2347 tl_assert(sf->outer == NULL);
2348 tl_assert(sf->inner == NULL);
2349 tl_assert(sf->creation_sp == ~0UL);
2350 shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2351 0, VG_(get_IP)(tid), NULL );
2352}
2353
2354
2355//////////////////////////////////////////////////////////////
2356// //
2357// main-ish //
2358// //
2359//////////////////////////////////////////////////////////////
2360
2361/* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2362 sp-change analysis, as requested in pc_pre_clo_int(). */
2363void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2364 ThreadId tid = VG_(get_running_tid)();
2365 shadowStack_unwind( tid, old_SP+len );
2366}
2367
2368void sg_pre_clo_init ( void ) {
2369 ourGlobals_init();
2370 init_StackBlocks_set();
2371 init_GlobalBlock_set();
2372}
2373
2374void sg_post_clo_init ( void ) {
2375}
2376
2377void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2378 shadowStack_thread_create(parent, child);
2379}
2380
2381void sg_pre_thread_first_insn ( ThreadId tid ) {
2382 shadowStack_set_initial_SP(tid);
2383}
2384
2385void sg_fini(Int exitcode)
2386{
2387 if (VG_(clo_verbosity) >= 2) {
2388 VG_(message)(Vg_DebugMsg,
2389 " sg_: %'llu total accesses, of which:", stats__total_accesses);
2390 VG_(message)(Vg_DebugMsg,
2391 " sg_: stack0: %'12llu classify",
2392 stats__classify_Stack0);
2393 VG_(message)(Vg_DebugMsg,
2394 " sg_: stackN: %'12llu classify",
2395 stats__classify_StackN);
2396 VG_(message)(Vg_DebugMsg,
2397 " sg_: global: %'12llu classify",
2398 stats__classify_Global);
2399 VG_(message)(Vg_DebugMsg,
2400 " sg_: unknown: %'12llu classify",
2401 stats__classify_Unknown);
2402 VG_(message)(Vg_DebugMsg,
2403 " sg_: %'llu Invars preened, of which %'llu changed",
2404 stats__Invars_preened, stats__Invars_changed);
2405 VG_(message)(Vg_DebugMsg,
2406 " sg_: t_i_b_MT: %'12llu", stats__t_i_b_empty);
2407 VG_(message)(Vg_DebugMsg,
2408 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses",
2409 stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2410 VG_(message)(Vg_DebugMsg,
2411 " sg_: htab-fast: %'llu hits",
2412 stats__htab_fast);
2413 VG_(message)(Vg_DebugMsg,
2414 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes",
2415 stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2416 }
2417}
2418
2419
2420/*--------------------------------------------------------------------*/
2421/*--- end sg_main.c ---*/
2422/*--------------------------------------------------------------------*/