blob: 7c0ecb42f907af9dc2c89031ccdac549b4980414 [file] [log] [blame]
sewardj024598e2008-09-18 14:43:05 +00001
2/*--------------------------------------------------------------------*/
3/*--- Ptrcheck: a pointer-use checker. ---*/
4/*--- This file checks stack and global array accesses. ---*/
5/*--- sg_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
10 use in programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
36*/
37
38#include "pub_tool_basics.h"
39#include "pub_tool_libcbase.h"
40#include "pub_tool_libcassert.h"
41#include "pub_tool_libcprint.h"
42#include "pub_tool_tooliface.h"
43#include "pub_tool_wordfm.h"
44#include "pub_tool_xarray.h"
45#include "pub_tool_threadstate.h"
46#include "pub_tool_mallocfree.h"
47#include "pub_tool_machine.h"
48#include "pub_tool_debuginfo.h"
49#include "pub_tool_options.h"
50
51#include "pc_common.h"
52
53#include "sg_main.h" // self
54
55
56static
57void preen_Invars ( Addr a, SizeT len, Bool isHeap ); /*fwds*/
58
59
60//////////////////////////////////////////////////////////////
61// //
62// Basic Stuff //
63// //
64//////////////////////////////////////////////////////////////
65
66static inline Bool is_sane_TId ( ThreadId tid )
67{
68 return tid >= 0 && tid < VG_N_THREADS
69 && tid != VG_INVALID_THREADID;
70}
71
72static void* sg_malloc ( HChar* cc, SizeT n ) {
73 void* p;
74 tl_assert(n > 0);
75 p = VG_(malloc)( cc, n );
76 tl_assert(p);
77 return p;
78}
79
80static void sg_free ( void* p ) {
81 tl_assert(p);
82 VG_(free)(p);
83}
84
85
86/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
87 first interval is lower, 1 if the first interval is higher, and 0
88 if there is any overlap. Redundant paranoia with casting is there
89 following what looked distinctly like a bug in gcc-4.1.2, in which
90 some of the comparisons were done signedly instead of
91 unsignedly. */
92inline
93static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94 Addr a2, SizeT n2 ) {
95 UWord a1w = (UWord)a1;
96 UWord n1w = (UWord)n1;
97 UWord a2w = (UWord)a2;
98 UWord n2w = (UWord)n2;
99 tl_assert(n1w > 0 && n2w > 0);
100 if (a1w + n1w <= a2w) return -1L;
101 if (a2w + n2w <= a1w) return 1L;
102 return 0;
103}
104
105/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106 within [aBig,aBig+nBig). */
107inline
108static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109 Addr aSmall, SizeT nSmall ) {
110 tl_assert(nBig > 0 && nSmall > 0);
111 return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112}
113
114inline
115static Addr Addr__min ( Addr a1, Addr a2 ) {
116 return a1 < a2 ? a1 : a2;
117}
118
119inline
120static Addr Addr__max ( Addr a1, Addr a2 ) {
121 return a1 < a2 ? a2 : a1;
122}
123
124
125//////////////////////////////////////////////////////////////
126// //
127// StackBlocks Persistent Cache //
128// //
129//////////////////////////////////////////////////////////////
130
131/* We maintain a set of XArray* of StackBlocks. These are never
132 freed. When a new StackBlock vector is acquired from
133 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134 If not present, it is added. If present, the just-acquired one is
135 freed and the copy used.
136
137 This simplifies storage management elsewhere. It allows us to
138 assume that a pointer to an XArray* of StackBlock is valid forever.
139 It also means there are no duplicates anywhere, which could be
140 important from a space point of view for programs that generate a
141 lot of translations, or where translations are frequently discarded
142 and re-made.
143
144 Note that we normalise the arrays by sorting the elements according
145 to an arbitrary total order, so as to avoid the situation that two
146 vectors describe the same set of variables but are not structurally
147 identical. */
148
149static inline Bool StackBlock__sane ( StackBlock* fb )
150{
151 if (fb->name[ sizeof(fb->name)-1 ] != 0)
152 return False;
153 if (fb->spRel != False && fb->spRel != True)
154 return False;
155 if (fb->isVec != False && fb->isVec != True)
156 return False;
157 return True;
158}
159
160/* Generate an arbitrary total ordering on StackBlocks. */
161static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
162{
163 Word r;
164 tl_assert(StackBlock__sane(fb1));
165 tl_assert(StackBlock__sane(fb2));
166 /* Hopefully the .base test hits most of the time. For the blocks
167 associated with any particular instruction, if the .base values
168 are the same then probably it doesn't make sense for the other
169 fields to be different. But this is supposed to be a completely
170 general structural total order, so we have to compare everything
171 anyway. */
172 if (fb1->base < fb2->base) return -1;
173 if (fb1->base > fb2->base) return 1;
174 /* compare sizes */
175 if (fb1->szB < fb2->szB) return -1;
176 if (fb1->szB > fb2->szB) return 1;
177 /* compare sp/fp flag */
178 if (fb1->spRel < fb2->spRel) return -1;
179 if (fb1->spRel > fb2->spRel) return 1;
180 /* compare is/is-not array-typed flag */
181 if (fb1->isVec < fb2->isVec) return -1;
182 if (fb1->isVec > fb2->isVec) return 1;
183 /* compare the name */
184 r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185 return r;
186}
187
188/* Returns True if all fields except .szB are the same. szBs may or
189 may not be the same; they are simply not consulted. */
190static Bool StackBlock__all_fields_except_szB_are_equal (
191 StackBlock* fb1,
192 StackBlock* fb2
193 )
194{
195 tl_assert(StackBlock__sane(fb1));
196 tl_assert(StackBlock__sane(fb2));
197 return fb1->base == fb2->base
198 && fb1->spRel == fb2->spRel
199 && fb1->isVec == fb2->isVec
200 && 0 == VG_(strcmp)(fb1->name, fb2->name);
201}
202
203
204/* Generate an arbitrary total ordering on vectors of StackBlocks. */
205static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206{
207 Word i, r, n1, n2;
208 n1 = VG_(sizeXA)( fb1s );
209 n2 = VG_(sizeXA)( fb2s );
210 if (n1 < n2) return -1;
211 if (n1 > n2) return 1;
212 for (i = 0; i < n1; i++) {
213 StackBlock *fb1, *fb2;
214 fb1 = VG_(indexXA)( fb1s, i );
215 fb2 = VG_(indexXA)( fb2s, i );
216 r = StackBlock__cmp( fb1, fb2 );
217 if (r != 0) return r;
218 }
219 tl_assert(i == n1 && i == n2);
220 return 0;
221}
222
223static void pp_StackBlock ( StackBlock* sb )
224{
225 VG_(printf)("StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }",
226 sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
227 sb->isVec ? 'Y' : 'N', &sb->name[0] );
228}
229
230static void pp_StackBlocks ( XArray* sbs )
231{
232 Word i, n = VG_(sizeXA)( sbs );
233 VG_(printf)("<<< STACKBLOCKS\n" );
234 for (i = 0; i < n; i++) {
235 VG_(printf)(" ");
236 pp_StackBlock( (StackBlock*)VG_(indexXA)( sbs, i ) );
237 VG_(printf)("\n");
238 }
239 VG_(printf)(">>> STACKBLOCKS\n" );
240}
241
242
243/* ---------- The StackBlock vector cache ---------- */
244
245static WordFM* /* XArray* of StackBlock -> nothing */
246 frameBlocks_set = NULL;
247
248static void init_StackBlocks_set ( void )
249{
250 tl_assert(!frameBlocks_set);
251 frameBlocks_set
252 = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
253 (Word(*)(UWord,UWord))StackBlocks__cmp );
254 tl_assert(frameBlocks_set);
255}
256
257/* Find the given StackBlock-vector in our collection thereof. If
258 found, deallocate the supplied one, and return the address of the
259 copy. If not found, add the supplied one to our collection and
260 return its address. */
261static XArray* /* of StackBlock */
262 StackBlocks__find_and_dealloc__or_add
263 ( XArray* /* of StackBlock */ orig )
264{
265 UWord key, val;
266
267 /* First, normalise, as per comments above. */
268 VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
269 VG_(sortXA)( orig );
270
271 /* Now get rid of any exact duplicates. */
272 nuke_dups:
273 { Word r, w, nEQ, n = VG_(sizeXA)( orig );
274 if (n >= 2) {
275 w = 0;
276 nEQ = 0;
277 for (r = 0; r < n; r++) {
278 if (r+1 < n) {
279 StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
280 StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
281 Word c = StackBlock__cmp(pR0,pR1);
282 tl_assert(c == -1 || c == 0);
283 if (c == 0) { nEQ++; continue; }
284 }
285 if (w != r) {
286 StackBlock* pW = VG_(indexXA)( orig, w );
287 StackBlock* pR = VG_(indexXA)( orig, r );
288 *pW = *pR;
289 }
290 w++;
291 }
292 tl_assert(r == n);
293 tl_assert(w + nEQ == n);
294 if (w < n) {
295 VG_(dropTailXA)( orig, n-w );
296 }
297 if (0) VG_(printf)("delta %ld\n", n-w);
298 }
299 }
300
301 /* Deal with the following strangeness, where two otherwise
302 identical blocks are claimed to have different sizes. In which
303 case we use the larger size. */
304 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
305 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
306 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
307 */
308 { Word i, n = VG_(sizeXA)( orig );
309 if (n >= 2) {
310 for (i = 0; i < n-1; i++) {
311 StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
312 StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
313 if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
314 /* They can't be identical because the previous tidying
315 pass would have removed the duplicates. And they
316 can't be > because the earlier sorting pass would
317 have ordered otherwise-identical descriptors
318 according to < on .szB fields. Hence: */
319 tl_assert(sb0->szB < sb1->szB);
320 sb0->szB = sb1->szB;
321 /* This makes the blocks identical, at the size of the
322 larger one. Rather than go to all the hassle of
323 sliding the rest down, simply go back to the
324 remove-duplicates stage. The assertion guarantees
325 that we eventually make progress, since the rm-dups
326 stage will get rid of one of the blocks. This is
327 expected to happen only exceedingly rarely. */
328 tl_assert(StackBlock__cmp(sb0,sb1) == 0);
329 goto nuke_dups;
330 }
331 }
332 }
333 }
334
335 /* A rather poor sanity check on the results. */
336 { Word i, n = VG_(sizeXA)( orig );
337 for (i = 0; i < n-1; i++) {
338 StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
339 StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
340 if (sb1->base == sb2->base)
341 pp_StackBlocks(orig);
342 tl_assert(sb1->base != sb2->base);
343 }
344 }
345
346 /* Now, do we have it already? */
347 if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
348 /* yes */
349 XArray* res;
350 tl_assert(val == 0);
351 tl_assert(key != (UWord)orig);
352 VG_(deleteXA)(orig);
353 res = (XArray*)key;
354 return res;
355 } else {
356 /* no */
357 VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
358 return orig;
359 }
360}
361
362/* Top level function for getting the StackBlock vector for a given
363 instruction. It is guaranteed that the returned pointer will be
364 valid for the entire rest of the run, and also that the addresses
365 of the individual elements of the array will not change. */
366
367static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
368{
369 XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
370 tl_assert(blocks);
371 return StackBlocks__find_and_dealloc__or_add( blocks );
372}
373
374
375//////////////////////////////////////////////////////////////
376// //
377// GlobalBlocks Persistent Cache //
378// //
379//////////////////////////////////////////////////////////////
380
381/* Generate an arbitrary total ordering on GlobalBlocks. */
382static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
383{
384 Word r;
385 /* compare addrs */
386 if (gb1->addr < gb2->addr) return -1;
387 if (gb1->addr > gb2->addr) return 1;
388 /* compare sizes */
389 if (gb1->szB < gb2->szB) return -1;
390 if (gb1->szB > gb2->szB) return 1;
391 /* compare is/is-not array-typed flag */
392 if (gb1->isVec < gb2->isVec) return -1;
393 if (gb1->isVec > gb2->isVec) return 1;
394 /* compare the name */
395 r = (Word)VG_(strcmp)(gb1->name, gb2->name);
396 if (r != 0) return r;
397 /* compare the soname */
398 r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
399 return r;
400}
401
402static WordFM* /* GlobalBlock* -> nothing */
403 globalBlock_set = NULL;
404
405static void init_GlobalBlock_set ( void )
406{
407 tl_assert(!globalBlock_set);
408 globalBlock_set
409 = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
410 (Word(*)(UWord,UWord))GlobalBlock__cmp );
411 tl_assert(globalBlock_set);
412}
413
414
415/* Top level function for making GlobalBlocks persistent. Call here
416 with a non-persistent version, and the returned one is guaranteed
417 to be valid for the entire rest of the run. The supplied one is
418 copied, not stored, so can be freed after the call. */
419
420static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
421{
422 UWord key, val;
423 /* Now, do we have it already? */
424 if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
425 /* yes, return the copy */
426 GlobalBlock* res;
427 tl_assert(val == 0);
428 res = (GlobalBlock*)key;
429 tl_assert(res != orig);
430 return res;
431 } else {
432 /* no. clone it, store the clone and return the clone's
433 address. */
434 GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
435 sizeof(GlobalBlock) );
436 tl_assert(clone);
437 *clone = *orig;
438 VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
439 return clone;
440 }
441}
442
443
444//////////////////////////////////////////////////////////////
445// //
446// Interval tree of StackTreeBlock //
447// //
448//////////////////////////////////////////////////////////////
449
450/* A node in a stack interval tree. Zero length intervals (.szB == 0)
451 are not allowed.
452
453 A stack interval tree is a (WordFM StackTreeNode* void). There is
454 one stack interval tree for each thread.
455*/
456typedef
457 struct {
458 Addr addr;
459 SizeT szB; /* copied from .descr->szB */
460 StackBlock* descr; /* it's an instance of this block */
461 UWord depth; /* depth of stack at time block was pushed */
462 }
463 StackTreeNode;
464
465static void pp_StackTree ( WordFM* sitree, HChar* who )
466{
467 UWord keyW, valW;
468 VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
469 VG_(initIterFM)( sitree );
470 while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
471 StackTreeNode* nd = (StackTreeNode*)keyW;
472 VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
473 nd->descr, nd->descr->name, nd->descr->szB);
474 }
475 VG_(printf)(">>> END pp_StackTree %s\n", who );
476}
477
478/* Interval comparison function for StackTreeNode */
479static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
480 StackTreeNode* sn2 )
481{
482 return cmp_nonempty_intervals(sn1->addr, sn1->szB,
483 sn2->addr, sn2->szB);
484}
485
486/* Find the node holding 'a', if any. */
487static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
488{
489 UWord keyW, valW;
490 StackTreeNode key;
491 tl_assert(sitree);
492 key.addr = a;
493 key.szB = 1;
494 if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
495 StackTreeNode* res = (StackTreeNode*)keyW;
496 tl_assert(valW == 0);
497 tl_assert(res != &key);
498 return res;
499 } else {
500 return NULL;
501 }
502}
503
504/* Note that the supplied XArray of FrameBlock must have been
505 made persistent already. */
506__attribute__((noinline))
507static void add_blocks_to_StackTree (
508 /*MOD*/WordFM* sitree,
509 XArray* /* FrameBlock */ descrs,
510 XArray* /* Addr */ bases,
511 UWord depth
512 )
513{
514 Bool debug = (Bool)0;
515 Word i, nDescrs, nBases;
516
517 nDescrs = VG_(sizeXA)( descrs ),
518 nBases = VG_(sizeXA)( bases );
519 tl_assert(nDescrs == nBases);
520
521 if (nDescrs == 0) return;
522
523 tl_assert(sitree);
524 if (debug) {
525 VG_(printf)("\n");
526 pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
527 }
528
529 for (i = 0; i < nDescrs; i++) {
530 Bool already_present;
531 StackTreeNode* nyu;
532 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
533 StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
534 tl_assert(descr->szB > 0);
535 nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
536 nyu->addr = addr;
537 nyu->szB = descr->szB;
538 nyu->descr = descr;
539 nyu->depth = depth;
540 if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
541 already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
542 /* The interval can't already be there; else we have
543 overlapping stack blocks. */
544 tl_assert(!already_present);
545 if (debug) {
546 pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
547 }
548 }
549 if (debug) {
550 pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
551 VG_(printf)("\n");
552 }
553}
554
555static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
556 XArray* /* Addr */ bases )
557{
558 UWord oldK, oldV;
559 Word i, nBases = VG_(sizeXA)( bases );
560 for (i = 0; i < nBases; i++) {
561 Bool b;
562 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
563 StackTreeNode* nd = find_StackTreeNode(sitree, addr);
564 /* The interval must be there; we added it earlier when
565 the associated frame was created. */
566 tl_assert(nd);
567 b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
568 /* we just found the block! */
569 tl_assert(b);
570 tl_assert(oldV == 0);
571 tl_assert(nd == (StackTreeNode*)oldK);
572 sg_free(nd);
573 }
574}
575
576
577static void delete_StackTree__kFin ( UWord keyW ) {
578 StackTreeNode* nd = (StackTreeNode*)keyW;
579 tl_assert(nd);
580 sg_free(nd);
581}
582static void delete_StackTree__vFin ( UWord valW ) {
583 tl_assert(valW == 0);
584}
585static void delete_StackTree ( WordFM* sitree )
586{
587 VG_(deleteFM)( sitree,
588 delete_StackTree__kFin, delete_StackTree__vFin );
589}
590
591static WordFM* new_StackTree ( void ) {
592 return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
593 (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
594}
595
596
597//////////////////////////////////////////////////////////////
598// //
599// Interval tree of GlobalTreeBlock //
600// //
601//////////////////////////////////////////////////////////////
602
603/* A node in a global interval tree. Zero length intervals
604 (.szB == 0) are not allowed.
605
606 A global interval tree is a (WordFM GlobalTreeNode* void). There
607 is one global interval tree for the entire process.
608*/
609typedef
610 struct {
611 Addr addr; /* copied from .descr->addr */
612 SizeT szB; /* copied from .descr->szB */
613 GlobalBlock* descr; /* it's this block */
614 }
615 GlobalTreeNode;
616
617static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
618 tl_assert(nd->descr);
619 VG_(printf)("GTNode [%#lx,+%ld) %s",
620 nd->addr, nd->szB, nd->descr->name);
621}
622
623static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
624 HChar* who )
625{
626 UWord keyW, valW;
627 GlobalTreeNode* nd;
628 VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
629 VG_(initIterFM)( gitree );
630 while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
631 tl_assert(valW == 0);
632 nd = (GlobalTreeNode*)keyW;
633 VG_(printf)(" ");
634 GlobalTreeNode__pp(nd);
635 VG_(printf)("\n");
636 }
637 VG_(doneIterFM)( gitree );
638 VG_(printf)(">>>\n");
639}
640
641/* Interval comparison function for GlobalTreeNode */
642static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
643 GlobalTreeNode* gn2 )
644{
645 return cmp_nonempty_intervals( gn1->addr, gn1->szB,
646 gn2->addr, gn2->szB );
647}
648
649/* Find the node holding 'a', if any. */
650static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
651{
652 UWord keyW, valW;
653 GlobalTreeNode key;
654 key.addr = a;
655 key.szB = 1;
656 if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
657 GlobalTreeNode* res = (GlobalTreeNode*)keyW;
658 tl_assert(valW == 0);
659 tl_assert(res != &key);
660 return res;
661 } else {
662 return NULL;
663 }
664}
665
666/* Note that the supplied GlobalBlock must have been made persistent
667 already. */
668static void add_block_to_GlobalTree (
669 /*MOD*/WordFM* gitree,
670 GlobalBlock* descr
671 )
672{
673 Bool already_present;
674 GlobalTreeNode *nyu, *nd;
675 UWord keyW, valW;
676
677 tl_assert(descr->szB > 0);
678 nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
679 nyu->addr = descr->addr;
680 nyu->szB = descr->szB;
681 nyu->descr = descr;
682
683 /* Basically it's an error to add a global block to the tree that
684 is already in the tree. However, detect and ignore attempts to
685 insert exact duplicates; they do appear for some reason
686 (possible a bug in m_debuginfo?) */
687 already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
688 if (already_present) {
689 tl_assert(valW == 0);
690 nd = (GlobalTreeNode*)keyW;
691 tl_assert(nd);
692 tl_assert(nd != nyu);
693 tl_assert(nd->descr);
694 tl_assert(nyu->descr);
695 if (nd->addr == nyu->addr && nd->szB == nyu->szB
696 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
697 /* Although it seems reasonable to demand that duplicate
698 blocks have identical names, that is too strict. For
699 example, reading debuginfo from glibc produces two
700 otherwise identical blocks with names "tzname" and
701 "__tzname". A constraint of the form "must be identical,
702 or one must be a substring of the other" would fix that.
703 However, such trickery is scuppered by the fact that we
704 truncate all variable names to 15 characters to make
705 storage management simpler, hence giving pairs like
706 "__EI___pthread_" (truncated) vs "__pthread_keys". So
707 it's simplest just to skip the name comparison
708 completely. */
709 && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
710 /* exact duplicate; ignore it */
711 sg_free(nyu);
712 return;
713 }
714 /* else fall through; the assertion below will catch it */
715 }
716
717 already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
718 /* The interval can't already be there; else we have
719 overlapping global blocks. */
720 if (already_present) {
721 GlobalTree__pp( gitree, "add_block_to_GlobalTree: non-exact duplicate" );
722 VG_(printf)("Overlapping block: ");
723 GlobalTreeNode__pp(nyu);
724 VG_(printf)("\n");
725 }
726 tl_assert(!already_present);
727}
728
729static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
730 Addr a, SizeT szB )
731{
732 /* One easy way to do this: look up [a,a+szB) in the tree. That
733 will either succeed, producing a block which intersects that
734 range, in which case we delete it and repeat; or it will fail,
735 in which case there are no blocks intersecting the range, and we
736 can bring the process to a halt. */
737 UWord keyW, valW, oldK, oldV;
738 GlobalTreeNode key, *nd;
739 Bool b, anyFound;
740
741 tl_assert(szB > 0);
742
743 anyFound = False;
744
745 key.addr = a;
746 key.szB = szB;
747
748 while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
749 anyFound = True;
750 nd = (GlobalTreeNode*)keyW;
751 tl_assert(valW == 0);
752 tl_assert(nd != &key);
753 tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
754
755 b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
756 tl_assert(b);
757 tl_assert(oldV == 0);
758 tl_assert(oldK == keyW); /* check we deleted the node we just found */
759 }
760
761 return anyFound;
762}
763
764
765//////////////////////////////////////////////////////////////
766// //
767// Invar //
768// //
769//////////////////////////////////////////////////////////////
770
771/* An invariant, as resulting from watching the destination of a
772 memory referencing instruction. Initially is Inv_Unset until the
773 instruction makes a first access. */
774
775typedef
776 enum {
777 Inv_Unset=1, /* not established yet */
778 Inv_Unknown, /* unknown location */
779 Inv_Stack0, /* array-typed stack block in innermost frame */
780 Inv_StackN, /* array-typed stack block in non-innermost frame */
781 Inv_Global, /* array-typed global block */
782 }
783 InvarTag;
784
785typedef
786 struct {
787 InvarTag tag;
788 union {
789 struct {
790 } Unset;
791 struct {
792 } Unknown;
793 struct {
794 Addr addr;
795 SizeT szB;
796 StackBlock* descr;
797 } Stack0; /* innermost stack frame */
798 struct {
799 /* Pointer to a node in the interval tree for
800 this thread. */
801 StackTreeNode* nd;
802 } StackN; /* non-innermost stack frame */
803 struct {
804 /* Pointer to a GlobalBlock in the interval tree of
805 global blocks. */
806 GlobalTreeNode* nd;
807 } Global;
808 }
809 Inv;
810 }
811 Invar;
812
813/* Partial debugging printing for an Invar. */
814static void pp_Invar ( Invar* i )
815{
816 switch (i->tag) {
817 case Inv_Unset:
818 VG_(printf)("Unset");
819 break;
820 case Inv_Unknown:
821 VG_(printf)("Unknown");
822 break;
823 case Inv_Stack0:
824 VG_(printf)("Stack0 [%#lx,+%lu)",
825 i->Inv.Stack0.addr, i->Inv.Stack0.szB);
826 break;
827 case Inv_StackN:
828 VG_(printf)("StackN [%#lx,+%lu)",
829 i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
830 break;
831 case Inv_Global:
832 VG_(printf)("Global [%#lx,+%lu)",
833 i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
834 break;
835 default:
836 tl_assert(0);
837 }
838}
839
840/* Compare two Invars for equality. */
841static Bool eq_Invar ( Invar* i1, Invar* i2 )
842{
843 tl_assert(i1->tag != Inv_Unset);
844 tl_assert(i2->tag != Inv_Unset);
845 if (i1->tag != i2->tag)
846 return False;
847 switch (i1->tag) {
848 case Inv_Unknown:
849 return True;
850 case Inv_Stack0:
851 return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
852 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
853 case Inv_StackN:
854 return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
855 case Inv_Global:
856 return i1->Inv.Global.nd == i2->Inv.Global.nd;
857 default:
858 tl_assert(0);
859 }
860 /*NOTREACHED*/
861 tl_assert(0);
862}
863
864/* Print selected parts of an Invar, suitable for use in error
865 messages. */
866static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
867{
868 HChar* str;
869 tl_assert(nBuf >= 96);
870 buf[0] = 0;
871 switch (inv->tag) {
872 case Inv_Unknown:
873 VG_(sprintf)(buf, "%s", "unknown");
874 break;
875 case Inv_Stack0:
876 str = "array";
877 VG_(sprintf)(buf, "stack %s \"%s\" in this frame",
878 str, inv->Inv.Stack0.descr->name );
879 break;
880 case Inv_StackN:
881 str = "array";
882 VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here",
883 str, inv->Inv.StackN.nd->descr->name,
884 depth - inv->Inv.StackN.nd->depth );
885 break;
886 case Inv_Global:
887 str = "array";
888 VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"",
889 str, inv->Inv.Global.nd->descr->name,
890 inv->Inv.Global.nd->descr->soname );
891 break;
892 case Inv_Unset:
893 VG_(sprintf)(buf, "%s", "Unset!");
894 break;
895 default:
896 tl_assert(0);
897 }
898}
899
900
901//////////////////////////////////////////////////////////////
902// //
903// our globals //
904// //
905//////////////////////////////////////////////////////////////
906
907//////////////////////////////////////////////////////////////
908///
909
910#define N_QCACHE 16
911
912/* Powers of two only, else the result will be chaos */
913#define QCACHE_ADVANCE_EVERY 16
914
915/* Per-thread query cache. Note that the invar can only be Inv_StackN
916 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
917typedef
918 struct {
919 Addr addr;
920 SizeT szB;
921 Invar inv;
922 }
923 QCElem;
924
925typedef
926 struct {
927 Word nInUse;
928 QCElem elems[N_QCACHE];
929 }
930 QCache;
931
932static void QCache__invalidate ( QCache* qc ) {
933 tl_assert(qc->nInUse >= 0);
934 qc->nInUse = 0;
935}
936
937static void QCache__pp ( QCache* qc, HChar* who )
938{
939 Word i;
940 VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
941 for (i = 0; i < qc->nInUse; i++) {
942 VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
943 pp_Invar(&qc->elems[i].inv);
944 VG_(printf)("\n");
945 }
946 VG_(printf)(">>>\n");
947}
948
949static ULong stats__qcache_queries = 0;
950static ULong stats__qcache_misses = 0;
951static ULong stats__qcache_probes = 0;
952
953///
954//////////////////////////////////////////////////////////////
955
956/* Each thread has:
957 * a shadow stack of StackFrames, which is a double-linked list
958 * an stack block interval tree
959*/
960static struct _StackFrame* shadowStacks[VG_N_THREADS];
961
962static WordFM* /* StackTreeNode */ siTrees[VG_N_THREADS];
963
964static QCache qcaches[VG_N_THREADS];
965
966
967/* Additionally, there is one global variable interval tree
968 for the entire process.
969*/
970static WordFM* /* GlobalTreeNode */ giTree;
971
972
973static void invalidate_all_QCaches ( void )
974{
975 Word i;
976 for (i = 0; i < VG_N_THREADS; i++) {
977 QCache__invalidate( &qcaches[i] );
978 }
979}
980
981static void ourGlobals_init ( void )
982{
983 Word i;
984 for (i = 0; i < VG_N_THREADS; i++) {
985 shadowStacks[i] = NULL;
986 siTrees[i] = NULL;
987 }
988 invalidate_all_QCaches();
989 giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
990 (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
991}
992
993
994//////////////////////////////////////////////////////////////
995// //
996// Handle global variable load/unload events //
997// //
998//////////////////////////////////////////////////////////////
999
1000static void acquire_globals ( ULong di_handle )
1001{
1002 Word n, i;
1003 XArray* /* of GlobalBlock */ gbs;
1004 if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1005 gbs = VG_(di_get_global_blocks_from_dihandle)
1006 (di_handle, True/*arrays only*/);
1007 if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs ));
1008
1009 n = VG_(sizeXA)( gbs );
1010 for (i = 0; i < n; i++) {
1011 GlobalBlock* gbp;
1012 GlobalBlock* gb = VG_(indexXA)( gbs, i );
1013 if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n",
1014 gb->szB, gb->addr, gb->soname, gb->name );
1015 tl_assert(gb->szB > 0);
1016 /* Make a persistent copy of each GlobalBlock, and add it
1017 to the tree. */
1018 gbp = get_persistent_GlobalBlock( gb );
1019 add_block_to_GlobalTree( giTree, gbp );
1020 }
1021
1022 VG_(deleteXA)( gbs );
1023}
1024
1025
1026/* We only intercept these two because we need to see any di_handles
1027 that might arise from the mappings/allocations. */
1028void sg_new_mem_mmap( Addr a, SizeT len,
1029 Bool rr, Bool ww, Bool xx, ULong di_handle )
1030{
1031 if (di_handle > 0)
1032 acquire_globals(di_handle);
1033}
1034void sg_new_mem_startup( Addr a, SizeT len,
1035 Bool rr, Bool ww, Bool xx, ULong di_handle )
1036{
1037 if (di_handle > 0)
1038 acquire_globals(di_handle);
1039}
1040void sg_die_mem_munmap ( Addr a, SizeT len )
1041{
1042 Bool debug = (Bool)0;
1043 Bool overlap = False;
1044
1045 if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1046
1047 if (len == 0)
1048 return;
1049
1050 overlap = del_GlobalTree_range(giTree, a, len);
1051
1052 { /* redundant sanity check */
1053 UWord keyW, valW;
1054 VG_(initIterFM)( giTree );
1055 while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1056 GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1057 tl_assert(valW == 0);
1058 tl_assert(nd->szB > 0);
1059 tl_assert(nd->addr + nd->szB <= a
1060 || a + len <= nd->addr);
1061 }
1062 VG_(doneIterFM)( giTree );
1063 }
1064
1065 if (!overlap)
1066 return;
1067
1068 /* Ok, the range contained some blocks. Therefore we'll need to
1069 visit all the Invars in all the thread shadow stacks, and
1070 convert all Inv_Global{S,V} entries that intersect [a,a+len) to
1071 Inv_Unknown. */
1072 tl_assert(len > 0);
1073 preen_Invars( a, len, False/*!isHeap*/ );
1074 invalidate_all_QCaches();
1075}
1076
1077
1078//////////////////////////////////////////////////////////////
1079// //
1080// StackFrame //
1081// //
1082//////////////////////////////////////////////////////////////
1083
1084static ULong stats__total_accesses = 0;
1085static ULong stats__classify_Stack0 = 0;
1086static ULong stats__classify_StackN = 0;
1087static ULong stats__classify_Global = 0;
1088static ULong stats__classify_Unknown = 0;
1089static ULong stats__Invars_preened = 0;
1090static ULong stats__Invars_changed = 0;
1091static ULong stats__t_i_b_empty = 0;
1092static ULong stats__htab_fast = 0;
1093static ULong stats__htab_searches = 0;
1094static ULong stats__htab_probes = 0;
1095static ULong stats__htab_resizes = 0;
1096
1097
1098/* A dynamic instance of an instruction */
1099typedef
1100 struct {
1101 /* IMMUTABLE */
1102 Addr insn_addr; /* NB! zero means 'not in use' */
1103 XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1104 /* MUTABLE */
1105 Invar invar;
1106 }
1107 IInstance;
1108
1109
1110#define N_HTAB_FIXED 64
1111
1112typedef
1113 struct _StackFrame {
1114 /* The sp when the frame was created, so we know when to get rid
1115 of it. */
1116 Addr creation_sp;
1117 /* The stack frames for a thread are arranged as a doubly linked
1118 list. Obviously the outermost frame in the stack has .outer
1119 as NULL and the innermost in theory has .inner as NULL.
1120 However, when a function returns, we don't delete the
1121 just-vacated StackFrame. Instead, it is retained in the list
1122 and will be re-used when the next call happens. This is so
1123 as to avoid constantly having to dynamically allocate and
1124 deallocate frames. */
1125 struct _StackFrame* inner;
1126 struct _StackFrame* outer;
1127 Word depth; /* 0 for outermost; increases inwards */
1128 /* Information for each memory referencing instruction, for this
1129 instantiation of the function. The iinstances array is
1130 operated as a simple linear-probe hash table, which is
1131 dynamically expanded as necessary. Once critical thing is
1132 that an IInstance with a .insn_addr of zero is interpreted to
1133 mean that hash table slot is unused. This means we can't
1134 store an IInstance for address zero. */
1135 /* Note that htab initially points to htab_fixed. If htab_fixed
1136 turns out not to be big enough then htab is made to point to
1137 dynamically allocated memory. But it's often the case that
1138 htab_fixed is big enough, so this optimisation saves a huge
1139 number of sg_malloc/sg_free call pairs. */
1140 IInstance* htab;
1141 UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1142 UWord htab_used; /* number of hash table slots currently in use */
1143 /* If this frame is currently making a call, then the following
1144 are relevant. */
1145 Addr sp_at_call;
1146 Addr fp_at_call;
1147 XArray* /* of Addr */ blocks_added_by_call;
1148 /* See comment just above */
1149 IInstance htab_fixed[N_HTAB_FIXED];
1150 }
1151 StackFrame;
1152
1153
1154
1155
1156
1157/* Move this somewhere else? */
1158/* Visit all Invars in the entire system. If 'isHeap' is True, change
1159 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1160 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1161 instead. */
1162
1163__attribute__((noinline))
1164static void preen_Invar ( Invar* inv, Addr a, SizeT len, Bool isHeap )
1165{
1166 stats__Invars_preened++;
1167 tl_assert(len > 0);
1168 tl_assert(inv);
1169 switch (inv->tag) {
1170#if 0
1171 case Inv_Heap:
1172 tl_assert(inv->Inv.Heap.len > 0);
1173 if (isHeap && rangesOverlap(a, len, inv->Inv.Heap.start,
1174 inv->Inv.Heap.len)) {
1175 inv->tag = Inv_Unknown;
1176 stats__Invars_changed++;
1177 }
1178 break;
1179 case Inv_GlobalS:
1180 case Inv_GlobalV:
1181 tl_assert(inv->Inv.Global.len > 0);
1182 if ((!isHeap)
1183 && rangesOverlap(a, len, inv->Inv.Global.start,
1184 inv->Inv.Global.len)) {
1185 inv->tag = Inv_Unknown;
1186 stats__Invars_changed++;
1187 }
1188 break;
1189 case Inv_StackS:
1190 case Inv_StackV:
1191 case Inv_Unknown:
1192 break;
1193#endif
1194 default: tl_assert(0);
1195 }
1196}
1197
1198__attribute__((noinline))
1199static void preen_Invars ( Addr a, SizeT len, Bool isHeap )
1200{
1201 tl_assert(0);
1202#if 0
1203 Int i;
1204 Word ixFrames, nFrames;
1205 UWord u;
1206 XArray* stack; /* XArray* of StackFrame */
1207 StackFrame* frame;
1208 tl_assert(len > 0);
1209 tl_assert(0);
1210 for (i = 0; i < VG_N_THREADS; i++) {
1211tl_assert(0);
1212 stack = shadowStacks[i];
1213 if (!stack)
1214 continue;
1215 nFrames = VG_(sizeXA)( stack );
1216 for (ixFrames = 0; ixFrames < nFrames; ixFrames++) {
1217 UWord xx = 0; /* sanity check only; count of used htab entries */
1218 frame = VG_(indexXA)( stack, ixFrames );
1219 tl_assert(frame->htab);
1220 for (u = 0; u < frame->htab_size; u++) {
1221 IInstance* ii = &frame->htab[u];
1222 if (ii->insn_addr == 0)
1223 continue; /* not in use */
1224 preen_Invar( &ii->invar, a, len, isHeap );
1225 xx++;
1226 }
1227 tl_assert(xx == frame->htab_used);
1228 }
1229 }
1230#endif
1231}
1232
1233
1234/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1235 of the ip are guaranteed to be zero */
1236inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1237 return (ip >> 0) & (htab_size - 1);
1238}
1239
1240__attribute__((noinline))
1241static void initialise_II_hash_table ( StackFrame* sf )
1242{
1243 UWord i;
1244 sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1245 sf->htab = &sf->htab_fixed[0];
1246 tl_assert(sf->htab);
1247 sf->htab_used = 0;
1248 for (i = 0; i < sf->htab_size; i++)
1249 sf->htab[i].insn_addr = 0; /* NOT IN USE */
1250}
1251
1252
1253__attribute__((noinline))
1254static void resize_II_hash_table ( StackFrame* sf )
1255{
1256 UWord i, j, ix, old_size, new_size;
1257 IInstance *old_htab, *new_htab, *old;
1258
1259 tl_assert(sf && sf->htab);
1260 old_size = sf->htab_size;
1261 new_size = 2 * old_size;
1262 old_htab = sf->htab;
1263 new_htab = sg_malloc( "di.sg_main.rIht.1",
1264 new_size * sizeof(IInstance) );
1265 for (i = 0; i < new_size; i++) {
1266 new_htab[i].insn_addr = 0; /* NOT IN USE */
1267 }
1268 for (i = 0; i < old_size; i++) {
1269 old = &old_htab[i];
1270 if (old->insn_addr == 0 /* NOT IN USE */)
1271 continue;
1272 ix = compute_II_hash(old->insn_addr, new_size);
1273 /* find out where to put this, in the new table */
1274 j = new_size;
1275 while (1) {
1276 if (new_htab[ix].insn_addr == 0)
1277 break;
1278 /* This can't ever happen, because it would mean the new
1279 table is full; that isn't allowed -- even the old table is
1280 only allowed to become half full. */
1281 tl_assert(j > 0);
1282 j--;
1283 ix++; if (ix == new_size) ix = 0;
1284 }
1285 /* copy the old entry to this location */
1286 tl_assert(ix < new_size);
1287 tl_assert(new_htab[ix].insn_addr == 0);
1288 new_htab[ix] = *old;
1289 tl_assert(new_htab[ix].insn_addr != 0);
1290 }
1291 /* all entries copied; free old table. */
1292 if (old_htab != &sf->htab_fixed[0])
1293 sg_free(old_htab);
1294 sf->htab = new_htab;
1295 sf->htab_size = new_size;
1296 /* check sf->htab_used is correct. Optional and a bit expensive
1297 but anyway: */
1298 j = 0;
1299 for (i = 0; i < new_size; i++) {
1300 if (new_htab[i].insn_addr != 0) {
1301 j++;
1302 }
1303 }
1304 tl_assert(j == sf->htab_used);
1305 if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1306}
1307
1308
1309__attribute__((noinline))
1310static IInstance* find_or_create_IInstance_SLOW (
1311 StackFrame* sf,
1312 Addr ip,
1313 XArray* /* StackBlock */ ip_frameblocks
1314 )
1315{
1316 UWord i, ix;
1317
1318 stats__htab_searches++;
1319
1320 tl_assert(sf);
1321 tl_assert(sf->htab);
1322
1323 /* Make sure the table loading doesn't get too high. */
1324 if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1325 stats__htab_resizes++;
1326 resize_II_hash_table(sf);
1327 }
1328 tl_assert(2 * sf->htab_used <= sf->htab_size);
1329
1330 ix = compute_II_hash(ip, sf->htab_size);
1331 i = sf->htab_size;
1332 while (1) {
1333 stats__htab_probes++;
1334 /* Note that because of the way the fast-case handler works,
1335 these two tests are actually redundant in the first iteration
1336 of this loop. (Except they aren't redundant if the code just
1337 above resized the table first. :-) */
1338 if (sf->htab[ix].insn_addr == ip)
1339 return &sf->htab[ix];
1340 if (sf->htab[ix].insn_addr == 0)
1341 break;
1342 /* If i ever gets to zero and we have found neither what we're
1343 looking for nor an empty slot, the table must be full. Which
1344 isn't possible -- we monitor the load factor to ensure it
1345 doesn't get above say 50%; if that ever does happen the table
1346 is resized. */
1347 tl_assert(i > 0);
1348 i--;
1349 ix++;
1350 if (ix == sf->htab_size) ix = 0;
1351 }
1352
1353 /* So now we've found a free slot at ix, and we can use that. */
1354 tl_assert(sf->htab[ix].insn_addr == 0);
1355
1356 /* Add a new record in this slot. */
1357 tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1358 sf->htab[ix].insn_addr = ip;
1359 sf->htab[ix].blocks = ip_frameblocks;
1360 sf->htab[ix].invar.tag = Inv_Unset;
1361 sf->htab_used++;
1362 return &sf->htab[ix];
1363}
1364
1365
1366inline
1367static IInstance* find_or_create_IInstance (
1368 StackFrame* sf,
1369 Addr ip,
1370 XArray* /* StackBlock */ ip_frameblocks
1371 )
1372{
1373 UWord ix = compute_II_hash(ip, sf->htab_size);
1374 /* Is it in the first slot we come to? */
1375 if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1376 stats__htab_fast++;
1377 return &sf->htab[ix];
1378 }
1379 /* If the first slot we come to is empty, bag it. */
1380 if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1381 stats__htab_fast++;
1382 tl_assert(ip != 0);
1383 sf->htab[ix].insn_addr = ip;
1384 sf->htab[ix].blocks = ip_frameblocks;
1385 sf->htab[ix].invar.tag = Inv_Unset;
1386 sf->htab_used++;
1387 return &sf->htab[ix];
1388 }
1389 /* Otherwise we hand off to the slow case, which searches other
1390 slots, and optionally resizes the table if necessary. */
1391 return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1392}
1393
1394
1395__attribute__((noinline))
1396static Addr calculate_StackBlock_EA ( StackBlock* descr,
1397 Addr sp, Addr fp ) {
1398 UWord w1 = (UWord)descr->base;
1399 UWord w2 = (UWord)(descr->spRel ? sp : fp);
1400 UWord ea = w1 + w2;
1401 return ea;
1402}
1403
1404/* Given an array of StackBlocks, return an array of Addrs, holding
1405 their effective addresses. Caller deallocates result array. */
1406__attribute__((noinline))
1407static XArray* /* Addr */ calculate_StackBlock_EAs (
1408 XArray* /* StackBlock */ blocks,
1409 Addr sp, Addr fp
1410 )
1411{
1412 XArray* res;
1413 Word i, n = VG_(sizeXA)( blocks );
1414 tl_assert(n > 0);
1415 res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1416 for (i = 0; i < n; i++) {
1417 StackBlock* blk = VG_(indexXA)( blocks, i );
1418 Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1419 VG_(addToXA)( res, &ea );
1420 }
1421 return res;
1422}
1423
1424
1425/* Try to classify the block into which a memory access falls, and
1426 write the result in 'inv'. This writes all relevant fields of
1427 'inv'. */
1428__attribute__((noinline))
1429static void classify_address ( /*OUT*/Invar* inv,
1430 ThreadId tid,
1431 Addr ea, Addr sp, Addr fp,
1432 UWord szB,
1433 XArray* /* of StackBlock */ thisInstrBlocks )
1434{
1435 tl_assert(szB > 0);
1436 /* First, look in the stack blocks accessible in this instruction's
1437 frame. */
1438 {
1439 Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1440 if (nBlocks == 0) stats__t_i_b_empty++;
1441 for (i = 0; i < nBlocks; i++) {
1442 StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1443 Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1444 if (bea <= ea && ea + szB <= bea + descr->szB) {
1445 /* found it */
1446 inv->tag = Inv_Stack0;
1447 inv->Inv.Stack0.addr = bea;
1448 inv->Inv.Stack0.szB = descr->szB;
1449 inv->Inv.Stack0.descr = descr;
1450 stats__classify_Stack0++;
1451 return;
1452 }
1453 }
1454 }
1455 /* Look in this thread's query cache */
1456 { Word i;
1457 QCache* cache = &qcaches[tid];
1458 static UWord ctr = 0;
1459 stats__qcache_queries++;
1460 for (i = 0; i < cache->nInUse; i++) {
1461 if (0) /* expensive in a loop like this */
1462 tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1463 stats__qcache_probes++;
1464 if (is_subinterval_of(cache->elems[i].addr,
1465 cache->elems[i].szB, ea, szB)) {
1466 if (i > 0
1467 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1468 QCElem tmp;
1469 tmp = cache->elems[i-1];
1470 cache->elems[i-1] = cache->elems[i];
1471 cache->elems[i] = tmp;
1472 i--;
1473 }
1474 *inv = cache->elems[i].inv;
1475 return;
1476 }
1477 }
1478 stats__qcache_misses++;
1479 }
1480 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1481 in some calling frame? Consult this thread's stack-block
1482 interval tree to find out. */
1483 { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1484 /* We know that [ea,ea+1) is in the block, but we need to
1485 restrict to the case where the whole access falls within
1486 it. */
1487 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1488 nd = NULL;
1489 }
1490 if (nd) {
1491 /* found it */
1492 inv->tag = Inv_StackN;
1493 inv->Inv.StackN.nd = nd;
1494 stats__classify_StackN++;
1495 goto out;
1496 }
1497 }
1498 /* Not in a stack block. Try the global pool. */
1499 { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1500 /* We know that [ea,ea+1) is in the block, but we need to
1501 restrict to the case where the whole access falls within
1502 it. */
1503 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1504 nd = NULL;
1505 }
1506 if (nd) {
1507 /* found it */
1508 inv->tag = Inv_Global;
1509 inv->Inv.Global.nd = nd;
1510 stats__classify_Global++;
1511 goto out;
1512 }
1513 }
1514 /* No idea - give up. */
1515 inv->tag = Inv_Unknown;
1516 stats__classify_Unknown++;
1517
1518 /* Update the cache */
1519 out:
1520 { Addr toadd_addr = 0;
1521 SizeT toadd_szB = 0;
1522 QCache* cache = &qcaches[tid];
1523
1524 static UWord ctr = 0;
1525 Bool show = False;
1526 if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1527
1528 if (show) QCache__pp(cache, "before upd");
1529
1530 switch (inv->tag) {
1531 case Inv_Global:
1532 toadd_addr = inv->Inv.Global.nd->addr;
1533 toadd_szB = inv->Inv.Global.nd->szB;
1534 break;
1535 case Inv_StackN:
1536 toadd_addr = inv->Inv.StackN.nd->addr;
1537 toadd_szB = inv->Inv.StackN.nd->szB;
1538 break;
1539 case Inv_Unknown: {
1540 /* This is more complex. We need to figure out the
1541 intersection of the "holes" in the global and stack
1542 interval trees into which [ea,ea+szB) falls. This is
1543 further complicated by the fact that [ea,ea+szB) might
1544 not fall cleanly into a hole; it may instead fall across
1545 the boundary of a stack or global block. In that case
1546 we just ignore it and don't update the cache, since we
1547 have no way to represent this situation precisely. */
1548 StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB;
1549 GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1550 Addr gMin, gMax, sMin, sMax, uMin, uMax;
1551 Bool sOK, gOK;
1552 sNegInf.addr = 0;
1553 sNegInf.szB = 1;
1554 sPosInf.addr = ~(UWord)0;
1555 sPosInf.szB = 1;
1556 gNegInf.addr = 0;
1557 gNegInf.szB = 1;
1558 gPosInf.addr = ~(UWord)0;
1559 gPosInf.szB = 1;
1560 sKey.addr = ea;
1561 sKey.szB = szB;
1562 gKey.addr = ea;
1563 gKey.szB = szB;
1564 if (0) VG_(printf)("Tree sizes %ld %ld\n",
1565 VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1566 sOK = VG_(findBoundsFM)( siTrees[tid],
1567 (UWord*)&sLB, (UWord*)&sUB,
1568 (UWord)&sNegInf, (UWord)&sPosInf,
1569 (UWord)&sKey );
1570 gOK = VG_(findBoundsFM)( giTree,
1571 (UWord*)&gLB, (UWord*)&gUB,
1572 (UWord)&gNegInf, (UWord)&gPosInf,
1573 (UWord)&gKey );
1574 if (!(sOK && gOK)) {
1575 /* If this happens, then [ea,ea+szB) partially overlaps
1576 a heap or stack block. We can't represent that, so
1577 just forget it (should be very rare). However, do
1578 maximum sanity checks first. In such a
1579 partial overlap case, it can't be the case that both
1580 [ea] and [ea+szB-1] overlap the same block, since if
1581 that were indeed the case then it wouldn't be a
1582 partial overlap; rather it would simply fall inside
1583 that block entirely and we shouldn't be inside this
1584 conditional at all. */
1585 if (!sOK) {
1586 StackTreeNode *ndFirst, *ndLast;
1587 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1588 ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1589 /* if both ends of the range fall inside a block,
1590 they can't be in the same block. */
1591 if (ndFirst && ndLast)
1592 tl_assert(ndFirst != ndLast);
1593 /* for each end of the range, if it is in a block,
1594 the range as a whole can't be entirely within the
1595 block. */
1596 if (ndFirst)
1597 tl_assert(!is_subinterval_of(ndFirst->addr,
1598 ndFirst->szB, ea, szB));
1599 if (ndLast)
1600 tl_assert(!is_subinterval_of(ndLast->addr,
1601 ndLast->szB, ea, szB));
1602 }
1603 if (!gOK) {
1604 GlobalTreeNode *ndFirst, *ndLast;
1605 ndFirst = find_GlobalTreeNode( giTree, ea );
1606 ndLast = find_GlobalTreeNode( giTree, ea+szB-1 );
1607 /* if both ends of the range fall inside a block,
1608 they can't be in the same block. */
1609 if (ndFirst && ndLast)
1610 tl_assert(ndFirst != ndLast);
1611 /* for each end of the range, if it is in a block,
1612 the range as a whole can't be entirely within the
1613 block. */
1614 if (ndFirst)
1615 tl_assert(!is_subinterval_of(ndFirst->addr,
1616 ndFirst->szB, ea, szB));
1617 if (ndLast)
1618 tl_assert(!is_subinterval_of(ndLast->addr,
1619 ndLast->szB, ea, szB));
1620 }
1621 if (0) VG_(printf)("overlapping blocks in cache\n");
1622 return;
1623 }
1624 sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB);
1625 sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1);
1626 gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB);
1627 gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1);
1628 if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1629 sMin, sMax, gMin, gMax);
1630 /* [sMin,sMax] and [gMin,gMax] must both contain
1631 [ea,ea+szB) (right?) That implies they must overlap at
1632 at least over [ea,ea+szB). */
1633 tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1634 tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1635 /* So now compute their intersection. */
1636 uMin = Addr__max( sMin, gMin );
1637 uMax = Addr__min( sMax, gMax );
1638 if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1639 tl_assert(uMin <= uMax);
1640 tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1641 /* Finally, we can park [uMin,uMax] in the cache. However,
1642 if uMax is ~0, we can't represent the difference; hence
1643 fudge uMax. */
1644 if (uMin < uMax && uMax == ~(UWord)0)
1645 uMax--;
1646 toadd_addr = uMin;
1647 toadd_szB = uMax - uMin + 1;
1648 break;
1649 }
1650 default:
1651 /* We should only be caching info for the above 3 cases */
1652 tl_assert(0);
1653 } /* switch (inv->tag) */
1654
1655 { /* and actually add this to the cache, finally */
1656 Word i;
1657 Word ip = cache->nInUse / 2; /* doesn't seem critical */
1658
1659 if (cache->nInUse < N_QCACHE)
1660 cache->nInUse++;
1661 for (i = cache->nInUse-1; i > ip; i--) {
1662 cache->elems[i] = cache->elems[i-1];
1663 }
1664
1665 tl_assert(toadd_szB > 0);
1666 cache->elems[ip].addr = toadd_addr;
1667 cache->elems[ip].szB = toadd_szB;
1668 cache->elems[ip].inv = *inv;
1669 }
1670
1671 if (show) QCache__pp(cache, "after upd");
1672
1673 }
1674}
1675
1676
1677/* CALLED FROM GENERATED CODE */
1678static
1679VG_REGPARM(3)
1680void helperc__mem_access ( /* Known only at run time: */
1681 Addr ea, Addr sp, Addr fp,
1682 /* Known at translation time: */
1683 Word sszB, Addr ip, XArray* ip_frameBlocks )
1684{
1685 UWord szB;
1686 IInstance* iinstance;
1687 Invar* inv;
1688 Invar new_inv;
1689 ThreadId tid = VG_(get_running_tid)();
1690 StackFrame* frame;
1691 HChar bufE[128], bufA[128];
1692
1693 stats__total_accesses++;
1694
1695 tl_assert(is_sane_TId(tid));
1696 frame = shadowStacks[tid];
1697 tl_assert(frame);
1698
1699 /* Find the instance info for this instruction. */
1700 tl_assert(ip_frameBlocks);
1701 iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1702 tl_assert(iinstance);
1703 tl_assert(iinstance->blocks == ip_frameBlocks);
1704
1705 szB = (sszB < 0) ? (-sszB) : sszB;
1706 tl_assert(szB > 0);
1707
1708 inv = &iinstance->invar;
1709
1710 /* Deal with first uses of instruction instances. */
1711 if (inv->tag == Inv_Unset) {
1712 /* This is the first use of this instance of the instruction, so
1713 we can't make any check; we merely record what we saw, so we
1714 can compare it against what happens for 2nd and subsequent
1715 accesses. */
1716 classify_address( inv,
1717 tid, ea, sp, fp, szB, iinstance->blocks );
1718 tl_assert(inv->tag != Inv_Unset);
1719 return;
1720 }
1721
1722 /* So generate an Invar and see if it's different from what
1723 we had before. */
1724 classify_address( &new_inv,
1725 tid, ea, sp, fp, szB, iinstance->blocks );
1726 tl_assert(new_inv.tag != Inv_Unset);
1727
1728 /* Did we see something different from before? If no, then there's
1729 no error. */
1730 if (eq_Invar(&new_inv, inv))
1731 return;
1732
1733 tl_assert(inv->tag != Inv_Unset);
1734
1735 VG_(memset)(bufE, 0, sizeof(bufE));
1736 show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1737
1738 VG_(memset)(bufA, 0, sizeof(bufA));
1739 show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1740
1741 sg_record_error_SorG( tid, ea, sszB, bufE, bufA );
1742
1743 /* And now install the new observation as "standard", so as to
1744 make future error messages make more sense. */
1745 *inv = new_inv;
1746}
1747
1748
1749////////////////////////////////////////
1750/* Primary push-a-new-frame routine. Called indirectly from
1751 generated code. */
1752
1753static UWord stats__max_sitree_size = 0;
1754static UWord stats__max_gitree_size = 0;
1755
1756static
1757void shadowStack_new_frame ( ThreadId tid,
1758 Addr sp_at_call_insn,
1759 Addr sp_post_call_insn,
1760 Addr fp_at_call_insn,
1761 Addr ip_post_call_insn,
1762 XArray* descrs_at_call_insn )
1763{
1764 StackFrame *callee, *caller;
1765 tl_assert(is_sane_TId(tid));
1766
1767 caller = shadowStacks[tid];
1768 tl_assert(caller);
1769
1770 if (caller->outer) { /* "this is not the outermost frame" */
1771 tl_assert(caller->outer->inner == caller);
1772 tl_assert(caller->outer->depth >= 0);
1773 tl_assert(1 + caller->outer->depth == caller->depth);
1774 } else {
1775 tl_assert(caller->depth == 0);
1776 }
1777
1778 caller->sp_at_call = sp_at_call_insn;
1779 caller->fp_at_call = fp_at_call_insn;
1780
1781 if (descrs_at_call_insn) {
1782 tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1783 caller->blocks_added_by_call
1784 = calculate_StackBlock_EAs( descrs_at_call_insn,
1785 sp_at_call_insn, fp_at_call_insn );
1786 if (caller->blocks_added_by_call)
1787 add_blocks_to_StackTree( siTrees[tid],
1788 descrs_at_call_insn,
1789 caller->blocks_added_by_call,
1790 caller->depth /* stack depth at which
1791 these blocks are
1792 considered to exist*/ );
1793 if (1) {
1794 UWord s = VG_(sizeFM)( siTrees[tid] );
1795 UWord g = VG_(sizeFM)( giTree );
1796 Bool sb = s > stats__max_sitree_size;
1797 Bool gb = g > stats__max_gitree_size;
1798 if (sb) stats__max_sitree_size = s;
1799 if (gb) stats__max_gitree_size = g;
1800 if (0 && (sb || gb))
1801 VG_(message)(Vg_DebugMsg,
1802 "exp-sgcheck: new max tree sizes: "
1803 "StackTree %ld, GlobalTree %ld",
1804 stats__max_sitree_size, stats__max_gitree_size );
1805 }
1806 } else {
1807 caller->blocks_added_by_call = NULL;
1808 }
1809
1810 /* caller->blocks_added_by_call is used again (and then freed) when
1811 this frame is removed from the stack. */
1812
1813 if (caller->inner) {
1814 callee = caller->inner;
1815 } else {
1816 callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1817 VG_(memset)(callee, 0, sizeof(StackFrame));
1818 callee->outer = caller;
1819 caller->inner = callee;
1820 callee->depth = 1 + caller->depth;
1821 tl_assert(callee->inner == NULL);
1822 }
1823
1824 /* This sets up .htab, .htab_size and .htab_used */
1825 initialise_II_hash_table( callee );
1826
1827 callee->creation_sp = sp_post_call_insn;
1828 callee->sp_at_call = 0; // not actually required ..
1829 callee->fp_at_call = 0; // .. these 3 initialisations are ..
1830 callee->blocks_added_by_call = NULL; // .. just for cleanness
1831
1832 /* record the new running stack frame */
1833 shadowStacks[tid] = callee;
1834
1835 /* and this thread's query cache is now invalid */
1836 QCache__invalidate( &qcaches[tid] );
1837
1838 if (0)
1839 { Word d = callee->depth;
1840 HChar fnname[80];
1841 Bool ok;
1842 Addr ip = ip_post_call_insn;
1843 ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1844 while (d > 0) {
1845 VG_(printf)(" ");
1846 d--;
1847 }
1848 VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1849 }
1850}
1851
1852/* CALLED FROM GENERATED CODE */
1853static
1854VG_REGPARM(3)
1855void helperc__new_frame ( Addr sp_post_call_insn,
1856 Addr fp_at_call_insn,
1857 Addr ip_post_call_insn,
1858 XArray* blocks_at_call_insn,
1859 Word sp_adjust )
1860{
1861 ThreadId tid = VG_(get_running_tid)();
1862 Addr sp_at_call_insn = sp_post_call_insn + sp_adjust;
1863 shadowStack_new_frame( tid,
1864 sp_at_call_insn,
1865 sp_post_call_insn,
1866 fp_at_call_insn,
1867 ip_post_call_insn,
1868 blocks_at_call_insn );
1869}
1870
1871
1872////////////////////////////////////////
1873/* Primary remove-frame(s) routine. Called indirectly from
1874 generated code. */
1875
1876__attribute__((noinline))
1877static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1878{
1879 StackFrame *innermost, *innermostOrig;
1880 tl_assert(is_sane_TId(tid));
1881 innermost = shadowStacks[tid];
1882 tl_assert(innermost);
1883 innermostOrig = innermost;
1884 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1885 while (1) {
1886 if (!innermost->outer)
1887 break;
1888 if (innermost->inner)
1889 tl_assert(innermost->inner->outer == innermost);
1890 tl_assert(innermost->outer->inner == innermost);
1891 tl_assert(innermost->blocks_added_by_call == NULL);
1892 if (sp_now <= innermost->creation_sp) break;
1893 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1894 tl_assert(innermost->htab);
1895 if (innermost->htab != &innermost->htab_fixed[0])
1896 sg_free(innermost->htab);
1897 /* be on the safe side */
1898 innermost->creation_sp = 0;
1899 innermost->htab = NULL;
1900 innermost->htab_size = 0;
1901 innermost->htab_used = 0;
1902 innermost->sp_at_call = 0;
1903 innermost->fp_at_call = 0;
1904 innermost->blocks_added_by_call = NULL;
1905 innermost = innermost->outer;
1906
1907 /* So now we're "back" in the calling frame. Remove from this
1908 thread's stack-interval-tree, the blocks added at the time of
1909 the call. */
1910
1911 if (innermost->outer) { /* not at the outermost frame */
1912 if (innermost->blocks_added_by_call == NULL) {
1913 } else {
1914 del_blocks_from_StackTree( siTrees[tid],
1915 innermost->blocks_added_by_call );
1916 VG_(deleteXA)( innermost->blocks_added_by_call );
1917 innermost->blocks_added_by_call = NULL;
1918 }
1919 }
1920 /* That completes the required tidying of the interval tree
1921 associated with the frame we just removed. */
1922
1923 if (0) {
1924 Word d = innermost->depth;
1925 while (d > 0) {
1926 VG_(printf)(" ");
1927 d--;
1928 }
1929 VG_(printf)("X\n");
1930 }
1931
1932 }
1933
1934 tl_assert(innermost);
1935
1936 if (innermost != innermostOrig) {
1937 shadowStacks[tid] = innermost;
1938 /* this thread's query cache is now invalid */
1939 QCache__invalidate( &qcaches[tid] );
1940 }
1941}
1942
1943
1944
1945//////////////////////////////////////////////////////////////
1946// //
1947// Instrumentation //
1948// //
1949//////////////////////////////////////////////////////////////
1950
1951/* What does instrumentation need to do?
1952
1953 - at each Call transfer, generate a call to shadowStack_new_frame
1954 do this by manually inspecting the IR
1955
1956 - at each sp change, if the sp change is negative,
1957 call shadowStack_unwind
1958 do this by asking for SP-change analysis
1959
1960 - for each memory referencing instruction,
1961 call helperc__mem_access
1962*/
1963
1964/* A complication: sg_ instrumentation and h_ instrumentation need to
1965 be interleaved. Since the latter is a lot more complex than the
1966 former, we split the sg_ instrumentation here into four functions
1967 and let the h_ instrumenter call the four functions as it goes.
1968 Hence the h_ instrumenter drives the sg_ instrumenter.
1969
1970 To make this viable, the sg_ instrumenter carries what running
1971 state it needs in 'struct _SGEnv'. This is exported only
1972 abstractly from this file.
1973*/
1974
1975struct _SGEnv {
1976 /* the current insn's IP */
1977 Addr64 curr_IP;
1978 /* whether the above is actually known */
1979 Bool curr_IP_known;
1980 /* if we find a mem ref, is it the first for this insn? Used for
1981 detecting insns which make more than one memory ref, a situation
1982 we basically can't really handle properly; and so we ignore all
1983 but the first ref. */
1984 Bool firstRef;
1985};
1986
1987
1988/* --- Helper fns for instrumentation --- */
1989
1990static IRTemp gen_Get_SP ( IRSB* bbOut,
1991 VexGuestLayout* layout,
1992 Int hWordTy_szB )
1993{
1994 IRExpr* sp_expr;
1995 IRTemp sp_temp;
1996 IRType sp_type;
1997 /* This in effect forces the host and guest word sizes to be the
1998 same. */
1999 tl_assert(hWordTy_szB == layout->sizeof_SP);
2000 sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2001 sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2002 sp_temp = newIRTemp( bbOut->tyenv, sp_type );
2003 addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2004 return sp_temp;
2005}
2006
2007static IRTemp gen_Get_FP ( IRSB* bbOut,
2008 VexGuestLayout* layout,
2009 Int hWordTy_szB )
2010{
2011 IRExpr* fp_expr;
2012 IRTemp fp_temp;
2013 IRType fp_type;
2014 /* This in effect forces the host and guest word sizes to be the
2015 same. */
2016 tl_assert(hWordTy_szB == layout->sizeof_SP);
2017 fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2018 fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2019 fp_temp = newIRTemp( bbOut->tyenv, fp_type );
2020 addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2021 return fp_temp;
2022}
2023
2024static void instrument_mem_access ( IRSB* bbOut,
2025 IRExpr* addr,
2026 Int szB,
2027 Bool isStore,
2028 Int hWordTy_szB,
2029 Addr curr_IP,
2030 VexGuestLayout* layout )
2031{
2032 IRType tyAddr = Ity_INVALID;
2033 XArray* frameBlocks = NULL;
2034
2035 tl_assert(isIRAtom(addr));
2036 tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2037
2038 tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2039 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2040
2041#if defined(VGA_x86)
2042 { UChar* p = (UChar*)curr_IP;
2043 // pop %ebp; RET
2044 if (p[-1] == 0x5d && p[0] == 0xc3) return;
2045 // pop %ebp; RET $imm16
2046 if (p[-1] == 0x5d && p[0] == 0xc2) return;
2047 // PUSH %EBP; mov %esp,%ebp
2048 if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2049 }
2050#endif
2051
2052 /* First off, find or create the StackBlocks for this instruction. */
2053 frameBlocks = get_StackBlocks_for_IP( curr_IP );
2054 tl_assert(frameBlocks);
2055 //if (VG_(sizeXA)( frameBlocks ) == 0)
2056 // frameBlocks = NULL;
2057
2058 /* Generate a call to "helperc__mem_access", passing:
2059 addr current_SP current_FP szB curr_IP frameBlocks
2060 */
2061 { IRTemp t_SP = gen_Get_SP( bbOut, layout, hWordTy_szB );
2062 IRTemp t_FP = gen_Get_FP( bbOut, layout, hWordTy_szB );
2063 IRExpr** args
2064 = mkIRExprVec_6( addr,
2065 IRExpr_RdTmp(t_SP),
2066 IRExpr_RdTmp(t_FP),
2067 mkIRExpr_HWord( isStore ? (-szB) : szB ),
2068 mkIRExpr_HWord( curr_IP ),
2069 mkIRExpr_HWord( (HWord)frameBlocks ) );
2070 IRDirty* di
2071 = unsafeIRDirty_0_N( 3/*regparms*/,
2072 "helperc__mem_access",
2073 VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2074 args );
2075
2076 addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2077 }
2078}
2079
2080
2081/* --- Instrumentation main (4 fns) --- */
2082
2083struct _SGEnv * sg_instrument_init ( void )
2084{
2085 struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2086 sizeof(struct _SGEnv));
2087 tl_assert(env);
2088 env->curr_IP = 0;
2089 env->curr_IP_known = False;
2090 env->firstRef = True;
2091 return env;
2092}
2093
2094void sg_instrument_fini ( struct _SGEnv * env )
2095{
2096 sg_free(env);
2097}
2098
2099/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2100 as required. This must be called before 'st' itself is added to
2101 'sbOut'. */
2102void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2103 /*MOD*/IRSB* sbOut,
2104 IRStmt* st,
2105 VexGuestLayout* layout,
2106 IRType gWordTy, IRType hWordTy )
2107{
2108 tl_assert(st);
2109 tl_assert(isFlatIRStmt(st));
2110 switch (st->tag) {
2111 case Ist_NoOp:
2112 case Ist_AbiHint:
2113 case Ist_Put:
2114 case Ist_PutI:
2115 case Ist_MBE:
2116 /* None of these can contain any memory references. */
2117 break;
2118
2119 case Ist_Exit:
2120 tl_assert(st->Ist.Exit.jk != Ijk_Call);
2121 /* else we must deal with a conditional call */
2122 break;
2123
2124 case Ist_IMark:
2125 env->curr_IP_known = True;
2126 env->curr_IP = (Addr)st->Ist.IMark.addr;
2127 env->firstRef = True;
2128 break;
2129
2130 case Ist_Store:
2131 tl_assert(env->curr_IP_known);
2132 if (env->firstRef) {
2133 instrument_mem_access(
2134 sbOut,
2135 st->Ist.Store.addr,
2136 sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2137 True/*isStore*/,
2138 sizeofIRType(hWordTy),
2139 env->curr_IP, layout
2140 );
2141 env->firstRef = False;
2142 }
2143 break;
2144
2145 case Ist_WrTmp: {
2146 IRExpr* data = st->Ist.WrTmp.data;
2147 if (data->tag == Iex_Load) {
2148 tl_assert(env->curr_IP_known);
2149 if (env->firstRef) {
2150 instrument_mem_access(
2151 sbOut,
2152 data->Iex.Load.addr,
2153 sizeofIRType(data->Iex.Load.ty),
2154 False/*!isStore*/,
2155 sizeofIRType(hWordTy),
2156 env->curr_IP, layout
2157 );
2158 env->firstRef = False;
2159 }
2160 }
2161 break;
2162 }
2163
2164 case Ist_Dirty: {
2165 Int dataSize;
2166 IRDirty* d = st->Ist.Dirty.details;
2167 if (d->mFx != Ifx_None) {
2168 /* This dirty helper accesses memory. Collect the
2169 details. */
2170 tl_assert(env->curr_IP_known);
2171 if (env->firstRef) {
2172 tl_assert(d->mAddr != NULL);
2173 tl_assert(d->mSize != 0);
2174 dataSize = d->mSize;
2175 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2176 instrument_mem_access(
2177 sbOut, d->mAddr, dataSize, False/*!isStore*/,
2178 sizeofIRType(hWordTy), env->curr_IP, layout
2179 );
2180 }
2181 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2182 instrument_mem_access(
2183 sbOut, d->mAddr, dataSize, True/*isStore*/,
2184 sizeofIRType(hWordTy), env->curr_IP, layout
2185 );
2186 }
2187 env->firstRef = False;
2188 }
2189 } else {
2190 tl_assert(d->mAddr == NULL);
2191 tl_assert(d->mSize == 0);
2192 }
2193 break;
2194 }
2195
2196 default:
2197 tl_assert(0);
2198
2199 } /* switch (st->tag) */
2200}
2201
2202
2203/* Add instrumentation for the final jump of an IRSB 'sbOut', and
2204 possibly modify 'env' as required. This must be the last
2205 instrumentation statement in the block. */
2206void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2207 /*MOD*/IRSB* sbOut,
2208 IRExpr* next,
2209 IRJumpKind jumpkind,
2210 VexGuestLayout* layout,
2211 IRType gWordTy, IRType hWordTy )
2212{
2213 if (jumpkind == Ijk_Call) {
2214 // Assumes x86 or amd64
2215 IRTemp sp_post_call_insn, fp_post_call_insn;
2216 XArray* frameBlocks;
2217 IRExpr** args;
2218 IRDirty* di;
2219 sp_post_call_insn
2220 = gen_Get_SP( sbOut, layout, sizeofIRType(hWordTy) );
2221 fp_post_call_insn
2222 = gen_Get_FP( sbOut, layout, sizeofIRType(hWordTy) );
2223 tl_assert(env->curr_IP_known);
2224 frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2225 tl_assert(frameBlocks);
2226 if (VG_(sizeXA)(frameBlocks) == 0)
2227 frameBlocks = NULL;
2228 args
2229 = mkIRExprVec_5(
2230 IRExpr_RdTmp(sp_post_call_insn),
2231 IRExpr_RdTmp(fp_post_call_insn),
2232 /* assume the call doesn't change FP */
2233 next,
2234 mkIRExpr_HWord( (HWord)frameBlocks ),
2235 mkIRExpr_HWord( sizeofIRType(gWordTy) )
2236 );
2237 di = unsafeIRDirty_0_N(
2238 3/*regparms*/,
2239 "helperc__new_frame",
2240 VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2241 args );
2242 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2243 }
2244}
2245
2246
2247//////////////////////////////////////////////////////////////
2248// //
2249// end Instrumentation //
2250// //
2251//////////////////////////////////////////////////////////////
2252
2253
2254//////////////////////////////////////////////////////////////
2255// //
2256// misc //
2257// //
2258//////////////////////////////////////////////////////////////
2259
2260/* Make a new empty stack frame that is suitable for being the
2261 outermost frame in a stack. It has a creation_sp of effectively
2262 infinity, so it can never be removed. */
2263static StackFrame* new_root_StackFrame ( void )
2264{
2265 StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2266 VG_(memset)( sframe, 0, sizeof(*sframe) );
2267 sframe->creation_sp = ~0UL;
2268
2269 /* This sets up .htab, .htab_size and .htab_used */
2270 initialise_II_hash_table( sframe );
2271
2272 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2273
2274 return sframe;
2275}
2276
2277/* Primary routine for setting up the shadow stack for a new thread.
2278 Note that this is used to create not only child thread stacks, but
2279 the root thread's stack too. We create a new stack with
2280 .creation_sp set to infinity, so that the outermost frame can never
2281 be removed (by shadowStack_unwind). The core calls this function
2282 as soon as a thread is created. We cannot yet get its SP value,
2283 since that may not yet be set. */
2284static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2285{
2286 tl_assert(is_sane_TId(child));
2287 if (parent == VG_INVALID_THREADID) {
2288 /* creating the main thread's stack */
2289 } else {
2290 tl_assert(is_sane_TId(parent));
2291 tl_assert(parent != child);
2292 tl_assert(shadowStacks[parent] != NULL);
2293 tl_assert(siTrees[parent] != NULL);
2294 }
2295
2296 /* Create the child's stack. Bear in mind we may be re-using
2297 it. */
2298 if (shadowStacks[child] == NULL) {
2299 /* First use of this stack. Just allocate an initial frame. */
2300 tl_assert(siTrees[child] == NULL);
2301 } else {
2302 StackFrame *frame, *frame2;
2303 /* re-using a stack. */
2304 /* get rid of the interval tree */
2305 tl_assert(siTrees[child] != NULL);
2306 delete_StackTree( siTrees[child] );
2307 siTrees[child] = NULL;
2308 /* Throw away all existing frames. */
2309 frame = shadowStacks[child];
2310 while (frame->outer)
2311 frame = frame->outer;
2312 tl_assert(frame->depth == 0);
2313 while (frame) {
2314 frame2 = frame->inner;
2315 if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2316 sg_free(frame);
2317 frame = frame2;
2318 }
2319 shadowStacks[child] = NULL;
2320 }
2321
2322 tl_assert(shadowStacks[child] == NULL);
2323 tl_assert(siTrees[child] == NULL);
2324
2325 /* Set up the initial stack frame. */
2326 shadowStacks[child] = new_root_StackFrame();
2327
2328 /* and set up the child's stack block interval tree. */
2329 siTrees[child] = new_StackTree();
2330}
2331
2332/* Once a thread is ready to go, the core calls here. We take the
2333 opportunity to push a second frame on its stack, with the
2334 presumably valid SP value that is going to be used for the thread's
2335 startup. Hence we should always wind up with a valid outermost
2336 frame for the thread. */
2337static void shadowStack_set_initial_SP ( ThreadId tid )
2338{
2339 StackFrame* sf;
2340 tl_assert(is_sane_TId(tid));
2341 sf = shadowStacks[tid];
2342 tl_assert(sf != NULL);
2343 tl_assert(sf->outer == NULL);
2344 tl_assert(sf->inner == NULL);
2345 tl_assert(sf->creation_sp == ~0UL);
2346 shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2347 0, VG_(get_IP)(tid), NULL );
2348}
2349
2350
2351//////////////////////////////////////////////////////////////
2352// //
2353// main-ish //
2354// //
2355//////////////////////////////////////////////////////////////
2356
2357/* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2358 sp-change analysis, as requested in pc_pre_clo_int(). */
2359void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2360 ThreadId tid = VG_(get_running_tid)();
2361 shadowStack_unwind( tid, old_SP+len );
2362}
2363
2364void sg_pre_clo_init ( void ) {
2365 ourGlobals_init();
2366 init_StackBlocks_set();
2367 init_GlobalBlock_set();
2368}
2369
2370void sg_post_clo_init ( void ) {
2371}
2372
2373void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2374 shadowStack_thread_create(parent, child);
2375}
2376
2377void sg_pre_thread_first_insn ( ThreadId tid ) {
2378 shadowStack_set_initial_SP(tid);
2379}
2380
2381void sg_fini(Int exitcode)
2382{
2383 if (VG_(clo_verbosity) >= 2) {
2384 VG_(message)(Vg_DebugMsg,
2385 " sg_: %'llu total accesses, of which:", stats__total_accesses);
2386 VG_(message)(Vg_DebugMsg,
2387 " sg_: stack0: %'12llu classify",
2388 stats__classify_Stack0);
2389 VG_(message)(Vg_DebugMsg,
2390 " sg_: stackN: %'12llu classify",
2391 stats__classify_StackN);
2392 VG_(message)(Vg_DebugMsg,
2393 " sg_: global: %'12llu classify",
2394 stats__classify_Global);
2395 VG_(message)(Vg_DebugMsg,
2396 " sg_: unknown: %'12llu classify",
2397 stats__classify_Unknown);
2398 VG_(message)(Vg_DebugMsg,
2399 " sg_: %'llu Invars preened, of which %'llu changed",
2400 stats__Invars_preened, stats__Invars_changed);
2401 VG_(message)(Vg_DebugMsg,
2402 " sg_: t_i_b_MT: %'12llu", stats__t_i_b_empty);
2403 VG_(message)(Vg_DebugMsg,
2404 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses",
2405 stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2406 VG_(message)(Vg_DebugMsg,
2407 " sg_: htab-fast: %'llu hits",
2408 stats__htab_fast);
2409 VG_(message)(Vg_DebugMsg,
2410 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes",
2411 stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2412 }
2413}
2414
2415
2416/*--------------------------------------------------------------------*/
2417/*--- end sg_main.c ---*/
2418/*--------------------------------------------------------------------*/