blob: 522be0283b66782751aa485d90aeaa3492c9e37c [file] [log] [blame]
sewardj024598e2008-09-18 14:43:05 +00001
2/*--------------------------------------------------------------------*/
3/*--- Ptrcheck: a pointer-use checker. ---*/
4/*--- This file checks stack and global array accesses. ---*/
5/*--- sg_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
10 use in programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
36*/
37
38#include "pub_tool_basics.h"
39#include "pub_tool_libcbase.h"
40#include "pub_tool_libcassert.h"
41#include "pub_tool_libcprint.h"
42#include "pub_tool_tooliface.h"
43#include "pub_tool_wordfm.h"
44#include "pub_tool_xarray.h"
45#include "pub_tool_threadstate.h"
46#include "pub_tool_mallocfree.h"
47#include "pub_tool_machine.h"
48#include "pub_tool_debuginfo.h"
49#include "pub_tool_options.h"
50
51#include "pc_common.h"
52
53#include "sg_main.h" // self
54
55
56static
sewardj9e1bed82008-10-20 10:25:16 +000057void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
sewardj024598e2008-09-18 14:43:05 +000058
59
60//////////////////////////////////////////////////////////////
61// //
62// Basic Stuff //
63// //
64//////////////////////////////////////////////////////////////
65
66static inline Bool is_sane_TId ( ThreadId tid )
67{
68 return tid >= 0 && tid < VG_N_THREADS
69 && tid != VG_INVALID_THREADID;
70}
71
72static void* sg_malloc ( HChar* cc, SizeT n ) {
73 void* p;
74 tl_assert(n > 0);
75 p = VG_(malloc)( cc, n );
76 tl_assert(p);
77 return p;
78}
79
80static void sg_free ( void* p ) {
81 tl_assert(p);
82 VG_(free)(p);
83}
84
85
86/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
87 first interval is lower, 1 if the first interval is higher, and 0
88 if there is any overlap. Redundant paranoia with casting is there
89 following what looked distinctly like a bug in gcc-4.1.2, in which
90 some of the comparisons were done signedly instead of
91 unsignedly. */
92inline
93static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94 Addr a2, SizeT n2 ) {
95 UWord a1w = (UWord)a1;
96 UWord n1w = (UWord)n1;
97 UWord a2w = (UWord)a2;
98 UWord n2w = (UWord)n2;
99 tl_assert(n1w > 0 && n2w > 0);
100 if (a1w + n1w <= a2w) return -1L;
101 if (a2w + n2w <= a1w) return 1L;
102 return 0;
103}
104
105/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106 within [aBig,aBig+nBig). */
107inline
108static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109 Addr aSmall, SizeT nSmall ) {
110 tl_assert(nBig > 0 && nSmall > 0);
111 return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112}
113
114inline
115static Addr Addr__min ( Addr a1, Addr a2 ) {
116 return a1 < a2 ? a1 : a2;
117}
118
119inline
120static Addr Addr__max ( Addr a1, Addr a2 ) {
121 return a1 < a2 ? a2 : a1;
122}
123
124
125//////////////////////////////////////////////////////////////
126// //
127// StackBlocks Persistent Cache //
128// //
129//////////////////////////////////////////////////////////////
130
131/* We maintain a set of XArray* of StackBlocks. These are never
132 freed. When a new StackBlock vector is acquired from
133 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134 If not present, it is added. If present, the just-acquired one is
135 freed and the copy used.
136
137 This simplifies storage management elsewhere. It allows us to
138 assume that a pointer to an XArray* of StackBlock is valid forever.
139 It also means there are no duplicates anywhere, which could be
140 important from a space point of view for programs that generate a
141 lot of translations, or where translations are frequently discarded
142 and re-made.
143
144 Note that we normalise the arrays by sorting the elements according
145 to an arbitrary total order, so as to avoid the situation that two
146 vectors describe the same set of variables but are not structurally
147 identical. */
148
149static inline Bool StackBlock__sane ( StackBlock* fb )
150{
151 if (fb->name[ sizeof(fb->name)-1 ] != 0)
152 return False;
153 if (fb->spRel != False && fb->spRel != True)
154 return False;
155 if (fb->isVec != False && fb->isVec != True)
156 return False;
157 return True;
158}
159
160/* Generate an arbitrary total ordering on StackBlocks. */
161static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
162{
163 Word r;
164 tl_assert(StackBlock__sane(fb1));
165 tl_assert(StackBlock__sane(fb2));
166 /* Hopefully the .base test hits most of the time. For the blocks
167 associated with any particular instruction, if the .base values
168 are the same then probably it doesn't make sense for the other
169 fields to be different. But this is supposed to be a completely
170 general structural total order, so we have to compare everything
171 anyway. */
172 if (fb1->base < fb2->base) return -1;
173 if (fb1->base > fb2->base) return 1;
174 /* compare sizes */
175 if (fb1->szB < fb2->szB) return -1;
176 if (fb1->szB > fb2->szB) return 1;
177 /* compare sp/fp flag */
178 if (fb1->spRel < fb2->spRel) return -1;
179 if (fb1->spRel > fb2->spRel) return 1;
180 /* compare is/is-not array-typed flag */
181 if (fb1->isVec < fb2->isVec) return -1;
182 if (fb1->isVec > fb2->isVec) return 1;
183 /* compare the name */
184 r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185 return r;
186}
187
188/* Returns True if all fields except .szB are the same. szBs may or
189 may not be the same; they are simply not consulted. */
190static Bool StackBlock__all_fields_except_szB_are_equal (
191 StackBlock* fb1,
192 StackBlock* fb2
193 )
194{
195 tl_assert(StackBlock__sane(fb1));
196 tl_assert(StackBlock__sane(fb2));
197 return fb1->base == fb2->base
198 && fb1->spRel == fb2->spRel
199 && fb1->isVec == fb2->isVec
200 && 0 == VG_(strcmp)(fb1->name, fb2->name);
201}
202
203
204/* Generate an arbitrary total ordering on vectors of StackBlocks. */
205static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206{
207 Word i, r, n1, n2;
208 n1 = VG_(sizeXA)( fb1s );
209 n2 = VG_(sizeXA)( fb2s );
210 if (n1 < n2) return -1;
211 if (n1 > n2) return 1;
212 for (i = 0; i < n1; i++) {
213 StackBlock *fb1, *fb2;
214 fb1 = VG_(indexXA)( fb1s, i );
215 fb2 = VG_(indexXA)( fb2s, i );
216 r = StackBlock__cmp( fb1, fb2 );
217 if (r != 0) return r;
218 }
219 tl_assert(i == n1 && i == n2);
220 return 0;
221}
222
223static void pp_StackBlock ( StackBlock* sb )
224{
225 VG_(printf)("StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }",
226 sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
227 sb->isVec ? 'Y' : 'N', &sb->name[0] );
228}
229
230static void pp_StackBlocks ( XArray* sbs )
231{
232 Word i, n = VG_(sizeXA)( sbs );
233 VG_(printf)("<<< STACKBLOCKS\n" );
234 for (i = 0; i < n; i++) {
235 VG_(printf)(" ");
236 pp_StackBlock( (StackBlock*)VG_(indexXA)( sbs, i ) );
237 VG_(printf)("\n");
238 }
239 VG_(printf)(">>> STACKBLOCKS\n" );
240}
241
242
243/* ---------- The StackBlock vector cache ---------- */
244
245static WordFM* /* XArray* of StackBlock -> nothing */
246 frameBlocks_set = NULL;
247
248static void init_StackBlocks_set ( void )
249{
250 tl_assert(!frameBlocks_set);
251 frameBlocks_set
252 = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
253 (Word(*)(UWord,UWord))StackBlocks__cmp );
254 tl_assert(frameBlocks_set);
255}
256
257/* Find the given StackBlock-vector in our collection thereof. If
258 found, deallocate the supplied one, and return the address of the
259 copy. If not found, add the supplied one to our collection and
260 return its address. */
261static XArray* /* of StackBlock */
262 StackBlocks__find_and_dealloc__or_add
263 ( XArray* /* of StackBlock */ orig )
264{
265 UWord key, val;
266
267 /* First, normalise, as per comments above. */
268 VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
269 VG_(sortXA)( orig );
270
271 /* Now get rid of any exact duplicates. */
272 nuke_dups:
273 { Word r, w, nEQ, n = VG_(sizeXA)( orig );
274 if (n >= 2) {
275 w = 0;
276 nEQ = 0;
277 for (r = 0; r < n; r++) {
278 if (r+1 < n) {
279 StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
280 StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
281 Word c = StackBlock__cmp(pR0,pR1);
282 tl_assert(c == -1 || c == 0);
283 if (c == 0) { nEQ++; continue; }
284 }
285 if (w != r) {
286 StackBlock* pW = VG_(indexXA)( orig, w );
287 StackBlock* pR = VG_(indexXA)( orig, r );
288 *pW = *pR;
289 }
290 w++;
291 }
292 tl_assert(r == n);
293 tl_assert(w + nEQ == n);
294 if (w < n) {
295 VG_(dropTailXA)( orig, n-w );
296 }
297 if (0) VG_(printf)("delta %ld\n", n-w);
298 }
299 }
300
301 /* Deal with the following strangeness, where two otherwise
302 identical blocks are claimed to have different sizes. In which
303 case we use the larger size. */
304 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
305 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
306 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
307 */
308 { Word i, n = VG_(sizeXA)( orig );
309 if (n >= 2) {
310 for (i = 0; i < n-1; i++) {
311 StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
312 StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
313 if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
314 /* They can't be identical because the previous tidying
315 pass would have removed the duplicates. And they
316 can't be > because the earlier sorting pass would
317 have ordered otherwise-identical descriptors
318 according to < on .szB fields. Hence: */
319 tl_assert(sb0->szB < sb1->szB);
320 sb0->szB = sb1->szB;
321 /* This makes the blocks identical, at the size of the
322 larger one. Rather than go to all the hassle of
323 sliding the rest down, simply go back to the
324 remove-duplicates stage. The assertion guarantees
325 that we eventually make progress, since the rm-dups
326 stage will get rid of one of the blocks. This is
327 expected to happen only exceedingly rarely. */
328 tl_assert(StackBlock__cmp(sb0,sb1) == 0);
329 goto nuke_dups;
330 }
331 }
332 }
333 }
334
335 /* A rather poor sanity check on the results. */
336 { Word i, n = VG_(sizeXA)( orig );
337 for (i = 0; i < n-1; i++) {
338 StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
339 StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
340 if (sb1->base == sb2->base)
341 pp_StackBlocks(orig);
342 tl_assert(sb1->base != sb2->base);
343 }
344 }
345
346 /* Now, do we have it already? */
347 if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
348 /* yes */
349 XArray* res;
350 tl_assert(val == 0);
351 tl_assert(key != (UWord)orig);
352 VG_(deleteXA)(orig);
353 res = (XArray*)key;
354 return res;
355 } else {
356 /* no */
357 VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
358 return orig;
359 }
360}
361
362/* Top level function for getting the StackBlock vector for a given
363 instruction. It is guaranteed that the returned pointer will be
364 valid for the entire rest of the run, and also that the addresses
365 of the individual elements of the array will not change. */
366
367static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
368{
369 XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
370 tl_assert(blocks);
371 return StackBlocks__find_and_dealloc__or_add( blocks );
372}
373
374
375//////////////////////////////////////////////////////////////
376// //
377// GlobalBlocks Persistent Cache //
378// //
379//////////////////////////////////////////////////////////////
380
381/* Generate an arbitrary total ordering on GlobalBlocks. */
382static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
383{
384 Word r;
385 /* compare addrs */
386 if (gb1->addr < gb2->addr) return -1;
387 if (gb1->addr > gb2->addr) return 1;
388 /* compare sizes */
389 if (gb1->szB < gb2->szB) return -1;
390 if (gb1->szB > gb2->szB) return 1;
391 /* compare is/is-not array-typed flag */
392 if (gb1->isVec < gb2->isVec) return -1;
393 if (gb1->isVec > gb2->isVec) return 1;
394 /* compare the name */
395 r = (Word)VG_(strcmp)(gb1->name, gb2->name);
396 if (r != 0) return r;
397 /* compare the soname */
398 r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
399 return r;
400}
401
402static WordFM* /* GlobalBlock* -> nothing */
403 globalBlock_set = NULL;
404
405static void init_GlobalBlock_set ( void )
406{
407 tl_assert(!globalBlock_set);
408 globalBlock_set
409 = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
410 (Word(*)(UWord,UWord))GlobalBlock__cmp );
411 tl_assert(globalBlock_set);
412}
413
414
415/* Top level function for making GlobalBlocks persistent. Call here
416 with a non-persistent version, and the returned one is guaranteed
417 to be valid for the entire rest of the run. The supplied one is
418 copied, not stored, so can be freed after the call. */
419
420static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
421{
422 UWord key, val;
423 /* Now, do we have it already? */
424 if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
425 /* yes, return the copy */
426 GlobalBlock* res;
427 tl_assert(val == 0);
428 res = (GlobalBlock*)key;
429 tl_assert(res != orig);
430 return res;
431 } else {
432 /* no. clone it, store the clone and return the clone's
433 address. */
434 GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
435 sizeof(GlobalBlock) );
436 tl_assert(clone);
437 *clone = *orig;
438 VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
439 return clone;
440 }
441}
442
443
444//////////////////////////////////////////////////////////////
445// //
446// Interval tree of StackTreeBlock //
447// //
448//////////////////////////////////////////////////////////////
449
450/* A node in a stack interval tree. Zero length intervals (.szB == 0)
451 are not allowed.
452
453 A stack interval tree is a (WordFM StackTreeNode* void). There is
454 one stack interval tree for each thread.
455*/
456typedef
457 struct {
458 Addr addr;
459 SizeT szB; /* copied from .descr->szB */
460 StackBlock* descr; /* it's an instance of this block */
461 UWord depth; /* depth of stack at time block was pushed */
462 }
463 StackTreeNode;
464
465static void pp_StackTree ( WordFM* sitree, HChar* who )
466{
467 UWord keyW, valW;
468 VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
469 VG_(initIterFM)( sitree );
470 while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
471 StackTreeNode* nd = (StackTreeNode*)keyW;
472 VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
473 nd->descr, nd->descr->name, nd->descr->szB);
474 }
475 VG_(printf)(">>> END pp_StackTree %s\n", who );
476}
477
478/* Interval comparison function for StackTreeNode */
479static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
480 StackTreeNode* sn2 )
481{
482 return cmp_nonempty_intervals(sn1->addr, sn1->szB,
483 sn2->addr, sn2->szB);
484}
485
486/* Find the node holding 'a', if any. */
487static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
488{
489 UWord keyW, valW;
490 StackTreeNode key;
491 tl_assert(sitree);
492 key.addr = a;
493 key.szB = 1;
494 if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
495 StackTreeNode* res = (StackTreeNode*)keyW;
496 tl_assert(valW == 0);
497 tl_assert(res != &key);
498 return res;
499 } else {
500 return NULL;
501 }
502}
503
504/* Note that the supplied XArray of FrameBlock must have been
505 made persistent already. */
506__attribute__((noinline))
507static void add_blocks_to_StackTree (
508 /*MOD*/WordFM* sitree,
509 XArray* /* FrameBlock */ descrs,
510 XArray* /* Addr */ bases,
511 UWord depth
512 )
513{
514 Bool debug = (Bool)0;
515 Word i, nDescrs, nBases;
516
517 nDescrs = VG_(sizeXA)( descrs ),
518 nBases = VG_(sizeXA)( bases );
519 tl_assert(nDescrs == nBases);
520
521 if (nDescrs == 0) return;
522
523 tl_assert(sitree);
524 if (debug) {
525 VG_(printf)("\n");
526 pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
527 }
528
529 for (i = 0; i < nDescrs; i++) {
530 Bool already_present;
531 StackTreeNode* nyu;
532 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
533 StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
534 tl_assert(descr->szB > 0);
535 nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
536 nyu->addr = addr;
537 nyu->szB = descr->szB;
538 nyu->descr = descr;
539 nyu->depth = depth;
540 if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
541 already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
542 /* The interval can't already be there; else we have
543 overlapping stack blocks. */
544 tl_assert(!already_present);
545 if (debug) {
546 pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
547 }
548 }
549 if (debug) {
550 pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
551 VG_(printf)("\n");
552 }
553}
554
555static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
556 XArray* /* Addr */ bases )
557{
558 UWord oldK, oldV;
559 Word i, nBases = VG_(sizeXA)( bases );
560 for (i = 0; i < nBases; i++) {
561 Bool b;
562 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
563 StackTreeNode* nd = find_StackTreeNode(sitree, addr);
564 /* The interval must be there; we added it earlier when
565 the associated frame was created. */
566 tl_assert(nd);
567 b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
568 /* we just found the block! */
569 tl_assert(b);
570 tl_assert(oldV == 0);
571 tl_assert(nd == (StackTreeNode*)oldK);
572 sg_free(nd);
573 }
574}
575
576
577static void delete_StackTree__kFin ( UWord keyW ) {
578 StackTreeNode* nd = (StackTreeNode*)keyW;
579 tl_assert(nd);
580 sg_free(nd);
581}
582static void delete_StackTree__vFin ( UWord valW ) {
583 tl_assert(valW == 0);
584}
585static void delete_StackTree ( WordFM* sitree )
586{
587 VG_(deleteFM)( sitree,
588 delete_StackTree__kFin, delete_StackTree__vFin );
589}
590
591static WordFM* new_StackTree ( void ) {
592 return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
593 (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
594}
595
596
597//////////////////////////////////////////////////////////////
598// //
599// Interval tree of GlobalTreeBlock //
600// //
601//////////////////////////////////////////////////////////////
602
603/* A node in a global interval tree. Zero length intervals
604 (.szB == 0) are not allowed.
605
606 A global interval tree is a (WordFM GlobalTreeNode* void). There
607 is one global interval tree for the entire process.
608*/
609typedef
610 struct {
611 Addr addr; /* copied from .descr->addr */
612 SizeT szB; /* copied from .descr->szB */
613 GlobalBlock* descr; /* it's this block */
614 }
615 GlobalTreeNode;
616
617static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
618 tl_assert(nd->descr);
619 VG_(printf)("GTNode [%#lx,+%ld) %s",
620 nd->addr, nd->szB, nd->descr->name);
621}
622
623static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
624 HChar* who )
625{
626 UWord keyW, valW;
627 GlobalTreeNode* nd;
628 VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
629 VG_(initIterFM)( gitree );
630 while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
631 tl_assert(valW == 0);
632 nd = (GlobalTreeNode*)keyW;
633 VG_(printf)(" ");
634 GlobalTreeNode__pp(nd);
635 VG_(printf)("\n");
636 }
637 VG_(doneIterFM)( gitree );
638 VG_(printf)(">>>\n");
639}
640
641/* Interval comparison function for GlobalTreeNode */
642static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
643 GlobalTreeNode* gn2 )
644{
645 return cmp_nonempty_intervals( gn1->addr, gn1->szB,
646 gn2->addr, gn2->szB );
647}
648
649/* Find the node holding 'a', if any. */
650static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
651{
652 UWord keyW, valW;
653 GlobalTreeNode key;
654 key.addr = a;
655 key.szB = 1;
656 if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
657 GlobalTreeNode* res = (GlobalTreeNode*)keyW;
658 tl_assert(valW == 0);
659 tl_assert(res != &key);
660 return res;
661 } else {
662 return NULL;
663 }
664}
665
666/* Note that the supplied GlobalBlock must have been made persistent
667 already. */
668static void add_block_to_GlobalTree (
669 /*MOD*/WordFM* gitree,
670 GlobalBlock* descr
671 )
672{
673 Bool already_present;
674 GlobalTreeNode *nyu, *nd;
675 UWord keyW, valW;
676
677 tl_assert(descr->szB > 0);
678 nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
679 nyu->addr = descr->addr;
680 nyu->szB = descr->szB;
681 nyu->descr = descr;
682
683 /* Basically it's an error to add a global block to the tree that
684 is already in the tree. However, detect and ignore attempts to
685 insert exact duplicates; they do appear for some reason
686 (possible a bug in m_debuginfo?) */
687 already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
688 if (already_present) {
689 tl_assert(valW == 0);
690 nd = (GlobalTreeNode*)keyW;
691 tl_assert(nd);
692 tl_assert(nd != nyu);
693 tl_assert(nd->descr);
694 tl_assert(nyu->descr);
695 if (nd->addr == nyu->addr && nd->szB == nyu->szB
696 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
697 /* Although it seems reasonable to demand that duplicate
698 blocks have identical names, that is too strict. For
699 example, reading debuginfo from glibc produces two
700 otherwise identical blocks with names "tzname" and
701 "__tzname". A constraint of the form "must be identical,
702 or one must be a substring of the other" would fix that.
703 However, such trickery is scuppered by the fact that we
704 truncate all variable names to 15 characters to make
705 storage management simpler, hence giving pairs like
706 "__EI___pthread_" (truncated) vs "__pthread_keys". So
707 it's simplest just to skip the name comparison
708 completely. */
709 && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
710 /* exact duplicate; ignore it */
711 sg_free(nyu);
712 return;
713 }
714 /* else fall through; the assertion below will catch it */
715 }
716
717 already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
718 /* The interval can't already be there; else we have
719 overlapping global blocks. */
720 if (already_present) {
721 GlobalTree__pp( gitree, "add_block_to_GlobalTree: non-exact duplicate" );
722 VG_(printf)("Overlapping block: ");
723 GlobalTreeNode__pp(nyu);
724 VG_(printf)("\n");
725 }
726 tl_assert(!already_present);
727}
728
729static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
730 Addr a, SizeT szB )
731{
732 /* One easy way to do this: look up [a,a+szB) in the tree. That
733 will either succeed, producing a block which intersects that
734 range, in which case we delete it and repeat; or it will fail,
735 in which case there are no blocks intersecting the range, and we
736 can bring the process to a halt. */
737 UWord keyW, valW, oldK, oldV;
738 GlobalTreeNode key, *nd;
739 Bool b, anyFound;
740
741 tl_assert(szB > 0);
742
743 anyFound = False;
744
745 key.addr = a;
746 key.szB = szB;
747
748 while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
749 anyFound = True;
750 nd = (GlobalTreeNode*)keyW;
751 tl_assert(valW == 0);
752 tl_assert(nd != &key);
753 tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
754
755 b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
756 tl_assert(b);
757 tl_assert(oldV == 0);
758 tl_assert(oldK == keyW); /* check we deleted the node we just found */
759 }
760
761 return anyFound;
762}
763
764
765//////////////////////////////////////////////////////////////
766// //
767// Invar //
768// //
769//////////////////////////////////////////////////////////////
770
771/* An invariant, as resulting from watching the destination of a
772 memory referencing instruction. Initially is Inv_Unset until the
773 instruction makes a first access. */
774
775typedef
776 enum {
777 Inv_Unset=1, /* not established yet */
778 Inv_Unknown, /* unknown location */
779 Inv_Stack0, /* array-typed stack block in innermost frame */
780 Inv_StackN, /* array-typed stack block in non-innermost frame */
781 Inv_Global, /* array-typed global block */
782 }
783 InvarTag;
784
785typedef
786 struct {
787 InvarTag tag;
788 union {
789 struct {
790 } Unset;
791 struct {
792 } Unknown;
793 struct {
794 Addr addr;
795 SizeT szB;
796 StackBlock* descr;
797 } Stack0; /* innermost stack frame */
798 struct {
799 /* Pointer to a node in the interval tree for
800 this thread. */
801 StackTreeNode* nd;
802 } StackN; /* non-innermost stack frame */
803 struct {
804 /* Pointer to a GlobalBlock in the interval tree of
805 global blocks. */
806 GlobalTreeNode* nd;
807 } Global;
808 }
809 Inv;
810 }
811 Invar;
812
813/* Partial debugging printing for an Invar. */
814static void pp_Invar ( Invar* i )
815{
816 switch (i->tag) {
817 case Inv_Unset:
818 VG_(printf)("Unset");
819 break;
820 case Inv_Unknown:
821 VG_(printf)("Unknown");
822 break;
823 case Inv_Stack0:
824 VG_(printf)("Stack0 [%#lx,+%lu)",
825 i->Inv.Stack0.addr, i->Inv.Stack0.szB);
826 break;
827 case Inv_StackN:
828 VG_(printf)("StackN [%#lx,+%lu)",
829 i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
830 break;
831 case Inv_Global:
832 VG_(printf)("Global [%#lx,+%lu)",
833 i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
834 break;
835 default:
836 tl_assert(0);
837 }
838}
839
840/* Compare two Invars for equality. */
841static Bool eq_Invar ( Invar* i1, Invar* i2 )
842{
843 tl_assert(i1->tag != Inv_Unset);
844 tl_assert(i2->tag != Inv_Unset);
845 if (i1->tag != i2->tag)
846 return False;
847 switch (i1->tag) {
848 case Inv_Unknown:
849 return True;
850 case Inv_Stack0:
851 return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
852 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
853 case Inv_StackN:
854 return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
855 case Inv_Global:
856 return i1->Inv.Global.nd == i2->Inv.Global.nd;
857 default:
858 tl_assert(0);
859 }
860 /*NOTREACHED*/
861 tl_assert(0);
862}
863
864/* Print selected parts of an Invar, suitable for use in error
865 messages. */
866static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
867{
868 HChar* str;
869 tl_assert(nBuf >= 96);
870 buf[0] = 0;
871 switch (inv->tag) {
872 case Inv_Unknown:
873 VG_(sprintf)(buf, "%s", "unknown");
874 break;
875 case Inv_Stack0:
876 str = "array";
877 VG_(sprintf)(buf, "stack %s \"%s\" in this frame",
878 str, inv->Inv.Stack0.descr->name );
879 break;
880 case Inv_StackN:
881 str = "array";
882 VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here",
883 str, inv->Inv.StackN.nd->descr->name,
884 depth - inv->Inv.StackN.nd->depth );
885 break;
886 case Inv_Global:
887 str = "array";
888 VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"",
889 str, inv->Inv.Global.nd->descr->name,
890 inv->Inv.Global.nd->descr->soname );
891 break;
892 case Inv_Unset:
893 VG_(sprintf)(buf, "%s", "Unset!");
894 break;
895 default:
896 tl_assert(0);
897 }
898}
899
900
901//////////////////////////////////////////////////////////////
902// //
903// our globals //
904// //
905//////////////////////////////////////////////////////////////
906
907//////////////////////////////////////////////////////////////
908///
909
910#define N_QCACHE 16
911
912/* Powers of two only, else the result will be chaos */
913#define QCACHE_ADVANCE_EVERY 16
914
915/* Per-thread query cache. Note that the invar can only be Inv_StackN
916 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
917typedef
918 struct {
919 Addr addr;
920 SizeT szB;
921 Invar inv;
922 }
923 QCElem;
924
925typedef
926 struct {
927 Word nInUse;
928 QCElem elems[N_QCACHE];
929 }
930 QCache;
931
932static void QCache__invalidate ( QCache* qc ) {
933 tl_assert(qc->nInUse >= 0);
934 qc->nInUse = 0;
935}
936
937static void QCache__pp ( QCache* qc, HChar* who )
938{
939 Word i;
940 VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
941 for (i = 0; i < qc->nInUse; i++) {
942 VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
943 pp_Invar(&qc->elems[i].inv);
944 VG_(printf)("\n");
945 }
946 VG_(printf)(">>>\n");
947}
948
949static ULong stats__qcache_queries = 0;
950static ULong stats__qcache_misses = 0;
951static ULong stats__qcache_probes = 0;
952
953///
954//////////////////////////////////////////////////////////////
955
956/* Each thread has:
957 * a shadow stack of StackFrames, which is a double-linked list
958 * an stack block interval tree
959*/
960static struct _StackFrame* shadowStacks[VG_N_THREADS];
961
962static WordFM* /* StackTreeNode */ siTrees[VG_N_THREADS];
963
964static QCache qcaches[VG_N_THREADS];
965
966
967/* Additionally, there is one global variable interval tree
968 for the entire process.
969*/
970static WordFM* /* GlobalTreeNode */ giTree;
971
972
973static void invalidate_all_QCaches ( void )
974{
975 Word i;
976 for (i = 0; i < VG_N_THREADS; i++) {
977 QCache__invalidate( &qcaches[i] );
978 }
979}
980
981static void ourGlobals_init ( void )
982{
983 Word i;
984 for (i = 0; i < VG_N_THREADS; i++) {
985 shadowStacks[i] = NULL;
986 siTrees[i] = NULL;
987 }
988 invalidate_all_QCaches();
989 giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
990 (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
991}
992
993
994//////////////////////////////////////////////////////////////
995// //
996// Handle global variable load/unload events //
997// //
998//////////////////////////////////////////////////////////////
999
1000static void acquire_globals ( ULong di_handle )
1001{
1002 Word n, i;
1003 XArray* /* of GlobalBlock */ gbs;
1004 if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1005 gbs = VG_(di_get_global_blocks_from_dihandle)
1006 (di_handle, True/*arrays only*/);
1007 if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs ));
1008
1009 n = VG_(sizeXA)( gbs );
1010 for (i = 0; i < n; i++) {
1011 GlobalBlock* gbp;
1012 GlobalBlock* gb = VG_(indexXA)( gbs, i );
1013 if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n",
1014 gb->szB, gb->addr, gb->soname, gb->name );
1015 tl_assert(gb->szB > 0);
1016 /* Make a persistent copy of each GlobalBlock, and add it
1017 to the tree. */
1018 gbp = get_persistent_GlobalBlock( gb );
1019 add_block_to_GlobalTree( giTree, gbp );
1020 }
1021
1022 VG_(deleteXA)( gbs );
1023}
1024
1025
1026/* We only intercept these two because we need to see any di_handles
1027 that might arise from the mappings/allocations. */
1028void sg_new_mem_mmap( Addr a, SizeT len,
1029 Bool rr, Bool ww, Bool xx, ULong di_handle )
1030{
1031 if (di_handle > 0)
1032 acquire_globals(di_handle);
1033}
1034void sg_new_mem_startup( Addr a, SizeT len,
1035 Bool rr, Bool ww, Bool xx, ULong di_handle )
1036{
1037 if (di_handle > 0)
1038 acquire_globals(di_handle);
1039}
1040void sg_die_mem_munmap ( Addr a, SizeT len )
1041{
1042 Bool debug = (Bool)0;
1043 Bool overlap = False;
1044
1045 if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1046
1047 if (len == 0)
1048 return;
1049
1050 overlap = del_GlobalTree_range(giTree, a, len);
1051
1052 { /* redundant sanity check */
1053 UWord keyW, valW;
1054 VG_(initIterFM)( giTree );
1055 while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1056 GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1057 tl_assert(valW == 0);
1058 tl_assert(nd->szB > 0);
1059 tl_assert(nd->addr + nd->szB <= a
1060 || a + len <= nd->addr);
1061 }
1062 VG_(doneIterFM)( giTree );
1063 }
1064
1065 if (!overlap)
1066 return;
1067
1068 /* Ok, the range contained some blocks. Therefore we'll need to
1069 visit all the Invars in all the thread shadow stacks, and
sewardj9e1bed82008-10-20 10:25:16 +00001070 convert all Inv_Global entries that intersect [a,a+len) to
sewardj024598e2008-09-18 14:43:05 +00001071 Inv_Unknown. */
1072 tl_assert(len > 0);
sewardj9e1bed82008-10-20 10:25:16 +00001073 preen_global_Invars( a, len );
sewardj024598e2008-09-18 14:43:05 +00001074 invalidate_all_QCaches();
1075}
1076
1077
1078//////////////////////////////////////////////////////////////
1079// //
1080// StackFrame //
1081// //
1082//////////////////////////////////////////////////////////////
1083
1084static ULong stats__total_accesses = 0;
1085static ULong stats__classify_Stack0 = 0;
1086static ULong stats__classify_StackN = 0;
1087static ULong stats__classify_Global = 0;
1088static ULong stats__classify_Unknown = 0;
1089static ULong stats__Invars_preened = 0;
1090static ULong stats__Invars_changed = 0;
1091static ULong stats__t_i_b_empty = 0;
1092static ULong stats__htab_fast = 0;
1093static ULong stats__htab_searches = 0;
1094static ULong stats__htab_probes = 0;
1095static ULong stats__htab_resizes = 0;
1096
1097
1098/* A dynamic instance of an instruction */
1099typedef
1100 struct {
1101 /* IMMUTABLE */
1102 Addr insn_addr; /* NB! zero means 'not in use' */
1103 XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1104 /* MUTABLE */
1105 Invar invar;
1106 }
1107 IInstance;
1108
1109
1110#define N_HTAB_FIXED 64
1111
1112typedef
1113 struct _StackFrame {
1114 /* The sp when the frame was created, so we know when to get rid
1115 of it. */
1116 Addr creation_sp;
1117 /* The stack frames for a thread are arranged as a doubly linked
1118 list. Obviously the outermost frame in the stack has .outer
1119 as NULL and the innermost in theory has .inner as NULL.
1120 However, when a function returns, we don't delete the
1121 just-vacated StackFrame. Instead, it is retained in the list
1122 and will be re-used when the next call happens. This is so
1123 as to avoid constantly having to dynamically allocate and
1124 deallocate frames. */
1125 struct _StackFrame* inner;
1126 struct _StackFrame* outer;
1127 Word depth; /* 0 for outermost; increases inwards */
1128 /* Information for each memory referencing instruction, for this
1129 instantiation of the function. The iinstances array is
1130 operated as a simple linear-probe hash table, which is
1131 dynamically expanded as necessary. Once critical thing is
1132 that an IInstance with a .insn_addr of zero is interpreted to
1133 mean that hash table slot is unused. This means we can't
1134 store an IInstance for address zero. */
1135 /* Note that htab initially points to htab_fixed. If htab_fixed
1136 turns out not to be big enough then htab is made to point to
1137 dynamically allocated memory. But it's often the case that
1138 htab_fixed is big enough, so this optimisation saves a huge
1139 number of sg_malloc/sg_free call pairs. */
1140 IInstance* htab;
1141 UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1142 UWord htab_used; /* number of hash table slots currently in use */
1143 /* If this frame is currently making a call, then the following
1144 are relevant. */
1145 Addr sp_at_call;
1146 Addr fp_at_call;
1147 XArray* /* of Addr */ blocks_added_by_call;
1148 /* See comment just above */
1149 IInstance htab_fixed[N_HTAB_FIXED];
1150 }
1151 StackFrame;
1152
1153
1154
1155
1156
1157/* Move this somewhere else? */
1158/* Visit all Invars in the entire system. If 'isHeap' is True, change
1159 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1160 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1161 instead. */
1162
1163__attribute__((noinline))
sewardj9e1bed82008-10-20 10:25:16 +00001164static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
sewardj024598e2008-09-18 14:43:05 +00001165{
1166 stats__Invars_preened++;
1167 tl_assert(len > 0);
1168 tl_assert(inv);
1169 switch (inv->tag) {
sewardj9e1bed82008-10-20 10:25:16 +00001170 case Inv_Global:
1171 tl_assert(inv->Inv.Global.nd);
1172 tl_assert(inv->Inv.Global.nd->szB > 0);
1173 if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1174 inv->Inv.Global.nd->addr,
1175 inv->Inv.Global.nd->szB);
1176 if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1177 inv->Inv.Global.nd->szB)) {
sewardj024598e2008-09-18 14:43:05 +00001178 inv->tag = Inv_Unknown;
1179 stats__Invars_changed++;
1180 }
1181 break;
sewardj9e1bed82008-10-20 10:25:16 +00001182 case Inv_Stack0:
1183 case Inv_StackN:
sewardj024598e2008-09-18 14:43:05 +00001184 case Inv_Unknown:
1185 break;
sewardj9e1bed82008-10-20 10:25:16 +00001186 default:
1187 tl_assert(0);
sewardj024598e2008-09-18 14:43:05 +00001188 }
1189}
1190
1191__attribute__((noinline))
sewardj9e1bed82008-10-20 10:25:16 +00001192static void preen_global_Invars ( Addr a, SizeT len )
sewardj024598e2008-09-18 14:43:05 +00001193{
sewardj024598e2008-09-18 14:43:05 +00001194 Int i;
sewardj024598e2008-09-18 14:43:05 +00001195 UWord u;
sewardj024598e2008-09-18 14:43:05 +00001196 StackFrame* frame;
1197 tl_assert(len > 0);
sewardj024598e2008-09-18 14:43:05 +00001198 for (i = 0; i < VG_N_THREADS; i++) {
sewardj9e1bed82008-10-20 10:25:16 +00001199 frame = shadowStacks[i];
1200 if (!frame)
1201 continue; /* no frames for this thread */
1202 /* start from the innermost frame */
1203 while (frame->inner)
1204 frame = frame->inner;
1205 tl_assert(frame->outer);
1206 /* work through the frames from innermost to outermost. The
1207 order isn't important; we just need to ensure we visit each
1208 frame once (including those which are not actually active,
1209 more 'inner' than the 'innermost active frame', viz, just
1210 hanging around waiting to be used, when the current innermost
1211 active frame makes more calls. See comments on definition of
1212 struct _StackFrame. */
1213 for (; frame; frame = frame->outer) {
sewardj024598e2008-09-18 14:43:05 +00001214 UWord xx = 0; /* sanity check only; count of used htab entries */
sewardj9e1bed82008-10-20 10:25:16 +00001215 if (!frame->htab)
1216 continue; /* frame not in use. See shadowStack_unwind(). */
sewardj024598e2008-09-18 14:43:05 +00001217 for (u = 0; u < frame->htab_size; u++) {
1218 IInstance* ii = &frame->htab[u];
1219 if (ii->insn_addr == 0)
1220 continue; /* not in use */
sewardj9e1bed82008-10-20 10:25:16 +00001221 if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1222 preen_global_Invar( &ii->invar, a, len );
sewardj024598e2008-09-18 14:43:05 +00001223 xx++;
1224 }
1225 tl_assert(xx == frame->htab_used);
1226 }
1227 }
sewardj024598e2008-09-18 14:43:05 +00001228}
1229
1230
1231/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1232 of the ip are guaranteed to be zero */
1233inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1234 return (ip >> 0) & (htab_size - 1);
1235}
1236
1237__attribute__((noinline))
1238static void initialise_II_hash_table ( StackFrame* sf )
1239{
1240 UWord i;
1241 sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1242 sf->htab = &sf->htab_fixed[0];
1243 tl_assert(sf->htab);
1244 sf->htab_used = 0;
1245 for (i = 0; i < sf->htab_size; i++)
1246 sf->htab[i].insn_addr = 0; /* NOT IN USE */
1247}
1248
1249
1250__attribute__((noinline))
1251static void resize_II_hash_table ( StackFrame* sf )
1252{
1253 UWord i, j, ix, old_size, new_size;
1254 IInstance *old_htab, *new_htab, *old;
1255
1256 tl_assert(sf && sf->htab);
1257 old_size = sf->htab_size;
1258 new_size = 2 * old_size;
1259 old_htab = sf->htab;
1260 new_htab = sg_malloc( "di.sg_main.rIht.1",
1261 new_size * sizeof(IInstance) );
1262 for (i = 0; i < new_size; i++) {
1263 new_htab[i].insn_addr = 0; /* NOT IN USE */
1264 }
1265 for (i = 0; i < old_size; i++) {
1266 old = &old_htab[i];
1267 if (old->insn_addr == 0 /* NOT IN USE */)
1268 continue;
1269 ix = compute_II_hash(old->insn_addr, new_size);
1270 /* find out where to put this, in the new table */
1271 j = new_size;
1272 while (1) {
1273 if (new_htab[ix].insn_addr == 0)
1274 break;
1275 /* This can't ever happen, because it would mean the new
1276 table is full; that isn't allowed -- even the old table is
1277 only allowed to become half full. */
1278 tl_assert(j > 0);
1279 j--;
1280 ix++; if (ix == new_size) ix = 0;
1281 }
1282 /* copy the old entry to this location */
1283 tl_assert(ix < new_size);
1284 tl_assert(new_htab[ix].insn_addr == 0);
1285 new_htab[ix] = *old;
1286 tl_assert(new_htab[ix].insn_addr != 0);
1287 }
1288 /* all entries copied; free old table. */
1289 if (old_htab != &sf->htab_fixed[0])
1290 sg_free(old_htab);
1291 sf->htab = new_htab;
1292 sf->htab_size = new_size;
1293 /* check sf->htab_used is correct. Optional and a bit expensive
1294 but anyway: */
1295 j = 0;
1296 for (i = 0; i < new_size; i++) {
1297 if (new_htab[i].insn_addr != 0) {
1298 j++;
1299 }
1300 }
1301 tl_assert(j == sf->htab_used);
1302 if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1303}
1304
1305
1306__attribute__((noinline))
1307static IInstance* find_or_create_IInstance_SLOW (
1308 StackFrame* sf,
1309 Addr ip,
1310 XArray* /* StackBlock */ ip_frameblocks
1311 )
1312{
1313 UWord i, ix;
1314
1315 stats__htab_searches++;
1316
1317 tl_assert(sf);
1318 tl_assert(sf->htab);
1319
1320 /* Make sure the table loading doesn't get too high. */
1321 if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1322 stats__htab_resizes++;
1323 resize_II_hash_table(sf);
1324 }
1325 tl_assert(2 * sf->htab_used <= sf->htab_size);
1326
1327 ix = compute_II_hash(ip, sf->htab_size);
1328 i = sf->htab_size;
1329 while (1) {
1330 stats__htab_probes++;
1331 /* Note that because of the way the fast-case handler works,
1332 these two tests are actually redundant in the first iteration
1333 of this loop. (Except they aren't redundant if the code just
1334 above resized the table first. :-) */
1335 if (sf->htab[ix].insn_addr == ip)
1336 return &sf->htab[ix];
1337 if (sf->htab[ix].insn_addr == 0)
1338 break;
1339 /* If i ever gets to zero and we have found neither what we're
1340 looking for nor an empty slot, the table must be full. Which
1341 isn't possible -- we monitor the load factor to ensure it
1342 doesn't get above say 50%; if that ever does happen the table
1343 is resized. */
1344 tl_assert(i > 0);
1345 i--;
1346 ix++;
1347 if (ix == sf->htab_size) ix = 0;
1348 }
1349
1350 /* So now we've found a free slot at ix, and we can use that. */
1351 tl_assert(sf->htab[ix].insn_addr == 0);
1352
1353 /* Add a new record in this slot. */
1354 tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1355 sf->htab[ix].insn_addr = ip;
1356 sf->htab[ix].blocks = ip_frameblocks;
1357 sf->htab[ix].invar.tag = Inv_Unset;
1358 sf->htab_used++;
1359 return &sf->htab[ix];
1360}
1361
1362
1363inline
1364static IInstance* find_or_create_IInstance (
1365 StackFrame* sf,
1366 Addr ip,
1367 XArray* /* StackBlock */ ip_frameblocks
1368 )
1369{
1370 UWord ix = compute_II_hash(ip, sf->htab_size);
1371 /* Is it in the first slot we come to? */
1372 if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1373 stats__htab_fast++;
1374 return &sf->htab[ix];
1375 }
1376 /* If the first slot we come to is empty, bag it. */
1377 if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1378 stats__htab_fast++;
1379 tl_assert(ip != 0);
1380 sf->htab[ix].insn_addr = ip;
1381 sf->htab[ix].blocks = ip_frameblocks;
1382 sf->htab[ix].invar.tag = Inv_Unset;
1383 sf->htab_used++;
1384 return &sf->htab[ix];
1385 }
1386 /* Otherwise we hand off to the slow case, which searches other
1387 slots, and optionally resizes the table if necessary. */
1388 return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1389}
1390
1391
1392__attribute__((noinline))
1393static Addr calculate_StackBlock_EA ( StackBlock* descr,
1394 Addr sp, Addr fp ) {
1395 UWord w1 = (UWord)descr->base;
1396 UWord w2 = (UWord)(descr->spRel ? sp : fp);
1397 UWord ea = w1 + w2;
1398 return ea;
1399}
1400
1401/* Given an array of StackBlocks, return an array of Addrs, holding
1402 their effective addresses. Caller deallocates result array. */
1403__attribute__((noinline))
1404static XArray* /* Addr */ calculate_StackBlock_EAs (
1405 XArray* /* StackBlock */ blocks,
1406 Addr sp, Addr fp
1407 )
1408{
1409 XArray* res;
1410 Word i, n = VG_(sizeXA)( blocks );
1411 tl_assert(n > 0);
1412 res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1413 for (i = 0; i < n; i++) {
1414 StackBlock* blk = VG_(indexXA)( blocks, i );
1415 Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1416 VG_(addToXA)( res, &ea );
1417 }
1418 return res;
1419}
1420
1421
1422/* Try to classify the block into which a memory access falls, and
1423 write the result in 'inv'. This writes all relevant fields of
1424 'inv'. */
1425__attribute__((noinline))
1426static void classify_address ( /*OUT*/Invar* inv,
1427 ThreadId tid,
1428 Addr ea, Addr sp, Addr fp,
1429 UWord szB,
1430 XArray* /* of StackBlock */ thisInstrBlocks )
1431{
1432 tl_assert(szB > 0);
1433 /* First, look in the stack blocks accessible in this instruction's
1434 frame. */
1435 {
1436 Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1437 if (nBlocks == 0) stats__t_i_b_empty++;
1438 for (i = 0; i < nBlocks; i++) {
1439 StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1440 Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1441 if (bea <= ea && ea + szB <= bea + descr->szB) {
1442 /* found it */
1443 inv->tag = Inv_Stack0;
1444 inv->Inv.Stack0.addr = bea;
1445 inv->Inv.Stack0.szB = descr->szB;
1446 inv->Inv.Stack0.descr = descr;
1447 stats__classify_Stack0++;
1448 return;
1449 }
1450 }
1451 }
1452 /* Look in this thread's query cache */
1453 { Word i;
1454 QCache* cache = &qcaches[tid];
1455 static UWord ctr = 0;
1456 stats__qcache_queries++;
1457 for (i = 0; i < cache->nInUse; i++) {
1458 if (0) /* expensive in a loop like this */
1459 tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1460 stats__qcache_probes++;
1461 if (is_subinterval_of(cache->elems[i].addr,
1462 cache->elems[i].szB, ea, szB)) {
1463 if (i > 0
1464 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1465 QCElem tmp;
1466 tmp = cache->elems[i-1];
1467 cache->elems[i-1] = cache->elems[i];
1468 cache->elems[i] = tmp;
1469 i--;
1470 }
1471 *inv = cache->elems[i].inv;
1472 return;
1473 }
1474 }
1475 stats__qcache_misses++;
1476 }
1477 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1478 in some calling frame? Consult this thread's stack-block
1479 interval tree to find out. */
1480 { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1481 /* We know that [ea,ea+1) is in the block, but we need to
1482 restrict to the case where the whole access falls within
1483 it. */
1484 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1485 nd = NULL;
1486 }
1487 if (nd) {
1488 /* found it */
1489 inv->tag = Inv_StackN;
1490 inv->Inv.StackN.nd = nd;
1491 stats__classify_StackN++;
1492 goto out;
1493 }
1494 }
1495 /* Not in a stack block. Try the global pool. */
1496 { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1497 /* We know that [ea,ea+1) is in the block, but we need to
1498 restrict to the case where the whole access falls within
1499 it. */
1500 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1501 nd = NULL;
1502 }
1503 if (nd) {
1504 /* found it */
1505 inv->tag = Inv_Global;
1506 inv->Inv.Global.nd = nd;
1507 stats__classify_Global++;
1508 goto out;
1509 }
1510 }
1511 /* No idea - give up. */
1512 inv->tag = Inv_Unknown;
1513 stats__classify_Unknown++;
1514
1515 /* Update the cache */
1516 out:
1517 { Addr toadd_addr = 0;
1518 SizeT toadd_szB = 0;
1519 QCache* cache = &qcaches[tid];
1520
1521 static UWord ctr = 0;
1522 Bool show = False;
1523 if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1524
1525 if (show) QCache__pp(cache, "before upd");
1526
1527 switch (inv->tag) {
1528 case Inv_Global:
1529 toadd_addr = inv->Inv.Global.nd->addr;
1530 toadd_szB = inv->Inv.Global.nd->szB;
1531 break;
1532 case Inv_StackN:
1533 toadd_addr = inv->Inv.StackN.nd->addr;
1534 toadd_szB = inv->Inv.StackN.nd->szB;
1535 break;
1536 case Inv_Unknown: {
1537 /* This is more complex. We need to figure out the
1538 intersection of the "holes" in the global and stack
1539 interval trees into which [ea,ea+szB) falls. This is
1540 further complicated by the fact that [ea,ea+szB) might
1541 not fall cleanly into a hole; it may instead fall across
1542 the boundary of a stack or global block. In that case
1543 we just ignore it and don't update the cache, since we
1544 have no way to represent this situation precisely. */
1545 StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB;
1546 GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1547 Addr gMin, gMax, sMin, sMax, uMin, uMax;
1548 Bool sOK, gOK;
1549 sNegInf.addr = 0;
1550 sNegInf.szB = 1;
1551 sPosInf.addr = ~(UWord)0;
1552 sPosInf.szB = 1;
1553 gNegInf.addr = 0;
1554 gNegInf.szB = 1;
1555 gPosInf.addr = ~(UWord)0;
1556 gPosInf.szB = 1;
1557 sKey.addr = ea;
1558 sKey.szB = szB;
1559 gKey.addr = ea;
1560 gKey.szB = szB;
1561 if (0) VG_(printf)("Tree sizes %ld %ld\n",
1562 VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1563 sOK = VG_(findBoundsFM)( siTrees[tid],
sewardj95208452008-10-18 19:55:31 +00001564 (UWord*)&sLB, NULL/*unused*/,
1565 (UWord*)&sUB, NULL/*unused*/,
1566 (UWord)&sNegInf, 0/*unused*/,
1567 (UWord)&sPosInf, 0/*unused*/,
sewardj024598e2008-09-18 14:43:05 +00001568 (UWord)&sKey );
1569 gOK = VG_(findBoundsFM)( giTree,
sewardj95208452008-10-18 19:55:31 +00001570 (UWord*)&gLB, NULL/*unused*/,
1571 (UWord*)&gUB, NULL/*unused*/,
1572 (UWord)&gNegInf, 0/*unused*/,
1573 (UWord)&gPosInf, 0/*unused*/,
sewardj024598e2008-09-18 14:43:05 +00001574 (UWord)&gKey );
1575 if (!(sOK && gOK)) {
1576 /* If this happens, then [ea,ea+szB) partially overlaps
1577 a heap or stack block. We can't represent that, so
1578 just forget it (should be very rare). However, do
1579 maximum sanity checks first. In such a
1580 partial overlap case, it can't be the case that both
1581 [ea] and [ea+szB-1] overlap the same block, since if
1582 that were indeed the case then it wouldn't be a
1583 partial overlap; rather it would simply fall inside
1584 that block entirely and we shouldn't be inside this
1585 conditional at all. */
1586 if (!sOK) {
1587 StackTreeNode *ndFirst, *ndLast;
1588 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1589 ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1590 /* if both ends of the range fall inside a block,
1591 they can't be in the same block. */
1592 if (ndFirst && ndLast)
1593 tl_assert(ndFirst != ndLast);
1594 /* for each end of the range, if it is in a block,
1595 the range as a whole can't be entirely within the
1596 block. */
1597 if (ndFirst)
1598 tl_assert(!is_subinterval_of(ndFirst->addr,
1599 ndFirst->szB, ea, szB));
1600 if (ndLast)
1601 tl_assert(!is_subinterval_of(ndLast->addr,
1602 ndLast->szB, ea, szB));
1603 }
1604 if (!gOK) {
1605 GlobalTreeNode *ndFirst, *ndLast;
1606 ndFirst = find_GlobalTreeNode( giTree, ea );
1607 ndLast = find_GlobalTreeNode( giTree, ea+szB-1 );
1608 /* if both ends of the range fall inside a block,
1609 they can't be in the same block. */
1610 if (ndFirst && ndLast)
1611 tl_assert(ndFirst != ndLast);
1612 /* for each end of the range, if it is in a block,
1613 the range as a whole can't be entirely within the
1614 block. */
1615 if (ndFirst)
1616 tl_assert(!is_subinterval_of(ndFirst->addr,
1617 ndFirst->szB, ea, szB));
1618 if (ndLast)
1619 tl_assert(!is_subinterval_of(ndLast->addr,
1620 ndLast->szB, ea, szB));
1621 }
1622 if (0) VG_(printf)("overlapping blocks in cache\n");
1623 return;
1624 }
1625 sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB);
1626 sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1);
1627 gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB);
1628 gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1);
1629 if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1630 sMin, sMax, gMin, gMax);
1631 /* [sMin,sMax] and [gMin,gMax] must both contain
1632 [ea,ea+szB) (right?) That implies they must overlap at
1633 at least over [ea,ea+szB). */
1634 tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1635 tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1636 /* So now compute their intersection. */
1637 uMin = Addr__max( sMin, gMin );
1638 uMax = Addr__min( sMax, gMax );
1639 if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1640 tl_assert(uMin <= uMax);
1641 tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1642 /* Finally, we can park [uMin,uMax] in the cache. However,
1643 if uMax is ~0, we can't represent the difference; hence
1644 fudge uMax. */
1645 if (uMin < uMax && uMax == ~(UWord)0)
1646 uMax--;
1647 toadd_addr = uMin;
1648 toadd_szB = uMax - uMin + 1;
1649 break;
1650 }
1651 default:
1652 /* We should only be caching info for the above 3 cases */
1653 tl_assert(0);
1654 } /* switch (inv->tag) */
1655
1656 { /* and actually add this to the cache, finally */
1657 Word i;
1658 Word ip = cache->nInUse / 2; /* doesn't seem critical */
1659
1660 if (cache->nInUse < N_QCACHE)
1661 cache->nInUse++;
1662 for (i = cache->nInUse-1; i > ip; i--) {
1663 cache->elems[i] = cache->elems[i-1];
1664 }
1665
1666 tl_assert(toadd_szB > 0);
1667 cache->elems[ip].addr = toadd_addr;
1668 cache->elems[ip].szB = toadd_szB;
1669 cache->elems[ip].inv = *inv;
1670 }
1671
1672 if (show) QCache__pp(cache, "after upd");
1673
1674 }
1675}
1676
1677
1678/* CALLED FROM GENERATED CODE */
1679static
1680VG_REGPARM(3)
1681void helperc__mem_access ( /* Known only at run time: */
1682 Addr ea, Addr sp, Addr fp,
1683 /* Known at translation time: */
1684 Word sszB, Addr ip, XArray* ip_frameBlocks )
1685{
1686 UWord szB;
1687 IInstance* iinstance;
1688 Invar* inv;
1689 Invar new_inv;
1690 ThreadId tid = VG_(get_running_tid)();
1691 StackFrame* frame;
1692 HChar bufE[128], bufA[128];
1693
1694 stats__total_accesses++;
1695
1696 tl_assert(is_sane_TId(tid));
1697 frame = shadowStacks[tid];
1698 tl_assert(frame);
1699
1700 /* Find the instance info for this instruction. */
1701 tl_assert(ip_frameBlocks);
1702 iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1703 tl_assert(iinstance);
1704 tl_assert(iinstance->blocks == ip_frameBlocks);
1705
1706 szB = (sszB < 0) ? (-sszB) : sszB;
1707 tl_assert(szB > 0);
1708
1709 inv = &iinstance->invar;
1710
1711 /* Deal with first uses of instruction instances. */
1712 if (inv->tag == Inv_Unset) {
1713 /* This is the first use of this instance of the instruction, so
1714 we can't make any check; we merely record what we saw, so we
1715 can compare it against what happens for 2nd and subsequent
1716 accesses. */
1717 classify_address( inv,
1718 tid, ea, sp, fp, szB, iinstance->blocks );
1719 tl_assert(inv->tag != Inv_Unset);
1720 return;
1721 }
1722
1723 /* So generate an Invar and see if it's different from what
1724 we had before. */
1725 classify_address( &new_inv,
1726 tid, ea, sp, fp, szB, iinstance->blocks );
1727 tl_assert(new_inv.tag != Inv_Unset);
1728
1729 /* Did we see something different from before? If no, then there's
1730 no error. */
1731 if (eq_Invar(&new_inv, inv))
1732 return;
1733
1734 tl_assert(inv->tag != Inv_Unset);
1735
1736 VG_(memset)(bufE, 0, sizeof(bufE));
1737 show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1738
1739 VG_(memset)(bufA, 0, sizeof(bufA));
1740 show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1741
1742 sg_record_error_SorG( tid, ea, sszB, bufE, bufA );
1743
1744 /* And now install the new observation as "standard", so as to
1745 make future error messages make more sense. */
1746 *inv = new_inv;
1747}
1748
1749
1750////////////////////////////////////////
1751/* Primary push-a-new-frame routine. Called indirectly from
1752 generated code. */
1753
1754static UWord stats__max_sitree_size = 0;
1755static UWord stats__max_gitree_size = 0;
1756
1757static
1758void shadowStack_new_frame ( ThreadId tid,
1759 Addr sp_at_call_insn,
1760 Addr sp_post_call_insn,
1761 Addr fp_at_call_insn,
1762 Addr ip_post_call_insn,
1763 XArray* descrs_at_call_insn )
1764{
1765 StackFrame *callee, *caller;
1766 tl_assert(is_sane_TId(tid));
1767
1768 caller = shadowStacks[tid];
1769 tl_assert(caller);
1770
1771 if (caller->outer) { /* "this is not the outermost frame" */
1772 tl_assert(caller->outer->inner == caller);
1773 tl_assert(caller->outer->depth >= 0);
1774 tl_assert(1 + caller->outer->depth == caller->depth);
1775 } else {
1776 tl_assert(caller->depth == 0);
1777 }
1778
1779 caller->sp_at_call = sp_at_call_insn;
1780 caller->fp_at_call = fp_at_call_insn;
1781
1782 if (descrs_at_call_insn) {
1783 tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1784 caller->blocks_added_by_call
1785 = calculate_StackBlock_EAs( descrs_at_call_insn,
1786 sp_at_call_insn, fp_at_call_insn );
1787 if (caller->blocks_added_by_call)
1788 add_blocks_to_StackTree( siTrees[tid],
1789 descrs_at_call_insn,
1790 caller->blocks_added_by_call,
1791 caller->depth /* stack depth at which
1792 these blocks are
1793 considered to exist*/ );
1794 if (1) {
1795 UWord s = VG_(sizeFM)( siTrees[tid] );
1796 UWord g = VG_(sizeFM)( giTree );
1797 Bool sb = s > stats__max_sitree_size;
1798 Bool gb = g > stats__max_gitree_size;
1799 if (sb) stats__max_sitree_size = s;
1800 if (gb) stats__max_gitree_size = g;
1801 if (0 && (sb || gb))
1802 VG_(message)(Vg_DebugMsg,
1803 "exp-sgcheck: new max tree sizes: "
1804 "StackTree %ld, GlobalTree %ld",
1805 stats__max_sitree_size, stats__max_gitree_size );
1806 }
1807 } else {
1808 caller->blocks_added_by_call = NULL;
1809 }
1810
1811 /* caller->blocks_added_by_call is used again (and then freed) when
1812 this frame is removed from the stack. */
1813
1814 if (caller->inner) {
1815 callee = caller->inner;
1816 } else {
1817 callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1818 VG_(memset)(callee, 0, sizeof(StackFrame));
1819 callee->outer = caller;
1820 caller->inner = callee;
1821 callee->depth = 1 + caller->depth;
1822 tl_assert(callee->inner == NULL);
1823 }
1824
1825 /* This sets up .htab, .htab_size and .htab_used */
1826 initialise_II_hash_table( callee );
1827
1828 callee->creation_sp = sp_post_call_insn;
1829 callee->sp_at_call = 0; // not actually required ..
1830 callee->fp_at_call = 0; // .. these 3 initialisations are ..
1831 callee->blocks_added_by_call = NULL; // .. just for cleanness
1832
1833 /* record the new running stack frame */
1834 shadowStacks[tid] = callee;
1835
1836 /* and this thread's query cache is now invalid */
1837 QCache__invalidate( &qcaches[tid] );
1838
1839 if (0)
1840 { Word d = callee->depth;
1841 HChar fnname[80];
1842 Bool ok;
1843 Addr ip = ip_post_call_insn;
1844 ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1845 while (d > 0) {
1846 VG_(printf)(" ");
1847 d--;
1848 }
1849 VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1850 }
1851}
1852
1853/* CALLED FROM GENERATED CODE */
1854static
1855VG_REGPARM(3)
1856void helperc__new_frame ( Addr sp_post_call_insn,
1857 Addr fp_at_call_insn,
1858 Addr ip_post_call_insn,
1859 XArray* blocks_at_call_insn,
1860 Word sp_adjust )
1861{
1862 ThreadId tid = VG_(get_running_tid)();
1863 Addr sp_at_call_insn = sp_post_call_insn + sp_adjust;
1864 shadowStack_new_frame( tid,
1865 sp_at_call_insn,
1866 sp_post_call_insn,
1867 fp_at_call_insn,
1868 ip_post_call_insn,
1869 blocks_at_call_insn );
1870}
1871
1872
1873////////////////////////////////////////
1874/* Primary remove-frame(s) routine. Called indirectly from
1875 generated code. */
1876
1877__attribute__((noinline))
1878static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1879{
1880 StackFrame *innermost, *innermostOrig;
1881 tl_assert(is_sane_TId(tid));
1882 innermost = shadowStacks[tid];
1883 tl_assert(innermost);
1884 innermostOrig = innermost;
1885 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1886 while (1) {
1887 if (!innermost->outer)
1888 break;
1889 if (innermost->inner)
1890 tl_assert(innermost->inner->outer == innermost);
1891 tl_assert(innermost->outer->inner == innermost);
1892 tl_assert(innermost->blocks_added_by_call == NULL);
1893 if (sp_now <= innermost->creation_sp) break;
1894 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1895 tl_assert(innermost->htab);
1896 if (innermost->htab != &innermost->htab_fixed[0])
1897 sg_free(innermost->htab);
1898 /* be on the safe side */
1899 innermost->creation_sp = 0;
1900 innermost->htab = NULL;
1901 innermost->htab_size = 0;
1902 innermost->htab_used = 0;
1903 innermost->sp_at_call = 0;
1904 innermost->fp_at_call = 0;
1905 innermost->blocks_added_by_call = NULL;
1906 innermost = innermost->outer;
1907
1908 /* So now we're "back" in the calling frame. Remove from this
1909 thread's stack-interval-tree, the blocks added at the time of
1910 the call. */
1911
1912 if (innermost->outer) { /* not at the outermost frame */
1913 if (innermost->blocks_added_by_call == NULL) {
1914 } else {
1915 del_blocks_from_StackTree( siTrees[tid],
1916 innermost->blocks_added_by_call );
1917 VG_(deleteXA)( innermost->blocks_added_by_call );
1918 innermost->blocks_added_by_call = NULL;
1919 }
1920 }
1921 /* That completes the required tidying of the interval tree
1922 associated with the frame we just removed. */
1923
1924 if (0) {
1925 Word d = innermost->depth;
1926 while (d > 0) {
1927 VG_(printf)(" ");
1928 d--;
1929 }
1930 VG_(printf)("X\n");
1931 }
1932
1933 }
1934
1935 tl_assert(innermost);
1936
1937 if (innermost != innermostOrig) {
1938 shadowStacks[tid] = innermost;
1939 /* this thread's query cache is now invalid */
1940 QCache__invalidate( &qcaches[tid] );
1941 }
1942}
1943
1944
1945
1946//////////////////////////////////////////////////////////////
1947// //
1948// Instrumentation //
1949// //
1950//////////////////////////////////////////////////////////////
1951
1952/* What does instrumentation need to do?
1953
1954 - at each Call transfer, generate a call to shadowStack_new_frame
1955 do this by manually inspecting the IR
1956
1957 - at each sp change, if the sp change is negative,
1958 call shadowStack_unwind
1959 do this by asking for SP-change analysis
1960
1961 - for each memory referencing instruction,
1962 call helperc__mem_access
1963*/
1964
1965/* A complication: sg_ instrumentation and h_ instrumentation need to
1966 be interleaved. Since the latter is a lot more complex than the
1967 former, we split the sg_ instrumentation here into four functions
1968 and let the h_ instrumenter call the four functions as it goes.
1969 Hence the h_ instrumenter drives the sg_ instrumenter.
1970
1971 To make this viable, the sg_ instrumenter carries what running
1972 state it needs in 'struct _SGEnv'. This is exported only
1973 abstractly from this file.
1974*/
1975
1976struct _SGEnv {
1977 /* the current insn's IP */
1978 Addr64 curr_IP;
1979 /* whether the above is actually known */
1980 Bool curr_IP_known;
1981 /* if we find a mem ref, is it the first for this insn? Used for
1982 detecting insns which make more than one memory ref, a situation
1983 we basically can't really handle properly; and so we ignore all
1984 but the first ref. */
1985 Bool firstRef;
1986};
1987
1988
1989/* --- Helper fns for instrumentation --- */
1990
1991static IRTemp gen_Get_SP ( IRSB* bbOut,
1992 VexGuestLayout* layout,
1993 Int hWordTy_szB )
1994{
1995 IRExpr* sp_expr;
1996 IRTemp sp_temp;
1997 IRType sp_type;
1998 /* This in effect forces the host and guest word sizes to be the
1999 same. */
2000 tl_assert(hWordTy_szB == layout->sizeof_SP);
2001 sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2002 sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2003 sp_temp = newIRTemp( bbOut->tyenv, sp_type );
2004 addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2005 return sp_temp;
2006}
2007
2008static IRTemp gen_Get_FP ( IRSB* bbOut,
2009 VexGuestLayout* layout,
2010 Int hWordTy_szB )
2011{
2012 IRExpr* fp_expr;
2013 IRTemp fp_temp;
2014 IRType fp_type;
2015 /* This in effect forces the host and guest word sizes to be the
2016 same. */
2017 tl_assert(hWordTy_szB == layout->sizeof_SP);
2018 fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2019 fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2020 fp_temp = newIRTemp( bbOut->tyenv, fp_type );
2021 addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2022 return fp_temp;
2023}
2024
2025static void instrument_mem_access ( IRSB* bbOut,
2026 IRExpr* addr,
2027 Int szB,
2028 Bool isStore,
2029 Int hWordTy_szB,
2030 Addr curr_IP,
2031 VexGuestLayout* layout )
2032{
2033 IRType tyAddr = Ity_INVALID;
2034 XArray* frameBlocks = NULL;
2035
2036 tl_assert(isIRAtom(addr));
2037 tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2038
2039 tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2040 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2041
2042#if defined(VGA_x86)
2043 { UChar* p = (UChar*)curr_IP;
2044 // pop %ebp; RET
2045 if (p[-1] == 0x5d && p[0] == 0xc3) return;
2046 // pop %ebp; RET $imm16
2047 if (p[-1] == 0x5d && p[0] == 0xc2) return;
2048 // PUSH %EBP; mov %esp,%ebp
2049 if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2050 }
2051#endif
2052
2053 /* First off, find or create the StackBlocks for this instruction. */
2054 frameBlocks = get_StackBlocks_for_IP( curr_IP );
2055 tl_assert(frameBlocks);
2056 //if (VG_(sizeXA)( frameBlocks ) == 0)
2057 // frameBlocks = NULL;
2058
2059 /* Generate a call to "helperc__mem_access", passing:
2060 addr current_SP current_FP szB curr_IP frameBlocks
2061 */
2062 { IRTemp t_SP = gen_Get_SP( bbOut, layout, hWordTy_szB );
2063 IRTemp t_FP = gen_Get_FP( bbOut, layout, hWordTy_szB );
2064 IRExpr** args
2065 = mkIRExprVec_6( addr,
2066 IRExpr_RdTmp(t_SP),
2067 IRExpr_RdTmp(t_FP),
2068 mkIRExpr_HWord( isStore ? (-szB) : szB ),
2069 mkIRExpr_HWord( curr_IP ),
2070 mkIRExpr_HWord( (HWord)frameBlocks ) );
2071 IRDirty* di
2072 = unsafeIRDirty_0_N( 3/*regparms*/,
2073 "helperc__mem_access",
2074 VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2075 args );
2076
2077 addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2078 }
2079}
2080
2081
2082/* --- Instrumentation main (4 fns) --- */
2083
2084struct _SGEnv * sg_instrument_init ( void )
2085{
2086 struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2087 sizeof(struct _SGEnv));
2088 tl_assert(env);
2089 env->curr_IP = 0;
2090 env->curr_IP_known = False;
2091 env->firstRef = True;
2092 return env;
2093}
2094
2095void sg_instrument_fini ( struct _SGEnv * env )
2096{
2097 sg_free(env);
2098}
2099
2100/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2101 as required. This must be called before 'st' itself is added to
2102 'sbOut'. */
2103void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2104 /*MOD*/IRSB* sbOut,
2105 IRStmt* st,
2106 VexGuestLayout* layout,
2107 IRType gWordTy, IRType hWordTy )
2108{
2109 tl_assert(st);
2110 tl_assert(isFlatIRStmt(st));
2111 switch (st->tag) {
2112 case Ist_NoOp:
2113 case Ist_AbiHint:
2114 case Ist_Put:
2115 case Ist_PutI:
2116 case Ist_MBE:
2117 /* None of these can contain any memory references. */
2118 break;
2119
2120 case Ist_Exit:
2121 tl_assert(st->Ist.Exit.jk != Ijk_Call);
2122 /* else we must deal with a conditional call */
2123 break;
2124
2125 case Ist_IMark:
2126 env->curr_IP_known = True;
2127 env->curr_IP = (Addr)st->Ist.IMark.addr;
2128 env->firstRef = True;
2129 break;
2130
2131 case Ist_Store:
2132 tl_assert(env->curr_IP_known);
2133 if (env->firstRef) {
2134 instrument_mem_access(
2135 sbOut,
2136 st->Ist.Store.addr,
2137 sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2138 True/*isStore*/,
2139 sizeofIRType(hWordTy),
2140 env->curr_IP, layout
2141 );
2142 env->firstRef = False;
2143 }
2144 break;
2145
2146 case Ist_WrTmp: {
2147 IRExpr* data = st->Ist.WrTmp.data;
2148 if (data->tag == Iex_Load) {
2149 tl_assert(env->curr_IP_known);
2150 if (env->firstRef) {
2151 instrument_mem_access(
2152 sbOut,
2153 data->Iex.Load.addr,
2154 sizeofIRType(data->Iex.Load.ty),
2155 False/*!isStore*/,
2156 sizeofIRType(hWordTy),
2157 env->curr_IP, layout
2158 );
2159 env->firstRef = False;
2160 }
2161 }
2162 break;
2163 }
2164
2165 case Ist_Dirty: {
2166 Int dataSize;
2167 IRDirty* d = st->Ist.Dirty.details;
2168 if (d->mFx != Ifx_None) {
2169 /* This dirty helper accesses memory. Collect the
2170 details. */
2171 tl_assert(env->curr_IP_known);
2172 if (env->firstRef) {
2173 tl_assert(d->mAddr != NULL);
2174 tl_assert(d->mSize != 0);
2175 dataSize = d->mSize;
2176 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2177 instrument_mem_access(
2178 sbOut, d->mAddr, dataSize, False/*!isStore*/,
2179 sizeofIRType(hWordTy), env->curr_IP, layout
2180 );
2181 }
2182 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2183 instrument_mem_access(
2184 sbOut, d->mAddr, dataSize, True/*isStore*/,
2185 sizeofIRType(hWordTy), env->curr_IP, layout
2186 );
2187 }
2188 env->firstRef = False;
2189 }
2190 } else {
2191 tl_assert(d->mAddr == NULL);
2192 tl_assert(d->mSize == 0);
2193 }
2194 break;
2195 }
2196
2197 default:
2198 tl_assert(0);
2199
2200 } /* switch (st->tag) */
2201}
2202
2203
2204/* Add instrumentation for the final jump of an IRSB 'sbOut', and
2205 possibly modify 'env' as required. This must be the last
2206 instrumentation statement in the block. */
2207void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2208 /*MOD*/IRSB* sbOut,
2209 IRExpr* next,
2210 IRJumpKind jumpkind,
2211 VexGuestLayout* layout,
2212 IRType gWordTy, IRType hWordTy )
2213{
2214 if (jumpkind == Ijk_Call) {
2215 // Assumes x86 or amd64
2216 IRTemp sp_post_call_insn, fp_post_call_insn;
2217 XArray* frameBlocks;
2218 IRExpr** args;
2219 IRDirty* di;
2220 sp_post_call_insn
2221 = gen_Get_SP( sbOut, layout, sizeofIRType(hWordTy) );
2222 fp_post_call_insn
2223 = gen_Get_FP( sbOut, layout, sizeofIRType(hWordTy) );
2224 tl_assert(env->curr_IP_known);
2225 frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2226 tl_assert(frameBlocks);
2227 if (VG_(sizeXA)(frameBlocks) == 0)
2228 frameBlocks = NULL;
2229 args
2230 = mkIRExprVec_5(
2231 IRExpr_RdTmp(sp_post_call_insn),
2232 IRExpr_RdTmp(fp_post_call_insn),
2233 /* assume the call doesn't change FP */
2234 next,
2235 mkIRExpr_HWord( (HWord)frameBlocks ),
2236 mkIRExpr_HWord( sizeofIRType(gWordTy) )
2237 );
2238 di = unsafeIRDirty_0_N(
2239 3/*regparms*/,
2240 "helperc__new_frame",
2241 VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2242 args );
2243 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2244 }
2245}
2246
2247
2248//////////////////////////////////////////////////////////////
2249// //
2250// end Instrumentation //
2251// //
2252//////////////////////////////////////////////////////////////
2253
2254
2255//////////////////////////////////////////////////////////////
2256// //
2257// misc //
2258// //
2259//////////////////////////////////////////////////////////////
2260
2261/* Make a new empty stack frame that is suitable for being the
2262 outermost frame in a stack. It has a creation_sp of effectively
2263 infinity, so it can never be removed. */
2264static StackFrame* new_root_StackFrame ( void )
2265{
2266 StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2267 VG_(memset)( sframe, 0, sizeof(*sframe) );
2268 sframe->creation_sp = ~0UL;
2269
2270 /* This sets up .htab, .htab_size and .htab_used */
2271 initialise_II_hash_table( sframe );
2272
2273 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2274
2275 return sframe;
2276}
2277
2278/* Primary routine for setting up the shadow stack for a new thread.
2279 Note that this is used to create not only child thread stacks, but
2280 the root thread's stack too. We create a new stack with
2281 .creation_sp set to infinity, so that the outermost frame can never
2282 be removed (by shadowStack_unwind). The core calls this function
2283 as soon as a thread is created. We cannot yet get its SP value,
2284 since that may not yet be set. */
2285static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2286{
2287 tl_assert(is_sane_TId(child));
2288 if (parent == VG_INVALID_THREADID) {
2289 /* creating the main thread's stack */
2290 } else {
2291 tl_assert(is_sane_TId(parent));
2292 tl_assert(parent != child);
2293 tl_assert(shadowStacks[parent] != NULL);
2294 tl_assert(siTrees[parent] != NULL);
2295 }
2296
2297 /* Create the child's stack. Bear in mind we may be re-using
2298 it. */
2299 if (shadowStacks[child] == NULL) {
2300 /* First use of this stack. Just allocate an initial frame. */
2301 tl_assert(siTrees[child] == NULL);
2302 } else {
2303 StackFrame *frame, *frame2;
2304 /* re-using a stack. */
2305 /* get rid of the interval tree */
2306 tl_assert(siTrees[child] != NULL);
2307 delete_StackTree( siTrees[child] );
2308 siTrees[child] = NULL;
2309 /* Throw away all existing frames. */
2310 frame = shadowStacks[child];
2311 while (frame->outer)
2312 frame = frame->outer;
2313 tl_assert(frame->depth == 0);
2314 while (frame) {
2315 frame2 = frame->inner;
2316 if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2317 sg_free(frame);
2318 frame = frame2;
2319 }
2320 shadowStacks[child] = NULL;
2321 }
2322
2323 tl_assert(shadowStacks[child] == NULL);
2324 tl_assert(siTrees[child] == NULL);
2325
2326 /* Set up the initial stack frame. */
2327 shadowStacks[child] = new_root_StackFrame();
2328
2329 /* and set up the child's stack block interval tree. */
2330 siTrees[child] = new_StackTree();
2331}
2332
2333/* Once a thread is ready to go, the core calls here. We take the
2334 opportunity to push a second frame on its stack, with the
2335 presumably valid SP value that is going to be used for the thread's
2336 startup. Hence we should always wind up with a valid outermost
2337 frame for the thread. */
2338static void shadowStack_set_initial_SP ( ThreadId tid )
2339{
2340 StackFrame* sf;
2341 tl_assert(is_sane_TId(tid));
2342 sf = shadowStacks[tid];
2343 tl_assert(sf != NULL);
2344 tl_assert(sf->outer == NULL);
2345 tl_assert(sf->inner == NULL);
2346 tl_assert(sf->creation_sp == ~0UL);
2347 shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2348 0, VG_(get_IP)(tid), NULL );
2349}
2350
2351
2352//////////////////////////////////////////////////////////////
2353// //
2354// main-ish //
2355// //
2356//////////////////////////////////////////////////////////////
2357
2358/* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2359 sp-change analysis, as requested in pc_pre_clo_int(). */
2360void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2361 ThreadId tid = VG_(get_running_tid)();
2362 shadowStack_unwind( tid, old_SP+len );
2363}
2364
2365void sg_pre_clo_init ( void ) {
2366 ourGlobals_init();
2367 init_StackBlocks_set();
2368 init_GlobalBlock_set();
2369}
2370
2371void sg_post_clo_init ( void ) {
2372}
2373
2374void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2375 shadowStack_thread_create(parent, child);
2376}
2377
2378void sg_pre_thread_first_insn ( ThreadId tid ) {
2379 shadowStack_set_initial_SP(tid);
2380}
2381
2382void sg_fini(Int exitcode)
2383{
2384 if (VG_(clo_verbosity) >= 2) {
2385 VG_(message)(Vg_DebugMsg,
2386 " sg_: %'llu total accesses, of which:", stats__total_accesses);
2387 VG_(message)(Vg_DebugMsg,
2388 " sg_: stack0: %'12llu classify",
2389 stats__classify_Stack0);
2390 VG_(message)(Vg_DebugMsg,
2391 " sg_: stackN: %'12llu classify",
2392 stats__classify_StackN);
2393 VG_(message)(Vg_DebugMsg,
2394 " sg_: global: %'12llu classify",
2395 stats__classify_Global);
2396 VG_(message)(Vg_DebugMsg,
2397 " sg_: unknown: %'12llu classify",
2398 stats__classify_Unknown);
2399 VG_(message)(Vg_DebugMsg,
2400 " sg_: %'llu Invars preened, of which %'llu changed",
2401 stats__Invars_preened, stats__Invars_changed);
2402 VG_(message)(Vg_DebugMsg,
2403 " sg_: t_i_b_MT: %'12llu", stats__t_i_b_empty);
2404 VG_(message)(Vg_DebugMsg,
2405 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses",
2406 stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2407 VG_(message)(Vg_DebugMsg,
2408 " sg_: htab-fast: %'llu hits",
2409 stats__htab_fast);
2410 VG_(message)(Vg_DebugMsg,
2411 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes",
2412 stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2413 }
2414}
2415
2416
2417/*--------------------------------------------------------------------*/
2418/*--- end sg_main.c ---*/
2419/*--------------------------------------------------------------------*/