blob: e5c82bd1aff95d5ab78e246a2b320eb3fbab4e37 [file] [log] [blame]
sewardj024598e2008-09-18 14:43:05 +00001
2/*--------------------------------------------------------------------*/
3/*--- Ptrcheck: a pointer-use checker. ---*/
4/*--- This file checks stack and global array accesses. ---*/
5/*--- sg_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
10 use in programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
36*/
37
38#include "pub_tool_basics.h"
39#include "pub_tool_libcbase.h"
40#include "pub_tool_libcassert.h"
41#include "pub_tool_libcprint.h"
42#include "pub_tool_tooliface.h"
43#include "pub_tool_wordfm.h"
44#include "pub_tool_xarray.h"
45#include "pub_tool_threadstate.h"
46#include "pub_tool_mallocfree.h"
47#include "pub_tool_machine.h"
48#include "pub_tool_debuginfo.h"
49#include "pub_tool_options.h"
50
51#include "pc_common.h"
52
53#include "sg_main.h" // self
54
55
56static
sewardj9e1bed82008-10-20 10:25:16 +000057void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
sewardj024598e2008-09-18 14:43:05 +000058
59
60//////////////////////////////////////////////////////////////
61// //
62// Basic Stuff //
63// //
64//////////////////////////////////////////////////////////////
65
66static inline Bool is_sane_TId ( ThreadId tid )
67{
68 return tid >= 0 && tid < VG_N_THREADS
69 && tid != VG_INVALID_THREADID;
70}
71
72static void* sg_malloc ( HChar* cc, SizeT n ) {
73 void* p;
74 tl_assert(n > 0);
75 p = VG_(malloc)( cc, n );
76 tl_assert(p);
77 return p;
78}
79
80static void sg_free ( void* p ) {
81 tl_assert(p);
82 VG_(free)(p);
83}
84
85
86/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
87 first interval is lower, 1 if the first interval is higher, and 0
88 if there is any overlap. Redundant paranoia with casting is there
89 following what looked distinctly like a bug in gcc-4.1.2, in which
90 some of the comparisons were done signedly instead of
91 unsignedly. */
92inline
93static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94 Addr a2, SizeT n2 ) {
95 UWord a1w = (UWord)a1;
96 UWord n1w = (UWord)n1;
97 UWord a2w = (UWord)a2;
98 UWord n2w = (UWord)n2;
99 tl_assert(n1w > 0 && n2w > 0);
100 if (a1w + n1w <= a2w) return -1L;
101 if (a2w + n2w <= a1w) return 1L;
102 return 0;
103}
104
105/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106 within [aBig,aBig+nBig). */
107inline
108static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109 Addr aSmall, SizeT nSmall ) {
110 tl_assert(nBig > 0 && nSmall > 0);
111 return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112}
113
114inline
115static Addr Addr__min ( Addr a1, Addr a2 ) {
116 return a1 < a2 ? a1 : a2;
117}
118
119inline
120static Addr Addr__max ( Addr a1, Addr a2 ) {
121 return a1 < a2 ? a2 : a1;
122}
123
124
125//////////////////////////////////////////////////////////////
126// //
127// StackBlocks Persistent Cache //
128// //
129//////////////////////////////////////////////////////////////
130
131/* We maintain a set of XArray* of StackBlocks. These are never
132 freed. When a new StackBlock vector is acquired from
133 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134 If not present, it is added. If present, the just-acquired one is
135 freed and the copy used.
136
137 This simplifies storage management elsewhere. It allows us to
138 assume that a pointer to an XArray* of StackBlock is valid forever.
139 It also means there are no duplicates anywhere, which could be
140 important from a space point of view for programs that generate a
141 lot of translations, or where translations are frequently discarded
142 and re-made.
143
144 Note that we normalise the arrays by sorting the elements according
145 to an arbitrary total order, so as to avoid the situation that two
146 vectors describe the same set of variables but are not structurally
147 identical. */
148
149static inline Bool StackBlock__sane ( StackBlock* fb )
150{
151 if (fb->name[ sizeof(fb->name)-1 ] != 0)
152 return False;
153 if (fb->spRel != False && fb->spRel != True)
154 return False;
155 if (fb->isVec != False && fb->isVec != True)
156 return False;
157 return True;
158}
159
160/* Generate an arbitrary total ordering on StackBlocks. */
161static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
162{
163 Word r;
164 tl_assert(StackBlock__sane(fb1));
165 tl_assert(StackBlock__sane(fb2));
166 /* Hopefully the .base test hits most of the time. For the blocks
167 associated with any particular instruction, if the .base values
168 are the same then probably it doesn't make sense for the other
169 fields to be different. But this is supposed to be a completely
170 general structural total order, so we have to compare everything
171 anyway. */
172 if (fb1->base < fb2->base) return -1;
173 if (fb1->base > fb2->base) return 1;
174 /* compare sizes */
175 if (fb1->szB < fb2->szB) return -1;
176 if (fb1->szB > fb2->szB) return 1;
177 /* compare sp/fp flag */
178 if (fb1->spRel < fb2->spRel) return -1;
179 if (fb1->spRel > fb2->spRel) return 1;
180 /* compare is/is-not array-typed flag */
181 if (fb1->isVec < fb2->isVec) return -1;
182 if (fb1->isVec > fb2->isVec) return 1;
183 /* compare the name */
184 r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185 return r;
186}
187
188/* Returns True if all fields except .szB are the same. szBs may or
189 may not be the same; they are simply not consulted. */
190static Bool StackBlock__all_fields_except_szB_are_equal (
191 StackBlock* fb1,
192 StackBlock* fb2
193 )
194{
195 tl_assert(StackBlock__sane(fb1));
196 tl_assert(StackBlock__sane(fb2));
197 return fb1->base == fb2->base
198 && fb1->spRel == fb2->spRel
199 && fb1->isVec == fb2->isVec
200 && 0 == VG_(strcmp)(fb1->name, fb2->name);
201}
202
203
204/* Generate an arbitrary total ordering on vectors of StackBlocks. */
205static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206{
207 Word i, r, n1, n2;
208 n1 = VG_(sizeXA)( fb1s );
209 n2 = VG_(sizeXA)( fb2s );
210 if (n1 < n2) return -1;
211 if (n1 > n2) return 1;
212 for (i = 0; i < n1; i++) {
213 StackBlock *fb1, *fb2;
214 fb1 = VG_(indexXA)( fb1s, i );
215 fb2 = VG_(indexXA)( fb2s, i );
216 r = StackBlock__cmp( fb1, fb2 );
217 if (r != 0) return r;
218 }
219 tl_assert(i == n1 && i == n2);
220 return 0;
221}
222
223static void pp_StackBlock ( StackBlock* sb )
224{
225 VG_(printf)("StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }",
226 sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
227 sb->isVec ? 'Y' : 'N', &sb->name[0] );
228}
229
230static void pp_StackBlocks ( XArray* sbs )
231{
232 Word i, n = VG_(sizeXA)( sbs );
233 VG_(printf)("<<< STACKBLOCKS\n" );
234 for (i = 0; i < n; i++) {
235 VG_(printf)(" ");
236 pp_StackBlock( (StackBlock*)VG_(indexXA)( sbs, i ) );
237 VG_(printf)("\n");
238 }
239 VG_(printf)(">>> STACKBLOCKS\n" );
240}
241
242
243/* ---------- The StackBlock vector cache ---------- */
244
245static WordFM* /* XArray* of StackBlock -> nothing */
246 frameBlocks_set = NULL;
247
248static void init_StackBlocks_set ( void )
249{
250 tl_assert(!frameBlocks_set);
251 frameBlocks_set
252 = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
253 (Word(*)(UWord,UWord))StackBlocks__cmp );
254 tl_assert(frameBlocks_set);
255}
256
257/* Find the given StackBlock-vector in our collection thereof. If
258 found, deallocate the supplied one, and return the address of the
259 copy. If not found, add the supplied one to our collection and
260 return its address. */
261static XArray* /* of StackBlock */
262 StackBlocks__find_and_dealloc__or_add
263 ( XArray* /* of StackBlock */ orig )
264{
265 UWord key, val;
266
267 /* First, normalise, as per comments above. */
268 VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
269 VG_(sortXA)( orig );
270
271 /* Now get rid of any exact duplicates. */
272 nuke_dups:
273 { Word r, w, nEQ, n = VG_(sizeXA)( orig );
274 if (n >= 2) {
275 w = 0;
276 nEQ = 0;
277 for (r = 0; r < n; r++) {
278 if (r+1 < n) {
279 StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
280 StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
281 Word c = StackBlock__cmp(pR0,pR1);
282 tl_assert(c == -1 || c == 0);
283 if (c == 0) { nEQ++; continue; }
284 }
285 if (w != r) {
286 StackBlock* pW = VG_(indexXA)( orig, w );
287 StackBlock* pR = VG_(indexXA)( orig, r );
288 *pW = *pR;
289 }
290 w++;
291 }
292 tl_assert(r == n);
293 tl_assert(w + nEQ == n);
294 if (w < n) {
295 VG_(dropTailXA)( orig, n-w );
296 }
297 if (0) VG_(printf)("delta %ld\n", n-w);
298 }
299 }
300
301 /* Deal with the following strangeness, where two otherwise
302 identical blocks are claimed to have different sizes. In which
303 case we use the larger size. */
304 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
305 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
306 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
307 */
308 { Word i, n = VG_(sizeXA)( orig );
309 if (n >= 2) {
310 for (i = 0; i < n-1; i++) {
311 StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
312 StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
313 if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
314 /* They can't be identical because the previous tidying
315 pass would have removed the duplicates. And they
316 can't be > because the earlier sorting pass would
317 have ordered otherwise-identical descriptors
318 according to < on .szB fields. Hence: */
319 tl_assert(sb0->szB < sb1->szB);
320 sb0->szB = sb1->szB;
321 /* This makes the blocks identical, at the size of the
322 larger one. Rather than go to all the hassle of
323 sliding the rest down, simply go back to the
324 remove-duplicates stage. The assertion guarantees
325 that we eventually make progress, since the rm-dups
326 stage will get rid of one of the blocks. This is
327 expected to happen only exceedingly rarely. */
328 tl_assert(StackBlock__cmp(sb0,sb1) == 0);
329 goto nuke_dups;
330 }
331 }
332 }
333 }
334
335 /* A rather poor sanity check on the results. */
336 { Word i, n = VG_(sizeXA)( orig );
337 for (i = 0; i < n-1; i++) {
338 StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
339 StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
340 if (sb1->base == sb2->base)
341 pp_StackBlocks(orig);
342 tl_assert(sb1->base != sb2->base);
343 }
344 }
345
346 /* Now, do we have it already? */
347 if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
348 /* yes */
349 XArray* res;
350 tl_assert(val == 0);
351 tl_assert(key != (UWord)orig);
352 VG_(deleteXA)(orig);
353 res = (XArray*)key;
354 return res;
355 } else {
356 /* no */
357 VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
358 return orig;
359 }
360}
361
362/* Top level function for getting the StackBlock vector for a given
363 instruction. It is guaranteed that the returned pointer will be
364 valid for the entire rest of the run, and also that the addresses
365 of the individual elements of the array will not change. */
366
367static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
368{
369 XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
370 tl_assert(blocks);
371 return StackBlocks__find_and_dealloc__or_add( blocks );
372}
373
374
375//////////////////////////////////////////////////////////////
376// //
377// GlobalBlocks Persistent Cache //
378// //
379//////////////////////////////////////////////////////////////
380
381/* Generate an arbitrary total ordering on GlobalBlocks. */
382static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
383{
384 Word r;
385 /* compare addrs */
386 if (gb1->addr < gb2->addr) return -1;
387 if (gb1->addr > gb2->addr) return 1;
388 /* compare sizes */
389 if (gb1->szB < gb2->szB) return -1;
390 if (gb1->szB > gb2->szB) return 1;
391 /* compare is/is-not array-typed flag */
392 if (gb1->isVec < gb2->isVec) return -1;
393 if (gb1->isVec > gb2->isVec) return 1;
394 /* compare the name */
395 r = (Word)VG_(strcmp)(gb1->name, gb2->name);
396 if (r != 0) return r;
397 /* compare the soname */
398 r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
399 return r;
400}
401
402static WordFM* /* GlobalBlock* -> nothing */
403 globalBlock_set = NULL;
404
405static void init_GlobalBlock_set ( void )
406{
407 tl_assert(!globalBlock_set);
408 globalBlock_set
409 = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
410 (Word(*)(UWord,UWord))GlobalBlock__cmp );
411 tl_assert(globalBlock_set);
412}
413
414
415/* Top level function for making GlobalBlocks persistent. Call here
416 with a non-persistent version, and the returned one is guaranteed
417 to be valid for the entire rest of the run. The supplied one is
418 copied, not stored, so can be freed after the call. */
419
420static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
421{
422 UWord key, val;
423 /* Now, do we have it already? */
424 if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
425 /* yes, return the copy */
426 GlobalBlock* res;
427 tl_assert(val == 0);
428 res = (GlobalBlock*)key;
429 tl_assert(res != orig);
430 return res;
431 } else {
432 /* no. clone it, store the clone and return the clone's
433 address. */
434 GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
435 sizeof(GlobalBlock) );
436 tl_assert(clone);
437 *clone = *orig;
438 VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
439 return clone;
440 }
441}
442
443
444//////////////////////////////////////////////////////////////
445// //
446// Interval tree of StackTreeBlock //
447// //
448//////////////////////////////////////////////////////////////
449
450/* A node in a stack interval tree. Zero length intervals (.szB == 0)
451 are not allowed.
452
453 A stack interval tree is a (WordFM StackTreeNode* void). There is
454 one stack interval tree for each thread.
455*/
456typedef
457 struct {
458 Addr addr;
459 SizeT szB; /* copied from .descr->szB */
460 StackBlock* descr; /* it's an instance of this block */
461 UWord depth; /* depth of stack at time block was pushed */
462 }
463 StackTreeNode;
464
465static void pp_StackTree ( WordFM* sitree, HChar* who )
466{
467 UWord keyW, valW;
468 VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
469 VG_(initIterFM)( sitree );
470 while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
471 StackTreeNode* nd = (StackTreeNode*)keyW;
472 VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
473 nd->descr, nd->descr->name, nd->descr->szB);
474 }
475 VG_(printf)(">>> END pp_StackTree %s\n", who );
476}
477
478/* Interval comparison function for StackTreeNode */
479static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
480 StackTreeNode* sn2 )
481{
482 return cmp_nonempty_intervals(sn1->addr, sn1->szB,
483 sn2->addr, sn2->szB);
484}
485
486/* Find the node holding 'a', if any. */
487static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
488{
489 UWord keyW, valW;
490 StackTreeNode key;
491 tl_assert(sitree);
492 key.addr = a;
493 key.szB = 1;
494 if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
495 StackTreeNode* res = (StackTreeNode*)keyW;
496 tl_assert(valW == 0);
497 tl_assert(res != &key);
498 return res;
499 } else {
500 return NULL;
501 }
502}
503
504/* Note that the supplied XArray of FrameBlock must have been
505 made persistent already. */
506__attribute__((noinline))
507static void add_blocks_to_StackTree (
508 /*MOD*/WordFM* sitree,
509 XArray* /* FrameBlock */ descrs,
510 XArray* /* Addr */ bases,
511 UWord depth
512 )
513{
514 Bool debug = (Bool)0;
515 Word i, nDescrs, nBases;
516
517 nDescrs = VG_(sizeXA)( descrs ),
518 nBases = VG_(sizeXA)( bases );
519 tl_assert(nDescrs == nBases);
520
521 if (nDescrs == 0) return;
522
523 tl_assert(sitree);
524 if (debug) {
sewardjb103a552008-10-20 22:27:52 +0000525 VG_(printf)("\ndepth = %lu\n", depth);
sewardj024598e2008-09-18 14:43:05 +0000526 pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
sewardjb103a552008-10-20 22:27:52 +0000527 pp_StackBlocks(descrs);
sewardj024598e2008-09-18 14:43:05 +0000528 }
529
530 for (i = 0; i < nDescrs; i++) {
531 Bool already_present;
532 StackTreeNode* nyu;
533 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
534 StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
535 tl_assert(descr->szB > 0);
536 nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
537 nyu->addr = addr;
538 nyu->szB = descr->szB;
539 nyu->descr = descr;
540 nyu->depth = depth;
541 if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
542 already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
543 /* The interval can't already be there; else we have
544 overlapping stack blocks. */
545 tl_assert(!already_present);
546 if (debug) {
547 pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
548 }
549 }
550 if (debug) {
551 pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
552 VG_(printf)("\n");
553 }
554}
555
556static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
557 XArray* /* Addr */ bases )
558{
559 UWord oldK, oldV;
560 Word i, nBases = VG_(sizeXA)( bases );
561 for (i = 0; i < nBases; i++) {
562 Bool b;
563 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
564 StackTreeNode* nd = find_StackTreeNode(sitree, addr);
565 /* The interval must be there; we added it earlier when
566 the associated frame was created. */
567 tl_assert(nd);
568 b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
569 /* we just found the block! */
570 tl_assert(b);
571 tl_assert(oldV == 0);
572 tl_assert(nd == (StackTreeNode*)oldK);
573 sg_free(nd);
574 }
575}
576
577
578static void delete_StackTree__kFin ( UWord keyW ) {
579 StackTreeNode* nd = (StackTreeNode*)keyW;
580 tl_assert(nd);
581 sg_free(nd);
582}
583static void delete_StackTree__vFin ( UWord valW ) {
584 tl_assert(valW == 0);
585}
586static void delete_StackTree ( WordFM* sitree )
587{
588 VG_(deleteFM)( sitree,
589 delete_StackTree__kFin, delete_StackTree__vFin );
590}
591
592static WordFM* new_StackTree ( void ) {
593 return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
594 (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
595}
596
597
598//////////////////////////////////////////////////////////////
599// //
600// Interval tree of GlobalTreeBlock //
601// //
602//////////////////////////////////////////////////////////////
603
604/* A node in a global interval tree. Zero length intervals
605 (.szB == 0) are not allowed.
606
607 A global interval tree is a (WordFM GlobalTreeNode* void). There
608 is one global interval tree for the entire process.
609*/
610typedef
611 struct {
612 Addr addr; /* copied from .descr->addr */
613 SizeT szB; /* copied from .descr->szB */
614 GlobalBlock* descr; /* it's this block */
615 }
616 GlobalTreeNode;
617
618static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
619 tl_assert(nd->descr);
620 VG_(printf)("GTNode [%#lx,+%ld) %s",
621 nd->addr, nd->szB, nd->descr->name);
622}
623
624static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
625 HChar* who )
626{
627 UWord keyW, valW;
628 GlobalTreeNode* nd;
629 VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
630 VG_(initIterFM)( gitree );
631 while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
632 tl_assert(valW == 0);
633 nd = (GlobalTreeNode*)keyW;
634 VG_(printf)(" ");
635 GlobalTreeNode__pp(nd);
636 VG_(printf)("\n");
637 }
638 VG_(doneIterFM)( gitree );
639 VG_(printf)(">>>\n");
640}
641
642/* Interval comparison function for GlobalTreeNode */
643static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
644 GlobalTreeNode* gn2 )
645{
646 return cmp_nonempty_intervals( gn1->addr, gn1->szB,
647 gn2->addr, gn2->szB );
648}
649
650/* Find the node holding 'a', if any. */
651static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
652{
653 UWord keyW, valW;
654 GlobalTreeNode key;
655 key.addr = a;
656 key.szB = 1;
657 if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
658 GlobalTreeNode* res = (GlobalTreeNode*)keyW;
659 tl_assert(valW == 0);
660 tl_assert(res != &key);
661 return res;
662 } else {
663 return NULL;
664 }
665}
666
667/* Note that the supplied GlobalBlock must have been made persistent
668 already. */
669static void add_block_to_GlobalTree (
670 /*MOD*/WordFM* gitree,
671 GlobalBlock* descr
672 )
673{
674 Bool already_present;
675 GlobalTreeNode *nyu, *nd;
676 UWord keyW, valW;
677
678 tl_assert(descr->szB > 0);
679 nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
680 nyu->addr = descr->addr;
681 nyu->szB = descr->szB;
682 nyu->descr = descr;
683
684 /* Basically it's an error to add a global block to the tree that
685 is already in the tree. However, detect and ignore attempts to
686 insert exact duplicates; they do appear for some reason
687 (possible a bug in m_debuginfo?) */
688 already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
689 if (already_present) {
690 tl_assert(valW == 0);
691 nd = (GlobalTreeNode*)keyW;
692 tl_assert(nd);
693 tl_assert(nd != nyu);
694 tl_assert(nd->descr);
695 tl_assert(nyu->descr);
696 if (nd->addr == nyu->addr && nd->szB == nyu->szB
697 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
698 /* Although it seems reasonable to demand that duplicate
699 blocks have identical names, that is too strict. For
700 example, reading debuginfo from glibc produces two
701 otherwise identical blocks with names "tzname" and
702 "__tzname". A constraint of the form "must be identical,
703 or one must be a substring of the other" would fix that.
704 However, such trickery is scuppered by the fact that we
705 truncate all variable names to 15 characters to make
706 storage management simpler, hence giving pairs like
707 "__EI___pthread_" (truncated) vs "__pthread_keys". So
708 it's simplest just to skip the name comparison
709 completely. */
710 && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
711 /* exact duplicate; ignore it */
712 sg_free(nyu);
713 return;
714 }
715 /* else fall through; the assertion below will catch it */
716 }
717
718 already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
719 /* The interval can't already be there; else we have
720 overlapping global blocks. */
721 if (already_present) {
722 GlobalTree__pp( gitree, "add_block_to_GlobalTree: non-exact duplicate" );
723 VG_(printf)("Overlapping block: ");
724 GlobalTreeNode__pp(nyu);
725 VG_(printf)("\n");
726 }
727 tl_assert(!already_present);
728}
729
730static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
731 Addr a, SizeT szB )
732{
733 /* One easy way to do this: look up [a,a+szB) in the tree. That
734 will either succeed, producing a block which intersects that
735 range, in which case we delete it and repeat; or it will fail,
736 in which case there are no blocks intersecting the range, and we
737 can bring the process to a halt. */
738 UWord keyW, valW, oldK, oldV;
739 GlobalTreeNode key, *nd;
740 Bool b, anyFound;
741
742 tl_assert(szB > 0);
743
744 anyFound = False;
745
746 key.addr = a;
747 key.szB = szB;
748
749 while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
750 anyFound = True;
751 nd = (GlobalTreeNode*)keyW;
752 tl_assert(valW == 0);
753 tl_assert(nd != &key);
754 tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
755
756 b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
757 tl_assert(b);
758 tl_assert(oldV == 0);
759 tl_assert(oldK == keyW); /* check we deleted the node we just found */
760 }
761
762 return anyFound;
763}
764
765
766//////////////////////////////////////////////////////////////
767// //
768// Invar //
769// //
770//////////////////////////////////////////////////////////////
771
772/* An invariant, as resulting from watching the destination of a
773 memory referencing instruction. Initially is Inv_Unset until the
774 instruction makes a first access. */
775
776typedef
777 enum {
778 Inv_Unset=1, /* not established yet */
779 Inv_Unknown, /* unknown location */
780 Inv_Stack0, /* array-typed stack block in innermost frame */
781 Inv_StackN, /* array-typed stack block in non-innermost frame */
782 Inv_Global, /* array-typed global block */
783 }
784 InvarTag;
785
786typedef
787 struct {
788 InvarTag tag;
789 union {
790 struct {
791 } Unset;
792 struct {
793 } Unknown;
794 struct {
795 Addr addr;
796 SizeT szB;
797 StackBlock* descr;
798 } Stack0; /* innermost stack frame */
799 struct {
800 /* Pointer to a node in the interval tree for
801 this thread. */
802 StackTreeNode* nd;
803 } StackN; /* non-innermost stack frame */
804 struct {
805 /* Pointer to a GlobalBlock in the interval tree of
806 global blocks. */
807 GlobalTreeNode* nd;
808 } Global;
809 }
810 Inv;
811 }
812 Invar;
813
814/* Partial debugging printing for an Invar. */
815static void pp_Invar ( Invar* i )
816{
817 switch (i->tag) {
818 case Inv_Unset:
819 VG_(printf)("Unset");
820 break;
821 case Inv_Unknown:
822 VG_(printf)("Unknown");
823 break;
824 case Inv_Stack0:
825 VG_(printf)("Stack0 [%#lx,+%lu)",
826 i->Inv.Stack0.addr, i->Inv.Stack0.szB);
827 break;
828 case Inv_StackN:
829 VG_(printf)("StackN [%#lx,+%lu)",
830 i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
831 break;
832 case Inv_Global:
833 VG_(printf)("Global [%#lx,+%lu)",
834 i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
835 break;
836 default:
837 tl_assert(0);
838 }
839}
840
841/* Compare two Invars for equality. */
842static Bool eq_Invar ( Invar* i1, Invar* i2 )
843{
844 tl_assert(i1->tag != Inv_Unset);
845 tl_assert(i2->tag != Inv_Unset);
846 if (i1->tag != i2->tag)
847 return False;
848 switch (i1->tag) {
849 case Inv_Unknown:
850 return True;
851 case Inv_Stack0:
852 return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
853 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
854 case Inv_StackN:
855 return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
856 case Inv_Global:
857 return i1->Inv.Global.nd == i2->Inv.Global.nd;
858 default:
859 tl_assert(0);
860 }
861 /*NOTREACHED*/
862 tl_assert(0);
863}
864
865/* Print selected parts of an Invar, suitable for use in error
866 messages. */
867static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
868{
869 HChar* str;
870 tl_assert(nBuf >= 96);
871 buf[0] = 0;
872 switch (inv->tag) {
873 case Inv_Unknown:
874 VG_(sprintf)(buf, "%s", "unknown");
875 break;
876 case Inv_Stack0:
877 str = "array";
878 VG_(sprintf)(buf, "stack %s \"%s\" in this frame",
879 str, inv->Inv.Stack0.descr->name );
880 break;
881 case Inv_StackN:
882 str = "array";
883 VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here",
884 str, inv->Inv.StackN.nd->descr->name,
885 depth - inv->Inv.StackN.nd->depth );
886 break;
887 case Inv_Global:
888 str = "array";
889 VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"",
890 str, inv->Inv.Global.nd->descr->name,
891 inv->Inv.Global.nd->descr->soname );
892 break;
893 case Inv_Unset:
894 VG_(sprintf)(buf, "%s", "Unset!");
895 break;
896 default:
897 tl_assert(0);
898 }
899}
900
901
902//////////////////////////////////////////////////////////////
903// //
904// our globals //
905// //
906//////////////////////////////////////////////////////////////
907
908//////////////////////////////////////////////////////////////
909///
910
911#define N_QCACHE 16
912
913/* Powers of two only, else the result will be chaos */
914#define QCACHE_ADVANCE_EVERY 16
915
916/* Per-thread query cache. Note that the invar can only be Inv_StackN
917 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
918typedef
919 struct {
920 Addr addr;
921 SizeT szB;
922 Invar inv;
923 }
924 QCElem;
925
926typedef
927 struct {
928 Word nInUse;
929 QCElem elems[N_QCACHE];
930 }
931 QCache;
932
933static void QCache__invalidate ( QCache* qc ) {
934 tl_assert(qc->nInUse >= 0);
935 qc->nInUse = 0;
936}
937
938static void QCache__pp ( QCache* qc, HChar* who )
939{
940 Word i;
941 VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
942 for (i = 0; i < qc->nInUse; i++) {
943 VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
944 pp_Invar(&qc->elems[i].inv);
945 VG_(printf)("\n");
946 }
947 VG_(printf)(">>>\n");
948}
949
950static ULong stats__qcache_queries = 0;
951static ULong stats__qcache_misses = 0;
952static ULong stats__qcache_probes = 0;
953
954///
955//////////////////////////////////////////////////////////////
956
957/* Each thread has:
958 * a shadow stack of StackFrames, which is a double-linked list
959 * an stack block interval tree
960*/
961static struct _StackFrame* shadowStacks[VG_N_THREADS];
962
963static WordFM* /* StackTreeNode */ siTrees[VG_N_THREADS];
964
965static QCache qcaches[VG_N_THREADS];
966
967
968/* Additionally, there is one global variable interval tree
969 for the entire process.
970*/
971static WordFM* /* GlobalTreeNode */ giTree;
972
973
974static void invalidate_all_QCaches ( void )
975{
976 Word i;
977 for (i = 0; i < VG_N_THREADS; i++) {
978 QCache__invalidate( &qcaches[i] );
979 }
980}
981
982static void ourGlobals_init ( void )
983{
984 Word i;
985 for (i = 0; i < VG_N_THREADS; i++) {
986 shadowStacks[i] = NULL;
987 siTrees[i] = NULL;
988 }
989 invalidate_all_QCaches();
990 giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
991 (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
992}
993
994
995//////////////////////////////////////////////////////////////
996// //
997// Handle global variable load/unload events //
998// //
999//////////////////////////////////////////////////////////////
1000
1001static void acquire_globals ( ULong di_handle )
1002{
1003 Word n, i;
1004 XArray* /* of GlobalBlock */ gbs;
1005 if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1006 gbs = VG_(di_get_global_blocks_from_dihandle)
1007 (di_handle, True/*arrays only*/);
1008 if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs ));
1009
1010 n = VG_(sizeXA)( gbs );
1011 for (i = 0; i < n; i++) {
1012 GlobalBlock* gbp;
1013 GlobalBlock* gb = VG_(indexXA)( gbs, i );
1014 if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n",
1015 gb->szB, gb->addr, gb->soname, gb->name );
1016 tl_assert(gb->szB > 0);
1017 /* Make a persistent copy of each GlobalBlock, and add it
1018 to the tree. */
1019 gbp = get_persistent_GlobalBlock( gb );
1020 add_block_to_GlobalTree( giTree, gbp );
1021 }
1022
1023 VG_(deleteXA)( gbs );
1024}
1025
1026
1027/* We only intercept these two because we need to see any di_handles
1028 that might arise from the mappings/allocations. */
1029void sg_new_mem_mmap( Addr a, SizeT len,
1030 Bool rr, Bool ww, Bool xx, ULong di_handle )
1031{
1032 if (di_handle > 0)
1033 acquire_globals(di_handle);
1034}
1035void sg_new_mem_startup( Addr a, SizeT len,
1036 Bool rr, Bool ww, Bool xx, ULong di_handle )
1037{
1038 if (di_handle > 0)
1039 acquire_globals(di_handle);
1040}
1041void sg_die_mem_munmap ( Addr a, SizeT len )
1042{
1043 Bool debug = (Bool)0;
1044 Bool overlap = False;
1045
1046 if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1047
1048 if (len == 0)
1049 return;
1050
1051 overlap = del_GlobalTree_range(giTree, a, len);
1052
1053 { /* redundant sanity check */
1054 UWord keyW, valW;
1055 VG_(initIterFM)( giTree );
1056 while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1057 GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1058 tl_assert(valW == 0);
1059 tl_assert(nd->szB > 0);
1060 tl_assert(nd->addr + nd->szB <= a
1061 || a + len <= nd->addr);
1062 }
1063 VG_(doneIterFM)( giTree );
1064 }
1065
1066 if (!overlap)
1067 return;
1068
1069 /* Ok, the range contained some blocks. Therefore we'll need to
1070 visit all the Invars in all the thread shadow stacks, and
sewardj9e1bed82008-10-20 10:25:16 +00001071 convert all Inv_Global entries that intersect [a,a+len) to
sewardj024598e2008-09-18 14:43:05 +00001072 Inv_Unknown. */
1073 tl_assert(len > 0);
sewardj9e1bed82008-10-20 10:25:16 +00001074 preen_global_Invars( a, len );
sewardj024598e2008-09-18 14:43:05 +00001075 invalidate_all_QCaches();
1076}
1077
1078
1079//////////////////////////////////////////////////////////////
1080// //
1081// StackFrame //
1082// //
1083//////////////////////////////////////////////////////////////
1084
1085static ULong stats__total_accesses = 0;
1086static ULong stats__classify_Stack0 = 0;
1087static ULong stats__classify_StackN = 0;
1088static ULong stats__classify_Global = 0;
1089static ULong stats__classify_Unknown = 0;
1090static ULong stats__Invars_preened = 0;
1091static ULong stats__Invars_changed = 0;
1092static ULong stats__t_i_b_empty = 0;
1093static ULong stats__htab_fast = 0;
1094static ULong stats__htab_searches = 0;
1095static ULong stats__htab_probes = 0;
1096static ULong stats__htab_resizes = 0;
1097
1098
1099/* A dynamic instance of an instruction */
1100typedef
1101 struct {
1102 /* IMMUTABLE */
1103 Addr insn_addr; /* NB! zero means 'not in use' */
1104 XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1105 /* MUTABLE */
1106 Invar invar;
1107 }
1108 IInstance;
1109
1110
1111#define N_HTAB_FIXED 64
1112
1113typedef
1114 struct _StackFrame {
1115 /* The sp when the frame was created, so we know when to get rid
1116 of it. */
1117 Addr creation_sp;
1118 /* The stack frames for a thread are arranged as a doubly linked
1119 list. Obviously the outermost frame in the stack has .outer
1120 as NULL and the innermost in theory has .inner as NULL.
1121 However, when a function returns, we don't delete the
1122 just-vacated StackFrame. Instead, it is retained in the list
1123 and will be re-used when the next call happens. This is so
1124 as to avoid constantly having to dynamically allocate and
1125 deallocate frames. */
1126 struct _StackFrame* inner;
1127 struct _StackFrame* outer;
1128 Word depth; /* 0 for outermost; increases inwards */
1129 /* Information for each memory referencing instruction, for this
1130 instantiation of the function. The iinstances array is
1131 operated as a simple linear-probe hash table, which is
1132 dynamically expanded as necessary. Once critical thing is
1133 that an IInstance with a .insn_addr of zero is interpreted to
1134 mean that hash table slot is unused. This means we can't
1135 store an IInstance for address zero. */
1136 /* Note that htab initially points to htab_fixed. If htab_fixed
1137 turns out not to be big enough then htab is made to point to
1138 dynamically allocated memory. But it's often the case that
1139 htab_fixed is big enough, so this optimisation saves a huge
1140 number of sg_malloc/sg_free call pairs. */
1141 IInstance* htab;
1142 UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1143 UWord htab_used; /* number of hash table slots currently in use */
1144 /* If this frame is currently making a call, then the following
1145 are relevant. */
1146 Addr sp_at_call;
1147 Addr fp_at_call;
1148 XArray* /* of Addr */ blocks_added_by_call;
1149 /* See comment just above */
1150 IInstance htab_fixed[N_HTAB_FIXED];
1151 }
1152 StackFrame;
1153
1154
1155
1156
1157
1158/* Move this somewhere else? */
1159/* Visit all Invars in the entire system. If 'isHeap' is True, change
1160 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1161 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1162 instead. */
1163
1164__attribute__((noinline))
sewardj9e1bed82008-10-20 10:25:16 +00001165static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
sewardj024598e2008-09-18 14:43:05 +00001166{
1167 stats__Invars_preened++;
1168 tl_assert(len > 0);
1169 tl_assert(inv);
1170 switch (inv->tag) {
sewardj9e1bed82008-10-20 10:25:16 +00001171 case Inv_Global:
1172 tl_assert(inv->Inv.Global.nd);
1173 tl_assert(inv->Inv.Global.nd->szB > 0);
1174 if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1175 inv->Inv.Global.nd->addr,
1176 inv->Inv.Global.nd->szB);
1177 if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1178 inv->Inv.Global.nd->szB)) {
sewardj024598e2008-09-18 14:43:05 +00001179 inv->tag = Inv_Unknown;
1180 stats__Invars_changed++;
1181 }
1182 break;
sewardj9e1bed82008-10-20 10:25:16 +00001183 case Inv_Stack0:
1184 case Inv_StackN:
sewardj024598e2008-09-18 14:43:05 +00001185 case Inv_Unknown:
1186 break;
sewardj9e1bed82008-10-20 10:25:16 +00001187 default:
1188 tl_assert(0);
sewardj024598e2008-09-18 14:43:05 +00001189 }
1190}
1191
1192__attribute__((noinline))
sewardj9e1bed82008-10-20 10:25:16 +00001193static void preen_global_Invars ( Addr a, SizeT len )
sewardj024598e2008-09-18 14:43:05 +00001194{
sewardj024598e2008-09-18 14:43:05 +00001195 Int i;
sewardj024598e2008-09-18 14:43:05 +00001196 UWord u;
sewardj024598e2008-09-18 14:43:05 +00001197 StackFrame* frame;
1198 tl_assert(len > 0);
sewardj024598e2008-09-18 14:43:05 +00001199 for (i = 0; i < VG_N_THREADS; i++) {
sewardj9e1bed82008-10-20 10:25:16 +00001200 frame = shadowStacks[i];
1201 if (!frame)
1202 continue; /* no frames for this thread */
1203 /* start from the innermost frame */
1204 while (frame->inner)
1205 frame = frame->inner;
1206 tl_assert(frame->outer);
1207 /* work through the frames from innermost to outermost. The
1208 order isn't important; we just need to ensure we visit each
1209 frame once (including those which are not actually active,
1210 more 'inner' than the 'innermost active frame', viz, just
1211 hanging around waiting to be used, when the current innermost
1212 active frame makes more calls. See comments on definition of
1213 struct _StackFrame. */
1214 for (; frame; frame = frame->outer) {
sewardj024598e2008-09-18 14:43:05 +00001215 UWord xx = 0; /* sanity check only; count of used htab entries */
sewardj9e1bed82008-10-20 10:25:16 +00001216 if (!frame->htab)
1217 continue; /* frame not in use. See shadowStack_unwind(). */
sewardj024598e2008-09-18 14:43:05 +00001218 for (u = 0; u < frame->htab_size; u++) {
1219 IInstance* ii = &frame->htab[u];
1220 if (ii->insn_addr == 0)
1221 continue; /* not in use */
sewardj9e1bed82008-10-20 10:25:16 +00001222 if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1223 preen_global_Invar( &ii->invar, a, len );
sewardj024598e2008-09-18 14:43:05 +00001224 xx++;
1225 }
1226 tl_assert(xx == frame->htab_used);
1227 }
1228 }
sewardj024598e2008-09-18 14:43:05 +00001229}
1230
1231
1232/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1233 of the ip are guaranteed to be zero */
1234inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1235 return (ip >> 0) & (htab_size - 1);
1236}
1237
1238__attribute__((noinline))
1239static void initialise_II_hash_table ( StackFrame* sf )
1240{
1241 UWord i;
1242 sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1243 sf->htab = &sf->htab_fixed[0];
1244 tl_assert(sf->htab);
1245 sf->htab_used = 0;
1246 for (i = 0; i < sf->htab_size; i++)
1247 sf->htab[i].insn_addr = 0; /* NOT IN USE */
1248}
1249
1250
1251__attribute__((noinline))
1252static void resize_II_hash_table ( StackFrame* sf )
1253{
1254 UWord i, j, ix, old_size, new_size;
1255 IInstance *old_htab, *new_htab, *old;
1256
1257 tl_assert(sf && sf->htab);
1258 old_size = sf->htab_size;
1259 new_size = 2 * old_size;
1260 old_htab = sf->htab;
1261 new_htab = sg_malloc( "di.sg_main.rIht.1",
1262 new_size * sizeof(IInstance) );
1263 for (i = 0; i < new_size; i++) {
1264 new_htab[i].insn_addr = 0; /* NOT IN USE */
1265 }
1266 for (i = 0; i < old_size; i++) {
1267 old = &old_htab[i];
1268 if (old->insn_addr == 0 /* NOT IN USE */)
1269 continue;
1270 ix = compute_II_hash(old->insn_addr, new_size);
1271 /* find out where to put this, in the new table */
1272 j = new_size;
1273 while (1) {
1274 if (new_htab[ix].insn_addr == 0)
1275 break;
1276 /* This can't ever happen, because it would mean the new
1277 table is full; that isn't allowed -- even the old table is
1278 only allowed to become half full. */
1279 tl_assert(j > 0);
1280 j--;
1281 ix++; if (ix == new_size) ix = 0;
1282 }
1283 /* copy the old entry to this location */
1284 tl_assert(ix < new_size);
1285 tl_assert(new_htab[ix].insn_addr == 0);
1286 new_htab[ix] = *old;
1287 tl_assert(new_htab[ix].insn_addr != 0);
1288 }
1289 /* all entries copied; free old table. */
1290 if (old_htab != &sf->htab_fixed[0])
1291 sg_free(old_htab);
1292 sf->htab = new_htab;
1293 sf->htab_size = new_size;
1294 /* check sf->htab_used is correct. Optional and a bit expensive
1295 but anyway: */
1296 j = 0;
1297 for (i = 0; i < new_size; i++) {
1298 if (new_htab[i].insn_addr != 0) {
1299 j++;
1300 }
1301 }
1302 tl_assert(j == sf->htab_used);
1303 if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1304}
1305
1306
1307__attribute__((noinline))
1308static IInstance* find_or_create_IInstance_SLOW (
1309 StackFrame* sf,
1310 Addr ip,
1311 XArray* /* StackBlock */ ip_frameblocks
1312 )
1313{
1314 UWord i, ix;
1315
1316 stats__htab_searches++;
1317
1318 tl_assert(sf);
1319 tl_assert(sf->htab);
1320
1321 /* Make sure the table loading doesn't get too high. */
1322 if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1323 stats__htab_resizes++;
1324 resize_II_hash_table(sf);
1325 }
1326 tl_assert(2 * sf->htab_used <= sf->htab_size);
1327
1328 ix = compute_II_hash(ip, sf->htab_size);
1329 i = sf->htab_size;
1330 while (1) {
1331 stats__htab_probes++;
1332 /* Note that because of the way the fast-case handler works,
1333 these two tests are actually redundant in the first iteration
1334 of this loop. (Except they aren't redundant if the code just
1335 above resized the table first. :-) */
1336 if (sf->htab[ix].insn_addr == ip)
1337 return &sf->htab[ix];
1338 if (sf->htab[ix].insn_addr == 0)
1339 break;
1340 /* If i ever gets to zero and we have found neither what we're
1341 looking for nor an empty slot, the table must be full. Which
1342 isn't possible -- we monitor the load factor to ensure it
1343 doesn't get above say 50%; if that ever does happen the table
1344 is resized. */
1345 tl_assert(i > 0);
1346 i--;
1347 ix++;
1348 if (ix == sf->htab_size) ix = 0;
1349 }
1350
1351 /* So now we've found a free slot at ix, and we can use that. */
1352 tl_assert(sf->htab[ix].insn_addr == 0);
1353
1354 /* Add a new record in this slot. */
1355 tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1356 sf->htab[ix].insn_addr = ip;
1357 sf->htab[ix].blocks = ip_frameblocks;
1358 sf->htab[ix].invar.tag = Inv_Unset;
1359 sf->htab_used++;
1360 return &sf->htab[ix];
1361}
1362
1363
1364inline
1365static IInstance* find_or_create_IInstance (
1366 StackFrame* sf,
1367 Addr ip,
1368 XArray* /* StackBlock */ ip_frameblocks
1369 )
1370{
1371 UWord ix = compute_II_hash(ip, sf->htab_size);
1372 /* Is it in the first slot we come to? */
1373 if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1374 stats__htab_fast++;
1375 return &sf->htab[ix];
1376 }
1377 /* If the first slot we come to is empty, bag it. */
1378 if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1379 stats__htab_fast++;
1380 tl_assert(ip != 0);
1381 sf->htab[ix].insn_addr = ip;
1382 sf->htab[ix].blocks = ip_frameblocks;
1383 sf->htab[ix].invar.tag = Inv_Unset;
1384 sf->htab_used++;
1385 return &sf->htab[ix];
1386 }
1387 /* Otherwise we hand off to the slow case, which searches other
1388 slots, and optionally resizes the table if necessary. */
1389 return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1390}
1391
1392
1393__attribute__((noinline))
1394static Addr calculate_StackBlock_EA ( StackBlock* descr,
1395 Addr sp, Addr fp ) {
1396 UWord w1 = (UWord)descr->base;
1397 UWord w2 = (UWord)(descr->spRel ? sp : fp);
1398 UWord ea = w1 + w2;
1399 return ea;
1400}
1401
1402/* Given an array of StackBlocks, return an array of Addrs, holding
1403 their effective addresses. Caller deallocates result array. */
1404__attribute__((noinline))
1405static XArray* /* Addr */ calculate_StackBlock_EAs (
1406 XArray* /* StackBlock */ blocks,
1407 Addr sp, Addr fp
1408 )
1409{
1410 XArray* res;
1411 Word i, n = VG_(sizeXA)( blocks );
1412 tl_assert(n > 0);
1413 res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1414 for (i = 0; i < n; i++) {
1415 StackBlock* blk = VG_(indexXA)( blocks, i );
1416 Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1417 VG_(addToXA)( res, &ea );
1418 }
1419 return res;
1420}
1421
1422
1423/* Try to classify the block into which a memory access falls, and
1424 write the result in 'inv'. This writes all relevant fields of
1425 'inv'. */
1426__attribute__((noinline))
1427static void classify_address ( /*OUT*/Invar* inv,
1428 ThreadId tid,
1429 Addr ea, Addr sp, Addr fp,
1430 UWord szB,
1431 XArray* /* of StackBlock */ thisInstrBlocks )
1432{
1433 tl_assert(szB > 0);
1434 /* First, look in the stack blocks accessible in this instruction's
1435 frame. */
1436 {
1437 Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1438 if (nBlocks == 0) stats__t_i_b_empty++;
1439 for (i = 0; i < nBlocks; i++) {
1440 StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1441 Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1442 if (bea <= ea && ea + szB <= bea + descr->szB) {
1443 /* found it */
1444 inv->tag = Inv_Stack0;
1445 inv->Inv.Stack0.addr = bea;
1446 inv->Inv.Stack0.szB = descr->szB;
1447 inv->Inv.Stack0.descr = descr;
1448 stats__classify_Stack0++;
1449 return;
1450 }
1451 }
1452 }
1453 /* Look in this thread's query cache */
1454 { Word i;
1455 QCache* cache = &qcaches[tid];
1456 static UWord ctr = 0;
1457 stats__qcache_queries++;
1458 for (i = 0; i < cache->nInUse; i++) {
1459 if (0) /* expensive in a loop like this */
1460 tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1461 stats__qcache_probes++;
1462 if (is_subinterval_of(cache->elems[i].addr,
1463 cache->elems[i].szB, ea, szB)) {
1464 if (i > 0
1465 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1466 QCElem tmp;
1467 tmp = cache->elems[i-1];
1468 cache->elems[i-1] = cache->elems[i];
1469 cache->elems[i] = tmp;
1470 i--;
1471 }
1472 *inv = cache->elems[i].inv;
1473 return;
1474 }
1475 }
1476 stats__qcache_misses++;
1477 }
1478 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1479 in some calling frame? Consult this thread's stack-block
1480 interval tree to find out. */
1481 { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1482 /* We know that [ea,ea+1) is in the block, but we need to
1483 restrict to the case where the whole access falls within
1484 it. */
1485 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1486 nd = NULL;
1487 }
1488 if (nd) {
1489 /* found it */
1490 inv->tag = Inv_StackN;
1491 inv->Inv.StackN.nd = nd;
1492 stats__classify_StackN++;
1493 goto out;
1494 }
1495 }
1496 /* Not in a stack block. Try the global pool. */
1497 { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1498 /* We know that [ea,ea+1) is in the block, but we need to
1499 restrict to the case where the whole access falls within
1500 it. */
1501 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1502 nd = NULL;
1503 }
1504 if (nd) {
1505 /* found it */
1506 inv->tag = Inv_Global;
1507 inv->Inv.Global.nd = nd;
1508 stats__classify_Global++;
1509 goto out;
1510 }
1511 }
1512 /* No idea - give up. */
1513 inv->tag = Inv_Unknown;
1514 stats__classify_Unknown++;
1515
1516 /* Update the cache */
1517 out:
1518 { Addr toadd_addr = 0;
1519 SizeT toadd_szB = 0;
1520 QCache* cache = &qcaches[tid];
1521
1522 static UWord ctr = 0;
1523 Bool show = False;
1524 if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1525
1526 if (show) QCache__pp(cache, "before upd");
1527
1528 switch (inv->tag) {
1529 case Inv_Global:
1530 toadd_addr = inv->Inv.Global.nd->addr;
1531 toadd_szB = inv->Inv.Global.nd->szB;
1532 break;
1533 case Inv_StackN:
1534 toadd_addr = inv->Inv.StackN.nd->addr;
1535 toadd_szB = inv->Inv.StackN.nd->szB;
1536 break;
1537 case Inv_Unknown: {
1538 /* This is more complex. We need to figure out the
1539 intersection of the "holes" in the global and stack
1540 interval trees into which [ea,ea+szB) falls. This is
1541 further complicated by the fact that [ea,ea+szB) might
1542 not fall cleanly into a hole; it may instead fall across
1543 the boundary of a stack or global block. In that case
1544 we just ignore it and don't update the cache, since we
1545 have no way to represent this situation precisely. */
1546 StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB;
1547 GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1548 Addr gMin, gMax, sMin, sMax, uMin, uMax;
1549 Bool sOK, gOK;
1550 sNegInf.addr = 0;
1551 sNegInf.szB = 1;
1552 sPosInf.addr = ~(UWord)0;
1553 sPosInf.szB = 1;
1554 gNegInf.addr = 0;
1555 gNegInf.szB = 1;
1556 gPosInf.addr = ~(UWord)0;
1557 gPosInf.szB = 1;
1558 sKey.addr = ea;
1559 sKey.szB = szB;
1560 gKey.addr = ea;
1561 gKey.szB = szB;
1562 if (0) VG_(printf)("Tree sizes %ld %ld\n",
1563 VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1564 sOK = VG_(findBoundsFM)( siTrees[tid],
sewardj95208452008-10-18 19:55:31 +00001565 (UWord*)&sLB, NULL/*unused*/,
1566 (UWord*)&sUB, NULL/*unused*/,
1567 (UWord)&sNegInf, 0/*unused*/,
1568 (UWord)&sPosInf, 0/*unused*/,
sewardj024598e2008-09-18 14:43:05 +00001569 (UWord)&sKey );
1570 gOK = VG_(findBoundsFM)( giTree,
sewardj95208452008-10-18 19:55:31 +00001571 (UWord*)&gLB, NULL/*unused*/,
1572 (UWord*)&gUB, NULL/*unused*/,
1573 (UWord)&gNegInf, 0/*unused*/,
1574 (UWord)&gPosInf, 0/*unused*/,
sewardj024598e2008-09-18 14:43:05 +00001575 (UWord)&gKey );
1576 if (!(sOK && gOK)) {
1577 /* If this happens, then [ea,ea+szB) partially overlaps
1578 a heap or stack block. We can't represent that, so
1579 just forget it (should be very rare). However, do
1580 maximum sanity checks first. In such a
1581 partial overlap case, it can't be the case that both
1582 [ea] and [ea+szB-1] overlap the same block, since if
1583 that were indeed the case then it wouldn't be a
1584 partial overlap; rather it would simply fall inside
1585 that block entirely and we shouldn't be inside this
1586 conditional at all. */
1587 if (!sOK) {
1588 StackTreeNode *ndFirst, *ndLast;
1589 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1590 ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1591 /* if both ends of the range fall inside a block,
1592 they can't be in the same block. */
1593 if (ndFirst && ndLast)
1594 tl_assert(ndFirst != ndLast);
1595 /* for each end of the range, if it is in a block,
1596 the range as a whole can't be entirely within the
1597 block. */
1598 if (ndFirst)
1599 tl_assert(!is_subinterval_of(ndFirst->addr,
1600 ndFirst->szB, ea, szB));
1601 if (ndLast)
1602 tl_assert(!is_subinterval_of(ndLast->addr,
1603 ndLast->szB, ea, szB));
1604 }
1605 if (!gOK) {
1606 GlobalTreeNode *ndFirst, *ndLast;
1607 ndFirst = find_GlobalTreeNode( giTree, ea );
1608 ndLast = find_GlobalTreeNode( giTree, ea+szB-1 );
1609 /* if both ends of the range fall inside a block,
1610 they can't be in the same block. */
1611 if (ndFirst && ndLast)
1612 tl_assert(ndFirst != ndLast);
1613 /* for each end of the range, if it is in a block,
1614 the range as a whole can't be entirely within the
1615 block. */
1616 if (ndFirst)
1617 tl_assert(!is_subinterval_of(ndFirst->addr,
1618 ndFirst->szB, ea, szB));
1619 if (ndLast)
1620 tl_assert(!is_subinterval_of(ndLast->addr,
1621 ndLast->szB, ea, szB));
1622 }
1623 if (0) VG_(printf)("overlapping blocks in cache\n");
1624 return;
1625 }
1626 sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB);
1627 sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1);
1628 gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB);
1629 gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1);
1630 if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1631 sMin, sMax, gMin, gMax);
1632 /* [sMin,sMax] and [gMin,gMax] must both contain
1633 [ea,ea+szB) (right?) That implies they must overlap at
1634 at least over [ea,ea+szB). */
1635 tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1636 tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1637 /* So now compute their intersection. */
1638 uMin = Addr__max( sMin, gMin );
1639 uMax = Addr__min( sMax, gMax );
1640 if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1641 tl_assert(uMin <= uMax);
1642 tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1643 /* Finally, we can park [uMin,uMax] in the cache. However,
1644 if uMax is ~0, we can't represent the difference; hence
1645 fudge uMax. */
1646 if (uMin < uMax && uMax == ~(UWord)0)
1647 uMax--;
1648 toadd_addr = uMin;
1649 toadd_szB = uMax - uMin + 1;
1650 break;
1651 }
1652 default:
1653 /* We should only be caching info for the above 3 cases */
1654 tl_assert(0);
1655 } /* switch (inv->tag) */
1656
1657 { /* and actually add this to the cache, finally */
1658 Word i;
1659 Word ip = cache->nInUse / 2; /* doesn't seem critical */
1660
1661 if (cache->nInUse < N_QCACHE)
1662 cache->nInUse++;
1663 for (i = cache->nInUse-1; i > ip; i--) {
1664 cache->elems[i] = cache->elems[i-1];
1665 }
1666
1667 tl_assert(toadd_szB > 0);
1668 cache->elems[ip].addr = toadd_addr;
1669 cache->elems[ip].szB = toadd_szB;
1670 cache->elems[ip].inv = *inv;
1671 }
1672
1673 if (show) QCache__pp(cache, "after upd");
1674
1675 }
1676}
1677
1678
1679/* CALLED FROM GENERATED CODE */
1680static
1681VG_REGPARM(3)
1682void helperc__mem_access ( /* Known only at run time: */
1683 Addr ea, Addr sp, Addr fp,
1684 /* Known at translation time: */
1685 Word sszB, Addr ip, XArray* ip_frameBlocks )
1686{
1687 UWord szB;
1688 IInstance* iinstance;
1689 Invar* inv;
1690 Invar new_inv;
1691 ThreadId tid = VG_(get_running_tid)();
1692 StackFrame* frame;
1693 HChar bufE[128], bufA[128];
1694
1695 stats__total_accesses++;
1696
1697 tl_assert(is_sane_TId(tid));
1698 frame = shadowStacks[tid];
1699 tl_assert(frame);
1700
1701 /* Find the instance info for this instruction. */
1702 tl_assert(ip_frameBlocks);
1703 iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1704 tl_assert(iinstance);
1705 tl_assert(iinstance->blocks == ip_frameBlocks);
1706
1707 szB = (sszB < 0) ? (-sszB) : sszB;
1708 tl_assert(szB > 0);
1709
1710 inv = &iinstance->invar;
1711
1712 /* Deal with first uses of instruction instances. */
1713 if (inv->tag == Inv_Unset) {
1714 /* This is the first use of this instance of the instruction, so
1715 we can't make any check; we merely record what we saw, so we
1716 can compare it against what happens for 2nd and subsequent
1717 accesses. */
1718 classify_address( inv,
1719 tid, ea, sp, fp, szB, iinstance->blocks );
1720 tl_assert(inv->tag != Inv_Unset);
1721 return;
1722 }
1723
1724 /* So generate an Invar and see if it's different from what
1725 we had before. */
1726 classify_address( &new_inv,
1727 tid, ea, sp, fp, szB, iinstance->blocks );
1728 tl_assert(new_inv.tag != Inv_Unset);
1729
1730 /* Did we see something different from before? If no, then there's
1731 no error. */
1732 if (eq_Invar(&new_inv, inv))
1733 return;
1734
1735 tl_assert(inv->tag != Inv_Unset);
1736
1737 VG_(memset)(bufE, 0, sizeof(bufE));
1738 show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1739
1740 VG_(memset)(bufA, 0, sizeof(bufA));
1741 show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1742
1743 sg_record_error_SorG( tid, ea, sszB, bufE, bufA );
1744
1745 /* And now install the new observation as "standard", so as to
1746 make future error messages make more sense. */
1747 *inv = new_inv;
1748}
1749
1750
1751////////////////////////////////////////
1752/* Primary push-a-new-frame routine. Called indirectly from
1753 generated code. */
1754
1755static UWord stats__max_sitree_size = 0;
1756static UWord stats__max_gitree_size = 0;
1757
1758static
1759void shadowStack_new_frame ( ThreadId tid,
1760 Addr sp_at_call_insn,
1761 Addr sp_post_call_insn,
1762 Addr fp_at_call_insn,
1763 Addr ip_post_call_insn,
1764 XArray* descrs_at_call_insn )
1765{
1766 StackFrame *callee, *caller;
1767 tl_assert(is_sane_TId(tid));
1768
1769 caller = shadowStacks[tid];
1770 tl_assert(caller);
1771
1772 if (caller->outer) { /* "this is not the outermost frame" */
1773 tl_assert(caller->outer->inner == caller);
1774 tl_assert(caller->outer->depth >= 0);
1775 tl_assert(1 + caller->outer->depth == caller->depth);
1776 } else {
1777 tl_assert(caller->depth == 0);
1778 }
1779
1780 caller->sp_at_call = sp_at_call_insn;
1781 caller->fp_at_call = fp_at_call_insn;
1782
1783 if (descrs_at_call_insn) {
1784 tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1785 caller->blocks_added_by_call
1786 = calculate_StackBlock_EAs( descrs_at_call_insn,
1787 sp_at_call_insn, fp_at_call_insn );
1788 if (caller->blocks_added_by_call)
1789 add_blocks_to_StackTree( siTrees[tid],
1790 descrs_at_call_insn,
1791 caller->blocks_added_by_call,
1792 caller->depth /* stack depth at which
1793 these blocks are
1794 considered to exist*/ );
1795 if (1) {
1796 UWord s = VG_(sizeFM)( siTrees[tid] );
1797 UWord g = VG_(sizeFM)( giTree );
1798 Bool sb = s > stats__max_sitree_size;
1799 Bool gb = g > stats__max_gitree_size;
1800 if (sb) stats__max_sitree_size = s;
1801 if (gb) stats__max_gitree_size = g;
1802 if (0 && (sb || gb))
1803 VG_(message)(Vg_DebugMsg,
1804 "exp-sgcheck: new max tree sizes: "
1805 "StackTree %ld, GlobalTree %ld",
1806 stats__max_sitree_size, stats__max_gitree_size );
1807 }
1808 } else {
1809 caller->blocks_added_by_call = NULL;
1810 }
1811
1812 /* caller->blocks_added_by_call is used again (and then freed) when
1813 this frame is removed from the stack. */
1814
1815 if (caller->inner) {
1816 callee = caller->inner;
1817 } else {
1818 callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1819 VG_(memset)(callee, 0, sizeof(StackFrame));
1820 callee->outer = caller;
1821 caller->inner = callee;
1822 callee->depth = 1 + caller->depth;
1823 tl_assert(callee->inner == NULL);
1824 }
1825
1826 /* This sets up .htab, .htab_size and .htab_used */
1827 initialise_II_hash_table( callee );
1828
1829 callee->creation_sp = sp_post_call_insn;
1830 callee->sp_at_call = 0; // not actually required ..
1831 callee->fp_at_call = 0; // .. these 3 initialisations are ..
1832 callee->blocks_added_by_call = NULL; // .. just for cleanness
1833
1834 /* record the new running stack frame */
1835 shadowStacks[tid] = callee;
1836
1837 /* and this thread's query cache is now invalid */
1838 QCache__invalidate( &qcaches[tid] );
1839
1840 if (0)
1841 { Word d = callee->depth;
1842 HChar fnname[80];
1843 Bool ok;
1844 Addr ip = ip_post_call_insn;
1845 ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1846 while (d > 0) {
1847 VG_(printf)(" ");
1848 d--;
1849 }
1850 VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1851 }
1852}
1853
1854/* CALLED FROM GENERATED CODE */
1855static
1856VG_REGPARM(3)
1857void helperc__new_frame ( Addr sp_post_call_insn,
1858 Addr fp_at_call_insn,
1859 Addr ip_post_call_insn,
1860 XArray* blocks_at_call_insn,
1861 Word sp_adjust )
1862{
1863 ThreadId tid = VG_(get_running_tid)();
1864 Addr sp_at_call_insn = sp_post_call_insn + sp_adjust;
1865 shadowStack_new_frame( tid,
1866 sp_at_call_insn,
1867 sp_post_call_insn,
1868 fp_at_call_insn,
1869 ip_post_call_insn,
1870 blocks_at_call_insn );
1871}
1872
1873
1874////////////////////////////////////////
1875/* Primary remove-frame(s) routine. Called indirectly from
1876 generated code. */
1877
1878__attribute__((noinline))
1879static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1880{
1881 StackFrame *innermost, *innermostOrig;
1882 tl_assert(is_sane_TId(tid));
1883 innermost = shadowStacks[tid];
1884 tl_assert(innermost);
1885 innermostOrig = innermost;
1886 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1887 while (1) {
1888 if (!innermost->outer)
1889 break;
1890 if (innermost->inner)
1891 tl_assert(innermost->inner->outer == innermost);
1892 tl_assert(innermost->outer->inner == innermost);
1893 tl_assert(innermost->blocks_added_by_call == NULL);
1894 if (sp_now <= innermost->creation_sp) break;
1895 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1896 tl_assert(innermost->htab);
1897 if (innermost->htab != &innermost->htab_fixed[0])
1898 sg_free(innermost->htab);
1899 /* be on the safe side */
1900 innermost->creation_sp = 0;
1901 innermost->htab = NULL;
1902 innermost->htab_size = 0;
1903 innermost->htab_used = 0;
1904 innermost->sp_at_call = 0;
1905 innermost->fp_at_call = 0;
1906 innermost->blocks_added_by_call = NULL;
1907 innermost = innermost->outer;
1908
1909 /* So now we're "back" in the calling frame. Remove from this
1910 thread's stack-interval-tree, the blocks added at the time of
1911 the call. */
1912
1913 if (innermost->outer) { /* not at the outermost frame */
1914 if (innermost->blocks_added_by_call == NULL) {
1915 } else {
1916 del_blocks_from_StackTree( siTrees[tid],
1917 innermost->blocks_added_by_call );
1918 VG_(deleteXA)( innermost->blocks_added_by_call );
1919 innermost->blocks_added_by_call = NULL;
1920 }
1921 }
1922 /* That completes the required tidying of the interval tree
1923 associated with the frame we just removed. */
1924
1925 if (0) {
1926 Word d = innermost->depth;
1927 while (d > 0) {
1928 VG_(printf)(" ");
1929 d--;
1930 }
1931 VG_(printf)("X\n");
1932 }
1933
1934 }
1935
1936 tl_assert(innermost);
1937
1938 if (innermost != innermostOrig) {
1939 shadowStacks[tid] = innermost;
1940 /* this thread's query cache is now invalid */
1941 QCache__invalidate( &qcaches[tid] );
1942 }
1943}
1944
1945
1946
1947//////////////////////////////////////////////////////////////
1948// //
1949// Instrumentation //
1950// //
1951//////////////////////////////////////////////////////////////
1952
1953/* What does instrumentation need to do?
1954
1955 - at each Call transfer, generate a call to shadowStack_new_frame
1956 do this by manually inspecting the IR
1957
1958 - at each sp change, if the sp change is negative,
1959 call shadowStack_unwind
1960 do this by asking for SP-change analysis
1961
1962 - for each memory referencing instruction,
1963 call helperc__mem_access
1964*/
1965
1966/* A complication: sg_ instrumentation and h_ instrumentation need to
1967 be interleaved. Since the latter is a lot more complex than the
1968 former, we split the sg_ instrumentation here into four functions
1969 and let the h_ instrumenter call the four functions as it goes.
1970 Hence the h_ instrumenter drives the sg_ instrumenter.
1971
1972 To make this viable, the sg_ instrumenter carries what running
1973 state it needs in 'struct _SGEnv'. This is exported only
1974 abstractly from this file.
1975*/
1976
1977struct _SGEnv {
1978 /* the current insn's IP */
1979 Addr64 curr_IP;
1980 /* whether the above is actually known */
1981 Bool curr_IP_known;
1982 /* if we find a mem ref, is it the first for this insn? Used for
1983 detecting insns which make more than one memory ref, a situation
1984 we basically can't really handle properly; and so we ignore all
1985 but the first ref. */
1986 Bool firstRef;
1987};
1988
1989
1990/* --- Helper fns for instrumentation --- */
1991
1992static IRTemp gen_Get_SP ( IRSB* bbOut,
1993 VexGuestLayout* layout,
1994 Int hWordTy_szB )
1995{
1996 IRExpr* sp_expr;
1997 IRTemp sp_temp;
1998 IRType sp_type;
1999 /* This in effect forces the host and guest word sizes to be the
2000 same. */
2001 tl_assert(hWordTy_szB == layout->sizeof_SP);
2002 sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2003 sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2004 sp_temp = newIRTemp( bbOut->tyenv, sp_type );
2005 addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2006 return sp_temp;
2007}
2008
2009static IRTemp gen_Get_FP ( IRSB* bbOut,
2010 VexGuestLayout* layout,
2011 Int hWordTy_szB )
2012{
2013 IRExpr* fp_expr;
2014 IRTemp fp_temp;
2015 IRType fp_type;
2016 /* This in effect forces the host and guest word sizes to be the
2017 same. */
2018 tl_assert(hWordTy_szB == layout->sizeof_SP);
2019 fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2020 fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2021 fp_temp = newIRTemp( bbOut->tyenv, fp_type );
2022 addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2023 return fp_temp;
2024}
2025
2026static void instrument_mem_access ( IRSB* bbOut,
2027 IRExpr* addr,
2028 Int szB,
2029 Bool isStore,
2030 Int hWordTy_szB,
2031 Addr curr_IP,
2032 VexGuestLayout* layout )
2033{
2034 IRType tyAddr = Ity_INVALID;
2035 XArray* frameBlocks = NULL;
2036
2037 tl_assert(isIRAtom(addr));
2038 tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2039
2040 tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2041 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2042
2043#if defined(VGA_x86)
2044 { UChar* p = (UChar*)curr_IP;
2045 // pop %ebp; RET
2046 if (p[-1] == 0x5d && p[0] == 0xc3) return;
2047 // pop %ebp; RET $imm16
2048 if (p[-1] == 0x5d && p[0] == 0xc2) return;
2049 // PUSH %EBP; mov %esp,%ebp
2050 if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2051 }
2052#endif
2053
2054 /* First off, find or create the StackBlocks for this instruction. */
2055 frameBlocks = get_StackBlocks_for_IP( curr_IP );
2056 tl_assert(frameBlocks);
2057 //if (VG_(sizeXA)( frameBlocks ) == 0)
2058 // frameBlocks = NULL;
2059
2060 /* Generate a call to "helperc__mem_access", passing:
2061 addr current_SP current_FP szB curr_IP frameBlocks
2062 */
2063 { IRTemp t_SP = gen_Get_SP( bbOut, layout, hWordTy_szB );
2064 IRTemp t_FP = gen_Get_FP( bbOut, layout, hWordTy_szB );
2065 IRExpr** args
2066 = mkIRExprVec_6( addr,
2067 IRExpr_RdTmp(t_SP),
2068 IRExpr_RdTmp(t_FP),
2069 mkIRExpr_HWord( isStore ? (-szB) : szB ),
2070 mkIRExpr_HWord( curr_IP ),
2071 mkIRExpr_HWord( (HWord)frameBlocks ) );
2072 IRDirty* di
2073 = unsafeIRDirty_0_N( 3/*regparms*/,
2074 "helperc__mem_access",
2075 VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2076 args );
2077
2078 addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2079 }
2080}
2081
2082
2083/* --- Instrumentation main (4 fns) --- */
2084
2085struct _SGEnv * sg_instrument_init ( void )
2086{
2087 struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2088 sizeof(struct _SGEnv));
2089 tl_assert(env);
2090 env->curr_IP = 0;
2091 env->curr_IP_known = False;
2092 env->firstRef = True;
2093 return env;
2094}
2095
2096void sg_instrument_fini ( struct _SGEnv * env )
2097{
2098 sg_free(env);
2099}
2100
2101/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2102 as required. This must be called before 'st' itself is added to
2103 'sbOut'. */
2104void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2105 /*MOD*/IRSB* sbOut,
2106 IRStmt* st,
2107 VexGuestLayout* layout,
2108 IRType gWordTy, IRType hWordTy )
2109{
sewardj4815eb52008-10-20 23:33:49 +00002110 if (!sg_clo_enable_sg_checks)
2111 return;
2112
sewardj024598e2008-09-18 14:43:05 +00002113 tl_assert(st);
2114 tl_assert(isFlatIRStmt(st));
2115 switch (st->tag) {
2116 case Ist_NoOp:
2117 case Ist_AbiHint:
2118 case Ist_Put:
2119 case Ist_PutI:
2120 case Ist_MBE:
2121 /* None of these can contain any memory references. */
2122 break;
2123
2124 case Ist_Exit:
2125 tl_assert(st->Ist.Exit.jk != Ijk_Call);
2126 /* else we must deal with a conditional call */
2127 break;
2128
2129 case Ist_IMark:
2130 env->curr_IP_known = True;
2131 env->curr_IP = (Addr)st->Ist.IMark.addr;
2132 env->firstRef = True;
2133 break;
2134
2135 case Ist_Store:
2136 tl_assert(env->curr_IP_known);
2137 if (env->firstRef) {
2138 instrument_mem_access(
2139 sbOut,
2140 st->Ist.Store.addr,
2141 sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2142 True/*isStore*/,
2143 sizeofIRType(hWordTy),
2144 env->curr_IP, layout
2145 );
2146 env->firstRef = False;
2147 }
2148 break;
2149
2150 case Ist_WrTmp: {
2151 IRExpr* data = st->Ist.WrTmp.data;
2152 if (data->tag == Iex_Load) {
2153 tl_assert(env->curr_IP_known);
2154 if (env->firstRef) {
2155 instrument_mem_access(
2156 sbOut,
2157 data->Iex.Load.addr,
2158 sizeofIRType(data->Iex.Load.ty),
2159 False/*!isStore*/,
2160 sizeofIRType(hWordTy),
2161 env->curr_IP, layout
2162 );
2163 env->firstRef = False;
2164 }
2165 }
2166 break;
2167 }
2168
2169 case Ist_Dirty: {
2170 Int dataSize;
2171 IRDirty* d = st->Ist.Dirty.details;
2172 if (d->mFx != Ifx_None) {
2173 /* This dirty helper accesses memory. Collect the
2174 details. */
2175 tl_assert(env->curr_IP_known);
2176 if (env->firstRef) {
2177 tl_assert(d->mAddr != NULL);
2178 tl_assert(d->mSize != 0);
2179 dataSize = d->mSize;
2180 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2181 instrument_mem_access(
2182 sbOut, d->mAddr, dataSize, False/*!isStore*/,
2183 sizeofIRType(hWordTy), env->curr_IP, layout
2184 );
2185 }
2186 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2187 instrument_mem_access(
2188 sbOut, d->mAddr, dataSize, True/*isStore*/,
2189 sizeofIRType(hWordTy), env->curr_IP, layout
2190 );
2191 }
2192 env->firstRef = False;
2193 }
2194 } else {
2195 tl_assert(d->mAddr == NULL);
2196 tl_assert(d->mSize == 0);
2197 }
2198 break;
2199 }
2200
2201 default:
2202 tl_assert(0);
2203
2204 } /* switch (st->tag) */
2205}
2206
2207
2208/* Add instrumentation for the final jump of an IRSB 'sbOut', and
2209 possibly modify 'env' as required. This must be the last
2210 instrumentation statement in the block. */
2211void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2212 /*MOD*/IRSB* sbOut,
2213 IRExpr* next,
2214 IRJumpKind jumpkind,
2215 VexGuestLayout* layout,
2216 IRType gWordTy, IRType hWordTy )
2217{
sewardj4815eb52008-10-20 23:33:49 +00002218 if (!sg_clo_enable_sg_checks)
2219 return;
2220
sewardj024598e2008-09-18 14:43:05 +00002221 if (jumpkind == Ijk_Call) {
2222 // Assumes x86 or amd64
2223 IRTemp sp_post_call_insn, fp_post_call_insn;
2224 XArray* frameBlocks;
2225 IRExpr** args;
2226 IRDirty* di;
2227 sp_post_call_insn
2228 = gen_Get_SP( sbOut, layout, sizeofIRType(hWordTy) );
2229 fp_post_call_insn
2230 = gen_Get_FP( sbOut, layout, sizeofIRType(hWordTy) );
2231 tl_assert(env->curr_IP_known);
2232 frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2233 tl_assert(frameBlocks);
2234 if (VG_(sizeXA)(frameBlocks) == 0)
2235 frameBlocks = NULL;
2236 args
2237 = mkIRExprVec_5(
2238 IRExpr_RdTmp(sp_post_call_insn),
2239 IRExpr_RdTmp(fp_post_call_insn),
2240 /* assume the call doesn't change FP */
2241 next,
2242 mkIRExpr_HWord( (HWord)frameBlocks ),
2243 mkIRExpr_HWord( sizeofIRType(gWordTy) )
2244 );
2245 di = unsafeIRDirty_0_N(
2246 3/*regparms*/,
2247 "helperc__new_frame",
2248 VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2249 args );
2250 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2251 }
2252}
2253
2254
2255//////////////////////////////////////////////////////////////
2256// //
2257// end Instrumentation //
2258// //
2259//////////////////////////////////////////////////////////////
2260
2261
2262//////////////////////////////////////////////////////////////
2263// //
2264// misc //
2265// //
2266//////////////////////////////////////////////////////////////
2267
2268/* Make a new empty stack frame that is suitable for being the
2269 outermost frame in a stack. It has a creation_sp of effectively
2270 infinity, so it can never be removed. */
2271static StackFrame* new_root_StackFrame ( void )
2272{
2273 StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2274 VG_(memset)( sframe, 0, sizeof(*sframe) );
2275 sframe->creation_sp = ~0UL;
2276
2277 /* This sets up .htab, .htab_size and .htab_used */
2278 initialise_II_hash_table( sframe );
2279
2280 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2281
2282 return sframe;
2283}
2284
2285/* Primary routine for setting up the shadow stack for a new thread.
2286 Note that this is used to create not only child thread stacks, but
2287 the root thread's stack too. We create a new stack with
2288 .creation_sp set to infinity, so that the outermost frame can never
2289 be removed (by shadowStack_unwind). The core calls this function
2290 as soon as a thread is created. We cannot yet get its SP value,
2291 since that may not yet be set. */
2292static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2293{
2294 tl_assert(is_sane_TId(child));
2295 if (parent == VG_INVALID_THREADID) {
2296 /* creating the main thread's stack */
2297 } else {
2298 tl_assert(is_sane_TId(parent));
2299 tl_assert(parent != child);
2300 tl_assert(shadowStacks[parent] != NULL);
2301 tl_assert(siTrees[parent] != NULL);
2302 }
2303
2304 /* Create the child's stack. Bear in mind we may be re-using
2305 it. */
2306 if (shadowStacks[child] == NULL) {
2307 /* First use of this stack. Just allocate an initial frame. */
2308 tl_assert(siTrees[child] == NULL);
2309 } else {
2310 StackFrame *frame, *frame2;
2311 /* re-using a stack. */
2312 /* get rid of the interval tree */
2313 tl_assert(siTrees[child] != NULL);
2314 delete_StackTree( siTrees[child] );
2315 siTrees[child] = NULL;
2316 /* Throw away all existing frames. */
2317 frame = shadowStacks[child];
2318 while (frame->outer)
2319 frame = frame->outer;
2320 tl_assert(frame->depth == 0);
2321 while (frame) {
2322 frame2 = frame->inner;
2323 if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2324 sg_free(frame);
2325 frame = frame2;
2326 }
2327 shadowStacks[child] = NULL;
2328 }
2329
2330 tl_assert(shadowStacks[child] == NULL);
2331 tl_assert(siTrees[child] == NULL);
2332
2333 /* Set up the initial stack frame. */
2334 shadowStacks[child] = new_root_StackFrame();
2335
2336 /* and set up the child's stack block interval tree. */
2337 siTrees[child] = new_StackTree();
2338}
2339
2340/* Once a thread is ready to go, the core calls here. We take the
2341 opportunity to push a second frame on its stack, with the
2342 presumably valid SP value that is going to be used for the thread's
2343 startup. Hence we should always wind up with a valid outermost
2344 frame for the thread. */
2345static void shadowStack_set_initial_SP ( ThreadId tid )
2346{
2347 StackFrame* sf;
2348 tl_assert(is_sane_TId(tid));
2349 sf = shadowStacks[tid];
2350 tl_assert(sf != NULL);
2351 tl_assert(sf->outer == NULL);
2352 tl_assert(sf->inner == NULL);
2353 tl_assert(sf->creation_sp == ~0UL);
2354 shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2355 0, VG_(get_IP)(tid), NULL );
2356}
2357
2358
2359//////////////////////////////////////////////////////////////
2360// //
2361// main-ish //
2362// //
2363//////////////////////////////////////////////////////////////
2364
2365/* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2366 sp-change analysis, as requested in pc_pre_clo_int(). */
2367void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2368 ThreadId tid = VG_(get_running_tid)();
2369 shadowStack_unwind( tid, old_SP+len );
2370}
2371
2372void sg_pre_clo_init ( void ) {
2373 ourGlobals_init();
2374 init_StackBlocks_set();
2375 init_GlobalBlock_set();
2376}
2377
2378void sg_post_clo_init ( void ) {
2379}
2380
2381void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2382 shadowStack_thread_create(parent, child);
2383}
2384
2385void sg_pre_thread_first_insn ( ThreadId tid ) {
2386 shadowStack_set_initial_SP(tid);
2387}
2388
2389void sg_fini(Int exitcode)
2390{
2391 if (VG_(clo_verbosity) >= 2) {
2392 VG_(message)(Vg_DebugMsg,
2393 " sg_: %'llu total accesses, of which:", stats__total_accesses);
2394 VG_(message)(Vg_DebugMsg,
2395 " sg_: stack0: %'12llu classify",
2396 stats__classify_Stack0);
2397 VG_(message)(Vg_DebugMsg,
2398 " sg_: stackN: %'12llu classify",
2399 stats__classify_StackN);
2400 VG_(message)(Vg_DebugMsg,
2401 " sg_: global: %'12llu classify",
2402 stats__classify_Global);
2403 VG_(message)(Vg_DebugMsg,
2404 " sg_: unknown: %'12llu classify",
2405 stats__classify_Unknown);
2406 VG_(message)(Vg_DebugMsg,
2407 " sg_: %'llu Invars preened, of which %'llu changed",
2408 stats__Invars_preened, stats__Invars_changed);
2409 VG_(message)(Vg_DebugMsg,
2410 " sg_: t_i_b_MT: %'12llu", stats__t_i_b_empty);
2411 VG_(message)(Vg_DebugMsg,
2412 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses",
2413 stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2414 VG_(message)(Vg_DebugMsg,
2415 " sg_: htab-fast: %'llu hits",
2416 stats__htab_fast);
2417 VG_(message)(Vg_DebugMsg,
2418 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes",
2419 stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2420 }
2421}
2422
2423
2424/*--------------------------------------------------------------------*/
2425/*--- end sg_main.c ---*/
2426/*--------------------------------------------------------------------*/