blob: 99ba2962ac54c8e90ba6885138bf2a9eae7d902d [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000046#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
150static void zsm_set_range ( Addr, SizeT, SVal );
151static SVal zsm_read8 ( Addr );
152static void zsm_copy_range ( Addr, Addr, SizeT );
153static void zsm_flush_cache ( void );
154
155#endif /* ! __HB_ZSM_H */
156
157
sewardjf98e1c02008-10-25 16:22:41 +0000158/* Round a up to the next multiple of N. N must be a power of 2 */
159#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
160/* Round a down to the next multiple of N. N must be a power of 2 */
161#define ROUNDDN(a, N) ((a) & ~(N-1))
162
163
164
165/* ------ User-supplied RC functions ------ */
166static void(*rcinc)(SVal) = NULL;
167static void(*rcdec)(SVal) = NULL;
168
169
170/* ------ CacheLine ------ */
171
172#define N_LINE_BITS 6 /* must be >= 3 */
173#define N_LINE_ARANGE (1 << N_LINE_BITS)
174#define N_LINE_TREES (N_LINE_ARANGE >> 3)
175
176typedef
177 struct {
178 UShort descrs[N_LINE_TREES];
179 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
180 }
181 CacheLine;
182
183#define TREE_DESCR_16_0 (1<<0)
184#define TREE_DESCR_32_0 (1<<1)
185#define TREE_DESCR_16_1 (1<<2)
186#define TREE_DESCR_64 (1<<3)
187#define TREE_DESCR_16_2 (1<<4)
188#define TREE_DESCR_32_1 (1<<5)
189#define TREE_DESCR_16_3 (1<<6)
190#define TREE_DESCR_8_0 (1<<7)
191#define TREE_DESCR_8_1 (1<<8)
192#define TREE_DESCR_8_2 (1<<9)
193#define TREE_DESCR_8_3 (1<<10)
194#define TREE_DESCR_8_4 (1<<11)
195#define TREE_DESCR_8_5 (1<<12)
196#define TREE_DESCR_8_6 (1<<13)
197#define TREE_DESCR_8_7 (1<<14)
198#define TREE_DESCR_DTY (1<<15)
199
200typedef
201 struct {
202 SVal dict[4]; /* can represent up to 4 diff values in the line */
203 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
204 dict indexes */
205 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
206 LineF to use, and dict[2..] are also SVal_INVALID. */
207 }
208 LineZ; /* compressed rep for a cache line */
209
210typedef
211 struct {
212 Bool inUse;
213 SVal w64s[N_LINE_ARANGE];
214 }
215 LineF; /* full rep for a cache line */
216
217/* Shadow memory.
218 Primary map is a WordFM Addr SecMap*.
219 SecMaps cover some page-size-ish section of address space and hold
220 a compressed representation.
221 CacheLine-sized chunks of SecMaps are copied into a Cache, being
222 decompressed when moved into the cache and recompressed on the
223 way out. Because of this, the cache must operate as a writeback
224 cache, not a writethrough one.
225
226 Each SecMap must hold a power-of-2 number of CacheLines. Hence
227 N_SECMAP_BITS must >= N_LINE_BITS.
228*/
229#define N_SECMAP_BITS 13
230#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
231
232// # CacheLines held by a SecMap
233#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
234
235/* The data in the SecMap is held in the array of LineZs. Each LineZ
236 either carries the required data directly, in a compressed
237 representation, or it holds (in .dict[0]) an index to the LineF in
238 .linesF that holds the full representation.
239
240 Currently-unused LineF's have their .inUse bit set to zero.
241 Since each in-use LineF is referred to be exactly one LineZ,
242 the number of .linesZ[] that refer to .linesF should equal
243 the number of .linesF[] that have .inUse == True.
244
245 RC obligations: the RCs presented to the user include exactly
246 the values in:
247 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
248 * F reps that are in use (.inUse == True)
249
250 Hence the following actions at the following transitions are required:
251
252 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
253 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
254 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
255 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
256*/
257typedef
258 struct {
259 UInt magic;
260 LineZ linesZ[N_SECMAP_ZLINES];
261 LineF* linesF;
262 UInt linesF_size;
263 }
264 SecMap;
265
266#define SecMap_MAGIC 0x571e58cbU
267
268static inline Bool is_sane_SecMap ( SecMap* sm ) {
269 return sm != NULL && sm->magic == SecMap_MAGIC;
270}
271
272/* ------ Cache ------ */
273
274#define N_WAY_BITS 16
275#define N_WAY_NENT (1 << N_WAY_BITS)
276
277/* Each tag is the address of the associated CacheLine, rounded down
278 to a CacheLine address boundary. A CacheLine size must be a power
279 of 2 and must be 8 or more. Hence an easy way to initialise the
280 cache so it is empty is to set all the tag values to any value % 8
281 != 0, eg 1. This means all queries in the cache initially miss.
282 It does however require us to detect and not writeback, any line
283 with a bogus tag. */
284typedef
285 struct {
286 CacheLine lyns0[N_WAY_NENT];
287 Addr tags0[N_WAY_NENT];
288 }
289 Cache;
290
291static inline Bool is_valid_scache_tag ( Addr tag ) {
292 /* a valid tag should be naturally aligned to the start of
293 a CacheLine. */
294 return 0 == (tag & (N_LINE_ARANGE - 1));
295}
296
297
298/* --------- Primary data structures --------- */
299
300/* Shadow memory primary map */
301static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
302static Cache cache_shmem;
303
304
305static UWord stats__secmaps_search = 0; // # SM finds
306static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
307static UWord stats__secmaps_allocd = 0; // # SecMaps issued
308static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
309static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
310static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
311static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
312static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
313static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
314static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
315static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
316static UWord stats__cache_F_fetches = 0; // # F lines fetched
317static UWord stats__cache_F_wbacks = 0; // # F lines written back
318static UWord stats__cache_invals = 0; // # cache invals
319static UWord stats__cache_flushes = 0; // # cache flushes
320static UWord stats__cache_totrefs = 0; // # total accesses
321static UWord stats__cache_totmisses = 0; // # misses
322static ULong stats__cache_make_New_arange = 0; // total arange made New
323static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
324static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
325static UWord stats__cline_read64s = 0; // # calls to s_m_read64
326static UWord stats__cline_read32s = 0; // # calls to s_m_read32
327static UWord stats__cline_read16s = 0; // # calls to s_m_read16
328static UWord stats__cline_read8s = 0; // # calls to s_m_read8
329static UWord stats__cline_write64s = 0; // # calls to s_m_write64
330static UWord stats__cline_write32s = 0; // # calls to s_m_write32
331static UWord stats__cline_write16s = 0; // # calls to s_m_write16
332static UWord stats__cline_write8s = 0; // # calls to s_m_write8
333static UWord stats__cline_set64s = 0; // # calls to s_m_set64
334static UWord stats__cline_set32s = 0; // # calls to s_m_set32
335static UWord stats__cline_set16s = 0; // # calls to s_m_set16
336static UWord stats__cline_set8s = 0; // # calls to s_m_set8
337static UWord stats__cline_get8s = 0; // # calls to s_m_get8
338static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
339static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
340static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
341static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
342static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
343static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
344static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
345
346static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
347 return a & ~(N_SECMAP_ARANGE - 1);
348}
349static inline UWord shmem__get_SecMap_offset ( Addr a ) {
350 return a & (N_SECMAP_ARANGE - 1);
351}
352
353
354/*----------------------------------------------------------------*/
355/*--- map_shmem :: WordFM Addr SecMap ---*/
356/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
357/*----------------------------------------------------------------*/
358
359/*--------------- SecMap allocation --------------- */
360
361static HChar* shmem__bigchunk_next = NULL;
362static HChar* shmem__bigchunk_end1 = NULL;
363
364static void* shmem__bigchunk_alloc ( SizeT n )
365{
366 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
367 tl_assert(n > 0);
368 n = VG_ROUNDUP(n, 16);
369 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
370 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
371 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
372 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
373 if (0)
374 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
375 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
376 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
377 if (shmem__bigchunk_next == NULL)
378 VG_(out_of_memory_NORETURN)(
379 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
380 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
381 }
382 tl_assert(shmem__bigchunk_next);
383 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
384 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
385 shmem__bigchunk_next += n;
386 return shmem__bigchunk_next - n;
387}
388
389static SecMap* shmem__alloc_SecMap ( void )
390{
391 Word i, j;
392 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
393 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
394 tl_assert(sm);
395 sm->magic = SecMap_MAGIC;
396 for (i = 0; i < N_SECMAP_ZLINES; i++) {
397 sm->linesZ[i].dict[0] = SVal_NOACCESS;
398 sm->linesZ[i].dict[1] = SVal_INVALID;
399 sm->linesZ[i].dict[2] = SVal_INVALID;
400 sm->linesZ[i].dict[3] = SVal_INVALID;
401 for (j = 0; j < N_LINE_ARANGE/4; j++)
402 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
403 }
404 sm->linesF = NULL;
405 sm->linesF_size = 0;
406 stats__secmaps_allocd++;
407 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
408 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
409 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
410 return sm;
411}
412
413typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
414static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
415
416static SecMap* shmem__find_SecMap ( Addr ga )
417{
418 SecMap* sm = NULL;
419 Addr gaKey = shmem__round_to_SecMap_base(ga);
420 // Cache
421 stats__secmaps_search++;
422 if (LIKELY(gaKey == smCache[0].gaKey))
423 return smCache[0].sm;
424 if (LIKELY(gaKey == smCache[1].gaKey)) {
425 SMCacheEnt tmp = smCache[0];
426 smCache[0] = smCache[1];
427 smCache[1] = tmp;
428 return smCache[0].sm;
429 }
430 if (gaKey == smCache[2].gaKey) {
431 SMCacheEnt tmp = smCache[1];
432 smCache[1] = smCache[2];
433 smCache[2] = tmp;
434 return smCache[1].sm;
435 }
436 // end Cache
437 stats__secmaps_search_slow++;
438 if (VG_(lookupFM)( map_shmem,
439 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
440 tl_assert(sm != NULL);
441 smCache[2] = smCache[1];
442 smCache[1] = smCache[0];
443 smCache[0].gaKey = gaKey;
444 smCache[0].sm = sm;
445 } else {
446 tl_assert(sm == NULL);
447 }
448 return sm;
449}
450
451static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
452{
453 SecMap* sm = shmem__find_SecMap ( ga );
454 if (LIKELY(sm)) {
455 return sm;
456 } else {
457 /* create a new one */
458 Addr gaKey = shmem__round_to_SecMap_base(ga);
459 sm = shmem__alloc_SecMap();
460 tl_assert(sm);
461 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
462 return sm;
463 }
464}
465
466
467/* ------------ LineF and LineZ related ------------ */
468
469static void rcinc_LineF ( LineF* lineF ) {
470 UWord i;
471 tl_assert(lineF->inUse);
472 for (i = 0; i < N_LINE_ARANGE; i++)
473 rcinc(lineF->w64s[i]);
474}
475
476static void rcdec_LineF ( LineF* lineF ) {
477 UWord i;
478 tl_assert(lineF->inUse);
479 for (i = 0; i < N_LINE_ARANGE; i++)
480 rcdec(lineF->w64s[i]);
481}
482
483static void rcinc_LineZ ( LineZ* lineZ ) {
484 tl_assert(lineZ->dict[0] != SVal_INVALID);
485 rcinc(lineZ->dict[0]);
486 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
487 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
488 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
489}
490
491static void rcdec_LineZ ( LineZ* lineZ ) {
492 tl_assert(lineZ->dict[0] != SVal_INVALID);
493 rcdec(lineZ->dict[0]);
494 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
495 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
496 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
497}
498
499inline
500static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
501 Word bix, shft, mask, prep;
502 tl_assert(ix >= 0);
503 bix = ix >> 2;
504 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
505 mask = 3 << shft;
506 prep = b2 << shft;
507 arr[bix] = (arr[bix] & ~mask) | prep;
508}
509
510inline
511static UWord read_twobit_array ( UChar* arr, UWord ix ) {
512 Word bix, shft;
513 tl_assert(ix >= 0);
514 bix = ix >> 2;
515 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
516 return (arr[bix] >> shft) & 3;
517}
518
519/* Given address 'tag', find either the Z or F line containing relevant
520 data, so it can be read into the cache.
521*/
522static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
523 /*OUT*/LineF** fp, Addr tag ) {
524 LineZ* lineZ;
525 LineF* lineF;
526 UWord zix;
527 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
528 UWord smoff = shmem__get_SecMap_offset(tag);
529 /* since smoff is derived from a valid tag, it should be
530 cacheline-aligned. */
531 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
532 zix = smoff >> N_LINE_BITS;
533 tl_assert(zix < N_SECMAP_ZLINES);
534 lineZ = &sm->linesZ[zix];
535 lineF = NULL;
536 if (lineZ->dict[0] == SVal_INVALID) {
537 UInt fix = (UInt)lineZ->dict[1];
538 tl_assert(sm->linesF);
539 tl_assert(sm->linesF_size > 0);
540 tl_assert(fix >= 0 && fix < sm->linesF_size);
541 lineF = &sm->linesF[fix];
542 tl_assert(lineF->inUse);
543 lineZ = NULL;
544 }
545 *zp = lineZ;
546 *fp = lineF;
547}
548
549/* Given address 'tag', return the relevant SecMap and the index of
550 the LineZ within it, in the expectation that the line is to be
551 overwritten. Regardless of whether 'tag' is currently associated
552 with a Z or F representation, to rcdec on the current
553 representation, in recognition of the fact that the contents are
554 just about to be overwritten. */
555static __attribute__((noinline))
556void find_Z_for_writing ( /*OUT*/SecMap** smp,
557 /*OUT*/Word* zixp,
558 Addr tag ) {
559 LineZ* lineZ;
560 LineF* lineF;
561 UWord zix;
562 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
563 UWord smoff = shmem__get_SecMap_offset(tag);
564 /* since smoff is derived from a valid tag, it should be
565 cacheline-aligned. */
566 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
567 zix = smoff >> N_LINE_BITS;
568 tl_assert(zix < N_SECMAP_ZLINES);
569 lineZ = &sm->linesZ[zix];
570 lineF = NULL;
571 /* re RCs, we are freeing up this LineZ/LineF so that new data can
572 be parked in it. Hence have to rcdec it accordingly. */
573 /* If lineZ has an associated lineF, free it up. */
574 if (lineZ->dict[0] == SVal_INVALID) {
575 UInt fix = (UInt)lineZ->dict[1];
576 tl_assert(sm->linesF);
577 tl_assert(sm->linesF_size > 0);
578 tl_assert(fix >= 0 && fix < sm->linesF_size);
579 lineF = &sm->linesF[fix];
580 tl_assert(lineF->inUse);
581 rcdec_LineF(lineF);
582 lineF->inUse = False;
583 } else {
584 rcdec_LineZ(lineZ);
585 }
586 *smp = sm;
587 *zixp = zix;
588}
589
590static __attribute__((noinline))
591void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
592 UInt i, new_size;
593 LineF* nyu;
594
595 if (sm->linesF) {
596 tl_assert(sm->linesF_size > 0);
597 } else {
598 tl_assert(sm->linesF_size == 0);
599 }
600
601 if (sm->linesF) {
602 for (i = 0; i < sm->linesF_size; i++) {
603 if (!sm->linesF[i].inUse) {
604 *fixp = (Word)i;
605 return;
606 }
607 }
608 }
609
610 /* No free F line found. Expand existing array and try again. */
611 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
612 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
613 new_size * sizeof(LineF) );
614 tl_assert(nyu);
615
616 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
617 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
618 * sizeof(LineF);
619
620 if (0)
621 VG_(printf)("SM %p: expand F array from %d to %d\n",
622 sm, (Int)sm->linesF_size, new_size);
623
624 for (i = 0; i < new_size; i++)
625 nyu[i].inUse = False;
626
627 if (sm->linesF) {
628 for (i = 0; i < sm->linesF_size; i++) {
629 tl_assert(sm->linesF[i].inUse);
630 nyu[i] = sm->linesF[i];
631 }
632 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
633 HG_(free)(sm->linesF);
634 }
635
636 sm->linesF = nyu;
637 sm->linesF_size = new_size;
638
639 for (i = 0; i < sm->linesF_size; i++) {
640 if (!sm->linesF[i].inUse) {
641 *fixp = (Word)i;
642 return;
643 }
644 }
645
646 /*NOTREACHED*/
647 tl_assert(0);
648}
649
650
651/* ------------ CacheLine and implicit-tree related ------------ */
652
653__attribute__((unused))
654static void pp_CacheLine ( CacheLine* cl ) {
655 Word i;
656 if (!cl) {
657 VG_(printf)("%s","pp_CacheLine(NULL)\n");
658 return;
659 }
660 for (i = 0; i < N_LINE_TREES; i++)
661 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
662 for (i = 0; i < N_LINE_ARANGE; i++)
663 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
664}
665
666static UChar descr_to_validbits ( UShort descr )
667{
668 /* a.k.a Party Time for gcc's constant folder */
669# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
670 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
671 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
672 ( (b8_5) << 12) | ( (b8_4) << 11) | \
673 ( (b8_3) << 10) | ( (b8_2) << 9) | \
674 ( (b8_1) << 8) | ( (b8_0) << 7) | \
675 ( (b16_3) << 6) | ( (b32_1) << 5) | \
676 ( (b16_2) << 4) | ( (b64) << 3) | \
677 ( (b16_1) << 2) | ( (b32_0) << 1) | \
678 ( (b16_0) << 0) ) )
679
680# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
681 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
682 ( (bit5) << 5) | ( (bit4) << 4) | \
683 ( (bit3) << 3) | ( (bit2) << 2) | \
684 ( (bit1) << 1) | ( (bit0) << 0) ) )
685
686 /* these should all get folded out at compile time */
687 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
688 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
689 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
690 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
691 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
692 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
693 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
694 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
695 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
696
697 switch (descr) {
698 /*
699 +--------------------------------- TREE_DESCR_8_7
700 | +------------------- TREE_DESCR_8_0
701 | | +---------------- TREE_DESCR_16_3
702 | | | +-------------- TREE_DESCR_32_1
703 | | | | +------------ TREE_DESCR_16_2
704 | | | | | +--------- TREE_DESCR_64
705 | | | | | | +------ TREE_DESCR_16_1
706 | | | | | | | +---- TREE_DESCR_32_0
707 | | | | | | | | +-- TREE_DESCR_16_0
708 | | | | | | | | |
709 | | | | | | | | | GRANULARITY, 7 -> 0 */
710 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
711 return BYTE(1,1,1,1,1,1,1,1);
712 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
713 return BYTE(1,1,0,1,1,1,1,1);
714 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
715 return BYTE(0,1,1,1,1,1,1,1);
716 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
717 return BYTE(0,1,0,1,1,1,1,1);
718
719 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
720 return BYTE(1,1,1,1,1,1,0,1);
721 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
722 return BYTE(1,1,0,1,1,1,0,1);
723 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
724 return BYTE(0,1,1,1,1,1,0,1);
725 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
726 return BYTE(0,1,0,1,1,1,0,1);
727
728 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
729 return BYTE(1,1,1,1,0,1,1,1);
730 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
731 return BYTE(1,1,0,1,0,1,1,1);
732 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
733 return BYTE(0,1,1,1,0,1,1,1);
734 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
735 return BYTE(0,1,0,1,0,1,1,1);
736
737 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
738 return BYTE(1,1,1,1,0,1,0,1);
739 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
740 return BYTE(1,1,0,1,0,1,0,1);
741 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
742 return BYTE(0,1,1,1,0,1,0,1);
743 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
744 return BYTE(0,1,0,1,0,1,0,1);
745
746 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
747 return BYTE(0,0,0,1,1,1,1,1);
748 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
749 return BYTE(0,0,0,1,1,1,0,1);
750 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
751 return BYTE(0,0,0,1,0,1,1,1);
752 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
753 return BYTE(0,0,0,1,0,1,0,1);
754
755 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
756 return BYTE(1,1,1,1,0,0,0,1);
757 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
758 return BYTE(1,1,0,1,0,0,0,1);
759 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
760 return BYTE(0,1,1,1,0,0,0,1);
761 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
762 return BYTE(0,1,0,1,0,0,0,1);
763
764 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
765 return BYTE(0,0,0,1,0,0,0,1);
766
767 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
768 return BYTE(0,0,0,0,0,0,0,1);
769
770 default: return BYTE(0,0,0,0,0,0,0,0);
771 /* INVALID - any valid descr produces at least one
772 valid bit in tree[0..7]*/
773 }
774 /* NOTREACHED*/
775 tl_assert(0);
776
777# undef DESCR
778# undef BYTE
779}
780
781__attribute__((unused))
782static Bool is_sane_Descr ( UShort descr ) {
783 return descr_to_validbits(descr) != 0;
784}
785
786static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
787 VG_(sprintf)(dst,
788 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
789 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
790 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
791 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
792 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
793 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
794 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
795 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
796 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
797 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
798 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
803 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
804 );
805}
806static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
807 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
808 (Int)((byte & 128) ? 1 : 0),
809 (Int)((byte & 64) ? 1 : 0),
810 (Int)((byte & 32) ? 1 : 0),
811 (Int)((byte & 16) ? 1 : 0),
812 (Int)((byte & 8) ? 1 : 0),
813 (Int)((byte & 4) ? 1 : 0),
814 (Int)((byte & 2) ? 1 : 0),
815 (Int)((byte & 1) ? 1 : 0)
816 );
817}
818
819static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
820 Word i;
821 UChar validbits = descr_to_validbits(descr);
822 HChar buf[128], buf2[128];
823 if (validbits == 0)
824 goto bad;
825 for (i = 0; i < 8; i++) {
826 if (validbits & (1<<i)) {
827 if (tree[i] == SVal_INVALID)
828 goto bad;
829 } else {
830 if (tree[i] != SVal_INVALID)
831 goto bad;
832 }
833 }
834 return True;
835 bad:
836 sprintf_Descr( buf, descr );
837 sprintf_Byte( buf2, validbits );
838 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
839 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
840 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
841 for (i = 0; i < 8; i++)
842 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
843 VG_(printf)("%s","}\n");
844 return 0;
845}
846
847static Bool is_sane_CacheLine ( CacheLine* cl )
848{
849 Word tno, cloff;
850
851 if (!cl) goto bad;
852
853 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
854 UShort descr = cl->descrs[tno];
855 SVal* tree = &cl->svals[cloff];
856 if (!is_sane_Descr_and_Tree(descr, tree))
857 goto bad;
858 }
859 tl_assert(cloff == N_LINE_ARANGE);
860 return True;
861 bad:
862 pp_CacheLine(cl);
863 return False;
864}
865
866static UShort normalise_tree ( /*MOD*/SVal* tree )
867{
868 UShort descr;
869 /* pre: incoming tree[0..7] does not have any invalid shvals, in
870 particular no zeroes. */
871 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
872 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
873 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
874 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
875 tl_assert(0);
876
877 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
878 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
879 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
880 /* build 16-bit layer */
881 if (tree[1] == tree[0]) {
882 tree[1] = SVal_INVALID;
883 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
884 descr |= TREE_DESCR_16_0;
885 }
886 if (tree[3] == tree[2]) {
887 tree[3] = SVal_INVALID;
888 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
889 descr |= TREE_DESCR_16_1;
890 }
891 if (tree[5] == tree[4]) {
892 tree[5] = SVal_INVALID;
893 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
894 descr |= TREE_DESCR_16_2;
895 }
896 if (tree[7] == tree[6]) {
897 tree[7] = SVal_INVALID;
898 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
899 descr |= TREE_DESCR_16_3;
900 }
901 /* build 32-bit layer */
902 if (tree[2] == tree[0]
903 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
904 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
905 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
906 descr |= TREE_DESCR_32_0;
907 }
908 if (tree[6] == tree[4]
909 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
910 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
911 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
912 descr |= TREE_DESCR_32_1;
913 }
914 /* build 64-bit layer */
915 if (tree[4] == tree[0]
916 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
917 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
918 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
919 descr |= TREE_DESCR_64;
920 }
921 return descr;
922}
923
924/* This takes a cacheline where all the data is at the leaves
925 (w8[..]) and builds a correctly normalised tree. */
926static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
927{
928 Word tno, cloff;
929 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
930 SVal* tree = &cl->svals[cloff];
931 cl->descrs[tno] = normalise_tree( tree );
932 }
933 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000934 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000935 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
936 stats__cline_normalises++;
937}
938
939
940typedef struct { UChar count; SVal sval; } CountedSVal;
941
942static
943void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
944 /*OUT*/Word* dstUsedP,
945 Word nDst, CacheLine* src )
946{
947 Word tno, cloff, dstUsed;
948
949 tl_assert(nDst == N_LINE_ARANGE);
950 dstUsed = 0;
951
952 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
953 UShort descr = src->descrs[tno];
954 SVal* tree = &src->svals[cloff];
955
956 /* sequentialise the tree described by (descr,tree). */
957# define PUT(_n,_v) \
958 do { dst[dstUsed ].count = (_n); \
959 dst[dstUsed++].sval = (_v); \
960 } while (0)
961
962 /* byte 0 */
963 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
964 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
965 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
966 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
967 /* byte 1 */
968 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
969 /* byte 2 */
970 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
971 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
972 /* byte 3 */
973 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
974 /* byte 4 */
975 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
976 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
977 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
978 /* byte 5 */
979 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
980 /* byte 6 */
981 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
982 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
983 /* byte 7 */
984 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
985
986# undef PUT
987 /* END sequentialise the tree described by (descr,tree). */
988
989 }
990 tl_assert(cloff == N_LINE_ARANGE);
991 tl_assert(dstUsed <= nDst);
992
993 *dstUsedP = dstUsed;
994}
995
996/* Write the cacheline 'wix' to backing store. Where it ends up
997 is determined by its tag field. */
998static __attribute__((noinline)) void cacheline_wback ( UWord wix )
999{
1000 Word i, j, k, m;
1001 Addr tag;
1002 SecMap* sm;
1003 CacheLine* cl;
1004 LineZ* lineZ;
1005 LineF* lineF;
1006 Word zix, fix, csvalsUsed;
1007 CountedSVal csvals[N_LINE_ARANGE];
1008 SVal sv;
1009
1010 if (0)
1011 VG_(printf)("scache wback line %d\n", (Int)wix);
1012
1013 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1014
1015 tag = cache_shmem.tags0[wix];
1016 cl = &cache_shmem.lyns0[wix];
1017
1018 /* The cache line may have been invalidated; if so, ignore it. */
1019 if (!is_valid_scache_tag(tag))
1020 return;
1021
1022 /* Where are we going to put it? */
1023 sm = NULL;
1024 lineZ = NULL;
1025 lineF = NULL;
1026 zix = fix = -1;
1027
1028 /* find the Z line to write in and rcdec it or the associated F
1029 line. */
1030 find_Z_for_writing( &sm, &zix, tag );
1031
1032 tl_assert(sm);
1033 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1034 lineZ = &sm->linesZ[zix];
1035
1036 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001037 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001038 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1039
1040 csvalsUsed = -1;
1041 sequentialise_CacheLine( csvals, &csvalsUsed,
1042 N_LINE_ARANGE, cl );
1043 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1044 if (0) VG_(printf)("%lu ", csvalsUsed);
1045
1046 lineZ->dict[0] = lineZ->dict[1]
1047 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1048
1049 /* i indexes actual shadow values, k is cursor in csvals */
1050 i = 0;
1051 for (k = 0; k < csvalsUsed; k++) {
1052
1053 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001054 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001055 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1056 /* do we already have it? */
1057 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1058 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1059 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1060 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1061 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001062 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001063 tl_assert(sv != SVal_INVALID);
1064 if (lineZ->dict[0]
1065 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1066 if (lineZ->dict[1]
1067 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1068 if (lineZ->dict[2]
1069 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1070 if (lineZ->dict[3]
1071 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1072 break; /* we'll have to use the f rep */
1073 dict_ok:
1074 m = csvals[k].count;
1075 if (m == 8) {
1076 write_twobit_array( lineZ->ix2s, i+0, j );
1077 write_twobit_array( lineZ->ix2s, i+1, j );
1078 write_twobit_array( lineZ->ix2s, i+2, j );
1079 write_twobit_array( lineZ->ix2s, i+3, j );
1080 write_twobit_array( lineZ->ix2s, i+4, j );
1081 write_twobit_array( lineZ->ix2s, i+5, j );
1082 write_twobit_array( lineZ->ix2s, i+6, j );
1083 write_twobit_array( lineZ->ix2s, i+7, j );
1084 i += 8;
1085 }
1086 else if (m == 4) {
1087 write_twobit_array( lineZ->ix2s, i+0, j );
1088 write_twobit_array( lineZ->ix2s, i+1, j );
1089 write_twobit_array( lineZ->ix2s, i+2, j );
1090 write_twobit_array( lineZ->ix2s, i+3, j );
1091 i += 4;
1092 }
1093 else if (m == 1) {
1094 write_twobit_array( lineZ->ix2s, i+0, j );
1095 i += 1;
1096 }
1097 else if (m == 2) {
1098 write_twobit_array( lineZ->ix2s, i+0, j );
1099 write_twobit_array( lineZ->ix2s, i+1, j );
1100 i += 2;
1101 }
1102 else {
1103 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1104 }
1105
1106 }
1107
1108 if (LIKELY(i == N_LINE_ARANGE)) {
1109 /* Construction of the compressed representation was
1110 successful. */
1111 rcinc_LineZ(lineZ);
1112 stats__cache_Z_wbacks++;
1113 } else {
1114 /* Cannot use the compressed(z) representation. Use the full(f)
1115 rep instead. */
1116 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1117 alloc_F_for_writing( sm, &fix );
1118 tl_assert(sm->linesF);
1119 tl_assert(sm->linesF_size > 0);
1120 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1121 lineF = &sm->linesF[fix];
1122 tl_assert(!lineF->inUse);
1123 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1124 lineZ->dict[1] = (SVal)fix;
1125 lineF->inUse = True;
1126 i = 0;
1127 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001128 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001129 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1130 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001131 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001132 tl_assert(sv != SVal_INVALID);
1133 for (m = csvals[k].count; m > 0; m--) {
1134 lineF->w64s[i] = sv;
1135 i++;
1136 }
1137 }
1138 tl_assert(i == N_LINE_ARANGE);
1139 rcinc_LineF(lineF);
1140 stats__cache_F_wbacks++;
1141 }
sewardjf98e1c02008-10-25 16:22:41 +00001142}
1143
1144/* Fetch the cacheline 'wix' from the backing store. The tag
1145 associated with 'wix' is assumed to have already been filled in;
1146 hence that is used to determine where in the backing store to read
1147 from. */
1148static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1149{
1150 Word i;
1151 Addr tag;
1152 CacheLine* cl;
1153 LineZ* lineZ;
1154 LineF* lineF;
1155
1156 if (0)
1157 VG_(printf)("scache fetch line %d\n", (Int)wix);
1158
1159 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1160
1161 tag = cache_shmem.tags0[wix];
1162 cl = &cache_shmem.lyns0[wix];
1163
1164 /* reject nonsense requests */
1165 tl_assert(is_valid_scache_tag(tag));
1166
1167 lineZ = NULL;
1168 lineF = NULL;
1169 find_ZF_for_reading( &lineZ, &lineF, tag );
1170 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1171
1172 /* expand the data into the bottom layer of the tree, then get
1173 cacheline_normalise to build the descriptor array. */
1174 if (lineF) {
1175 tl_assert(lineF->inUse);
1176 for (i = 0; i < N_LINE_ARANGE; i++) {
1177 cl->svals[i] = lineF->w64s[i];
1178 }
1179 stats__cache_F_fetches++;
1180 } else {
1181 for (i = 0; i < N_LINE_ARANGE; i++) {
1182 SVal sv;
1183 UWord ix = read_twobit_array( lineZ->ix2s, i );
1184 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1185 sv = lineZ->dict[ix];
1186 tl_assert(sv != SVal_INVALID);
1187 cl->svals[i] = sv;
1188 }
1189 stats__cache_Z_fetches++;
1190 }
1191 normalise_CacheLine( cl );
1192}
1193
1194static void shmem__invalidate_scache ( void ) {
1195 Word wix;
1196 if (0) VG_(printf)("%s","scache inval\n");
1197 tl_assert(!is_valid_scache_tag(1));
1198 for (wix = 0; wix < N_WAY_NENT; wix++) {
1199 cache_shmem.tags0[wix] = 1/*INVALID*/;
1200 }
1201 stats__cache_invals++;
1202}
1203
1204static void shmem__flush_and_invalidate_scache ( void ) {
1205 Word wix;
1206 Addr tag;
1207 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1208 tl_assert(!is_valid_scache_tag(1));
1209 for (wix = 0; wix < N_WAY_NENT; wix++) {
1210 tag = cache_shmem.tags0[wix];
1211 if (tag == 1/*INVALID*/) {
1212 /* already invalid; nothing to do */
1213 } else {
1214 tl_assert(is_valid_scache_tag(tag));
1215 cacheline_wback( wix );
1216 }
1217 cache_shmem.tags0[wix] = 1/*INVALID*/;
1218 }
1219 stats__cache_flushes++;
1220 stats__cache_invals++;
1221}
1222
1223
1224static inline Bool aligned16 ( Addr a ) {
1225 return 0 == (a & 1);
1226}
1227static inline Bool aligned32 ( Addr a ) {
1228 return 0 == (a & 3);
1229}
1230static inline Bool aligned64 ( Addr a ) {
1231 return 0 == (a & 7);
1232}
1233static inline UWord get_cacheline_offset ( Addr a ) {
1234 return (UWord)(a & (N_LINE_ARANGE - 1));
1235}
1236static inline Addr cacheline_ROUNDUP ( Addr a ) {
1237 return ROUNDUP(a, N_LINE_ARANGE);
1238}
1239static inline Addr cacheline_ROUNDDN ( Addr a ) {
1240 return ROUNDDN(a, N_LINE_ARANGE);
1241}
1242static inline UWord get_treeno ( Addr a ) {
1243 return get_cacheline_offset(a) >> 3;
1244}
1245static inline UWord get_tree_offset ( Addr a ) {
1246 return a & 7;
1247}
1248
1249static __attribute__((noinline))
1250 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1251static inline CacheLine* get_cacheline ( Addr a )
1252{
1253 /* tag is 'a' with the in-line offset masked out,
1254 eg a[31]..a[4] 0000 */
1255 Addr tag = a & ~(N_LINE_ARANGE - 1);
1256 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1257 stats__cache_totrefs++;
1258 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1259 return &cache_shmem.lyns0[wix];
1260 } else {
1261 return get_cacheline_MISS( a );
1262 }
1263}
1264
1265static __attribute__((noinline))
1266 CacheLine* get_cacheline_MISS ( Addr a )
1267{
1268 /* tag is 'a' with the in-line offset masked out,
1269 eg a[31]..a[4] 0000 */
1270
1271 CacheLine* cl;
1272 Addr* tag_old_p;
1273 Addr tag = a & ~(N_LINE_ARANGE - 1);
1274 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1275
1276 tl_assert(tag != cache_shmem.tags0[wix]);
1277
1278 /* Dump the old line into the backing store. */
1279 stats__cache_totmisses++;
1280
1281 cl = &cache_shmem.lyns0[wix];
1282 tag_old_p = &cache_shmem.tags0[wix];
1283
1284 if (is_valid_scache_tag( *tag_old_p )) {
1285 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001286 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001287 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1288 cacheline_wback( wix );
1289 }
1290 /* and reload the new one */
1291 *tag_old_p = tag;
1292 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001293 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001294 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1295 return cl;
1296}
1297
1298static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1299 stats__cline_64to32pulldown++;
1300 switch (toff) {
1301 case 0: case 4:
1302 tl_assert(descr & TREE_DESCR_64);
1303 tree[4] = tree[0];
1304 descr &= ~TREE_DESCR_64;
1305 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1306 break;
1307 default:
1308 tl_assert(0);
1309 }
1310 return descr;
1311}
1312
1313static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1314 stats__cline_32to16pulldown++;
1315 switch (toff) {
1316 case 0: case 2:
1317 if (!(descr & TREE_DESCR_32_0)) {
1318 descr = pulldown_to_32(tree, 0, descr);
1319 }
1320 tl_assert(descr & TREE_DESCR_32_0);
1321 tree[2] = tree[0];
1322 descr &= ~TREE_DESCR_32_0;
1323 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1324 break;
1325 case 4: case 6:
1326 if (!(descr & TREE_DESCR_32_1)) {
1327 descr = pulldown_to_32(tree, 4, descr);
1328 }
1329 tl_assert(descr & TREE_DESCR_32_1);
1330 tree[6] = tree[4];
1331 descr &= ~TREE_DESCR_32_1;
1332 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1333 break;
1334 default:
1335 tl_assert(0);
1336 }
1337 return descr;
1338}
1339
1340static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1341 stats__cline_16to8pulldown++;
1342 switch (toff) {
1343 case 0: case 1:
1344 if (!(descr & TREE_DESCR_16_0)) {
1345 descr = pulldown_to_16(tree, 0, descr);
1346 }
1347 tl_assert(descr & TREE_DESCR_16_0);
1348 tree[1] = tree[0];
1349 descr &= ~TREE_DESCR_16_0;
1350 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1351 break;
1352 case 2: case 3:
1353 if (!(descr & TREE_DESCR_16_1)) {
1354 descr = pulldown_to_16(tree, 2, descr);
1355 }
1356 tl_assert(descr & TREE_DESCR_16_1);
1357 tree[3] = tree[2];
1358 descr &= ~TREE_DESCR_16_1;
1359 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1360 break;
1361 case 4: case 5:
1362 if (!(descr & TREE_DESCR_16_2)) {
1363 descr = pulldown_to_16(tree, 4, descr);
1364 }
1365 tl_assert(descr & TREE_DESCR_16_2);
1366 tree[5] = tree[4];
1367 descr &= ~TREE_DESCR_16_2;
1368 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1369 break;
1370 case 6: case 7:
1371 if (!(descr & TREE_DESCR_16_3)) {
1372 descr = pulldown_to_16(tree, 6, descr);
1373 }
1374 tl_assert(descr & TREE_DESCR_16_3);
1375 tree[7] = tree[6];
1376 descr &= ~TREE_DESCR_16_3;
1377 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1378 break;
1379 default:
1380 tl_assert(0);
1381 }
1382 return descr;
1383}
1384
1385
1386static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1387 UShort mask;
1388 switch (toff) {
1389 case 0:
1390 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1391 tl_assert( (descr & mask) == mask );
1392 descr &= ~mask;
1393 descr |= TREE_DESCR_16_0;
1394 break;
1395 case 2:
1396 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1397 tl_assert( (descr & mask) == mask );
1398 descr &= ~mask;
1399 descr |= TREE_DESCR_16_1;
1400 break;
1401 case 4:
1402 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1403 tl_assert( (descr & mask) == mask );
1404 descr &= ~mask;
1405 descr |= TREE_DESCR_16_2;
1406 break;
1407 case 6:
1408 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1409 tl_assert( (descr & mask) == mask );
1410 descr &= ~mask;
1411 descr |= TREE_DESCR_16_3;
1412 break;
1413 default:
1414 tl_assert(0);
1415 }
1416 return descr;
1417}
1418
1419static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1420 UShort mask;
1421 switch (toff) {
1422 case 0:
1423 if (!(descr & TREE_DESCR_16_0))
1424 descr = pullup_descr_to_16(descr, 0);
1425 if (!(descr & TREE_DESCR_16_1))
1426 descr = pullup_descr_to_16(descr, 2);
1427 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1428 tl_assert( (descr & mask) == mask );
1429 descr &= ~mask;
1430 descr |= TREE_DESCR_32_0;
1431 break;
1432 case 4:
1433 if (!(descr & TREE_DESCR_16_2))
1434 descr = pullup_descr_to_16(descr, 4);
1435 if (!(descr & TREE_DESCR_16_3))
1436 descr = pullup_descr_to_16(descr, 6);
1437 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1438 tl_assert( (descr & mask) == mask );
1439 descr &= ~mask;
1440 descr |= TREE_DESCR_32_1;
1441 break;
1442 default:
1443 tl_assert(0);
1444 }
1445 return descr;
1446}
1447
1448static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1449 switch (toff) {
1450 case 0: case 4:
1451 return 0 != (descr & TREE_DESCR_64);
1452 default:
1453 tl_assert(0);
1454 }
1455}
1456
1457static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1458 switch (toff) {
1459 case 0:
1460 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1461 case 2:
1462 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1463 case 4:
1464 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1465 case 6:
1466 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1467 default:
1468 tl_assert(0);
1469 }
1470}
1471
1472/* ------------ Cache management ------------ */
1473
1474static void zsm_flush_cache ( void )
1475{
1476 shmem__flush_and_invalidate_scache();
1477}
1478
1479
1480static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1481{
1482 tl_assert( sizeof(UWord) == sizeof(Addr) );
1483
1484 rcinc = p_rcinc;
1485 rcdec = p_rcdec;
1486
1487 tl_assert(map_shmem == NULL);
1488 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1489 HG_(free),
1490 NULL/*unboxed UWord cmp*/);
1491 tl_assert(map_shmem != NULL);
1492 shmem__invalidate_scache();
1493
1494 /* a SecMap must contain an integral number of CacheLines */
1495 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1496 /* also ... a CacheLine holds an integral number of trees */
1497 tl_assert(0 == (N_LINE_ARANGE % 8));
1498}
1499
1500/////////////////////////////////////////////////////////////////
1501/////////////////////////////////////////////////////////////////
1502// //
1503// SECTION END compressed shadow memory //
1504// //
1505/////////////////////////////////////////////////////////////////
1506/////////////////////////////////////////////////////////////////
1507
1508
1509
1510/////////////////////////////////////////////////////////////////
1511/////////////////////////////////////////////////////////////////
1512// //
1513// SECTION BEGIN vts primitives //
1514// //
1515/////////////////////////////////////////////////////////////////
1516/////////////////////////////////////////////////////////////////
1517
1518#ifndef __HB_VTS_H
1519#define __HB_VTS_H
1520
1521/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1522 lowest 30 bits of an SVal. */
1523typedef UInt VtsID;
1524#define VtsID_INVALID 0xFFFFFFFF
1525
1526/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1527 a backlink for the caller's convenience. Since we have no idea
1528 what to set that to in the library, it always gets set to
1529 VtsID_INVALID. */
1530typedef
1531 struct {
1532 VtsID id;
1533 XArray* ts; /* XArray* ScalarTS(abstract) */
1534 }
1535 VTS;
1536
1537
1538/* Create a new, empty VTS. */
1539VTS* VTS__new ( void );
1540
1541/* Delete this VTS in its entirety. */
1542void VTS__delete ( VTS* vts );
1543
1544/* Create a new singleton VTS. */
1545VTS* VTS__singleton ( Thr* thr, ULong tym );
1546
1547/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1548 not modified. */
1549VTS* VTS__tick ( Thr* me, VTS* vts );
1550
1551/* Return a new VTS constructed as the join (max) of the 2 args.
1552 Neither arg is modified. */
1553VTS* VTS__join ( VTS* a, VTS* b );
1554
1555/* Compute the partial ordering relation of the two args. */
1556typedef
1557 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1558 POrd;
1559
1560POrd VTS__cmp ( VTS* a, VTS* b );
1561
1562/* Compute an arbitrary structural (total) ordering on the two args,
1563 based on their VCs, so they can be looked up in a table, tree, etc.
1564 Returns -1, 0 or 1. */
1565Word VTS__cmp_structural ( VTS* a, VTS* b );
1566
1567/* Debugging only. Display the given VTS in the buffer. */
1568void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1569
1570/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001571ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001572
1573#endif /* ! __HB_VTS_H */
1574
1575
1576/*--------------- to do with Vector Timestamps ---------------*/
1577
1578/* Scalar Timestamp */
1579typedef
1580 struct {
1581 Thr* thr;
1582 ULong tym;
1583 }
1584 ScalarTS;
1585
1586
1587static Bool is_sane_VTS ( VTS* vts )
1588{
1589 UWord i, n;
1590 ScalarTS *st1, *st2;
1591 if (!vts) return False;
1592 if (!vts->ts) return False;
1593 n = VG_(sizeXA)( vts->ts );
1594 if (n >= 2) {
1595 for (i = 0; i < n-1; i++) {
1596 st1 = VG_(indexXA)( vts->ts, i );
1597 st2 = VG_(indexXA)( vts->ts, i+1 );
1598 if (st1->thr >= st2->thr)
1599 return False;
1600 if (st1->tym == 0 || st2->tym == 0)
1601 return False;
1602 }
1603 }
1604 return True;
1605}
1606
1607
1608/* Create a new, empty VTS.
1609*/
1610VTS* VTS__new ( void )
1611{
1612 VTS* vts;
1613 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1614 tl_assert(vts);
1615 vts->id = VtsID_INVALID;
1616 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1617 HG_(free), sizeof(ScalarTS) );
1618 tl_assert(vts->ts);
1619 return vts;
1620}
1621
1622
1623/* Delete this VTS in its entirety.
1624*/
1625void VTS__delete ( VTS* vts )
1626{
1627 tl_assert(vts);
1628 tl_assert(vts->ts);
1629 VG_(deleteXA)( vts->ts );
1630 HG_(free)(vts);
1631}
1632
1633
1634/* Create a new singleton VTS.
1635*/
1636VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1637 ScalarTS st;
1638 VTS* vts;
1639 tl_assert(thr);
1640 tl_assert(tym >= 1);
1641 vts = VTS__new();
1642 st.thr = thr;
1643 st.tym = tym;
1644 VG_(addToXA)( vts->ts, &st );
1645 return vts;
1646}
1647
1648
1649/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1650 not modified.
1651*/
1652VTS* VTS__tick ( Thr* me, VTS* vts )
1653{
1654 ScalarTS* here = NULL;
1655 ScalarTS tmp;
1656 VTS* res;
1657 Word i, n;
1658 tl_assert(me);
1659 tl_assert(is_sane_VTS(vts));
1660 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1661 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1662 res = VTS__new();
1663 n = VG_(sizeXA)( vts->ts );
1664
1665 /* main loop doesn't handle zero-entry case correctly, so
1666 special-case it. */
1667 if (n == 0) {
1668 tmp.thr = me;
1669 tmp.tym = 1;
1670 VG_(addToXA)( res->ts, &tmp );
1671 tl_assert(is_sane_VTS(res));
1672 return res;
1673 }
1674
1675 for (i = 0; i < n; i++) {
1676 here = VG_(indexXA)( vts->ts, i );
1677 if (me < here->thr) {
1678 /* We just went past 'me', without seeing it. */
1679 tmp.thr = me;
1680 tmp.tym = 1;
1681 VG_(addToXA)( res->ts, &tmp );
1682 tmp = *here;
1683 VG_(addToXA)( res->ts, &tmp );
1684 i++;
1685 break;
1686 }
1687 else if (me == here->thr) {
1688 tmp = *here;
1689 tmp.tym++;
1690 VG_(addToXA)( res->ts, &tmp );
1691 i++;
1692 break;
1693 }
1694 else /* me > here->thr */ {
1695 tmp = *here;
1696 VG_(addToXA)( res->ts, &tmp );
1697 }
1698 }
1699 tl_assert(i >= 0 && i <= n);
1700 if (i == n && here && here->thr < me) {
1701 tmp.thr = me;
1702 tmp.tym = 1;
1703 VG_(addToXA)( res->ts, &tmp );
1704 } else {
1705 for (/*keepgoing*/; i < n; i++) {
1706 here = VG_(indexXA)( vts->ts, i );
1707 tmp = *here;
1708 VG_(addToXA)( res->ts, &tmp );
1709 }
1710 }
1711 tl_assert(is_sane_VTS(res));
1712 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1713 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1714 return res;
1715}
1716
1717
1718/* Return a new VTS constructed as the join (max) of the 2 args.
1719 Neither arg is modified.
1720*/
1721VTS* VTS__join ( VTS* a, VTS* b )
1722{
1723 Word ia, ib, useda, usedb;
1724 ULong tyma, tymb, tymMax;
1725 Thr* thr;
1726 VTS* res;
1727 ScalarTS *tmpa, *tmpb;
1728
1729 tl_assert(a && a->ts);
1730 tl_assert(b && b->ts);
1731 useda = VG_(sizeXA)( a->ts );
1732 usedb = VG_(sizeXA)( b->ts );
1733
1734 res = VTS__new();
1735 ia = ib = 0;
1736
1737 while (1) {
1738
1739 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1740 from a and b in order, where thr is the next Thr*
1741 occurring in either a or b, and tyma/b are the relevant
1742 scalar timestamps, taking into account implicit zeroes. */
1743 tl_assert(ia >= 0 && ia <= useda);
1744 tl_assert(ib >= 0 && ib <= usedb);
1745 tmpa = tmpb = NULL;
1746
1747 if (ia == useda && ib == usedb) {
1748 /* both empty - done */
1749 break;
1750 }
1751 else
1752 if (ia == useda && ib != usedb) {
1753 /* a empty, use up b */
1754 tmpb = VG_(indexXA)( b->ts, ib );
1755 thr = tmpb->thr;
1756 tyma = 0;
1757 tymb = tmpb->tym;
1758 ib++;
1759 }
1760 else
1761 if (ia != useda && ib == usedb) {
1762 /* b empty, use up a */
1763 tmpa = VG_(indexXA)( a->ts, ia );
1764 thr = tmpa->thr;
1765 tyma = tmpa->tym;
1766 tymb = 0;
1767 ia++;
1768 }
1769 else {
1770 /* both not empty; extract lowest-Thr*'d triple */
1771 tmpa = VG_(indexXA)( a->ts, ia );
1772 tmpb = VG_(indexXA)( b->ts, ib );
1773 if (tmpa->thr < tmpb->thr) {
1774 /* a has the lowest unconsidered Thr* */
1775 thr = tmpa->thr;
1776 tyma = tmpa->tym;
1777 tymb = 0;
1778 ia++;
1779 }
1780 else
1781 if (tmpa->thr > tmpb->thr) {
1782 /* b has the lowest unconsidered Thr* */
1783 thr = tmpb->thr;
1784 tyma = 0;
1785 tymb = tmpb->tym;
1786 ib++;
1787 } else {
1788 /* they both next mention the same Thr* */
1789 tl_assert(tmpa->thr == tmpb->thr);
1790 thr = tmpa->thr; /* == tmpb->thr */
1791 tyma = tmpa->tym;
1792 tymb = tmpb->tym;
1793 ia++;
1794 ib++;
1795 }
1796 }
1797
1798 /* having laboriously determined (thr, tyma, tymb), do something
1799 useful with it. */
1800 tymMax = tyma > tymb ? tyma : tymb;
1801 if (tymMax > 0) {
1802 ScalarTS st;
1803 st.thr = thr;
1804 st.tym = tymMax;
1805 VG_(addToXA)( res->ts, &st );
1806 }
1807
1808 }
1809
1810 tl_assert(is_sane_VTS( res ));
1811
1812 return res;
1813}
1814
1815
1816/* Compute the partial ordering relation of the two args.
1817*/
1818POrd VTS__cmp ( VTS* a, VTS* b )
1819{
1820 Word ia, ib, useda, usedb;
1821 ULong tyma, tymb;
1822 Thr* thr;
1823 ScalarTS *tmpa, *tmpb;
1824
1825 Bool all_leq = True;
1826 Bool all_geq = True;
1827
1828 tl_assert(a && a->ts);
1829 tl_assert(b && b->ts);
1830 useda = VG_(sizeXA)( a->ts );
1831 usedb = VG_(sizeXA)( b->ts );
1832
1833 ia = ib = 0;
1834
1835 while (1) {
1836
1837 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1838 from a and b in order, where thr is the next Thr*
1839 occurring in either a or b, and tyma/b are the relevant
1840 scalar timestamps, taking into account implicit zeroes. */
1841 tl_assert(ia >= 0 && ia <= useda);
1842 tl_assert(ib >= 0 && ib <= usedb);
1843 tmpa = tmpb = NULL;
1844
1845 if (ia == useda && ib == usedb) {
1846 /* both empty - done */
1847 break;
1848 }
1849 else
1850 if (ia == useda && ib != usedb) {
1851 /* a empty, use up b */
1852 tmpb = VG_(indexXA)( b->ts, ib );
1853 thr = tmpb->thr;
1854 tyma = 0;
1855 tymb = tmpb->tym;
1856 ib++;
1857 }
1858 else
1859 if (ia != useda && ib == usedb) {
1860 /* b empty, use up a */
1861 tmpa = VG_(indexXA)( a->ts, ia );
1862 thr = tmpa->thr;
1863 tyma = tmpa->tym;
1864 tymb = 0;
1865 ia++;
1866 }
1867 else {
1868 /* both not empty; extract lowest-Thr*'d triple */
1869 tmpa = VG_(indexXA)( a->ts, ia );
1870 tmpb = VG_(indexXA)( b->ts, ib );
1871 if (tmpa->thr < tmpb->thr) {
1872 /* a has the lowest unconsidered Thr* */
1873 thr = tmpa->thr;
1874 tyma = tmpa->tym;
1875 tymb = 0;
1876 ia++;
1877 }
1878 else
1879 if (tmpa->thr > tmpb->thr) {
1880 /* b has the lowest unconsidered Thr* */
1881 thr = tmpb->thr;
1882 tyma = 0;
1883 tymb = tmpb->tym;
1884 ib++;
1885 } else {
1886 /* they both next mention the same Thr* */
1887 tl_assert(tmpa->thr == tmpb->thr);
1888 thr = tmpa->thr; /* == tmpb->thr */
1889 tyma = tmpa->tym;
1890 tymb = tmpb->tym;
1891 ia++;
1892 ib++;
1893 }
1894 }
1895
1896 /* having laboriously determined (thr, tyma, tymb), do something
1897 useful with it. */
1898 if (tyma < tymb)
1899 all_geq = False;
1900 if (tyma > tymb)
1901 all_leq = False;
1902 }
1903
1904 if (all_leq && all_geq)
1905 return POrd_EQ;
1906 /* now we know they aren't equal, so either all_leq or all_geq or
1907 both are false. */
1908 if (all_leq)
1909 return POrd_LT;
1910 if (all_geq)
1911 return POrd_GT;
1912 /* hmm, neither all_geq or all_leq. This means unordered. */
1913 return POrd_UN;
1914}
1915
1916
1917/* Compute an arbitrary structural (total) ordering on the two args,
1918 based on their VCs, so they can be looked up in a table, tree, etc.
1919 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1920*/
1921Word VTS__cmp_structural ( VTS* a, VTS* b )
1922{
1923 /* We just need to generate an arbitrary total ordering based on
1924 a->ts and b->ts. Preferably do it in a way which comes across likely
1925 differences relatively quickly. */
1926 Word i, useda, usedb;
1927 ScalarTS *tmpa, *tmpb;
1928
1929 tl_assert(a && a->ts);
1930 tl_assert(b && b->ts);
1931 useda = VG_(sizeXA)( a->ts );
1932 usedb = VG_(sizeXA)( b->ts );
1933
1934 if (useda < usedb) return -1;
1935 if (useda > usedb) return 1;
1936
1937 /* Same length vectors, so let's step through them together. */
1938 tl_assert(useda == usedb);
1939 for (i = 0; i < useda; i++) {
1940 tmpa = VG_(indexXA)( a->ts, i );
1941 tmpb = VG_(indexXA)( b->ts, i );
1942 if (tmpa->tym < tmpb->tym) return -1;
1943 if (tmpa->tym > tmpb->tym) return 1;
1944 if (tmpa->thr < tmpb->thr) return -1;
1945 if (tmpa->thr > tmpb->thr) return 1;
1946 }
1947
1948 /* They're identical. */
1949 return 0;
1950}
1951
1952
1953/* Debugging only. Display the given VTS in the buffer.
1954*/
1955void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1956 ScalarTS* st;
1957 HChar unit[64];
1958 Word i, n;
1959 Int avail = nBuf;
1960 tl_assert(vts && vts->ts);
1961 tl_assert(nBuf > 16);
1962 buf[0] = '[';
1963 buf[1] = 0;
1964 n = VG_(sizeXA)( vts->ts );
1965 for (i = 0; i < n; i++) {
1966 tl_assert(avail >= 40);
1967 st = VG_(indexXA)( vts->ts, i );
1968 VG_(memset)(unit, 0, sizeof(unit));
1969 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1970 st->thr, st->tym);
1971 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1972 VG_(strcat)(buf, " ...]");
1973 buf[nBuf-1] = 0;
1974 return;
1975 }
1976 VG_(strcat)(buf, unit);
1977 avail -= VG_(strlen)(unit);
1978 }
1979 VG_(strcat)(buf, "]");
1980 buf[nBuf-1] = 0;
1981}
1982
1983
1984/* Debugging only. Return vts[index], so to speak.
1985*/
1986ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1987 UWord i, n;
1988 tl_assert(vts && vts->ts);
1989 n = VG_(sizeXA)( vts->ts );
1990 for (i = 0; i < n; i++) {
1991 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1992 if (st->thr == idx)
1993 return st->tym;
1994 }
1995 return 0;
1996}
1997
1998
1999/////////////////////////////////////////////////////////////////
2000/////////////////////////////////////////////////////////////////
2001// //
2002// SECTION END vts primitives //
2003// //
2004/////////////////////////////////////////////////////////////////
2005/////////////////////////////////////////////////////////////////
2006
2007
2008
2009/////////////////////////////////////////////////////////////////
2010/////////////////////////////////////////////////////////////////
2011// //
2012// SECTION BEGIN main library //
2013// //
2014/////////////////////////////////////////////////////////////////
2015/////////////////////////////////////////////////////////////////
2016
2017
2018/////////////////////////////////////////////////////////
2019// //
2020// VTS set //
2021// //
2022/////////////////////////////////////////////////////////
2023
2024static WordFM* /* VTS* void void */ vts_set = NULL;
2025
2026static void vts_set_init ( void )
2027{
2028 tl_assert(!vts_set);
2029 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2030 HG_(free),
2031 (Word(*)(UWord,UWord))VTS__cmp_structural );
2032 tl_assert(vts_set);
2033}
2034
2035/* Given a newly made VTS, look in vts_set to see if we already have
2036 an identical one. If yes, free up this one and return instead a
2037 pointer to the existing one. If no, add this one to the set and
2038 return the same pointer. Caller differentiates the two cases by
2039 comparing returned pointer with the supplied one (although that
2040 does require that the supplied VTS is not already in the set).
2041*/
2042static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2043{
2044 UWord keyW, valW;
2045 /* lookup cand (by value) */
2046 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2047 /* found it */
2048 tl_assert(valW == 0);
2049 /* if this fails, cand (by ref) was already present (!) */
2050 tl_assert(keyW != (UWord)cand);
2051 VTS__delete(cand);
2052 return (VTS*)keyW;
2053 } else {
2054 /* not present. Add and return pointer to same. */
2055 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2056 return cand;
2057 }
2058}
2059
2060
2061/////////////////////////////////////////////////////////
2062// //
2063// VTS table //
2064// //
2065/////////////////////////////////////////////////////////
2066
2067static void VtsID__invalidate_caches ( void ); /* fwds */
2068
2069/* A type to hold VTS table entries. Invariants:
2070 If .vts == NULL, then this entry is not in use, so:
2071 - .rc == 0
2072 - this entry is on the freelist (unfortunately, does not imply
2073 any constraints on value for .nextfree)
2074 If .vts != NULL, then this entry is in use:
2075 - .vts is findable in vts_set
2076 - .vts->id == this entry number
2077 - no specific value for .rc (even 0 is OK)
2078 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2079*/
2080typedef
2081 struct {
2082 VTS* vts; /* vts, in vts_set */
2083 UWord rc; /* reference count - enough for entire aspace */
2084 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2085 }
2086 VtsTE;
2087
2088/* The VTS table. */
2089static XArray* /* of VtsTE */ vts_tab = NULL;
2090
2091/* An index into the VTS table, indicating the start of the list of
2092 free (available for use) entries. If the list is empty, this is
2093 VtsID_INVALID. */
2094static VtsID vts_tab_freelist = VtsID_INVALID;
2095
2096/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2097 vts_tab equals or exceeds this size. After GC, the value here is
2098 set appropriately so as to check for the next GC point. */
2099static Word vts_next_GC_at = 1000;
2100
2101static void vts_tab_init ( void )
2102{
2103 vts_tab
2104 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2105 HG_(free), sizeof(VtsTE) );
2106 vts_tab_freelist
2107 = VtsID_INVALID;
2108 tl_assert(vts_tab);
2109}
2110
2111/* Add ii to the free list, checking that it looks out-of-use. */
2112static void add_to_free_list ( VtsID ii )
2113{
2114 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2115 tl_assert(ie->vts == NULL);
2116 tl_assert(ie->rc == 0);
2117 tl_assert(ie->freelink == VtsID_INVALID);
2118 ie->freelink = vts_tab_freelist;
2119 vts_tab_freelist = ii;
2120}
2121
2122/* Get an entry from the free list. This will return VtsID_INVALID if
2123 the free list is empty. */
2124static VtsID get_from_free_list ( void )
2125{
2126 VtsID ii;
2127 VtsTE* ie;
2128 if (vts_tab_freelist == VtsID_INVALID)
2129 return VtsID_INVALID;
2130 ii = vts_tab_freelist;
2131 ie = VG_(indexXA)( vts_tab, ii );
2132 tl_assert(ie->vts == NULL);
2133 tl_assert(ie->rc == 0);
2134 vts_tab_freelist = ie->freelink;
2135 return ii;
2136}
2137
2138/* Produce a new VtsID that can be used, either by getting it from
2139 the freelist, or, if that is empty, by expanding vts_tab. */
2140static VtsID get_new_VtsID ( void )
2141{
2142 VtsID ii;
2143 VtsTE te;
2144 ii = get_from_free_list();
2145 if (ii != VtsID_INVALID)
2146 return ii;
2147 te.vts = NULL;
2148 te.rc = 0;
2149 te.freelink = VtsID_INVALID;
2150 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2151 return ii;
2152}
2153
2154
2155/* Indirect callback from lib_zsm. */
2156static void VtsID__rcinc ( VtsID ii )
2157{
2158 VtsTE* ie;
2159 /* VG_(indexXA) does a range check for us */
2160 ie = VG_(indexXA)( vts_tab, ii );
2161 tl_assert(ie->vts); /* else it's not in use */
2162 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2163 tl_assert(ie->vts->id == ii);
2164 ie->rc++;
2165}
2166
2167/* Indirect callback from lib_zsm. */
2168static void VtsID__rcdec ( VtsID ii )
2169{
2170 VtsTE* ie;
2171 /* VG_(indexXA) does a range check for us */
2172 ie = VG_(indexXA)( vts_tab, ii );
2173 tl_assert(ie->vts); /* else it's not in use */
2174 tl_assert(ie->rc > 0); /* else RC snafu */
2175 tl_assert(ie->vts->id == ii);
2176 ie->rc--;
2177}
2178
2179
2180/* Look up 'cand' in our collection of VTSs. If present, deallocate
2181 it and return the VtsID for the pre-existing version. If not
2182 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2183 for it, and return that. */
2184static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2185{
2186 VTS* auld;
2187 tl_assert(cand->id == VtsID_INVALID);
2188 auld = vts_set__find_and_dealloc__or_add(cand);
2189 if (auld != cand) {
2190 /* We already have an Aulde one. Use that. */
2191 VtsTE* ie;
2192 tl_assert(auld->id != VtsID_INVALID);
2193 ie = VG_(indexXA)( vts_tab, auld->id );
2194 tl_assert(ie->vts == auld);
2195 return auld->id;
2196 } else {
2197 VtsID ii = get_new_VtsID();
2198 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2199 ie->vts = cand;
2200 ie->rc = 0;
2201 ie->freelink = VtsID_INVALID;
2202 cand->id = ii;
2203 return ii;
2204 }
2205}
2206
2207
2208static void show_vts_stats ( HChar* caller )
2209{
2210 UWord nSet, nTab, nLive;
2211 ULong totrc;
2212 UWord n, i;
2213 nSet = VG_(sizeFM)( vts_set );
2214 nTab = VG_(sizeXA)( vts_tab );
2215 totrc = 0;
2216 nLive = 0;
2217 n = VG_(sizeXA)( vts_tab );
2218 for (i = 0; i < n; i++) {
2219 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2220 if (ie->vts) {
2221 nLive++;
2222 totrc += (ULong)ie->rc;
2223 } else {
2224 tl_assert(ie->rc == 0);
2225 }
2226 }
2227 VG_(printf)(" show_vts_stats %s\n", caller);
2228 VG_(printf)(" vts_tab size %4lu\n", nTab);
2229 VG_(printf)(" vts_tab live %4lu\n", nLive);
2230 VG_(printf)(" vts_set size %4lu\n", nSet);
2231 VG_(printf)(" total rc %4llu\n", totrc);
2232}
2233
2234/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002235__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002236static void vts_tab__do_GC ( Bool show_stats )
2237{
2238 UWord i, nTab, nLive, nFreed;
2239
2240 /* check this is actually necessary. */
2241 tl_assert(vts_tab_freelist == VtsID_INVALID);
2242
2243 /* empty the caches for partial order checks and binary joins. We
2244 could do better and prune out the entries to be deleted, but it
2245 ain't worth the hassle. */
2246 VtsID__invalidate_caches();
2247
2248 /* First, make the reference counts up to date. */
2249 zsm_flush_cache();
2250
2251 nTab = VG_(sizeXA)( vts_tab );
2252
2253 if (show_stats) {
2254 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2255 show_vts_stats("before GC");
2256 }
2257
2258 /* Now we can inspect the entire vts_tab. Any entries
2259 with zero .rc fields are now no longer in use and can be
2260 free list, removed from vts_set, and deleted. */
2261 nFreed = 0;
2262 for (i = 0; i < nTab; i++) {
2263 Bool present;
2264 UWord oldK = 0, oldV = 0;
2265 VtsTE* te = VG_(indexXA)( vts_tab, i );
2266 if (te->vts == NULL) {
2267 tl_assert(te->rc == 0);
2268 continue; /* already on the free list (presumably) */
2269 }
2270 if (te->rc > 0)
2271 continue; /* in use */
2272 /* Ok, we got one we can free. */
2273 tl_assert(te->vts->id == i);
2274 /* first, remove it from vts_set. */
2275 present = VG_(delFromFM)( vts_set,
2276 &oldK, &oldV, (UWord)te->vts );
2277 tl_assert(present); /* else it isn't in vts_set ?! */
2278 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2279 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2280 /* now free the VTS itself */
2281 VTS__delete(te->vts);
2282 te->vts = NULL;
2283 /* and finally put this entry on the free list */
2284 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2285 add_to_free_list( i );
2286 nFreed++;
2287 }
2288
2289 /* Now figure out when the next GC should be. We'll allow the
2290 number of VTSs to double before GCing again. Except of course
2291 that since we can't (or, at least, don't) shrink vts_tab, we
2292 can't set the threshhold value smaller than it. */
2293 tl_assert(nFreed <= nTab);
2294 nLive = nTab - nFreed;
2295 tl_assert(nLive >= 0 && nLive <= nTab);
2296 vts_next_GC_at = 2 * nLive;
2297 if (vts_next_GC_at < nTab)
2298 vts_next_GC_at = nTab;
2299
2300 if (show_stats) {
2301 show_vts_stats("after GC");
2302 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2303 }
2304
sewardjd024ae52008-11-09 20:47:57 +00002305 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002306 static UInt ctr = 0;
2307 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002308 VG_(message)(Vg_DebugMsg,
2309 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
sewardjf98e1c02008-10-25 16:22:41 +00002310 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2311 }
2312}
2313
2314
2315/////////////////////////////////////////////////////////
2316// //
2317// Vts IDs //
2318// //
2319/////////////////////////////////////////////////////////
2320
2321//////////////////////////
2322static ULong stats__getOrdering_queries = 0;
2323static ULong stats__getOrdering_misses = 0;
2324static ULong stats__join2_queries = 0;
2325static ULong stats__join2_misses = 0;
2326
2327static inline UInt ROL32 ( UInt w, Int n ) {
2328 w = (w << n) | (w >> (32-n));
2329 return w;
2330}
2331static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2332 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2333 return hash % nTab;
2334}
2335
2336#define N_GETORDERING_CACHE 1023
2337static
2338 struct { VtsID vi1; VtsID vi2; POrd ord; }
2339 getOrdering_cache[N_GETORDERING_CACHE];
2340
2341#define N_JOIN2_CACHE 1023
2342static
2343 struct { VtsID vi1; VtsID vi2; VtsID res; }
2344 join2_cache[N_JOIN2_CACHE];
2345
2346static void VtsID__invalidate_caches ( void ) {
2347 Int i;
2348 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2349 getOrdering_cache[i].vi1 = VtsID_INVALID;
2350 getOrdering_cache[i].vi2 = VtsID_INVALID;
2351 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2352 }
2353 for (i = 0; i < N_JOIN2_CACHE; i++) {
2354 join2_cache[i].vi1 = VtsID_INVALID;
2355 join2_cache[i].vi2 = VtsID_INVALID;
2356 join2_cache[i].res = VtsID_INVALID;
2357 }
2358}
2359//////////////////////////
2360
sewardjd52392d2008-11-08 20:36:26 +00002361//static Bool VtsID__is_valid ( VtsID vi ) {
2362// VtsTE* ve;
2363// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2364// return False;
2365// ve = VG_(indexXA)( vts_tab, vi );
2366// if (!ve->vts)
2367// return False;
2368// tl_assert(ve->vts->id == vi);
2369// return True;
2370//}
sewardjf98e1c02008-10-25 16:22:41 +00002371
2372static VTS* VtsID__to_VTS ( VtsID vi ) {
2373 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2374 tl_assert(te->vts);
2375 return te->vts;
2376}
2377
2378static void VtsID__pp ( VtsID vi ) {
2379 HChar buf[100];
2380 VTS* vts = VtsID__to_VTS(vi);
2381 VTS__show( buf, sizeof(buf)-1, vts );
2382 buf[sizeof(buf)-1] = 0;
2383 VG_(printf)("%s", buf);
2384}
2385
2386/* compute partial ordering relation of vi1 and vi2. */
2387__attribute__((noinline))
2388static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2389 UInt hash;
2390 POrd ord;
2391 VTS *v1, *v2;
2392 //if (vi1 == vi2) return POrd_EQ;
2393 tl_assert(vi1 != vi2);
2394 ////++
2395 stats__getOrdering_queries++;
2396 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2397 if (getOrdering_cache[hash].vi1 == vi1
2398 && getOrdering_cache[hash].vi2 == vi2)
2399 return getOrdering_cache[hash].ord;
2400 stats__getOrdering_misses++;
2401 ////--
2402 v1 = VtsID__to_VTS(vi1);
2403 v2 = VtsID__to_VTS(vi2);
2404 ord = VTS__cmp( v1, v2 );
2405 ////++
2406 getOrdering_cache[hash].vi1 = vi1;
2407 getOrdering_cache[hash].vi2 = vi2;
2408 getOrdering_cache[hash].ord = ord;
2409 ////--
2410 return ord;
2411}
2412static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2413 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2414}
2415
2416/* compute binary join */
2417__attribute__((noinline))
2418static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2419 UInt hash;
2420 VtsID res;
2421 VTS *vts1, *vts2, *nyu;
2422 //if (vi1 == vi2) return vi1;
2423 tl_assert(vi1 != vi2);
2424 ////++
2425 stats__join2_queries++;
2426 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2427 if (join2_cache[hash].vi1 == vi1
2428 && join2_cache[hash].vi2 == vi2)
2429 return join2_cache[hash].res;
2430 stats__join2_misses++;
2431 ////--
2432 vts1 = VtsID__to_VTS(vi1);
2433 vts2 = VtsID__to_VTS(vi2);
2434 nyu = VTS__join(vts1,vts2);
2435 res = vts_tab__find_and_dealloc__or_add(nyu);
2436 ////++
2437 join2_cache[hash].vi1 = vi1;
2438 join2_cache[hash].vi2 = vi2;
2439 join2_cache[hash].res = res;
2440 ////--
2441 return res;
2442}
2443static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2444 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2445}
2446
2447/* create a singleton VTS, namely [thr:1] */
2448static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2449 VTS* nyu = VTS__singleton(thr,tym);
2450 return vts_tab__find_and_dealloc__or_add(nyu);
2451}
2452
2453/* tick operation, creates value 1 if specified index is absent */
2454static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2455 VTS* vts = VtsID__to_VTS(vi);
2456 VTS* nyu = VTS__tick(idx,vts);
2457 return vts_tab__find_and_dealloc__or_add(nyu);
2458}
2459
2460/* index into a VTS (only for assertions) */
2461static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2462 VTS* vts = VtsID__to_VTS(vi);
2463 return VTS__indexAt_SLOW( vts, idx );
2464}
2465
2466
2467/////////////////////////////////////////////////////////
2468// //
2469// Threads //
2470// //
2471/////////////////////////////////////////////////////////
2472
2473struct _Thr {
2474 /* Current VTSs for this thread. They change as we go along. viR
2475 is the VTS to be used for reads, viW for writes. Usually they
2476 are the same, but can differ when we deal with reader-writer
2477 locks. It is always the case that VtsID__getOrdering(viW,viR)
2478 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2479 lagging behind, viR. */
2480 VtsID viR;
2481 VtsID viW;
2482 /* opaque (to us) data we hold on behalf of the library's user. */
2483 void* opaque;
2484};
2485
2486static Thr* Thr__new ( void ) {
2487 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2488 thr->viR = VtsID_INVALID;
2489 thr->viW = VtsID_INVALID;
2490 return thr;
2491}
2492
2493
2494/////////////////////////////////////////////////////////
2495// //
2496// Shadow Values //
2497// //
2498/////////////////////////////////////////////////////////
2499
2500// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2501// hb_zsm.h. We have to do everything else here.
2502
2503/* SVal is 64 bit unsigned int.
2504
2505 <---------30---------> <---------30--------->
2506 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2507 01 X--------------------X XX X--------------------X E(rror)
2508 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2509 11 X--------------------X XX X--------------------X I: SVal_INVALID
2510*/
2511#define SVAL_TAGMASK (3ULL << 62)
2512
2513static inline Bool SVal__isC ( SVal s ) {
2514 return (0ULL << 62) == (s & SVAL_TAGMASK);
2515}
2516static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2517 //tl_assert(VtsID__is_valid(rmini));
2518 //tl_assert(VtsID__is_valid(wmini));
2519 return (((ULong)rmini) << 32) | ((ULong)wmini);
2520}
2521static inline VtsID SVal__unC_Rmin ( SVal s ) {
2522 tl_assert(SVal__isC(s));
2523 return (VtsID)(s >> 32);
2524}
2525static inline VtsID SVal__unC_Wmin ( SVal s ) {
2526 tl_assert(SVal__isC(s));
2527 return (VtsID)(s & 0xFFFFFFFFULL);
2528}
2529
2530static Bool SVal__isE ( SVal s ) {
2531 return (1ULL << 62) == (s & SVAL_TAGMASK);
2532}
2533static SVal SVal__mkE ( void ) {
2534 return 1ULL << 62;
2535}
2536
2537static Bool SVal__isA ( SVal s ) {
2538 return (2ULL << 62) == (s & SVAL_TAGMASK);
2539}
2540static SVal SVal__mkA ( void ) {
2541 return 2ULL << 62;
2542}
2543
2544/* Direct callback from lib_zsm. */
2545static void SVal__rcinc ( SVal s ) {
2546 if (SVal__isC(s)) {
2547 VtsID__rcinc( SVal__unC_Rmin(s) );
2548 VtsID__rcinc( SVal__unC_Wmin(s) );
2549 }
2550}
2551
2552/* Direct callback from lib_zsm. */
2553static void SVal__rcdec ( SVal s ) {
2554 if (SVal__isC(s)) {
2555 VtsID__rcdec( SVal__unC_Rmin(s) );
2556 VtsID__rcdec( SVal__unC_Wmin(s) );
2557 }
2558}
2559
2560
2561/////////////////////////////////////////////////////////
2562// //
sewardjd86e3a22008-12-03 11:39:37 +00002563// A simple group (memory) allocator //
2564// //
2565/////////////////////////////////////////////////////////
2566
2567//////////////// BEGIN general group allocator
2568typedef
2569 struct {
2570 UWord elemSzB; /* element size */
2571 UWord nPerGroup; /* # elems per group */
2572 void* (*alloc)(HChar*, SizeT); /* group allocator */
2573 HChar* cc; /* group allocator's cc */
2574 void (*free)(void*); /* group allocator's free-er (unused) */
2575 /* XArray of void* (pointers to groups). The groups themselves.
2576 Each element is a pointer to a block of size (elemSzB *
2577 nPerGroup) bytes. */
2578 XArray* groups;
2579 /* next free element. Is a pointer to an element in one of the
2580 groups pointed to by .groups. */
2581 void* nextFree;
2582 }
2583 GroupAlloc;
2584
2585static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
2586 UWord elemSzB,
2587 UWord nPerGroup,
2588 void* (*alloc)(HChar*, SizeT),
2589 HChar* cc,
2590 void (*free)(void*) )
2591{
2592 tl_assert(0 == (elemSzB % sizeof(UWord)));
2593 tl_assert(elemSzB >= sizeof(UWord));
2594 tl_assert(nPerGroup >= 100); /* let's say */
2595 tl_assert(alloc);
2596 tl_assert(cc);
2597 tl_assert(free);
2598 tl_assert(ga);
2599 VG_(memset)(ga, 0, sizeof(*ga));
2600 ga->elemSzB = elemSzB;
2601 ga->nPerGroup = nPerGroup;
2602 ga->groups = NULL;
2603 ga->alloc = alloc;
2604 ga->cc = cc;
2605 ga->free = free;
2606 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
2607 ga->nextFree = NULL;
2608 tl_assert(ga->groups);
2609}
2610
2611/* The freelist is empty. Allocate a new group and put all the new
2612 elements in it onto the freelist. */
2613__attribute__((noinline))
2614static void gal_add_new_group ( GroupAlloc* ga )
2615{
2616 Word i;
2617 UWord* group;
2618 tl_assert(ga);
2619 tl_assert(ga->nextFree == NULL);
2620 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
2621 tl_assert(group);
2622 /* extend the freelist through the new group. Place the freelist
2623 pointer in the first word of each element. That's why the
2624 element size must be at least one word. */
2625 for (i = ga->nPerGroup-1; i >= 0; i--) {
2626 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
2627 UWord* elem = (UWord*)elemC;
2628 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
2629 *elem = (UWord)ga->nextFree;
2630 ga->nextFree = elem;
2631 }
2632 /* and add to our collection of groups */
2633 VG_(addToXA)( ga->groups, &group );
2634}
2635
2636inline static void* gal_Alloc ( GroupAlloc* ga )
2637{
2638 UWord* elem;
2639 if (UNLIKELY(ga->nextFree == NULL)) {
2640 gal_add_new_group(ga);
2641 }
2642 elem = ga->nextFree;
2643 ga->nextFree = (void*)*elem;
2644 *elem = 0; /* unnecessary, but just to be on the safe side */
2645 return elem;
2646}
2647
2648inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
2649{
2650 tl_assert(n == ga->elemSzB);
2651 return gal_Alloc( ga );
2652}
2653
2654inline static void gal_Free ( GroupAlloc* ga, void* p )
2655{
2656 UWord* elem = (UWord*)p;
2657 *elem = (UWord)ga->nextFree;
2658 ga->nextFree = elem;
2659}
2660//////////////// END general group allocator
2661
2662
2663/////////////////////////////////////////////////////////
2664// //
sewardjf98e1c02008-10-25 16:22:41 +00002665// Change-event map2 //
2666// //
2667/////////////////////////////////////////////////////////
2668
sewardjf98e1c02008-10-25 16:22:41 +00002669#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2670
2671/* This is in two parts:
2672
2673 1. An OSet of RCECs. This is a set of reference-counted stack
2674 traces. When the reference count of a stack trace becomes zero,
2675 it is removed from the set and freed up. The intent is to have
2676 a set of stack traces which can be referred to from (2), but to
2677 only represent each one once. The set is indexed/searched by
2678 ordering on the stack trace vectors.
2679
sewardj849b0ed2008-12-21 10:43:10 +00002680 2. A SparseWA of OldRefs. These store information about each old
2681 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00002682 location for which the information is recorded. For LRU
2683 purposes, each OldRef also contains a generation number,
2684 indicating when it was most recently accessed.
2685
2686 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00002687 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
2688 size) triples to RCECs. This allows us to collect the last
2689 access-traceback by up to N_OLDREF_ACCS different triples for
2690 this location. The accs[] array is a MTF-array. If a binding
2691 falls off the end, that's too bad -- we will lose info about
2692 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00002693
sewardj849b0ed2008-12-21 10:43:10 +00002694 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00002695 whose generation numbers are below some threshold; hence doing
2696 approximate LRU discarding. For each discarded OldRef we must
2697 of course decrement the reference count on the all RCECs it
2698 refers to, in order that entries from (1) eventually get
2699 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00002700
2701 A major improvement in reliability of this mechanism would be to
2702 have a dynamically sized OldRef.accs[] array, so no entries ever
2703 fall off the end. In investigations (Dec 08) it appears that a
2704 major cause for the non-availability of conflicting-access traces
2705 in race reports is caused by the fixed size of this array. I
2706 suspect for most OldRefs, only a few entries are used, but for a
2707 minority of cases there is an overflow, leading to info lossage.
2708 Investigations also suggest this is very workload and scheduling
2709 sensitive. Therefore a dynamic sizing would be better.
2710
2711 However, dynamic sizing would defeat the use of a GroupAllocator
2712 for OldRef structures. And that's important for performance. So
2713 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00002714*/
2715
2716
2717static UWord stats__ctxt_rcdec1 = 0;
2718static UWord stats__ctxt_rcdec2 = 0;
2719static UWord stats__ctxt_rcdec3 = 0;
2720static UWord stats__ctxt_rcdec_calls = 0;
2721static UWord stats__ctxt_rcdec_discards = 0;
2722static UWord stats__ctxt_rcdec1_eq = 0;
2723
2724static UWord stats__ctxt_tab_curr = 0;
2725static UWord stats__ctxt_tab_max = 0;
2726
2727static UWord stats__ctxt_tab_qs = 0;
2728static UWord stats__ctxt_tab_cmps = 0;
2729
2730
2731///////////////////////////////////////////////////////
2732//// Part (1): An OSet of RCECs
2733///
2734
2735#define N_FRAMES 8
2736
2737// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2738#define RCEC_MAGIC 0xab88abb2UL
2739
2740//#define N_RCEC_TAB 98317 /* prime */
2741#define N_RCEC_TAB 196613 /* prime */
2742
2743typedef
2744 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00002745 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002746 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00002747 UWord rc;
2748 UWord rcX; /* used for crosschecking */
2749 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2750 }
2751 RCEC;
2752
2753static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2754
2755
2756/* Gives an arbitrary total order on RCEC .frames fields */
2757static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2758 Word i;
2759 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2760 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2761 if (ec1->frames[0] < ec2->frames[0]) return -1;
2762 if (ec1->frames[0] > ec2->frames[0]) return 1;
2763 for (i = 1; i < 1 + N_FRAMES; i++) {
2764 if (ec1->frames[i] < ec2->frames[i]) return -1;
2765 if (ec1->frames[i] > ec2->frames[i]) return 1;
2766 }
2767 return 0;
2768}
2769
2770
2771/* Dec the ref of this RCEC. */
2772static void ctxt__rcdec ( RCEC* ec )
2773{
2774 stats__ctxt_rcdec_calls++;
2775 tl_assert(ec && ec->magic == RCEC_MAGIC);
2776 tl_assert(ec->rc > 0);
2777 ec->rc--;
2778}
2779
2780static void ctxt__rcinc ( RCEC* ec )
2781{
2782 tl_assert(ec && ec->magic == RCEC_MAGIC);
2783 ec->rc++;
2784}
2785
2786
sewardjd86e3a22008-12-03 11:39:37 +00002787//////////// BEGIN RCEC group allocator
2788static GroupAlloc rcec_group_allocator;
2789
2790static RCEC* alloc_RCEC ( void ) {
2791 return gal_Alloc ( &rcec_group_allocator );
2792}
2793
2794static void free_RCEC ( RCEC* rcec ) {
2795 tl_assert(rcec->magic == RCEC_MAGIC);
2796 gal_Free( &rcec_group_allocator, rcec );
2797}
2798//////////// END OldRef group allocator
2799
2800
sewardjf98e1c02008-10-25 16:22:41 +00002801/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2802 move it one step closer the the front of the list, so as to make
2803 subsequent searches for it cheaper. */
2804static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2805{
2806 RCEC *ec0, *ec1, *ec2;
2807 if (ec == *headp)
2808 tl_assert(0); /* already at head of list */
2809 tl_assert(ec != NULL);
2810 ec0 = *headp;
2811 ec1 = NULL;
2812 ec2 = NULL;
2813 while (True) {
2814 if (ec0 == NULL || ec0 == ec) break;
2815 ec2 = ec1;
2816 ec1 = ec0;
2817 ec0 = ec0->next;
2818 }
2819 tl_assert(ec0 == ec);
2820 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2821 RCEC* tmp;
2822 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2823 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2824 closer to the start of the list. */
2825 tl_assert(ec2->next == ec1);
2826 tl_assert(ec1->next == ec0);
2827 tmp = ec0->next;
2828 ec2->next = ec0;
2829 ec0->next = ec1;
2830 ec1->next = tmp;
2831 }
2832 else
2833 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2834 /* it's second in the list. */
2835 tl_assert(*headp == ec1);
2836 tl_assert(ec1->next == ec0);
2837 ec1->next = ec0->next;
2838 ec0->next = ec1;
2839 *headp = ec0;
2840 }
2841}
2842
2843
2844/* Find the given RCEC in the tree, and return a pointer to it. Or,
2845 if not present, add the given one to the tree (by making a copy of
2846 it, so the caller can immediately deallocate the original) and
2847 return a pointer to the copy. The caller can safely have 'example'
2848 on its stack, since we will always return a pointer to a copy of
2849 it, not to the original. Note that the inserted node will have .rc
2850 of zero and so the caller must immediatly increment it. */
2851__attribute__((noinline))
2852static RCEC* ctxt__find_or_add ( RCEC* example )
2853{
2854 UWord hent;
2855 RCEC* copy;
2856 tl_assert(example && example->magic == RCEC_MAGIC);
2857 tl_assert(example->rc == 0);
2858
2859 /* Search the hash table to see if we already have it. */
2860 stats__ctxt_tab_qs++;
2861 hent = example->frames[0] % N_RCEC_TAB;
2862 copy = contextTab[hent];
2863 while (1) {
2864 if (!copy) break;
2865 tl_assert(copy->magic == RCEC_MAGIC);
2866 stats__ctxt_tab_cmps++;
2867 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2868 copy = copy->next;
2869 }
2870
2871 if (copy) {
2872 tl_assert(copy != example);
2873 /* optimisation: if it's not at the head of its list, move 1
2874 step fwds, to make future searches cheaper */
2875 if (copy != contextTab[hent]) {
2876 move_RCEC_one_step_forward( &contextTab[hent], copy );
2877 }
2878 } else {
sewardjd86e3a22008-12-03 11:39:37 +00002879 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00002880 tl_assert(copy != example);
2881 *copy = *example;
2882 copy->next = contextTab[hent];
2883 contextTab[hent] = copy;
2884 stats__ctxt_tab_curr++;
2885 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2886 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2887 }
2888 return copy;
2889}
2890
2891static inline UWord ROLW ( UWord w, Int n )
2892{
2893 Int bpw = 8 * sizeof(UWord);
2894 w = (w << n) | (w >> (bpw-n));
2895 return w;
2896}
2897
2898__attribute__((noinline))
2899static RCEC* get_RCEC ( Thr* thr )
2900{
2901 UWord hash, i;
2902 RCEC example;
2903 example.magic = RCEC_MAGIC;
2904 example.rc = 0;
2905 example.rcX = 0;
2906 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2907 hash = 0;
2908 for (i = 1; i < 1 + N_FRAMES; i++) {
2909 hash ^= example.frames[i];
2910 hash = ROLW(hash, 19);
2911 }
2912 example.frames[0] = hash;
2913 return ctxt__find_or_add( &example );
2914}
2915
2916///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00002917//// Part (2):
2918/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00002919///
2920
2921// (UInt) `echo "Old Reference Information" | md5sum`
2922#define OldRef_MAGIC 0x30b1f075UL
2923
sewardjc5ea9962008-12-07 01:41:46 +00002924/* Records an access: a thread and a context. The size
2925 (1,2,4,8) and read-or-writeness are also encoded as
2926 follows: bottom bit of .thr is 1 if write, 0 if read
2927 bottom 2 bits of .rcec are encode size:
2928 00 = 1, 01 = 2, 10 = 4, 11 = 8
2929*/
sewardjf98e1c02008-10-25 16:22:41 +00002930typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2931
sewardj849b0ed2008-12-21 10:43:10 +00002932#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00002933
2934typedef
2935 struct {
sewardjd86e3a22008-12-03 11:39:37 +00002936 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002937 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00002938 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00002939 /* unused slots in this array have .thr == NULL */
2940 Thr_n_RCEC accs[N_OLDREF_ACCS];
2941 }
2942 OldRef;
2943
sewardjd86e3a22008-12-03 11:39:37 +00002944
2945//////////// BEGIN OldRef group allocator
2946static GroupAlloc oldref_group_allocator;
2947
2948static OldRef* alloc_OldRef ( void ) {
2949 return gal_Alloc ( &oldref_group_allocator );
2950}
2951
2952static void free_OldRef ( OldRef* r ) {
2953 tl_assert(r->magic == OldRef_MAGIC);
2954 gal_Free( &oldref_group_allocator, r );
2955}
2956//////////// END OldRef group allocator
2957
sewardjd86e3a22008-12-03 11:39:37 +00002958
sewardjbc307e52008-12-06 22:10:54 +00002959static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
2960static UWord oldrefGen = 0; /* current LRU generation # */
2961static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2962static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00002963
sewardjc5ea9962008-12-07 01:41:46 +00002964inline static void* ptr_or_UWord ( void* p, UWord w ) {
2965 return (void*)( ((UWord)p) | ((UWord)w) );
2966}
2967inline static void* ptr_and_UWord ( void* p, UWord w ) {
2968 return (void*)( ((UWord)p) & ((UWord)w) );
2969}
2970
sewardj1669cc72008-12-13 01:20:21 +00002971inline static UInt min_UInt ( UInt a, UInt b ) {
2972 return a < b ? a : b;
2973}
2974
sewardja781be62008-12-08 00:12:28 +00002975/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
2976 first interval is lower, 1 if the first interval is higher, and 0
2977 if there is any overlap. Redundant paranoia with casting is there
2978 following what looked distinctly like a bug in gcc-4.1.2, in which
2979 some of the comparisons were done signedly instead of
2980 unsignedly. */
2981/* Copied from exp-ptrcheck/sg_main.c */
2982static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
2983 Addr a2, SizeT n2 ) {
2984 UWord a1w = (UWord)a1;
2985 UWord n1w = (UWord)n1;
2986 UWord a2w = (UWord)a2;
2987 UWord n2w = (UWord)n2;
2988 tl_assert(n1w > 0 && n2w > 0);
2989 if (a1w + n1w <= a2w) return -1L;
2990 if (a2w + n2w <= a1w) return 1L;
2991 return 0;
2992}
2993
sewardjc5ea9962008-12-07 01:41:46 +00002994static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00002995{
sewardjd86e3a22008-12-03 11:39:37 +00002996 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00002997 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00002998 Word i, j;
2999 UWord keyW, valW;
3000 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003001
sewardjc5ea9962008-12-07 01:41:46 +00003002 rcec = get_RCEC( thr );
3003 ctxt__rcinc(rcec);
3004
3005 /* encode the size and writeness of the transaction in the bottom
3006 two bits of thr and rcec. */
3007 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3008 switch (szB) {
3009 /* This doesn't look particularly branch-predictor friendly. */
3010 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3011 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3012 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3013 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3014 default: tl_assert(0);
3015 }
3016
3017 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003018 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003019
sewardjd86e3a22008-12-03 11:39:37 +00003020 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003021
3022 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003023 see if we have a stack trace pertaining to this (thread, R/W,
3024 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003025 tl_assert(keyW == a);
3026 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003027 tl_assert(ref->magic == OldRef_MAGIC);
3028
3029 tl_assert(thr);
3030 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003031 if (ref->accs[i].thr != thr)
3032 continue;
3033 /* since .thr encodes both the accessing thread and the
3034 read/writeness, we know now that at least those features
3035 of the access match this entry. So we just need to check
3036 the size indication. Do this by inspecting the lowest 2 bits of
3037 .rcec, which contain the encoded size info. */
3038 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3039 continue;
3040 /* else we have a match, so stop looking. */
3041 break;
sewardjf98e1c02008-10-25 16:22:41 +00003042 }
3043
3044 if (i < N_OLDREF_ACCS) {
3045 /* thread 'thr' has an entry at index 'i'. Update it. */
3046 if (i > 0) {
3047 Thr_n_RCEC tmp = ref->accs[i-1];
3048 ref->accs[i-1] = ref->accs[i];
3049 ref->accs[i] = tmp;
3050 i--;
3051 }
sewardjc5ea9962008-12-07 01:41:46 +00003052 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003053 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003054 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3055 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003056 tl_assert(ref->accs[i].thr == thr);
3057 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003058 /* No entry for this (thread, R/W, size) triple. Shuffle all
3059 of them down one slot, and put the new entry at the start
3060 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003061 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3062 /* the last slot is in use. We must dec the rc on the
3063 associated rcec. */
3064 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3065 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003066 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3067 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003068 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003069 } else {
3070 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3071 }
3072 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3073 ref->accs[j] = ref->accs[j-1];
3074 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003075 ref->accs[0].rcec = rcec;
3076 /* thr==NULL is used to signify an empty slot, so we can't
3077 add a NULL thr. */
3078 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003079 }
3080
3081 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003082
3083 } else {
3084
3085 /* We don't have a record for this address. Create a new one. */
3086 if (oldrefTreeN >= oldrefGenIncAt) {
3087 oldrefGen++;
3088 oldrefGenIncAt = oldrefTreeN + 50000;
3089 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3090 oldrefGen, oldrefTreeN );
3091 }
sewardjd86e3a22008-12-03 11:39:37 +00003092
3093 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003094 ref->magic = OldRef_MAGIC;
3095 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003096 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003097 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003098 /* thr==NULL is used to signify an empty slot, so we can't add a
3099 NULL thr. */
3100 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003101 for (j = 1; j < N_OLDREF_ACCS; j++) {
3102 ref->accs[j].thr = NULL;
3103 ref->accs[j].rcec = NULL;
3104 }
sewardjbc307e52008-12-06 22:10:54 +00003105 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003106 oldrefTreeN++;
3107
3108 }
3109}
3110
3111
sewardjc5ea9962008-12-07 01:41:46 +00003112Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3113 /*OUT*/Thr** resThr,
3114 /*OUT*/SizeT* resSzB,
3115 /*OUT*/Bool* resIsW,
3116 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003117{
sewardja781be62008-12-08 00:12:28 +00003118 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003119 OldRef* ref;
3120 UWord keyW, valW;
3121 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003122
sewardjc5ea9962008-12-07 01:41:46 +00003123 Thr* cand_thr;
3124 RCEC* cand_rcec;
3125 Bool cand_isW;
3126 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003127 Addr cand_a;
3128
3129 Addr toCheck[15];
3130 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003131
3132 tl_assert(thr);
3133 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003134
sewardja781be62008-12-08 00:12:28 +00003135 toCheck[nToCheck++] = a;
3136 for (i = -7; i < (Word)szB; i++) {
3137 if (i != 0)
3138 toCheck[nToCheck++] = a + i;
3139 }
3140 tl_assert(nToCheck <= 15);
3141
3142 /* Now see if we can find a suitable matching event for
3143 any of the addresses in toCheck[0 .. nToCheck-1]. */
3144 for (j = 0; j < nToCheck; j++) {
3145
3146 cand_a = toCheck[j];
3147 // VG_(printf)("test %ld %p\n", j, cand_a);
3148
3149 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3150 if (!b)
3151 continue;
3152
sewardjd86e3a22008-12-03 11:39:37 +00003153 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003154 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003155 tl_assert(ref->magic == OldRef_MAGIC);
3156 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3157
sewardjc5ea9962008-12-07 01:41:46 +00003158 cand_thr = NULL;
3159 cand_rcec = NULL;
3160 cand_isW = False;
3161 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003162
sewardjc5ea9962008-12-07 01:41:46 +00003163 for (i = 0; i < N_OLDREF_ACCS; i++) {
3164 Thr_n_RCEC* cand = &ref->accs[i];
3165 cand_thr = ptr_and_UWord(cand->thr, ~3);
3166 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3167 /* Decode the writeness from the bottom bit of .thr. */
3168 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3169 /* Decode the size from the bottom two bits of .rcec. */
3170 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3171 case 0: cand_szB = 1; break;
3172 case 1: cand_szB = 2; break;
3173 case 2: cand_szB = 4; break;
3174 case 3: cand_szB = 8; break;
3175 default: tl_assert(0);
3176 }
3177
3178 if (cand_thr == NULL)
3179 /* This slot isn't in use. Ignore it. */
3180 continue;
3181
3182 if (cand_thr == thr)
3183 /* This is an access by the same thread, but we're only
3184 interested in accesses from other threads. Ignore. */
3185 continue;
3186
3187 if ((!cand_isW) && (!isW))
3188 /* We don't want to report a read racing against another
3189 read; that's stupid. So in this case move on. */
3190 continue;
3191
sewardja781be62008-12-08 00:12:28 +00003192 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3193 /* No overlap with the access we're asking about. Ignore. */
3194 continue;
3195
sewardjc5ea9962008-12-07 01:41:46 +00003196 /* We have a match. Stop searching. */
3197 break;
3198 }
3199
3200 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3201
sewardja781be62008-12-08 00:12:28 +00003202 if (i < N_OLDREF_ACCS) {
3203 /* return with success */
3204 tl_assert(cand_thr);
3205 tl_assert(cand_rcec);
3206 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3207 tl_assert(cand_szB >= 1);
3208 *resEC = VG_(make_ExeContext_from_StackTrace)(
sewardj1669cc72008-12-13 01:20:21 +00003209 &cand_rcec->frames[1],
3210 min_UInt(N_FRAMES, VG_(clo_backtrace_size))
sewardja781be62008-12-08 00:12:28 +00003211 );
3212 *resThr = cand_thr;
3213 *resSzB = cand_szB;
3214 *resIsW = cand_isW;
3215 return True;
3216 }
sewardjc5ea9962008-12-07 01:41:46 +00003217
sewardja781be62008-12-08 00:12:28 +00003218 /* consider next address in toCheck[] */
3219 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003220
sewardja781be62008-12-08 00:12:28 +00003221 /* really didn't find anything. */
3222 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003223}
3224
3225static void event_map_init ( void )
3226{
3227 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003228
3229 /* Context (RCEC) group allocator */
3230 init_GroupAlloc ( &rcec_group_allocator,
3231 sizeof(RCEC),
3232 1000 /* RCECs per group */,
3233 HG_(zalloc),
3234 "libhb.event_map_init.1 (RCEC groups)",
3235 HG_(free) );
3236
3237 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003238 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003239 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003240 N_RCEC_TAB * sizeof(RCEC*) );
3241 tl_assert(contextTab);
3242 for (i = 0; i < N_RCEC_TAB; i++)
3243 contextTab[i] = NULL;
3244
sewardjd86e3a22008-12-03 11:39:37 +00003245 /* Oldref group allocator */
3246 init_GroupAlloc ( &oldref_group_allocator,
3247 sizeof(OldRef),
3248 1000 /* OldRefs per group */,
3249 HG_(zalloc),
3250 "libhb.event_map_init.3 (OldRef groups)",
3251 HG_(free) );
3252
sewardjd86e3a22008-12-03 11:39:37 +00003253 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003254 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003255 oldrefTree = VG_(newSWA)(
3256 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003257 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003258 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003259 );
3260 tl_assert(oldrefTree);
3261
3262 oldrefGen = 0;
3263 oldrefGenIncAt = 0;
3264 oldrefTreeN = 0;
3265}
3266
3267static void event_map__check_reference_counts ( Bool before )
3268{
3269 RCEC* rcec;
3270 OldRef* oldref;
3271 Word i;
3272 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003273 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003274
3275 /* Set the 'check' reference counts to zero. Also, optionally
3276 check that the real reference counts are non-zero. We allow
3277 these to fall to zero before a GC, but the GC must get rid of
3278 all those that are zero, hence none should be zero after a
3279 GC. */
3280 for (i = 0; i < N_RCEC_TAB; i++) {
3281 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3282 nEnts++;
3283 tl_assert(rcec);
3284 tl_assert(rcec->magic == RCEC_MAGIC);
3285 if (!before)
3286 tl_assert(rcec->rc > 0);
3287 rcec->rcX = 0;
3288 }
3289 }
3290
3291 /* check that the stats are sane */
3292 tl_assert(nEnts == stats__ctxt_tab_curr);
3293 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3294
3295 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003296 VG_(initIterSWA)( oldrefTree );
3297 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003298 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003299 tl_assert(oldref->magic == OldRef_MAGIC);
3300 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003301 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3302 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3303 if (aThr) {
3304 tl_assert(aRef);
3305 tl_assert(aRef->magic == RCEC_MAGIC);
3306 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003307 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003308 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003309 }
3310 }
3311 }
3312
3313 /* compare check ref counts with actual */
3314 for (i = 0; i < N_RCEC_TAB; i++) {
3315 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3316 tl_assert(rcec->rc == rcec->rcX);
3317 }
3318 }
3319}
3320
sewardj8fd92d32008-11-20 23:17:01 +00003321__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003322static void event_map_maybe_GC ( void )
3323{
3324 OldRef* oldref;
3325 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003326 XArray* refs2del;
3327 Word i, j, n2del;
3328
sewardj8fd92d32008-11-20 23:17:01 +00003329 UWord* genMap = NULL;
3330 UWord genMap_min = 0;
3331 UWord genMap_size = 0;
3332
sewardj849b0ed2008-12-21 10:43:10 +00003333 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00003334 return;
3335
3336 if (0)
3337 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3338
sewardj849b0ed2008-12-21 10:43:10 +00003339 /* Check for sane command line params. Limit values must match
3340 those in hg_process_cmd_line_option. */
3341 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
3342 tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
3343
sewardj8f5374e2008-12-07 11:40:17 +00003344 /* Check our counting is sane (expensive) */
3345 if (CHECK_CEM)
3346 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003347
sewardj8f5374e2008-12-07 11:40:17 +00003348 /* Check the reference counts (expensive) */
3349 if (CHECK_CEM)
3350 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003351
sewardj8fd92d32008-11-20 23:17:01 +00003352 /* Compute the distribution of generation values in the ref tree.
3353 There are likely only to be a few different generation numbers
3354 in the whole tree, but we don't know what they are. Hence use a
3355 dynamically resized array of counters. The array is genMap[0
3356 .. genMap_size-1], where genMap[0] is the count for the
3357 generation number genMap_min, genMap[1] is the count for
3358 genMap_min+1, etc. If a new number is seen outside the range
3359 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3360 copied into a larger array, and genMap_min and genMap_size are
3361 adjusted accordingly. */
3362
sewardjf98e1c02008-10-25 16:22:41 +00003363 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003364
sewardjbc307e52008-12-06 22:10:54 +00003365 VG_(initIterSWA)( oldrefTree );
3366 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003367
sewardjd86e3a22008-12-03 11:39:37 +00003368 UWord ea, key;
3369 oldref = (OldRef*)valW;
3370 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003371
3372 /* BEGIN find 'ea', which is the index in genMap holding the
3373 count for generation number 'key'. */
3374 if (UNLIKELY(genMap == NULL)) {
3375 /* deal with the first key to be seen, so that the following
3376 cases don't need to handle the complexity of a NULL count
3377 array. */
3378 genMap_min = key;
3379 genMap_size = 1;
3380 genMap = HG_(zalloc)( "libhb.emmG.1a",
3381 genMap_size * sizeof(UWord) );
3382 ea = 0;
3383 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3384 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003385 }
sewardj8fd92d32008-11-20 23:17:01 +00003386 else
3387 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3388 /* this is the expected (almost-always-happens) case: 'key'
3389 is already mapped in the array. */
3390 ea = key - genMap_min;
3391 }
3392 else
3393 if (key < genMap_min) {
3394 /* 'key' appears before the start of the current array.
3395 Extend the current array by allocating a larger one and
3396 copying the current one to the upper end of it. */
3397 Word more;
3398 UWord* map2;
3399 more = genMap_min - key;
3400 tl_assert(more > 0);
3401 map2 = HG_(zalloc)( "libhb.emmG.1b",
3402 (genMap_size + more) * sizeof(UWord) );
3403 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3404 HG_(free)( genMap );
3405 genMap = map2;
3406 genMap_size += more;
3407 genMap_min -= more;
3408 ea = 0;
3409 tl_assert(genMap_min == key);
3410 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3411 key, genMap_min, genMap_min+genMap_size- 1 );
3412 }
3413 else {
3414 /* 'key' appears after the end of the current array. Extend
3415 the current array by allocating a larger one and copying
3416 the current one to the lower end of it. */
3417 Word more;
3418 UWord* map2;
3419 tl_assert(key >= genMap_min + genMap_size);
3420 more = key - (genMap_min + genMap_size) + 1;
3421 tl_assert(more > 0);
3422 map2 = HG_(zalloc)( "libhb.emmG.1c",
3423 (genMap_size + more) * sizeof(UWord) );
3424 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3425 HG_(free)( genMap );
3426 genMap = map2;
3427 genMap_size += more;
3428 ea = genMap_size - 1;;
3429 tl_assert(genMap_min + genMap_size - 1 == key);
3430 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3431 key, genMap_min, genMap_min+genMap_size- 1 );
3432 }
3433 /* END find 'ea' from 'key' */
3434
3435 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003436 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003437 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003438 }
3439
sewardj8fd92d32008-11-20 23:17:01 +00003440 tl_assert(genMap);
3441 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003442
sewardj8fd92d32008-11-20 23:17:01 +00003443 /* Sanity check what we just computed */
3444 { UWord sum = 0;
3445 for (i = 0; i < genMap_size; i++) {
3446 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3447 i + genMap_min, genMap[i] );
3448 sum += genMap[i];
3449 }
3450 tl_assert(sum == oldrefTreeN);
3451 }
3452
3453 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003454 retained = oldrefTreeN;
3455 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003456
3457 for (i = 0; i < genMap_size; i++) {
3458 keyW = i + genMap_min;
3459 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003460 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3461 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3462 tl_assert(keyW >= maxGen);
3463 tl_assert(retained >= valW);
3464 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00003465 > (UWord)(HG_(clo_conflict_cache_size)
3466 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00003467 retained -= valW;
3468 maxGen = keyW;
3469 } else {
3470 break;
3471 }
3472 }
sewardjf98e1c02008-10-25 16:22:41 +00003473
sewardj8fd92d32008-11-20 23:17:01 +00003474 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003475
sewardj9b1f0fd2008-11-18 23:40:00 +00003476 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003477
3478 /* Now make up a big list of the oldrefTree entries we want to
3479 delete. We can't simultaneously traverse the tree and delete
3480 stuff from it, so first we need to copy them off somewhere
3481 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003482 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003483 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003484
sewardj9b1f0fd2008-11-18 23:40:00 +00003485 if (retained < oldrefTreeN) {
3486
3487 /* This is the normal (expected) case. We discard any ref whose
3488 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003489 VG_(initIterSWA)( oldrefTree );
3490 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003491 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003492 tl_assert(oldref->magic == OldRef_MAGIC);
3493 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003494 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003495 }
sewardjf98e1c02008-10-25 16:22:41 +00003496 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003497 if (VG_(clo_verbosity) > 1) {
3498 VG_(message)(Vg_DebugMsg,
3499 "libhb: EvM GC: delete generations %lu and below, "
3500 "retaining %lu entries",
3501 maxGen, retained );
3502 }
3503
3504 } else {
3505
3506 static UInt rand_seed = 0; /* leave as static */
3507
3508 /* Degenerate case: there's only one generation in the entire
3509 tree, so we need to have some other way of deciding which
3510 refs to throw away. Just throw out half of them randomly. */
3511 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00003512 VG_(initIterSWA)( oldrefTree );
3513 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003514 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003515 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003516 tl_assert(oldref->magic == OldRef_MAGIC);
3517 n = VG_(random)( &rand_seed );
3518 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003519 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003520 retained--;
3521 }
3522 }
3523 if (VG_(clo_verbosity) > 1) {
3524 VG_(message)(Vg_DebugMsg,
3525 "libhb: EvM GC: randomly delete half the entries, "
3526 "retaining %lu entries",
3527 retained );
3528 }
3529
sewardjf98e1c02008-10-25 16:22:41 +00003530 }
3531
3532 n2del = VG_(sizeXA)( refs2del );
3533 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3534
3535 if (0) VG_(printf)("%s","deleting entries\n");
3536 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003537 Bool b;
3538 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00003539 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00003540 tl_assert(b);
3541 tl_assert(keyW == ga2del);
3542 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003543 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00003544 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
3545 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
3546 if (aRef) {
3547 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003548 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00003549 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00003550 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003551 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003552 }
3553 }
sewardjd86e3a22008-12-03 11:39:37 +00003554
3555 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00003556 }
3557
3558 VG_(deleteXA)( refs2del );
3559
sewardjc5ea9962008-12-07 01:41:46 +00003560 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00003561
3562 oldrefTreeN = retained;
3563 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3564
3565 /* Throw away all RCECs with zero reference counts */
3566 for (i = 0; i < N_RCEC_TAB; i++) {
3567 RCEC** pp = &contextTab[i];
3568 RCEC* p = *pp;
3569 while (p) {
3570 if (p->rc == 0) {
3571 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00003572 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00003573 p = *pp;
3574 tl_assert(stats__ctxt_tab_curr > 0);
3575 stats__ctxt_tab_curr--;
3576 } else {
3577 pp = &p->next;
3578 p = p->next;
3579 }
3580 }
3581 }
3582
sewardj8f5374e2008-12-07 11:40:17 +00003583 /* Check the reference counts (expensive) */
3584 if (CHECK_CEM)
3585 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003586
3587 //if (0)
3588 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3589 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3590
3591}
3592
3593
3594/////////////////////////////////////////////////////////
3595// //
3596// Core MSM //
3597// //
3598/////////////////////////////////////////////////////////
3599
sewardjb0e009d2008-11-19 16:35:15 +00003600/* Logic in msm_read/msm_write updated/verified after re-analysis,
3601 19 Nov 08. */
3602
sewardjb0e009d2008-11-19 16:35:15 +00003603/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
3604 nonzero, the effect is that when a race is detected for a location,
3605 that location is put into a special 'error' state and no further
3606 checking of it is done until it returns to a 'normal' state, which
3607 requires it to be deallocated and reallocated.
3608
3609 This is a bad idea, because of the interaction with suppressions.
3610 Suppose there is a race on the location, but the error is
3611 suppressed. The location now is marked as in-error. Now any
3612 subsequent race -- including ones we want to see -- will never be
3613 detected until the location is deallocated and reallocated.
3614
sewardj8f5374e2008-12-07 11:40:17 +00003615 Hence set MSM_RACE2ERR to zero. This causes raced-on locations to
sewardjb0e009d2008-11-19 16:35:15 +00003616 remain in the normal 'C' (constrained) state, but places on them
3617 the constraint that the next accesses happen-after both the
3618 existing constraint and the relevant vector clock of the thread
sewardj8f5374e2008-12-07 11:40:17 +00003619 doing the racing access.
sewardjb0e009d2008-11-19 16:35:15 +00003620*/
3621#define MSM_RACE2ERR 0
3622
sewardjf98e1c02008-10-25 16:22:41 +00003623static ULong stats__msm_read = 0;
3624static ULong stats__msm_read_change = 0;
3625static ULong stats__msm_write = 0;
3626static ULong stats__msm_write_change = 0;
3627
3628__attribute__((noinline))
3629static void record_race_info ( Thr* acc_thr,
sewardja781be62008-12-08 00:12:28 +00003630 Addr acc_addr, SizeT szB, Bool isWrite )
sewardjf98e1c02008-10-25 16:22:41 +00003631{
sewardjc5ea9962008-12-07 01:41:46 +00003632 /* Call here to report a race. We just hand it onwards to
3633 HG_(record_error_Race). If that in turn discovers that the
3634 error is going to be collected, then that queries the
3635 conflicting-event map. The alternative would be to query it
3636 right here. But that causes a lot of pointless queries for
3637 errors which will shortly be discarded as duplicates, and can
3638 become a performance overhead; so we defer the query until we
3639 know the error is not a duplicate. */
3640 tl_assert(acc_thr->opaque);
3641 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
sewardja781be62008-12-08 00:12:28 +00003642 szB, isWrite, NULL/*mb_lastlock*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003643}
3644
3645static Bool is_sane_SVal_C ( SVal sv ) {
3646 POrd ord;
3647 if (!SVal__isC(sv)) return True;
3648 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3649 if (ord == POrd_EQ || ord == POrd_LT) return True;
3650 return False;
3651}
3652
3653
3654/* Compute new state following a read */
3655static inline SVal msm_read ( SVal svOld,
3656 /* The following are only needed for
3657 creating error reports. */
3658 Thr* acc_thr,
3659 Addr acc_addr, SizeT szB )
3660{
3661 SVal svNew = SVal_INVALID;
3662 stats__msm_read++;
3663
3664 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00003665 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003666 tl_assert(is_sane_SVal_C(svOld));
3667 }
3668
3669 if (SVal__isC(svOld)) {
3670 POrd ord;
3671 VtsID tviR = acc_thr->viR;
3672 VtsID tviW = acc_thr->viW;
3673 VtsID rmini = SVal__unC_Rmin(svOld);
3674 VtsID wmini = SVal__unC_Wmin(svOld);
3675
3676 ord = VtsID__getOrdering(rmini,tviR);
3677 if (ord == POrd_EQ || ord == POrd_LT) {
3678 /* no race */
3679 /* Note: RWLOCK subtlety: use tviW, not tviR */
3680 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3681 goto out;
3682 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003683 /* assert on sanity of constraints. */
3684 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3685 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003686 svNew = MSM_RACE2ERR
3687 ? SVal__mkE()
sewardj8f5374e2008-12-07 11:40:17 +00003688 /* see comments on corresponding fragment in
3689 msm_write for explanation. */
3690 /* aggressive setting: */
3691 /*
sewardjb0e009d2008-11-19 16:35:15 +00003692 : SVal__mkC( VtsID__join2(wmini,tviR),
3693 VtsID__join2(wmini,tviW) );
sewardj8f5374e2008-12-07 11:40:17 +00003694 */
3695 /* "consistent" setting: */
sewardj3b0c4d72008-11-20 11:20:50 +00003696 : SVal__mkC( VtsID__join2(rmini,tviR),
3697 VtsID__join2(wmini,tviW) );
sewardja781be62008-12-08 00:12:28 +00003698 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003699 goto out;
3700 }
3701 }
3702 if (SVal__isA(svOld)) {
3703 /* reading no-access memory (sigh); leave unchanged */
3704 /* check for no pollution */
3705 tl_assert(svOld == SVal_NOACCESS);
3706 svNew = SVal_NOACCESS;
3707 goto out;
3708 }
3709 if (SVal__isE(svOld)) {
3710 /* no race, location is already "in error" */
3711 svNew = SVal__mkE();
3712 goto out;
3713 }
3714 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3715 tl_assert(0);
3716
3717 out:
sewardj8f5374e2008-12-07 11:40:17 +00003718 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003719 tl_assert(is_sane_SVal_C(svNew));
3720 }
3721 tl_assert(svNew != SVal_INVALID);
sewardj849b0ed2008-12-21 10:43:10 +00003722 if (svNew != svOld && HG_(clo_show_conflicts)) {
sewardj8f5374e2008-12-07 11:40:17 +00003723 if (SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00003724 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardjf98e1c02008-10-25 16:22:41 +00003725 stats__msm_read_change++;
3726 }
3727 }
3728 return svNew;
3729}
3730
3731
3732/* Compute new state following a write */
3733static inline SVal msm_write ( SVal svOld,
3734 /* The following are only needed for
3735 creating error reports. */
3736 Thr* acc_thr,
3737 Addr acc_addr, SizeT szB )
3738{
3739 SVal svNew = SVal_INVALID;
3740 stats__msm_write++;
3741
3742 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00003743 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003744 tl_assert(is_sane_SVal_C(svOld));
3745 }
3746
3747 if (SVal__isC(svOld)) {
3748 POrd ord;
3749 VtsID tviW = acc_thr->viW;
3750 VtsID wmini = SVal__unC_Wmin(svOld);
3751
3752 ord = VtsID__getOrdering(wmini,tviW);
3753 if (ord == POrd_EQ || ord == POrd_LT) {
3754 /* no race */
3755 svNew = SVal__mkC( tviW, tviW );
3756 goto out;
3757 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003758 VtsID tviR = acc_thr->viR;
sewardjf98e1c02008-10-25 16:22:41 +00003759 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00003760 /* assert on sanity of constraints. */
3761 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3762 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003763 svNew = MSM_RACE2ERR
3764 ? SVal__mkE()
sewardj8f5374e2008-12-07 11:40:17 +00003765 /* One possibility is, after a race is seen, to
3766 set the location's constraints as aggressively
3767 (as far ahead) as possible. However, that just
3768 causes lots more races to be reported, which is
3769 very confusing. Hence don't do this. */
3770 /*
sewardjb0e009d2008-11-19 16:35:15 +00003771 : SVal__mkC( VtsID__join2(wmini,tviR),
sewardjf98e1c02008-10-25 16:22:41 +00003772 VtsID__join2(wmini,tviW) );
sewardj8f5374e2008-12-07 11:40:17 +00003773 */
3774 /* instead, re-set the constraints in a way which
3775 is consistent with (ie, as they would have been
3776 computed anyway) had no race been detected. */
sewardj3b0c4d72008-11-20 11:20:50 +00003777 : SVal__mkC( VtsID__join2(rmini,tviR),
3778 VtsID__join2(wmini,tviW) );
sewardja781be62008-12-08 00:12:28 +00003779 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003780 goto out;
3781 }
3782 }
3783 if (SVal__isA(svOld)) {
3784 /* writing no-access memory (sigh); leave unchanged */
3785 /* check for no pollution */
3786 tl_assert(svOld == SVal_NOACCESS);
3787 svNew = SVal_NOACCESS;
3788 goto out;
3789 }
3790 if (SVal__isE(svOld)) {
3791 /* no race, location is already "in error" */
3792 svNew = SVal__mkE();
3793 goto out;
3794 }
3795 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3796 tl_assert(0);
3797
3798 out:
sewardj8f5374e2008-12-07 11:40:17 +00003799 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003800 tl_assert(is_sane_SVal_C(svNew));
3801 }
3802 tl_assert(svNew != SVal_INVALID);
sewardj849b0ed2008-12-21 10:43:10 +00003803 if (svNew != svOld && HG_(clo_show_conflicts)) {
sewardj8f5374e2008-12-07 11:40:17 +00003804 if (SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00003805 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardjf98e1c02008-10-25 16:22:41 +00003806 stats__msm_write_change++;
3807 }
3808 }
3809 return svNew;
3810}
3811
3812
3813/////////////////////////////////////////////////////////
3814// //
3815// Apply core MSM to specific memory locations //
3816// //
3817/////////////////////////////////////////////////////////
3818
3819/*------------- ZSM accesses: 8 bit apply ------------- */
3820
3821void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3822 CacheLine* cl;
3823 UWord cloff, tno, toff;
3824 SVal svOld, svNew;
3825 UShort descr;
3826 stats__cline_read8s++;
3827 cl = get_cacheline(a);
3828 cloff = get_cacheline_offset(a);
3829 tno = get_treeno(a);
3830 toff = get_tree_offset(a); /* == 0 .. 7 */
3831 descr = cl->descrs[tno];
3832 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3833 SVal* tree = &cl->svals[tno << 3];
3834 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00003835 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003836 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3837 }
3838 svOld = cl->svals[cloff];
3839 svNew = msm_read( svOld, thr,a,1 );
3840 tl_assert(svNew != SVal_INVALID);
3841 cl->svals[cloff] = svNew;
3842}
3843
3844void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3845 CacheLine* cl;
3846 UWord cloff, tno, toff;
3847 SVal svOld, svNew;
3848 UShort descr;
3849 stats__cline_read8s++;
3850 cl = get_cacheline(a);
3851 cloff = get_cacheline_offset(a);
3852 tno = get_treeno(a);
3853 toff = get_tree_offset(a); /* == 0 .. 7 */
3854 descr = cl->descrs[tno];
3855 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3856 SVal* tree = &cl->svals[tno << 3];
3857 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00003858 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003859 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3860 }
3861 svOld = cl->svals[cloff];
3862 svNew = msm_write( svOld, thr,a,1 );
3863 tl_assert(svNew != SVal_INVALID);
3864 cl->svals[cloff] = svNew;
3865}
3866
3867/*------------- ZSM accesses: 16 bit apply ------------- */
3868
3869void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3870 CacheLine* cl;
3871 UWord cloff, tno, toff;
3872 SVal svOld, svNew;
3873 UShort descr;
3874 stats__cline_read16s++;
3875 if (UNLIKELY(!aligned16(a))) goto slowcase;
3876 cl = get_cacheline(a);
3877 cloff = get_cacheline_offset(a);
3878 tno = get_treeno(a);
3879 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3880 descr = cl->descrs[tno];
3881 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3882 if (valid_value_is_below_me_16(descr, toff)) {
3883 goto slowcase;
3884 } else {
3885 SVal* tree = &cl->svals[tno << 3];
3886 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3887 }
sewardj8f5374e2008-12-07 11:40:17 +00003888 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003889 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3890 }
3891 svOld = cl->svals[cloff];
3892 svNew = msm_read( svOld, thr,a,2 );
3893 tl_assert(svNew != SVal_INVALID);
3894 cl->svals[cloff] = svNew;
3895 return;
3896 slowcase: /* misaligned, or must go further down the tree */
3897 stats__cline_16to8splits++;
3898 zsm_apply8___msm_read( thr, a + 0 );
3899 zsm_apply8___msm_read( thr, a + 1 );
3900}
3901
3902void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3903 CacheLine* cl;
3904 UWord cloff, tno, toff;
3905 SVal svOld, svNew;
3906 UShort descr;
3907 stats__cline_read16s++;
3908 if (UNLIKELY(!aligned16(a))) goto slowcase;
3909 cl = get_cacheline(a);
3910 cloff = get_cacheline_offset(a);
3911 tno = get_treeno(a);
3912 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3913 descr = cl->descrs[tno];
3914 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3915 if (valid_value_is_below_me_16(descr, toff)) {
3916 goto slowcase;
3917 } else {
3918 SVal* tree = &cl->svals[tno << 3];
3919 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3920 }
sewardj8f5374e2008-12-07 11:40:17 +00003921 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003922 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3923 }
3924 svOld = cl->svals[cloff];
3925 svNew = msm_write( svOld, thr,a,2 );
3926 tl_assert(svNew != SVal_INVALID);
3927 cl->svals[cloff] = svNew;
3928 return;
3929 slowcase: /* misaligned, or must go further down the tree */
3930 stats__cline_16to8splits++;
3931 zsm_apply8___msm_write( thr, a + 0 );
3932 zsm_apply8___msm_write( thr, a + 1 );
3933}
3934
3935/*------------- ZSM accesses: 32 bit apply ------------- */
3936
3937void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3938 CacheLine* cl;
3939 UWord cloff, tno, toff;
3940 SVal svOld, svNew;
3941 UShort descr;
3942 if (UNLIKELY(!aligned32(a))) goto slowcase;
3943 cl = get_cacheline(a);
3944 cloff = get_cacheline_offset(a);
3945 tno = get_treeno(a);
3946 toff = get_tree_offset(a); /* == 0 or 4 */
3947 descr = cl->descrs[tno];
3948 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3949 if (valid_value_is_above_me_32(descr, toff)) {
3950 SVal* tree = &cl->svals[tno << 3];
3951 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3952 } else {
3953 goto slowcase;
3954 }
sewardj8f5374e2008-12-07 11:40:17 +00003955 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003956 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3957 }
3958 svOld = cl->svals[cloff];
3959 svNew = msm_read( svOld, thr,a,4 );
3960 tl_assert(svNew != SVal_INVALID);
3961 cl->svals[cloff] = svNew;
3962 return;
3963 slowcase: /* misaligned, or must go further down the tree */
3964 stats__cline_32to16splits++;
3965 zsm_apply16___msm_read( thr, a + 0 );
3966 zsm_apply16___msm_read( thr, a + 2 );
3967}
3968
3969void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3970 CacheLine* cl;
3971 UWord cloff, tno, toff;
3972 SVal svOld, svNew;
3973 UShort descr;
3974 if (UNLIKELY(!aligned32(a))) goto slowcase;
3975 cl = get_cacheline(a);
3976 cloff = get_cacheline_offset(a);
3977 tno = get_treeno(a);
3978 toff = get_tree_offset(a); /* == 0 or 4 */
3979 descr = cl->descrs[tno];
3980 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3981 if (valid_value_is_above_me_32(descr, toff)) {
3982 SVal* tree = &cl->svals[tno << 3];
3983 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3984 } else {
3985 goto slowcase;
3986 }
sewardj8f5374e2008-12-07 11:40:17 +00003987 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003988 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3989 }
3990 svOld = cl->svals[cloff];
3991 svNew = msm_write( svOld, thr,a,4 );
3992 tl_assert(svNew != SVal_INVALID);
3993 cl->svals[cloff] = svNew;
3994 return;
3995 slowcase: /* misaligned, or must go further down the tree */
3996 stats__cline_32to16splits++;
3997 zsm_apply16___msm_write( thr, a + 0 );
3998 zsm_apply16___msm_write( thr, a + 2 );
3999}
4000
4001/*------------- ZSM accesses: 64 bit apply ------------- */
4002
4003void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
4004 CacheLine* cl;
4005 UWord cloff, tno, toff;
4006 SVal svOld, svNew;
4007 UShort descr;
4008 stats__cline_read64s++;
4009 if (UNLIKELY(!aligned64(a))) goto slowcase;
4010 cl = get_cacheline(a);
4011 cloff = get_cacheline_offset(a);
4012 tno = get_treeno(a);
4013 toff = get_tree_offset(a); /* == 0, unused */
4014 descr = cl->descrs[tno];
4015 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4016 goto slowcase;
4017 }
4018 svOld = cl->svals[cloff];
4019 svNew = msm_read( svOld, thr,a,8 );
4020 tl_assert(svNew != SVal_INVALID);
4021 cl->svals[cloff] = svNew;
4022 return;
4023 slowcase: /* misaligned, or must go further down the tree */
4024 stats__cline_64to32splits++;
4025 zsm_apply32___msm_read( thr, a + 0 );
4026 zsm_apply32___msm_read( thr, a + 4 );
4027}
4028
4029void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
4030 CacheLine* cl;
4031 UWord cloff, tno, toff;
4032 SVal svOld, svNew;
4033 UShort descr;
4034 stats__cline_read64s++;
4035 if (UNLIKELY(!aligned64(a))) goto slowcase;
4036 cl = get_cacheline(a);
4037 cloff = get_cacheline_offset(a);
4038 tno = get_treeno(a);
4039 toff = get_tree_offset(a); /* == 0, unused */
4040 descr = cl->descrs[tno];
4041 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4042 goto slowcase;
4043 }
4044 svOld = cl->svals[cloff];
4045 svNew = msm_write( svOld, thr,a,8 );
4046 tl_assert(svNew != SVal_INVALID);
4047 cl->svals[cloff] = svNew;
4048 return;
4049 slowcase: /* misaligned, or must go further down the tree */
4050 stats__cline_64to32splits++;
4051 zsm_apply32___msm_write( thr, a + 0 );
4052 zsm_apply32___msm_write( thr, a + 4 );
4053}
4054
4055/*--------------- ZSM accesses: 8 bit write --------------- */
4056
4057static
4058void zsm_write8 ( Addr a, SVal svNew ) {
4059 CacheLine* cl;
4060 UWord cloff, tno, toff;
4061 UShort descr;
4062 stats__cline_set8s++;
4063 cl = get_cacheline(a);
4064 cloff = get_cacheline_offset(a);
4065 tno = get_treeno(a);
4066 toff = get_tree_offset(a); /* == 0 .. 7 */
4067 descr = cl->descrs[tno];
4068 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4069 SVal* tree = &cl->svals[tno << 3];
4070 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004071 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004072 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4073 }
4074 tl_assert(svNew != SVal_INVALID);
4075 cl->svals[cloff] = svNew;
4076}
4077
4078/*--------------- ZSM accesses: 16 bit write --------------- */
4079
4080static
4081void zsm_write16 ( Addr a, SVal svNew ) {
4082 CacheLine* cl;
4083 UWord cloff, tno, toff;
4084 UShort descr;
4085 stats__cline_set16s++;
4086 if (UNLIKELY(!aligned16(a))) goto slowcase;
4087 cl = get_cacheline(a);
4088 cloff = get_cacheline_offset(a);
4089 tno = get_treeno(a);
4090 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4091 descr = cl->descrs[tno];
4092 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4093 if (valid_value_is_below_me_16(descr, toff)) {
4094 /* Writing at this level. Need to fix up 'descr'. */
4095 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4096 /* At this point, the tree does not match cl->descr[tno] any
4097 more. The assignments below will fix it up. */
4098 } else {
4099 /* We can't indiscriminately write on the w16 node as in the
4100 w64 case, as that might make the node inconsistent with
4101 its parent. So first, pull down to this level. */
4102 SVal* tree = &cl->svals[tno << 3];
4103 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004104 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004105 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4106 }
4107 }
4108 tl_assert(svNew != SVal_INVALID);
4109 cl->svals[cloff + 0] = svNew;
4110 cl->svals[cloff + 1] = SVal_INVALID;
4111 return;
4112 slowcase: /* misaligned */
4113 stats__cline_16to8splits++;
4114 zsm_write8( a + 0, svNew );
4115 zsm_write8( a + 1, svNew );
4116}
4117
4118/*--------------- ZSM accesses: 32 bit write --------------- */
4119
4120static
4121void zsm_write32 ( Addr a, SVal svNew ) {
4122 CacheLine* cl;
4123 UWord cloff, tno, toff;
4124 UShort descr;
4125 stats__cline_set32s++;
4126 if (UNLIKELY(!aligned32(a))) goto slowcase;
4127 cl = get_cacheline(a);
4128 cloff = get_cacheline_offset(a);
4129 tno = get_treeno(a);
4130 toff = get_tree_offset(a); /* == 0 or 4 */
4131 descr = cl->descrs[tno];
4132 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4133 if (valid_value_is_above_me_32(descr, toff)) {
4134 /* We can't indiscriminately write on the w32 node as in the
4135 w64 case, as that might make the node inconsistent with
4136 its parent. So first, pull down to this level. */
4137 SVal* tree = &cl->svals[tno << 3];
4138 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004139 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004140 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4141 } else {
4142 /* Writing at this level. Need to fix up 'descr'. */
4143 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4144 /* At this point, the tree does not match cl->descr[tno] any
4145 more. The assignments below will fix it up. */
4146 }
4147 }
4148 tl_assert(svNew != SVal_INVALID);
4149 cl->svals[cloff + 0] = svNew;
4150 cl->svals[cloff + 1] = SVal_INVALID;
4151 cl->svals[cloff + 2] = SVal_INVALID;
4152 cl->svals[cloff + 3] = SVal_INVALID;
4153 return;
4154 slowcase: /* misaligned */
4155 stats__cline_32to16splits++;
4156 zsm_write16( a + 0, svNew );
4157 zsm_write16( a + 2, svNew );
4158}
4159
4160/*--------------- ZSM accesses: 64 bit write --------------- */
4161
4162static
4163void zsm_write64 ( Addr a, SVal svNew ) {
4164 CacheLine* cl;
4165 UWord cloff, tno, toff;
4166 stats__cline_set64s++;
4167 if (UNLIKELY(!aligned64(a))) goto slowcase;
4168 cl = get_cacheline(a);
4169 cloff = get_cacheline_offset(a);
4170 tno = get_treeno(a);
4171 toff = get_tree_offset(a); /* == 0 */
4172 cl->descrs[tno] = TREE_DESCR_64;
4173 tl_assert(svNew != SVal_INVALID);
4174 cl->svals[cloff + 0] = svNew;
4175 cl->svals[cloff + 1] = SVal_INVALID;
4176 cl->svals[cloff + 2] = SVal_INVALID;
4177 cl->svals[cloff + 3] = SVal_INVALID;
4178 cl->svals[cloff + 4] = SVal_INVALID;
4179 cl->svals[cloff + 5] = SVal_INVALID;
4180 cl->svals[cloff + 6] = SVal_INVALID;
4181 cl->svals[cloff + 7] = SVal_INVALID;
4182 return;
4183 slowcase: /* misaligned */
4184 stats__cline_64to32splits++;
4185 zsm_write32( a + 0, svNew );
4186 zsm_write32( a + 4, svNew );
4187}
4188
4189/*------------- ZSM accesses: 8 bit read/copy ------------- */
4190
4191static
4192SVal zsm_read8 ( Addr a ) {
4193 CacheLine* cl;
4194 UWord cloff, tno, toff;
4195 UShort descr;
4196 stats__cline_get8s++;
4197 cl = get_cacheline(a);
4198 cloff = get_cacheline_offset(a);
4199 tno = get_treeno(a);
4200 toff = get_tree_offset(a); /* == 0 .. 7 */
4201 descr = cl->descrs[tno];
4202 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4203 SVal* tree = &cl->svals[tno << 3];
4204 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4205 }
4206 return cl->svals[cloff];
4207}
4208
4209static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
4210 SVal sv;
4211 stats__cline_copy8s++;
4212 sv = zsm_read8( src );
4213 zsm_write8( dst, sv );
4214}
4215
4216/* ------------ Shadow memory range setting ops ------------ */
4217
4218void zsm_apply_range___msm_read ( Thr* thr,
4219 Addr a, SizeT len )
4220{
4221 /* fast track a couple of common cases */
4222 if (len == 4 && aligned32(a)) {
4223 zsm_apply32___msm_read( thr, a );
4224 return;
4225 }
4226 if (len == 8 && aligned64(a)) {
4227 zsm_apply64___msm_read( thr, a );
4228 return;
4229 }
4230
4231 /* be completely general (but as efficient as possible) */
4232 if (len == 0) return;
4233
4234 if (!aligned16(a) && len >= 1) {
4235 zsm_apply8___msm_read( thr, a );
4236 a += 1;
4237 len -= 1;
4238 tl_assert(aligned16(a));
4239 }
4240 if (len == 0) return;
4241
4242 if (!aligned32(a) && len >= 2) {
4243 zsm_apply16___msm_read( thr, a );
4244 a += 2;
4245 len -= 2;
4246 tl_assert(aligned32(a));
4247 }
4248 if (len == 0) return;
4249
4250 if (!aligned64(a) && len >= 4) {
4251 zsm_apply32___msm_read( thr, a );
4252 a += 4;
4253 len -= 4;
4254 tl_assert(aligned64(a));
4255 }
4256 if (len == 0) return;
4257
4258 if (len >= 8) {
4259 tl_assert(aligned64(a));
4260 while (len >= 8) {
4261 zsm_apply64___msm_read( thr, a );
4262 a += 8;
4263 len -= 8;
4264 }
4265 tl_assert(aligned64(a));
4266 }
4267 if (len == 0) return;
4268
4269 if (len >= 4)
4270 tl_assert(aligned32(a));
4271 if (len >= 4) {
4272 zsm_apply32___msm_read( thr, a );
4273 a += 4;
4274 len -= 4;
4275 }
4276 if (len == 0) return;
4277
4278 if (len >= 2)
4279 tl_assert(aligned16(a));
4280 if (len >= 2) {
4281 zsm_apply16___msm_read( thr, a );
4282 a += 2;
4283 len -= 2;
4284 }
4285 if (len == 0) return;
4286
4287 if (len >= 1) {
4288 zsm_apply8___msm_read( thr, a );
4289 a += 1;
4290 len -= 1;
4291 }
4292 tl_assert(len == 0);
4293}
4294
4295
4296
4297void zsm_apply_range___msm_write ( Thr* thr,
4298 Addr a, SizeT len )
4299{
4300 /* fast track a couple of common cases */
4301 if (len == 4 && aligned32(a)) {
4302 zsm_apply32___msm_write( thr, a );
4303 return;
4304 }
4305 if (len == 8 && aligned64(a)) {
4306 zsm_apply64___msm_write( thr, a );
4307 return;
4308 }
4309
4310 /* be completely general (but as efficient as possible) */
4311 if (len == 0) return;
4312
4313 if (!aligned16(a) && len >= 1) {
4314 zsm_apply8___msm_write( thr, a );
4315 a += 1;
4316 len -= 1;
4317 tl_assert(aligned16(a));
4318 }
4319 if (len == 0) return;
4320
4321 if (!aligned32(a) && len >= 2) {
4322 zsm_apply16___msm_write( thr, a );
4323 a += 2;
4324 len -= 2;
4325 tl_assert(aligned32(a));
4326 }
4327 if (len == 0) return;
4328
4329 if (!aligned64(a) && len >= 4) {
4330 zsm_apply32___msm_write( thr, a );
4331 a += 4;
4332 len -= 4;
4333 tl_assert(aligned64(a));
4334 }
4335 if (len == 0) return;
4336
4337 if (len >= 8) {
4338 tl_assert(aligned64(a));
4339 while (len >= 8) {
4340 zsm_apply64___msm_write( thr, a );
4341 a += 8;
4342 len -= 8;
4343 }
4344 tl_assert(aligned64(a));
4345 }
4346 if (len == 0) return;
4347
4348 if (len >= 4)
4349 tl_assert(aligned32(a));
4350 if (len >= 4) {
4351 zsm_apply32___msm_write( thr, a );
4352 a += 4;
4353 len -= 4;
4354 }
4355 if (len == 0) return;
4356
4357 if (len >= 2)
4358 tl_assert(aligned16(a));
4359 if (len >= 2) {
4360 zsm_apply16___msm_write( thr, a );
4361 a += 2;
4362 len -= 2;
4363 }
4364 if (len == 0) return;
4365
4366 if (len >= 1) {
4367 zsm_apply8___msm_write( thr, a );
4368 a += 1;
4369 len -= 1;
4370 }
4371 tl_assert(len == 0);
4372}
4373
4374
4375
4376
4377/* Block-copy states (needed for implementing realloc()). */
4378
4379static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
4380{
4381 SizeT i;
4382 if (len == 0)
4383 return;
4384
4385 /* assert for non-overlappingness */
4386 tl_assert(src+len <= dst || dst+len <= src);
4387
4388 /* To be simple, just copy byte by byte. But so as not to wreck
4389 performance for later accesses to dst[0 .. len-1], normalise
4390 destination lines as we finish with them, and also normalise the
4391 line containing the first and last address. */
4392 for (i = 0; i < len; i++) {
4393 Bool normalise
4394 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4395 || i == 0 /* first in range */
4396 || i == len-1; /* last in range */
4397 zsm_copy8( src+i, dst+i, normalise );
4398 }
4399}
4400
4401
4402/* For setting address ranges to a given value. Has considerable
4403 sophistication so as to avoid generating large numbers of pointless
4404 cache loads/writebacks for large ranges. */
4405
4406/* Do small ranges in-cache, in the obvious way. */
4407static
4408void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
4409{
4410 /* fast track a couple of common cases */
4411 if (len == 4 && aligned32(a)) {
4412 zsm_write32( a, svNew );
4413 return;
4414 }
4415 if (len == 8 && aligned64(a)) {
4416 zsm_write64( a, svNew );
4417 return;
4418 }
4419
4420 /* be completely general (but as efficient as possible) */
4421 if (len == 0) return;
4422
4423 if (!aligned16(a) && len >= 1) {
4424 zsm_write8( a, svNew );
4425 a += 1;
4426 len -= 1;
4427 tl_assert(aligned16(a));
4428 }
4429 if (len == 0) return;
4430
4431 if (!aligned32(a) && len >= 2) {
4432 zsm_write16( a, svNew );
4433 a += 2;
4434 len -= 2;
4435 tl_assert(aligned32(a));
4436 }
4437 if (len == 0) return;
4438
4439 if (!aligned64(a) && len >= 4) {
4440 zsm_write32( a, svNew );
4441 a += 4;
4442 len -= 4;
4443 tl_assert(aligned64(a));
4444 }
4445 if (len == 0) return;
4446
4447 if (len >= 8) {
4448 tl_assert(aligned64(a));
4449 while (len >= 8) {
4450 zsm_write64( a, svNew );
4451 a += 8;
4452 len -= 8;
4453 }
4454 tl_assert(aligned64(a));
4455 }
4456 if (len == 0) return;
4457
4458 if (len >= 4)
4459 tl_assert(aligned32(a));
4460 if (len >= 4) {
4461 zsm_write32( a, svNew );
4462 a += 4;
4463 len -= 4;
4464 }
4465 if (len == 0) return;
4466
4467 if (len >= 2)
4468 tl_assert(aligned16(a));
4469 if (len >= 2) {
4470 zsm_write16( a, svNew );
4471 a += 2;
4472 len -= 2;
4473 }
4474 if (len == 0) return;
4475
4476 if (len >= 1) {
4477 zsm_write8( a, svNew );
4478 a += 1;
4479 len -= 1;
4480 }
4481 tl_assert(len == 0);
4482}
4483
4484
4485/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4486 for larger ranges, try to operate directly on the out-of-cache
4487 representation, rather than dragging lines into the cache,
4488 overwriting them, and forcing them out. This turns out to be an
4489 important performance optimisation. */
4490
4491static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4492{
4493 tl_assert(svNew != SVal_INVALID);
4494 stats__cache_make_New_arange += (ULong)len;
4495
4496 if (0 && len > 500)
4497 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4498
4499 if (0) {
4500 static UWord n_New_in_cache = 0;
4501 static UWord n_New_not_in_cache = 0;
4502 /* tag is 'a' with the in-line offset masked out,
4503 eg a[31]..a[4] 0000 */
4504 Addr tag = a & ~(N_LINE_ARANGE - 1);
4505 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4506 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4507 n_New_in_cache++;
4508 } else {
4509 n_New_not_in_cache++;
4510 }
4511 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4512 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4513 n_New_in_cache, n_New_not_in_cache );
4514 }
4515
4516 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4517 zsm_set_range_SMALL( a, len, svNew );
4518 } else {
4519 Addr before_start = a;
4520 Addr aligned_start = cacheline_ROUNDUP(a);
4521 Addr after_start = cacheline_ROUNDDN(a + len);
4522 UWord before_len = aligned_start - before_start;
4523 UWord aligned_len = after_start - aligned_start;
4524 UWord after_len = a + len - after_start;
4525 tl_assert(before_start <= aligned_start);
4526 tl_assert(aligned_start <= after_start);
4527 tl_assert(before_len < N_LINE_ARANGE);
4528 tl_assert(after_len < N_LINE_ARANGE);
4529 tl_assert(get_cacheline_offset(aligned_start) == 0);
4530 if (get_cacheline_offset(a) == 0) {
4531 tl_assert(before_len == 0);
4532 tl_assert(a == aligned_start);
4533 }
4534 if (get_cacheline_offset(a+len) == 0) {
4535 tl_assert(after_len == 0);
4536 tl_assert(after_start == a+len);
4537 }
4538 if (before_len > 0) {
4539 zsm_set_range_SMALL( before_start, before_len, svNew );
4540 }
4541 if (after_len > 0) {
4542 zsm_set_range_SMALL( after_start, after_len, svNew );
4543 }
4544 stats__cache_make_New_inZrep += (ULong)aligned_len;
4545
4546 while (1) {
4547 Addr tag;
4548 UWord wix;
4549 if (aligned_start >= after_start)
4550 break;
4551 tl_assert(get_cacheline_offset(aligned_start) == 0);
4552 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4553 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4554 if (tag == cache_shmem.tags0[wix]) {
4555 UWord i;
4556 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4557 zsm_write64( aligned_start + i * 8, svNew );
4558 } else {
4559 UWord i;
4560 Word zix;
4561 SecMap* sm;
4562 LineZ* lineZ;
4563 /* This line is not in the cache. Do not force it in; instead
4564 modify it in-place. */
4565 /* find the Z line to write in and rcdec it or the
4566 associated F line. */
4567 find_Z_for_writing( &sm, &zix, tag );
4568 tl_assert(sm);
4569 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4570 lineZ = &sm->linesZ[zix];
4571 lineZ->dict[0] = svNew;
4572 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4573 for (i = 0; i < N_LINE_ARANGE/4; i++)
4574 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4575 rcinc_LineZ(lineZ);
4576 }
4577 aligned_start += N_LINE_ARANGE;
4578 aligned_len -= N_LINE_ARANGE;
4579 }
4580 tl_assert(aligned_start == after_start);
4581 tl_assert(aligned_len == 0);
4582 }
4583}
4584
4585
4586/////////////////////////////////////////////////////////
4587// //
4588// Synchronisation objects //
4589// //
4590/////////////////////////////////////////////////////////
4591
4592// (UInt) `echo "Synchronisation object" | md5sum`
4593#define SO_MAGIC 0x56b3c5b0U
4594
4595struct _SO {
4596 VtsID viR; /* r-clock of sender */
4597 VtsID viW; /* w-clock of sender */
4598 UInt magic;
4599};
4600
4601static SO* SO__Alloc ( void ) {
4602 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4603 so->viR = VtsID_INVALID;
4604 so->viW = VtsID_INVALID;
4605 so->magic = SO_MAGIC;
4606 return so;
4607}
4608static void SO__Dealloc ( SO* so ) {
4609 tl_assert(so);
4610 tl_assert(so->magic == SO_MAGIC);
4611 if (so->viR == VtsID_INVALID) {
4612 tl_assert(so->viW == VtsID_INVALID);
4613 } else {
4614 tl_assert(so->viW != VtsID_INVALID);
4615 VtsID__rcdec(so->viR);
4616 VtsID__rcdec(so->viW);
4617 }
4618 so->magic = 0;
4619 HG_(free)( so );
4620}
4621
4622
4623/////////////////////////////////////////////////////////
4624// //
4625// Top Level API //
4626// //
4627/////////////////////////////////////////////////////////
4628
4629static void show_thread_state ( HChar* str, Thr* t )
4630{
4631 if (1) return;
4632 if (t->viR == t->viW) {
4633 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4634 VtsID__pp( t->viR );
4635 VG_(printf)("%s","\n");
4636 } else {
4637 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4638 VtsID__pp( t->viR );
4639 VG_(printf)(" viW %u==", t->viW);
4640 VtsID__pp( t->viW );
4641 VG_(printf)("%s","\n");
4642 }
4643}
4644
4645
4646Thr* libhb_init (
4647 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004648 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004649 )
4650{
4651 Thr* thr;
4652 VtsID vi;
4653 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004654 tl_assert(get_EC);
4655 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004656 main_get_EC = get_EC;
4657
4658 // No need to initialise hg_wordfm.
4659 // No need to initialise hg_wordset.
4660
4661 vts_set_init();
4662 vts_tab_init();
4663 event_map_init();
4664 VtsID__invalidate_caches();
4665
4666 // initialise shadow memory
4667 zsm_init( SVal__rcinc, SVal__rcdec );
4668
4669 thr = Thr__new();
4670 vi = VtsID__mk_Singleton( thr, 1 );
4671 thr->viR = vi;
4672 thr->viW = vi;
4673 VtsID__rcinc(thr->viR);
4674 VtsID__rcinc(thr->viW);
4675
4676 show_thread_state(" root", thr);
4677 return thr;
4678}
4679
4680Thr* libhb_create ( Thr* parent )
4681{
4682 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4683 the child's index. Since the child's index is guaranteed
4684 unique, it has never been seen before, so the implicit value
4685 before the tick is zero and after that is one. */
4686 Thr* child = Thr__new();
4687
4688 child->viR = VtsID__tick( parent->viR, child );
4689 child->viW = VtsID__tick( parent->viW, child );
4690 VtsID__rcinc(child->viR);
4691 VtsID__rcinc(child->viW);
4692
4693 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4694 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4695
4696 /* and the parent has to move along too */
4697 VtsID__rcdec(parent->viR);
4698 VtsID__rcdec(parent->viW);
4699 parent->viR = VtsID__tick( parent->viR, parent );
4700 parent->viW = VtsID__tick( parent->viW, parent );
4701 VtsID__rcinc(parent->viR);
4702 VtsID__rcinc(parent->viW);
4703
4704 show_thread_state(" child", child);
4705 show_thread_state("parent", parent);
4706
4707 return child;
4708}
4709
4710/* Shut down the library, and print stats (in fact that's _all_
4711 this is for. */
4712void libhb_shutdown ( Bool show_stats )
4713{
4714 if (show_stats) {
4715 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4716 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4717 stats__secmaps_allocd,
4718 stats__secmap_ga_space_covered);
4719 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4720 stats__secmap_linesZ_allocd,
4721 stats__secmap_linesZ_bytes);
4722 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4723 stats__secmap_linesF_allocd,
4724 stats__secmap_linesF_bytes);
4725 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4726 stats__secmap_iterator_steppings);
4727 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4728 stats__secmaps_search, stats__secmaps_search_slow);
4729
4730 VG_(printf)("%s","\n");
4731 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4732 stats__cache_totrefs, stats__cache_totmisses );
4733 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4734 stats__cache_Z_fetches, stats__cache_F_fetches );
4735 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4736 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4737 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4738 stats__cache_invals, stats__cache_flushes );
4739 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4740 stats__cache_make_New_arange,
4741 stats__cache_make_New_inZrep);
4742
4743 VG_(printf)("%s","\n");
4744 VG_(printf)(" cline: %'10lu normalises\n",
4745 stats__cline_normalises );
4746 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4747 stats__cline_read64s,
4748 stats__cline_read32s,
4749 stats__cline_read16s,
4750 stats__cline_read8s );
4751 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4752 stats__cline_write64s,
4753 stats__cline_write32s,
4754 stats__cline_write16s,
4755 stats__cline_write8s );
4756 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4757 stats__cline_set64s,
4758 stats__cline_set32s,
4759 stats__cline_set16s,
4760 stats__cline_set8s );
4761 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4762 stats__cline_get8s, stats__cline_copy8s );
4763 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4764 stats__cline_64to32splits,
4765 stats__cline_32to16splits,
4766 stats__cline_16to8splits );
4767 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4768 stats__cline_64to32pulldown,
4769 stats__cline_32to16pulldown,
4770 stats__cline_16to8pulldown );
4771 if (0)
4772 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4773 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4774
4775 VG_(printf)("%s","\n");
4776
4777 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4778 stats__msm_read, stats__msm_read_change);
4779 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4780 stats__msm_write, stats__msm_write_change);
4781 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4782 stats__getOrdering_queries, stats__getOrdering_misses);
4783 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4784 stats__join2_queries, stats__join2_misses);
4785
4786 VG_(printf)("%s","\n");
4787 VG_(printf)(
4788 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4789 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4790 );
4791 VG_(printf)( " libhb: %lu entries in vts_set\n",
4792 VG_(sizeFM)( vts_set ) );
4793
4794 VG_(printf)("%s","\n");
4795 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4796 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4797 stats__ctxt_rcdec2,
4798 stats__ctxt_rcdec3 );
4799 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4800 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4801 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4802 (UWord)N_RCEC_TAB,
4803 stats__ctxt_tab_curr );
4804 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4805 stats__ctxt_tab_qs,
4806 stats__ctxt_tab_cmps );
4807#if 0
4808 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4809 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4810 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4811 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4812 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4813 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4814 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4815 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4816 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4817 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4818 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4819 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4820 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4821 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4822
4823 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4824 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4825 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4826 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4827#endif
4828
4829 VG_(printf)("%s","<<< END libhb stats >>>\n");
4830 VG_(printf)("%s","\n");
4831
4832 }
4833}
4834
4835void libhb_async_exit ( Thr* thr )
4836{
4837 /* is there anything we need to do? */
4838}
4839
4840/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4841 a Seg that points at a VTS is its one-and-only owner, and ditto for
4842 a SO that points at a VTS. */
4843
4844SO* libhb_so_alloc ( void )
4845{
4846 return SO__Alloc();
4847}
4848
4849void libhb_so_dealloc ( SO* so )
4850{
4851 tl_assert(so);
4852 tl_assert(so->magic == SO_MAGIC);
4853 SO__Dealloc(so);
4854}
4855
4856/* See comments in libhb.h for details on the meaning of
4857 strong vs weak sends and strong vs weak receives. */
4858void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4859{
4860 /* Copy the VTSs from 'thr' into the sync object, and then move
4861 the thread along one step. */
4862
4863 tl_assert(so);
4864 tl_assert(so->magic == SO_MAGIC);
4865
4866 /* stay sane .. a thread's read-clock must always lead or be the
4867 same as its write-clock */
4868 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4869 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4870 }
4871
4872 /* since we're overwriting the VtsIDs in the SO, we need to drop
4873 any references made by the previous contents thereof */
4874 if (so->viR == VtsID_INVALID) {
4875 tl_assert(so->viW == VtsID_INVALID);
4876 so->viR = thr->viR;
4877 so->viW = thr->viW;
4878 VtsID__rcinc(so->viR);
4879 VtsID__rcinc(so->viW);
4880 } else {
4881 /* In a strong send, we dump any previous VC in the SO and
4882 install the sending thread's VC instead. For a weak send we
4883 must join2 with what's already there. */
4884 tl_assert(so->viW != VtsID_INVALID);
4885 VtsID__rcdec(so->viR);
4886 VtsID__rcdec(so->viW);
4887 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4888 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4889 VtsID__rcinc(so->viR);
4890 VtsID__rcinc(so->viW);
4891 }
4892
4893 /* move both parent clocks along */
4894 VtsID__rcdec(thr->viR);
4895 VtsID__rcdec(thr->viW);
4896 thr->viR = VtsID__tick( thr->viR, thr );
4897 thr->viW = VtsID__tick( thr->viW, thr );
4898 VtsID__rcinc(thr->viR);
4899 VtsID__rcinc(thr->viW);
4900 if (strong_send)
4901 show_thread_state("s-send", thr);
4902 else
4903 show_thread_state("w-send", thr);
4904}
4905
4906void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4907{
4908 tl_assert(so);
4909 tl_assert(so->magic == SO_MAGIC);
4910
4911 if (so->viR != VtsID_INVALID) {
4912 tl_assert(so->viW != VtsID_INVALID);
4913
4914 /* Weak receive (basically, an R-acquisition of a R-W lock).
4915 This advances the read-clock of the receiver, but not the
4916 write-clock. */
4917 VtsID__rcdec(thr->viR);
4918 thr->viR = VtsID__join2( thr->viR, so->viR );
4919 VtsID__rcinc(thr->viR);
4920
4921 /* For a strong receive, we also advance the receiver's write
4922 clock, which means the receive as a whole is essentially
4923 equivalent to a W-acquisition of a R-W lock. */
4924 if (strong_recv) {
4925 VtsID__rcdec(thr->viW);
4926 thr->viW = VtsID__join2( thr->viW, so->viW );
4927 VtsID__rcinc(thr->viW);
4928 }
4929
4930 if (strong_recv)
4931 show_thread_state("s-recv", thr);
4932 else
4933 show_thread_state("w-recv", thr);
4934
4935 } else {
4936 tl_assert(so->viW == VtsID_INVALID);
4937 /* Deal with degenerate case: 'so' has no vts, so there has been
4938 no message posted to it. Just ignore this case. */
4939 show_thread_state("d-recv", thr);
4940 }
4941}
4942
4943Bool libhb_so_everSent ( SO* so )
4944{
4945 if (so->viR == VtsID_INVALID) {
4946 tl_assert(so->viW == VtsID_INVALID);
4947 return False;
4948 } else {
4949 tl_assert(so->viW != VtsID_INVALID);
4950 return True;
4951 }
4952}
4953
4954#define XXX1 0 // 0x67a106c
4955#define XXX2 0
4956
4957static Bool TRACEME(Addr a, SizeT szB) {
4958 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4959 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4960 return False;
4961}
4962static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4963 SVal sv = zsm_read8(a);
4964 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4965 show_thread_state("", thr);
4966 VG_(printf)("%s","\n");
4967}
4968
4969void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4970{
4971 SVal sv = SVal__mkC(thr->viW, thr->viW);
4972 tl_assert(is_sane_SVal_C(sv));
4973 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4974 zsm_set_range( a, szB, sv );
4975 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4976}
4977
4978void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4979{
4980 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4981 zsm_set_range( a, szB, SVal__mkA() );
4982 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4983}
4984
4985void* libhb_get_Thr_opaque ( Thr* thr ) {
4986 tl_assert(thr);
4987 return thr->opaque;
4988}
4989
4990void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4991 tl_assert(thr);
4992 thr->opaque = v;
4993}
4994
4995void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4996{
4997 zsm_copy_range(dst, src, len);
4998}
4999
5000void libhb_maybe_GC ( void )
5001{
5002 event_map_maybe_GC();
5003 /* If there are still freelist entries available, no need for a
5004 GC. */
5005 if (vts_tab_freelist != VtsID_INVALID)
5006 return;
5007 /* So all the table entries are full, and we're having to expand
5008 the table. But did we hit the threshhold point yet? */
5009 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5010 return;
5011 vts_tab__do_GC( False/*don't show stats*/ );
5012}
5013
5014
5015/////////////////////////////////////////////////////////////////
5016/////////////////////////////////////////////////////////////////
5017// //
5018// SECTION END main library //
5019// //
5020/////////////////////////////////////////////////////////////////
5021/////////////////////////////////////////////////////////////////
5022
5023/*--------------------------------------------------------------------*/
5024/*--- end libhb_main.c ---*/
5025/*--------------------------------------------------------------------*/