blob: fc232f7ec8bf4ea33386a3846e0a24c500cfcd4c [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
39#include "pub_tool_xarray.h"
40#include "pub_tool_oset.h"
41#include "pub_tool_threadstate.h"
42#include "pub_tool_aspacemgr.h"
43#include "pub_tool_execontext.h"
44#include "pub_tool_errormgr.h"
45
46#include "hg_basics.h"
47#include "hg_wordset.h"
48#include "hg_lock_n_thread.h"
49#include "hg_errors.h"
50
51#include "libhb.h"
52
53
54/* fwds for
55 Globals needed by other parts of the library. These are set
56 once at startup and then never changed. */
57static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
58static struct _EC* (*main_stacktrace_to_EC)( Addr*, UWord ) = NULL;
59static struct _EC* (*main_get_EC)( Thr* ) = NULL;
60
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63// //
64// //
65// //
66/////////////////////////////////////////////////////////////////
67/////////////////////////////////////////////////////////////////
68
69
70/////////////////////////////////////////////////////////////////
71/////////////////////////////////////////////////////////////////
72// //
73// SECTION BEGIN compressed shadow memory //
74// //
75/////////////////////////////////////////////////////////////////
76/////////////////////////////////////////////////////////////////
77
78#ifndef __HB_ZSM_H
79#define __HB_ZSM_H
80
81typedef ULong SVal;
82
83/* This value has special significance to the implementation, and callers
84 may not store it in the shadow memory. */
85#define SVal_INVALID (3ULL << 62)
86
87/* This is the default value for shadow memory. Initially the shadow
88 memory contains no accessible areas and so all reads produce this
89 value. TODO: make this caller-defineable. */
90#define SVal_NOACCESS (2ULL << 62)
91
92/* Initialise the library. Once initialised, it will (or may) call
93 rcinc and rcdec in response to all the calls below, in order to
94 allow the user to do reference counting on the SVals stored herein.
95 It is important to understand, however, that due to internal
96 caching, the reference counts are in general inaccurate, and can be
97 both above or below the true reference count for an item. In
98 particular, the library may indicate that the reference count for
99 an item is zero, when in fact it is not.
100
101 To make the reference counting exact and therefore non-pointless,
102 call zsm_flush_cache. Immediately after it returns, the reference
103 counts for all items, as deduced by the caller by observing calls
104 to rcinc and rcdec, will be correct, and so any items with a zero
105 reference count may be freed (or at least considered to be
106 unreferenced by this library).
107*/
108static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
109
110static void zsm_set_range ( Addr, SizeT, SVal );
111static SVal zsm_read8 ( Addr );
112static void zsm_copy_range ( Addr, Addr, SizeT );
113static void zsm_flush_cache ( void );
114
115#endif /* ! __HB_ZSM_H */
116
117
118/* For the shadow mem cache stuff we may want more intrusive
119 checks. Unfortunately there's no almost-zero-cost way to make them
120 selectable at run time. Hence set the #if 0 to #if 1 and
121 rebuild if you want them. */
122#if 0
123# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
124# define inline __attribute__((noinline))
125 /* probably want to ditch -fomit-frame-pointer too */
126#else
127# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
128#endif
129
130/* For the SegmentID, SegmentSet and SVal stuff we may want more
131 intrusive checks. Again there's no zero cost way to do this. Set
132 the #if 0 to #if 1 and rebuild if you want them. */
133#if 0
134# define SCE_SVALS 1 /* sanity-check shadow value stuff */
135#else
136# define SCE_SVALS 0
137#endif
138
139
140/* Round a up to the next multiple of N. N must be a power of 2 */
141#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
142/* Round a down to the next multiple of N. N must be a power of 2 */
143#define ROUNDDN(a, N) ((a) & ~(N-1))
144
145
146
147/* ------ User-supplied RC functions ------ */
148static void(*rcinc)(SVal) = NULL;
149static void(*rcdec)(SVal) = NULL;
150
151
152/* ------ CacheLine ------ */
153
154#define N_LINE_BITS 6 /* must be >= 3 */
155#define N_LINE_ARANGE (1 << N_LINE_BITS)
156#define N_LINE_TREES (N_LINE_ARANGE >> 3)
157
158typedef
159 struct {
160 UShort descrs[N_LINE_TREES];
161 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
162 }
163 CacheLine;
164
165#define TREE_DESCR_16_0 (1<<0)
166#define TREE_DESCR_32_0 (1<<1)
167#define TREE_DESCR_16_1 (1<<2)
168#define TREE_DESCR_64 (1<<3)
169#define TREE_DESCR_16_2 (1<<4)
170#define TREE_DESCR_32_1 (1<<5)
171#define TREE_DESCR_16_3 (1<<6)
172#define TREE_DESCR_8_0 (1<<7)
173#define TREE_DESCR_8_1 (1<<8)
174#define TREE_DESCR_8_2 (1<<9)
175#define TREE_DESCR_8_3 (1<<10)
176#define TREE_DESCR_8_4 (1<<11)
177#define TREE_DESCR_8_5 (1<<12)
178#define TREE_DESCR_8_6 (1<<13)
179#define TREE_DESCR_8_7 (1<<14)
180#define TREE_DESCR_DTY (1<<15)
181
182typedef
183 struct {
184 SVal dict[4]; /* can represent up to 4 diff values in the line */
185 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
186 dict indexes */
187 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
188 LineF to use, and dict[2..] are also SVal_INVALID. */
189 }
190 LineZ; /* compressed rep for a cache line */
191
192typedef
193 struct {
194 Bool inUse;
195 SVal w64s[N_LINE_ARANGE];
196 }
197 LineF; /* full rep for a cache line */
198
199/* Shadow memory.
200 Primary map is a WordFM Addr SecMap*.
201 SecMaps cover some page-size-ish section of address space and hold
202 a compressed representation.
203 CacheLine-sized chunks of SecMaps are copied into a Cache, being
204 decompressed when moved into the cache and recompressed on the
205 way out. Because of this, the cache must operate as a writeback
206 cache, not a writethrough one.
207
208 Each SecMap must hold a power-of-2 number of CacheLines. Hence
209 N_SECMAP_BITS must >= N_LINE_BITS.
210*/
211#define N_SECMAP_BITS 13
212#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
213
214// # CacheLines held by a SecMap
215#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
216
217/* The data in the SecMap is held in the array of LineZs. Each LineZ
218 either carries the required data directly, in a compressed
219 representation, or it holds (in .dict[0]) an index to the LineF in
220 .linesF that holds the full representation.
221
222 Currently-unused LineF's have their .inUse bit set to zero.
223 Since each in-use LineF is referred to be exactly one LineZ,
224 the number of .linesZ[] that refer to .linesF should equal
225 the number of .linesF[] that have .inUse == True.
226
227 RC obligations: the RCs presented to the user include exactly
228 the values in:
229 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
230 * F reps that are in use (.inUse == True)
231
232 Hence the following actions at the following transitions are required:
233
234 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
235 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
236 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
237 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
238*/
239typedef
240 struct {
241 UInt magic;
242 LineZ linesZ[N_SECMAP_ZLINES];
243 LineF* linesF;
244 UInt linesF_size;
245 }
246 SecMap;
247
248#define SecMap_MAGIC 0x571e58cbU
249
250static inline Bool is_sane_SecMap ( SecMap* sm ) {
251 return sm != NULL && sm->magic == SecMap_MAGIC;
252}
253
254/* ------ Cache ------ */
255
256#define N_WAY_BITS 16
257#define N_WAY_NENT (1 << N_WAY_BITS)
258
259/* Each tag is the address of the associated CacheLine, rounded down
260 to a CacheLine address boundary. A CacheLine size must be a power
261 of 2 and must be 8 or more. Hence an easy way to initialise the
262 cache so it is empty is to set all the tag values to any value % 8
263 != 0, eg 1. This means all queries in the cache initially miss.
264 It does however require us to detect and not writeback, any line
265 with a bogus tag. */
266typedef
267 struct {
268 CacheLine lyns0[N_WAY_NENT];
269 Addr tags0[N_WAY_NENT];
270 }
271 Cache;
272
273static inline Bool is_valid_scache_tag ( Addr tag ) {
274 /* a valid tag should be naturally aligned to the start of
275 a CacheLine. */
276 return 0 == (tag & (N_LINE_ARANGE - 1));
277}
278
279
280/* --------- Primary data structures --------- */
281
282/* Shadow memory primary map */
283static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
284static Cache cache_shmem;
285
286
287static UWord stats__secmaps_search = 0; // # SM finds
288static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
289static UWord stats__secmaps_allocd = 0; // # SecMaps issued
290static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
291static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
292static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
293static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
294static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
295static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
296static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
297static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
298static UWord stats__cache_F_fetches = 0; // # F lines fetched
299static UWord stats__cache_F_wbacks = 0; // # F lines written back
300static UWord stats__cache_invals = 0; // # cache invals
301static UWord stats__cache_flushes = 0; // # cache flushes
302static UWord stats__cache_totrefs = 0; // # total accesses
303static UWord stats__cache_totmisses = 0; // # misses
304static ULong stats__cache_make_New_arange = 0; // total arange made New
305static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
306static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
307static UWord stats__cline_read64s = 0; // # calls to s_m_read64
308static UWord stats__cline_read32s = 0; // # calls to s_m_read32
309static UWord stats__cline_read16s = 0; // # calls to s_m_read16
310static UWord stats__cline_read8s = 0; // # calls to s_m_read8
311static UWord stats__cline_write64s = 0; // # calls to s_m_write64
312static UWord stats__cline_write32s = 0; // # calls to s_m_write32
313static UWord stats__cline_write16s = 0; // # calls to s_m_write16
314static UWord stats__cline_write8s = 0; // # calls to s_m_write8
315static UWord stats__cline_set64s = 0; // # calls to s_m_set64
316static UWord stats__cline_set32s = 0; // # calls to s_m_set32
317static UWord stats__cline_set16s = 0; // # calls to s_m_set16
318static UWord stats__cline_set8s = 0; // # calls to s_m_set8
319static UWord stats__cline_get8s = 0; // # calls to s_m_get8
320static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
321static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
322static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
323static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
324static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
325static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
326static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
327
328static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
329 return a & ~(N_SECMAP_ARANGE - 1);
330}
331static inline UWord shmem__get_SecMap_offset ( Addr a ) {
332 return a & (N_SECMAP_ARANGE - 1);
333}
334
335
336/*----------------------------------------------------------------*/
337/*--- map_shmem :: WordFM Addr SecMap ---*/
338/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
339/*----------------------------------------------------------------*/
340
341/*--------------- SecMap allocation --------------- */
342
343static HChar* shmem__bigchunk_next = NULL;
344static HChar* shmem__bigchunk_end1 = NULL;
345
346static void* shmem__bigchunk_alloc ( SizeT n )
347{
348 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
349 tl_assert(n > 0);
350 n = VG_ROUNDUP(n, 16);
351 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
352 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
353 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
354 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
355 if (0)
356 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
357 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
358 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
359 if (shmem__bigchunk_next == NULL)
360 VG_(out_of_memory_NORETURN)(
361 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
362 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
363 }
364 tl_assert(shmem__bigchunk_next);
365 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
366 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
367 shmem__bigchunk_next += n;
368 return shmem__bigchunk_next - n;
369}
370
371static SecMap* shmem__alloc_SecMap ( void )
372{
373 Word i, j;
374 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
375 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
376 tl_assert(sm);
377 sm->magic = SecMap_MAGIC;
378 for (i = 0; i < N_SECMAP_ZLINES; i++) {
379 sm->linesZ[i].dict[0] = SVal_NOACCESS;
380 sm->linesZ[i].dict[1] = SVal_INVALID;
381 sm->linesZ[i].dict[2] = SVal_INVALID;
382 sm->linesZ[i].dict[3] = SVal_INVALID;
383 for (j = 0; j < N_LINE_ARANGE/4; j++)
384 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
385 }
386 sm->linesF = NULL;
387 sm->linesF_size = 0;
388 stats__secmaps_allocd++;
389 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
390 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
391 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
392 return sm;
393}
394
395typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
396static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
397
398static SecMap* shmem__find_SecMap ( Addr ga )
399{
400 SecMap* sm = NULL;
401 Addr gaKey = shmem__round_to_SecMap_base(ga);
402 // Cache
403 stats__secmaps_search++;
404 if (LIKELY(gaKey == smCache[0].gaKey))
405 return smCache[0].sm;
406 if (LIKELY(gaKey == smCache[1].gaKey)) {
407 SMCacheEnt tmp = smCache[0];
408 smCache[0] = smCache[1];
409 smCache[1] = tmp;
410 return smCache[0].sm;
411 }
412 if (gaKey == smCache[2].gaKey) {
413 SMCacheEnt tmp = smCache[1];
414 smCache[1] = smCache[2];
415 smCache[2] = tmp;
416 return smCache[1].sm;
417 }
418 // end Cache
419 stats__secmaps_search_slow++;
420 if (VG_(lookupFM)( map_shmem,
421 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
422 tl_assert(sm != NULL);
423 smCache[2] = smCache[1];
424 smCache[1] = smCache[0];
425 smCache[0].gaKey = gaKey;
426 smCache[0].sm = sm;
427 } else {
428 tl_assert(sm == NULL);
429 }
430 return sm;
431}
432
433static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
434{
435 SecMap* sm = shmem__find_SecMap ( ga );
436 if (LIKELY(sm)) {
437 return sm;
438 } else {
439 /* create a new one */
440 Addr gaKey = shmem__round_to_SecMap_base(ga);
441 sm = shmem__alloc_SecMap();
442 tl_assert(sm);
443 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
444 return sm;
445 }
446}
447
448
449/* ------------ LineF and LineZ related ------------ */
450
451static void rcinc_LineF ( LineF* lineF ) {
452 UWord i;
453 tl_assert(lineF->inUse);
454 for (i = 0; i < N_LINE_ARANGE; i++)
455 rcinc(lineF->w64s[i]);
456}
457
458static void rcdec_LineF ( LineF* lineF ) {
459 UWord i;
460 tl_assert(lineF->inUse);
461 for (i = 0; i < N_LINE_ARANGE; i++)
462 rcdec(lineF->w64s[i]);
463}
464
465static void rcinc_LineZ ( LineZ* lineZ ) {
466 tl_assert(lineZ->dict[0] != SVal_INVALID);
467 rcinc(lineZ->dict[0]);
468 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
469 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
470 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
471}
472
473static void rcdec_LineZ ( LineZ* lineZ ) {
474 tl_assert(lineZ->dict[0] != SVal_INVALID);
475 rcdec(lineZ->dict[0]);
476 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
477 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
478 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
479}
480
481inline
482static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
483 Word bix, shft, mask, prep;
484 tl_assert(ix >= 0);
485 bix = ix >> 2;
486 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
487 mask = 3 << shft;
488 prep = b2 << shft;
489 arr[bix] = (arr[bix] & ~mask) | prep;
490}
491
492inline
493static UWord read_twobit_array ( UChar* arr, UWord ix ) {
494 Word bix, shft;
495 tl_assert(ix >= 0);
496 bix = ix >> 2;
497 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
498 return (arr[bix] >> shft) & 3;
499}
500
501/* Given address 'tag', find either the Z or F line containing relevant
502 data, so it can be read into the cache.
503*/
504static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
505 /*OUT*/LineF** fp, Addr tag ) {
506 LineZ* lineZ;
507 LineF* lineF;
508 UWord zix;
509 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
510 UWord smoff = shmem__get_SecMap_offset(tag);
511 /* since smoff is derived from a valid tag, it should be
512 cacheline-aligned. */
513 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
514 zix = smoff >> N_LINE_BITS;
515 tl_assert(zix < N_SECMAP_ZLINES);
516 lineZ = &sm->linesZ[zix];
517 lineF = NULL;
518 if (lineZ->dict[0] == SVal_INVALID) {
519 UInt fix = (UInt)lineZ->dict[1];
520 tl_assert(sm->linesF);
521 tl_assert(sm->linesF_size > 0);
522 tl_assert(fix >= 0 && fix < sm->linesF_size);
523 lineF = &sm->linesF[fix];
524 tl_assert(lineF->inUse);
525 lineZ = NULL;
526 }
527 *zp = lineZ;
528 *fp = lineF;
529}
530
531/* Given address 'tag', return the relevant SecMap and the index of
532 the LineZ within it, in the expectation that the line is to be
533 overwritten. Regardless of whether 'tag' is currently associated
534 with a Z or F representation, to rcdec on the current
535 representation, in recognition of the fact that the contents are
536 just about to be overwritten. */
537static __attribute__((noinline))
538void find_Z_for_writing ( /*OUT*/SecMap** smp,
539 /*OUT*/Word* zixp,
540 Addr tag ) {
541 LineZ* lineZ;
542 LineF* lineF;
543 UWord zix;
544 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
545 UWord smoff = shmem__get_SecMap_offset(tag);
546 /* since smoff is derived from a valid tag, it should be
547 cacheline-aligned. */
548 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
549 zix = smoff >> N_LINE_BITS;
550 tl_assert(zix < N_SECMAP_ZLINES);
551 lineZ = &sm->linesZ[zix];
552 lineF = NULL;
553 /* re RCs, we are freeing up this LineZ/LineF so that new data can
554 be parked in it. Hence have to rcdec it accordingly. */
555 /* If lineZ has an associated lineF, free it up. */
556 if (lineZ->dict[0] == SVal_INVALID) {
557 UInt fix = (UInt)lineZ->dict[1];
558 tl_assert(sm->linesF);
559 tl_assert(sm->linesF_size > 0);
560 tl_assert(fix >= 0 && fix < sm->linesF_size);
561 lineF = &sm->linesF[fix];
562 tl_assert(lineF->inUse);
563 rcdec_LineF(lineF);
564 lineF->inUse = False;
565 } else {
566 rcdec_LineZ(lineZ);
567 }
568 *smp = sm;
569 *zixp = zix;
570}
571
572static __attribute__((noinline))
573void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
574 UInt i, new_size;
575 LineF* nyu;
576
577 if (sm->linesF) {
578 tl_assert(sm->linesF_size > 0);
579 } else {
580 tl_assert(sm->linesF_size == 0);
581 }
582
583 if (sm->linesF) {
584 for (i = 0; i < sm->linesF_size; i++) {
585 if (!sm->linesF[i].inUse) {
586 *fixp = (Word)i;
587 return;
588 }
589 }
590 }
591
592 /* No free F line found. Expand existing array and try again. */
593 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
594 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
595 new_size * sizeof(LineF) );
596 tl_assert(nyu);
597
598 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
599 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
600 * sizeof(LineF);
601
602 if (0)
603 VG_(printf)("SM %p: expand F array from %d to %d\n",
604 sm, (Int)sm->linesF_size, new_size);
605
606 for (i = 0; i < new_size; i++)
607 nyu[i].inUse = False;
608
609 if (sm->linesF) {
610 for (i = 0; i < sm->linesF_size; i++) {
611 tl_assert(sm->linesF[i].inUse);
612 nyu[i] = sm->linesF[i];
613 }
614 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
615 HG_(free)(sm->linesF);
616 }
617
618 sm->linesF = nyu;
619 sm->linesF_size = new_size;
620
621 for (i = 0; i < sm->linesF_size; i++) {
622 if (!sm->linesF[i].inUse) {
623 *fixp = (Word)i;
624 return;
625 }
626 }
627
628 /*NOTREACHED*/
629 tl_assert(0);
630}
631
632
633/* ------------ CacheLine and implicit-tree related ------------ */
634
635__attribute__((unused))
636static void pp_CacheLine ( CacheLine* cl ) {
637 Word i;
638 if (!cl) {
639 VG_(printf)("%s","pp_CacheLine(NULL)\n");
640 return;
641 }
642 for (i = 0; i < N_LINE_TREES; i++)
643 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
644 for (i = 0; i < N_LINE_ARANGE; i++)
645 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
646}
647
648static UChar descr_to_validbits ( UShort descr )
649{
650 /* a.k.a Party Time for gcc's constant folder */
651# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
652 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
653 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
654 ( (b8_5) << 12) | ( (b8_4) << 11) | \
655 ( (b8_3) << 10) | ( (b8_2) << 9) | \
656 ( (b8_1) << 8) | ( (b8_0) << 7) | \
657 ( (b16_3) << 6) | ( (b32_1) << 5) | \
658 ( (b16_2) << 4) | ( (b64) << 3) | \
659 ( (b16_1) << 2) | ( (b32_0) << 1) | \
660 ( (b16_0) << 0) ) )
661
662# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
663 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
664 ( (bit5) << 5) | ( (bit4) << 4) | \
665 ( (bit3) << 3) | ( (bit2) << 2) | \
666 ( (bit1) << 1) | ( (bit0) << 0) ) )
667
668 /* these should all get folded out at compile time */
669 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
670 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
671 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
672 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
673 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
674 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
675 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
676 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
677 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
678
679 switch (descr) {
680 /*
681 +--------------------------------- TREE_DESCR_8_7
682 | +------------------- TREE_DESCR_8_0
683 | | +---------------- TREE_DESCR_16_3
684 | | | +-------------- TREE_DESCR_32_1
685 | | | | +------------ TREE_DESCR_16_2
686 | | | | | +--------- TREE_DESCR_64
687 | | | | | | +------ TREE_DESCR_16_1
688 | | | | | | | +---- TREE_DESCR_32_0
689 | | | | | | | | +-- TREE_DESCR_16_0
690 | | | | | | | | |
691 | | | | | | | | | GRANULARITY, 7 -> 0 */
692 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
693 return BYTE(1,1,1,1,1,1,1,1);
694 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
695 return BYTE(1,1,0,1,1,1,1,1);
696 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
697 return BYTE(0,1,1,1,1,1,1,1);
698 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
699 return BYTE(0,1,0,1,1,1,1,1);
700
701 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
702 return BYTE(1,1,1,1,1,1,0,1);
703 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
704 return BYTE(1,1,0,1,1,1,0,1);
705 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
706 return BYTE(0,1,1,1,1,1,0,1);
707 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
708 return BYTE(0,1,0,1,1,1,0,1);
709
710 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
711 return BYTE(1,1,1,1,0,1,1,1);
712 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
713 return BYTE(1,1,0,1,0,1,1,1);
714 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
715 return BYTE(0,1,1,1,0,1,1,1);
716 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
717 return BYTE(0,1,0,1,0,1,1,1);
718
719 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
720 return BYTE(1,1,1,1,0,1,0,1);
721 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
722 return BYTE(1,1,0,1,0,1,0,1);
723 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
724 return BYTE(0,1,1,1,0,1,0,1);
725 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
726 return BYTE(0,1,0,1,0,1,0,1);
727
728 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
729 return BYTE(0,0,0,1,1,1,1,1);
730 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
731 return BYTE(0,0,0,1,1,1,0,1);
732 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
733 return BYTE(0,0,0,1,0,1,1,1);
734 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
735 return BYTE(0,0,0,1,0,1,0,1);
736
737 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
738 return BYTE(1,1,1,1,0,0,0,1);
739 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
740 return BYTE(1,1,0,1,0,0,0,1);
741 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
742 return BYTE(0,1,1,1,0,0,0,1);
743 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
744 return BYTE(0,1,0,1,0,0,0,1);
745
746 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
747 return BYTE(0,0,0,1,0,0,0,1);
748
749 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
750 return BYTE(0,0,0,0,0,0,0,1);
751
752 default: return BYTE(0,0,0,0,0,0,0,0);
753 /* INVALID - any valid descr produces at least one
754 valid bit in tree[0..7]*/
755 }
756 /* NOTREACHED*/
757 tl_assert(0);
758
759# undef DESCR
760# undef BYTE
761}
762
763__attribute__((unused))
764static Bool is_sane_Descr ( UShort descr ) {
765 return descr_to_validbits(descr) != 0;
766}
767
768static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
769 VG_(sprintf)(dst,
770 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
771 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
772 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
773 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
774 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
775 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
776 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
777 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
778 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
779 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
780 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
781 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
782 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
783 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
784 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
785 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
786 );
787}
788static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
789 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
790 (Int)((byte & 128) ? 1 : 0),
791 (Int)((byte & 64) ? 1 : 0),
792 (Int)((byte & 32) ? 1 : 0),
793 (Int)((byte & 16) ? 1 : 0),
794 (Int)((byte & 8) ? 1 : 0),
795 (Int)((byte & 4) ? 1 : 0),
796 (Int)((byte & 2) ? 1 : 0),
797 (Int)((byte & 1) ? 1 : 0)
798 );
799}
800
801static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
802 Word i;
803 UChar validbits = descr_to_validbits(descr);
804 HChar buf[128], buf2[128];
805 if (validbits == 0)
806 goto bad;
807 for (i = 0; i < 8; i++) {
808 if (validbits & (1<<i)) {
809 if (tree[i] == SVal_INVALID)
810 goto bad;
811 } else {
812 if (tree[i] != SVal_INVALID)
813 goto bad;
814 }
815 }
816 return True;
817 bad:
818 sprintf_Descr( buf, descr );
819 sprintf_Byte( buf2, validbits );
820 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
821 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
822 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
823 for (i = 0; i < 8; i++)
824 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
825 VG_(printf)("%s","}\n");
826 return 0;
827}
828
829static Bool is_sane_CacheLine ( CacheLine* cl )
830{
831 Word tno, cloff;
832
833 if (!cl) goto bad;
834
835 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
836 UShort descr = cl->descrs[tno];
837 SVal* tree = &cl->svals[cloff];
838 if (!is_sane_Descr_and_Tree(descr, tree))
839 goto bad;
840 }
841 tl_assert(cloff == N_LINE_ARANGE);
842 return True;
843 bad:
844 pp_CacheLine(cl);
845 return False;
846}
847
848static UShort normalise_tree ( /*MOD*/SVal* tree )
849{
850 UShort descr;
851 /* pre: incoming tree[0..7] does not have any invalid shvals, in
852 particular no zeroes. */
853 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
854 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
855 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
856 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
857 tl_assert(0);
858
859 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
860 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
861 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
862 /* build 16-bit layer */
863 if (tree[1] == tree[0]) {
864 tree[1] = SVal_INVALID;
865 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
866 descr |= TREE_DESCR_16_0;
867 }
868 if (tree[3] == tree[2]) {
869 tree[3] = SVal_INVALID;
870 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
871 descr |= TREE_DESCR_16_1;
872 }
873 if (tree[5] == tree[4]) {
874 tree[5] = SVal_INVALID;
875 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
876 descr |= TREE_DESCR_16_2;
877 }
878 if (tree[7] == tree[6]) {
879 tree[7] = SVal_INVALID;
880 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
881 descr |= TREE_DESCR_16_3;
882 }
883 /* build 32-bit layer */
884 if (tree[2] == tree[0]
885 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
886 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
887 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
888 descr |= TREE_DESCR_32_0;
889 }
890 if (tree[6] == tree[4]
891 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
892 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
893 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
894 descr |= TREE_DESCR_32_1;
895 }
896 /* build 64-bit layer */
897 if (tree[4] == tree[0]
898 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
899 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
900 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
901 descr |= TREE_DESCR_64;
902 }
903 return descr;
904}
905
906/* This takes a cacheline where all the data is at the leaves
907 (w8[..]) and builds a correctly normalised tree. */
908static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
909{
910 Word tno, cloff;
911 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
912 SVal* tree = &cl->svals[cloff];
913 cl->descrs[tno] = normalise_tree( tree );
914 }
915 tl_assert(cloff == N_LINE_ARANGE);
916 if (SCE_CACHELINE)
917 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
918 stats__cline_normalises++;
919}
920
921
922typedef struct { UChar count; SVal sval; } CountedSVal;
923
924static
925void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
926 /*OUT*/Word* dstUsedP,
927 Word nDst, CacheLine* src )
928{
929 Word tno, cloff, dstUsed;
930
931 tl_assert(nDst == N_LINE_ARANGE);
932 dstUsed = 0;
933
934 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
935 UShort descr = src->descrs[tno];
936 SVal* tree = &src->svals[cloff];
937
938 /* sequentialise the tree described by (descr,tree). */
939# define PUT(_n,_v) \
940 do { dst[dstUsed ].count = (_n); \
941 dst[dstUsed++].sval = (_v); \
942 } while (0)
943
944 /* byte 0 */
945 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
946 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
947 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
948 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
949 /* byte 1 */
950 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
951 /* byte 2 */
952 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
953 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
954 /* byte 3 */
955 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
956 /* byte 4 */
957 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
958 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
959 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
960 /* byte 5 */
961 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
962 /* byte 6 */
963 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
964 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
965 /* byte 7 */
966 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
967
968# undef PUT
969 /* END sequentialise the tree described by (descr,tree). */
970
971 }
972 tl_assert(cloff == N_LINE_ARANGE);
973 tl_assert(dstUsed <= nDst);
974
975 *dstUsedP = dstUsed;
976}
977
978/* Write the cacheline 'wix' to backing store. Where it ends up
979 is determined by its tag field. */
980static __attribute__((noinline)) void cacheline_wback ( UWord wix )
981{
982 Word i, j, k, m;
983 Addr tag;
984 SecMap* sm;
985 CacheLine* cl;
986 LineZ* lineZ;
987 LineF* lineF;
988 Word zix, fix, csvalsUsed;
989 CountedSVal csvals[N_LINE_ARANGE];
990 SVal sv;
991
992 if (0)
993 VG_(printf)("scache wback line %d\n", (Int)wix);
994
995 tl_assert(wix >= 0 && wix < N_WAY_NENT);
996
997 tag = cache_shmem.tags0[wix];
998 cl = &cache_shmem.lyns0[wix];
999
1000 /* The cache line may have been invalidated; if so, ignore it. */
1001 if (!is_valid_scache_tag(tag))
1002 return;
1003
1004 /* Where are we going to put it? */
1005 sm = NULL;
1006 lineZ = NULL;
1007 lineF = NULL;
1008 zix = fix = -1;
1009
1010 /* find the Z line to write in and rcdec it or the associated F
1011 line. */
1012 find_Z_for_writing( &sm, &zix, tag );
1013
1014 tl_assert(sm);
1015 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1016 lineZ = &sm->linesZ[zix];
1017
1018 /* Generate the data to be stored */
1019 if (SCE_CACHELINE)
1020 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1021
1022 csvalsUsed = -1;
1023 sequentialise_CacheLine( csvals, &csvalsUsed,
1024 N_LINE_ARANGE, cl );
1025 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1026 if (0) VG_(printf)("%lu ", csvalsUsed);
1027
1028 lineZ->dict[0] = lineZ->dict[1]
1029 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1030
1031 /* i indexes actual shadow values, k is cursor in csvals */
1032 i = 0;
1033 for (k = 0; k < csvalsUsed; k++) {
1034
1035 sv = csvals[k].sval;
1036 if (SCE_SVALS)
1037 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1038 /* do we already have it? */
1039 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1040 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1041 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1042 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1043 /* no. look for a free slot. */
1044 if (SCE_SVALS)
1045 tl_assert(sv != SVal_INVALID);
1046 if (lineZ->dict[0]
1047 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1048 if (lineZ->dict[1]
1049 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1050 if (lineZ->dict[2]
1051 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1052 if (lineZ->dict[3]
1053 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1054 break; /* we'll have to use the f rep */
1055 dict_ok:
1056 m = csvals[k].count;
1057 if (m == 8) {
1058 write_twobit_array( lineZ->ix2s, i+0, j );
1059 write_twobit_array( lineZ->ix2s, i+1, j );
1060 write_twobit_array( lineZ->ix2s, i+2, j );
1061 write_twobit_array( lineZ->ix2s, i+3, j );
1062 write_twobit_array( lineZ->ix2s, i+4, j );
1063 write_twobit_array( lineZ->ix2s, i+5, j );
1064 write_twobit_array( lineZ->ix2s, i+6, j );
1065 write_twobit_array( lineZ->ix2s, i+7, j );
1066 i += 8;
1067 }
1068 else if (m == 4) {
1069 write_twobit_array( lineZ->ix2s, i+0, j );
1070 write_twobit_array( lineZ->ix2s, i+1, j );
1071 write_twobit_array( lineZ->ix2s, i+2, j );
1072 write_twobit_array( lineZ->ix2s, i+3, j );
1073 i += 4;
1074 }
1075 else if (m == 1) {
1076 write_twobit_array( lineZ->ix2s, i+0, j );
1077 i += 1;
1078 }
1079 else if (m == 2) {
1080 write_twobit_array( lineZ->ix2s, i+0, j );
1081 write_twobit_array( lineZ->ix2s, i+1, j );
1082 i += 2;
1083 }
1084 else {
1085 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1086 }
1087
1088 }
1089
1090 if (LIKELY(i == N_LINE_ARANGE)) {
1091 /* Construction of the compressed representation was
1092 successful. */
1093 rcinc_LineZ(lineZ);
1094 stats__cache_Z_wbacks++;
1095 } else {
1096 /* Cannot use the compressed(z) representation. Use the full(f)
1097 rep instead. */
1098 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1099 alloc_F_for_writing( sm, &fix );
1100 tl_assert(sm->linesF);
1101 tl_assert(sm->linesF_size > 0);
1102 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1103 lineF = &sm->linesF[fix];
1104 tl_assert(!lineF->inUse);
1105 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1106 lineZ->dict[1] = (SVal)fix;
1107 lineF->inUse = True;
1108 i = 0;
1109 for (k = 0; k < csvalsUsed; k++) {
1110 if (SCE_SVALS)
1111 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1112 sv = csvals[k].sval;
1113 if (SCE_SVALS)
1114 tl_assert(sv != SVal_INVALID);
1115 for (m = csvals[k].count; m > 0; m--) {
1116 lineF->w64s[i] = sv;
1117 i++;
1118 }
1119 }
1120 tl_assert(i == N_LINE_ARANGE);
1121 rcinc_LineF(lineF);
1122 stats__cache_F_wbacks++;
1123 }
1124
1125 //if (anyShared)
1126 // sm->mbHasShared = True;
1127
1128 /* mb_tidy_one_cacheline(); */
1129}
1130
1131/* Fetch the cacheline 'wix' from the backing store. The tag
1132 associated with 'wix' is assumed to have already been filled in;
1133 hence that is used to determine where in the backing store to read
1134 from. */
1135static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1136{
1137 Word i;
1138 Addr tag;
1139 CacheLine* cl;
1140 LineZ* lineZ;
1141 LineF* lineF;
1142
1143 if (0)
1144 VG_(printf)("scache fetch line %d\n", (Int)wix);
1145
1146 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1147
1148 tag = cache_shmem.tags0[wix];
1149 cl = &cache_shmem.lyns0[wix];
1150
1151 /* reject nonsense requests */
1152 tl_assert(is_valid_scache_tag(tag));
1153
1154 lineZ = NULL;
1155 lineF = NULL;
1156 find_ZF_for_reading( &lineZ, &lineF, tag );
1157 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1158
1159 /* expand the data into the bottom layer of the tree, then get
1160 cacheline_normalise to build the descriptor array. */
1161 if (lineF) {
1162 tl_assert(lineF->inUse);
1163 for (i = 0; i < N_LINE_ARANGE; i++) {
1164 cl->svals[i] = lineF->w64s[i];
1165 }
1166 stats__cache_F_fetches++;
1167 } else {
1168 for (i = 0; i < N_LINE_ARANGE; i++) {
1169 SVal sv;
1170 UWord ix = read_twobit_array( lineZ->ix2s, i );
1171 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1172 sv = lineZ->dict[ix];
1173 tl_assert(sv != SVal_INVALID);
1174 cl->svals[i] = sv;
1175 }
1176 stats__cache_Z_fetches++;
1177 }
1178 normalise_CacheLine( cl );
1179}
1180
1181static void shmem__invalidate_scache ( void ) {
1182 Word wix;
1183 if (0) VG_(printf)("%s","scache inval\n");
1184 tl_assert(!is_valid_scache_tag(1));
1185 for (wix = 0; wix < N_WAY_NENT; wix++) {
1186 cache_shmem.tags0[wix] = 1/*INVALID*/;
1187 }
1188 stats__cache_invals++;
1189}
1190
1191static void shmem__flush_and_invalidate_scache ( void ) {
1192 Word wix;
1193 Addr tag;
1194 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1195 tl_assert(!is_valid_scache_tag(1));
1196 for (wix = 0; wix < N_WAY_NENT; wix++) {
1197 tag = cache_shmem.tags0[wix];
1198 if (tag == 1/*INVALID*/) {
1199 /* already invalid; nothing to do */
1200 } else {
1201 tl_assert(is_valid_scache_tag(tag));
1202 cacheline_wback( wix );
1203 }
1204 cache_shmem.tags0[wix] = 1/*INVALID*/;
1205 }
1206 stats__cache_flushes++;
1207 stats__cache_invals++;
1208}
1209
1210
1211static inline Bool aligned16 ( Addr a ) {
1212 return 0 == (a & 1);
1213}
1214static inline Bool aligned32 ( Addr a ) {
1215 return 0 == (a & 3);
1216}
1217static inline Bool aligned64 ( Addr a ) {
1218 return 0 == (a & 7);
1219}
1220static inline UWord get_cacheline_offset ( Addr a ) {
1221 return (UWord)(a & (N_LINE_ARANGE - 1));
1222}
1223static inline Addr cacheline_ROUNDUP ( Addr a ) {
1224 return ROUNDUP(a, N_LINE_ARANGE);
1225}
1226static inline Addr cacheline_ROUNDDN ( Addr a ) {
1227 return ROUNDDN(a, N_LINE_ARANGE);
1228}
1229static inline UWord get_treeno ( Addr a ) {
1230 return get_cacheline_offset(a) >> 3;
1231}
1232static inline UWord get_tree_offset ( Addr a ) {
1233 return a & 7;
1234}
1235
1236static __attribute__((noinline))
1237 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1238static inline CacheLine* get_cacheline ( Addr a )
1239{
1240 /* tag is 'a' with the in-line offset masked out,
1241 eg a[31]..a[4] 0000 */
1242 Addr tag = a & ~(N_LINE_ARANGE - 1);
1243 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1244 stats__cache_totrefs++;
1245 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1246 return &cache_shmem.lyns0[wix];
1247 } else {
1248 return get_cacheline_MISS( a );
1249 }
1250}
1251
1252static __attribute__((noinline))
1253 CacheLine* get_cacheline_MISS ( Addr a )
1254{
1255 /* tag is 'a' with the in-line offset masked out,
1256 eg a[31]..a[4] 0000 */
1257
1258 CacheLine* cl;
1259 Addr* tag_old_p;
1260 Addr tag = a & ~(N_LINE_ARANGE - 1);
1261 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1262
1263 tl_assert(tag != cache_shmem.tags0[wix]);
1264
1265 /* Dump the old line into the backing store. */
1266 stats__cache_totmisses++;
1267
1268 cl = &cache_shmem.lyns0[wix];
1269 tag_old_p = &cache_shmem.tags0[wix];
1270
1271 if (is_valid_scache_tag( *tag_old_p )) {
1272 /* EXPENSIVE and REDUNDANT: callee does it */
1273 if (SCE_CACHELINE)
1274 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1275 cacheline_wback( wix );
1276 }
1277 /* and reload the new one */
1278 *tag_old_p = tag;
1279 cacheline_fetch( wix );
1280 if (SCE_CACHELINE)
1281 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1282 return cl;
1283}
1284
1285static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1286 stats__cline_64to32pulldown++;
1287 switch (toff) {
1288 case 0: case 4:
1289 tl_assert(descr & TREE_DESCR_64);
1290 tree[4] = tree[0];
1291 descr &= ~TREE_DESCR_64;
1292 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1293 break;
1294 default:
1295 tl_assert(0);
1296 }
1297 return descr;
1298}
1299
1300static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1301 stats__cline_32to16pulldown++;
1302 switch (toff) {
1303 case 0: case 2:
1304 if (!(descr & TREE_DESCR_32_0)) {
1305 descr = pulldown_to_32(tree, 0, descr);
1306 }
1307 tl_assert(descr & TREE_DESCR_32_0);
1308 tree[2] = tree[0];
1309 descr &= ~TREE_DESCR_32_0;
1310 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1311 break;
1312 case 4: case 6:
1313 if (!(descr & TREE_DESCR_32_1)) {
1314 descr = pulldown_to_32(tree, 4, descr);
1315 }
1316 tl_assert(descr & TREE_DESCR_32_1);
1317 tree[6] = tree[4];
1318 descr &= ~TREE_DESCR_32_1;
1319 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1320 break;
1321 default:
1322 tl_assert(0);
1323 }
1324 return descr;
1325}
1326
1327static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1328 stats__cline_16to8pulldown++;
1329 switch (toff) {
1330 case 0: case 1:
1331 if (!(descr & TREE_DESCR_16_0)) {
1332 descr = pulldown_to_16(tree, 0, descr);
1333 }
1334 tl_assert(descr & TREE_DESCR_16_0);
1335 tree[1] = tree[0];
1336 descr &= ~TREE_DESCR_16_0;
1337 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1338 break;
1339 case 2: case 3:
1340 if (!(descr & TREE_DESCR_16_1)) {
1341 descr = pulldown_to_16(tree, 2, descr);
1342 }
1343 tl_assert(descr & TREE_DESCR_16_1);
1344 tree[3] = tree[2];
1345 descr &= ~TREE_DESCR_16_1;
1346 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1347 break;
1348 case 4: case 5:
1349 if (!(descr & TREE_DESCR_16_2)) {
1350 descr = pulldown_to_16(tree, 4, descr);
1351 }
1352 tl_assert(descr & TREE_DESCR_16_2);
1353 tree[5] = tree[4];
1354 descr &= ~TREE_DESCR_16_2;
1355 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1356 break;
1357 case 6: case 7:
1358 if (!(descr & TREE_DESCR_16_3)) {
1359 descr = pulldown_to_16(tree, 6, descr);
1360 }
1361 tl_assert(descr & TREE_DESCR_16_3);
1362 tree[7] = tree[6];
1363 descr &= ~TREE_DESCR_16_3;
1364 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1365 break;
1366 default:
1367 tl_assert(0);
1368 }
1369 return descr;
1370}
1371
1372
1373static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1374 UShort mask;
1375 switch (toff) {
1376 case 0:
1377 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1378 tl_assert( (descr & mask) == mask );
1379 descr &= ~mask;
1380 descr |= TREE_DESCR_16_0;
1381 break;
1382 case 2:
1383 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1384 tl_assert( (descr & mask) == mask );
1385 descr &= ~mask;
1386 descr |= TREE_DESCR_16_1;
1387 break;
1388 case 4:
1389 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1390 tl_assert( (descr & mask) == mask );
1391 descr &= ~mask;
1392 descr |= TREE_DESCR_16_2;
1393 break;
1394 case 6:
1395 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1396 tl_assert( (descr & mask) == mask );
1397 descr &= ~mask;
1398 descr |= TREE_DESCR_16_3;
1399 break;
1400 default:
1401 tl_assert(0);
1402 }
1403 return descr;
1404}
1405
1406static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1407 UShort mask;
1408 switch (toff) {
1409 case 0:
1410 if (!(descr & TREE_DESCR_16_0))
1411 descr = pullup_descr_to_16(descr, 0);
1412 if (!(descr & TREE_DESCR_16_1))
1413 descr = pullup_descr_to_16(descr, 2);
1414 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1415 tl_assert( (descr & mask) == mask );
1416 descr &= ~mask;
1417 descr |= TREE_DESCR_32_0;
1418 break;
1419 case 4:
1420 if (!(descr & TREE_DESCR_16_2))
1421 descr = pullup_descr_to_16(descr, 4);
1422 if (!(descr & TREE_DESCR_16_3))
1423 descr = pullup_descr_to_16(descr, 6);
1424 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1425 tl_assert( (descr & mask) == mask );
1426 descr &= ~mask;
1427 descr |= TREE_DESCR_32_1;
1428 break;
1429 default:
1430 tl_assert(0);
1431 }
1432 return descr;
1433}
1434
1435static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1436 switch (toff) {
1437 case 0: case 4:
1438 return 0 != (descr & TREE_DESCR_64);
1439 default:
1440 tl_assert(0);
1441 }
1442}
1443
1444static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1445 switch (toff) {
1446 case 0:
1447 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1448 case 2:
1449 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1450 case 4:
1451 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1452 case 6:
1453 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1454 default:
1455 tl_assert(0);
1456 }
1457}
1458
1459/* ------------ Cache management ------------ */
1460
1461static void zsm_flush_cache ( void )
1462{
1463 shmem__flush_and_invalidate_scache();
1464}
1465
1466
1467static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1468{
1469 tl_assert( sizeof(UWord) == sizeof(Addr) );
1470
1471 rcinc = p_rcinc;
1472 rcdec = p_rcdec;
1473
1474 tl_assert(map_shmem == NULL);
1475 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1476 HG_(free),
1477 NULL/*unboxed UWord cmp*/);
1478 tl_assert(map_shmem != NULL);
1479 shmem__invalidate_scache();
1480
1481 /* a SecMap must contain an integral number of CacheLines */
1482 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1483 /* also ... a CacheLine holds an integral number of trees */
1484 tl_assert(0 == (N_LINE_ARANGE % 8));
1485}
1486
1487/////////////////////////////////////////////////////////////////
1488/////////////////////////////////////////////////////////////////
1489// //
1490// SECTION END compressed shadow memory //
1491// //
1492/////////////////////////////////////////////////////////////////
1493/////////////////////////////////////////////////////////////////
1494
1495
1496
1497/////////////////////////////////////////////////////////////////
1498/////////////////////////////////////////////////////////////////
1499// //
1500// SECTION BEGIN vts primitives //
1501// //
1502/////////////////////////////////////////////////////////////////
1503/////////////////////////////////////////////////////////////////
1504
1505#ifndef __HB_VTS_H
1506#define __HB_VTS_H
1507
1508/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1509 lowest 30 bits of an SVal. */
1510typedef UInt VtsID;
1511#define VtsID_INVALID 0xFFFFFFFF
1512
1513/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1514 a backlink for the caller's convenience. Since we have no idea
1515 what to set that to in the library, it always gets set to
1516 VtsID_INVALID. */
1517typedef
1518 struct {
1519 VtsID id;
1520 XArray* ts; /* XArray* ScalarTS(abstract) */
1521 }
1522 VTS;
1523
1524
1525/* Create a new, empty VTS. */
1526VTS* VTS__new ( void );
1527
1528/* Delete this VTS in its entirety. */
1529void VTS__delete ( VTS* vts );
1530
1531/* Create a new singleton VTS. */
1532VTS* VTS__singleton ( Thr* thr, ULong tym );
1533
1534/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1535 not modified. */
1536VTS* VTS__tick ( Thr* me, VTS* vts );
1537
1538/* Return a new VTS constructed as the join (max) of the 2 args.
1539 Neither arg is modified. */
1540VTS* VTS__join ( VTS* a, VTS* b );
1541
1542/* Compute the partial ordering relation of the two args. */
1543typedef
1544 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1545 POrd;
1546
1547POrd VTS__cmp ( VTS* a, VTS* b );
1548
1549/* Compute an arbitrary structural (total) ordering on the two args,
1550 based on their VCs, so they can be looked up in a table, tree, etc.
1551 Returns -1, 0 or 1. */
1552Word VTS__cmp_structural ( VTS* a, VTS* b );
1553
1554/* Debugging only. Display the given VTS in the buffer. */
1555void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1556
1557/* Debugging only. Return vts[index], so to speak. */
1558ULong VTS__indexAt_SLOW ( VTS* vts, Thr* index );
1559
1560#endif /* ! __HB_VTS_H */
1561
1562
1563/*--------------- to do with Vector Timestamps ---------------*/
1564
1565/* Scalar Timestamp */
1566typedef
1567 struct {
1568 Thr* thr;
1569 ULong tym;
1570 }
1571 ScalarTS;
1572
1573
1574static Bool is_sane_VTS ( VTS* vts )
1575{
1576 UWord i, n;
1577 ScalarTS *st1, *st2;
1578 if (!vts) return False;
1579 if (!vts->ts) return False;
1580 n = VG_(sizeXA)( vts->ts );
1581 if (n >= 2) {
1582 for (i = 0; i < n-1; i++) {
1583 st1 = VG_(indexXA)( vts->ts, i );
1584 st2 = VG_(indexXA)( vts->ts, i+1 );
1585 if (st1->thr >= st2->thr)
1586 return False;
1587 if (st1->tym == 0 || st2->tym == 0)
1588 return False;
1589 }
1590 }
1591 return True;
1592}
1593
1594
1595/* Create a new, empty VTS.
1596*/
1597VTS* VTS__new ( void )
1598{
1599 VTS* vts;
1600 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1601 tl_assert(vts);
1602 vts->id = VtsID_INVALID;
1603 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1604 HG_(free), sizeof(ScalarTS) );
1605 tl_assert(vts->ts);
1606 return vts;
1607}
1608
1609
1610/* Delete this VTS in its entirety.
1611*/
1612void VTS__delete ( VTS* vts )
1613{
1614 tl_assert(vts);
1615 tl_assert(vts->ts);
1616 VG_(deleteXA)( vts->ts );
1617 HG_(free)(vts);
1618}
1619
1620
1621/* Create a new singleton VTS.
1622*/
1623VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1624 ScalarTS st;
1625 VTS* vts;
1626 tl_assert(thr);
1627 tl_assert(tym >= 1);
1628 vts = VTS__new();
1629 st.thr = thr;
1630 st.tym = tym;
1631 VG_(addToXA)( vts->ts, &st );
1632 return vts;
1633}
1634
1635
1636/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1637 not modified.
1638*/
1639VTS* VTS__tick ( Thr* me, VTS* vts )
1640{
1641 ScalarTS* here = NULL;
1642 ScalarTS tmp;
1643 VTS* res;
1644 Word i, n;
1645 tl_assert(me);
1646 tl_assert(is_sane_VTS(vts));
1647 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1648 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1649 res = VTS__new();
1650 n = VG_(sizeXA)( vts->ts );
1651
1652 /* main loop doesn't handle zero-entry case correctly, so
1653 special-case it. */
1654 if (n == 0) {
1655 tmp.thr = me;
1656 tmp.tym = 1;
1657 VG_(addToXA)( res->ts, &tmp );
1658 tl_assert(is_sane_VTS(res));
1659 return res;
1660 }
1661
1662 for (i = 0; i < n; i++) {
1663 here = VG_(indexXA)( vts->ts, i );
1664 if (me < here->thr) {
1665 /* We just went past 'me', without seeing it. */
1666 tmp.thr = me;
1667 tmp.tym = 1;
1668 VG_(addToXA)( res->ts, &tmp );
1669 tmp = *here;
1670 VG_(addToXA)( res->ts, &tmp );
1671 i++;
1672 break;
1673 }
1674 else if (me == here->thr) {
1675 tmp = *here;
1676 tmp.tym++;
1677 VG_(addToXA)( res->ts, &tmp );
1678 i++;
1679 break;
1680 }
1681 else /* me > here->thr */ {
1682 tmp = *here;
1683 VG_(addToXA)( res->ts, &tmp );
1684 }
1685 }
1686 tl_assert(i >= 0 && i <= n);
1687 if (i == n && here && here->thr < me) {
1688 tmp.thr = me;
1689 tmp.tym = 1;
1690 VG_(addToXA)( res->ts, &tmp );
1691 } else {
1692 for (/*keepgoing*/; i < n; i++) {
1693 here = VG_(indexXA)( vts->ts, i );
1694 tmp = *here;
1695 VG_(addToXA)( res->ts, &tmp );
1696 }
1697 }
1698 tl_assert(is_sane_VTS(res));
1699 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1700 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1701 return res;
1702}
1703
1704
1705/* Return a new VTS constructed as the join (max) of the 2 args.
1706 Neither arg is modified.
1707*/
1708VTS* VTS__join ( VTS* a, VTS* b )
1709{
1710 Word ia, ib, useda, usedb;
1711 ULong tyma, tymb, tymMax;
1712 Thr* thr;
1713 VTS* res;
1714 ScalarTS *tmpa, *tmpb;
1715
1716 tl_assert(a && a->ts);
1717 tl_assert(b && b->ts);
1718 useda = VG_(sizeXA)( a->ts );
1719 usedb = VG_(sizeXA)( b->ts );
1720
1721 res = VTS__new();
1722 ia = ib = 0;
1723
1724 while (1) {
1725
1726 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1727 from a and b in order, where thr is the next Thr*
1728 occurring in either a or b, and tyma/b are the relevant
1729 scalar timestamps, taking into account implicit zeroes. */
1730 tl_assert(ia >= 0 && ia <= useda);
1731 tl_assert(ib >= 0 && ib <= usedb);
1732 tmpa = tmpb = NULL;
1733
1734 if (ia == useda && ib == usedb) {
1735 /* both empty - done */
1736 break;
1737 }
1738 else
1739 if (ia == useda && ib != usedb) {
1740 /* a empty, use up b */
1741 tmpb = VG_(indexXA)( b->ts, ib );
1742 thr = tmpb->thr;
1743 tyma = 0;
1744 tymb = tmpb->tym;
1745 ib++;
1746 }
1747 else
1748 if (ia != useda && ib == usedb) {
1749 /* b empty, use up a */
1750 tmpa = VG_(indexXA)( a->ts, ia );
1751 thr = tmpa->thr;
1752 tyma = tmpa->tym;
1753 tymb = 0;
1754 ia++;
1755 }
1756 else {
1757 /* both not empty; extract lowest-Thr*'d triple */
1758 tmpa = VG_(indexXA)( a->ts, ia );
1759 tmpb = VG_(indexXA)( b->ts, ib );
1760 if (tmpa->thr < tmpb->thr) {
1761 /* a has the lowest unconsidered Thr* */
1762 thr = tmpa->thr;
1763 tyma = tmpa->tym;
1764 tymb = 0;
1765 ia++;
1766 }
1767 else
1768 if (tmpa->thr > tmpb->thr) {
1769 /* b has the lowest unconsidered Thr* */
1770 thr = tmpb->thr;
1771 tyma = 0;
1772 tymb = tmpb->tym;
1773 ib++;
1774 } else {
1775 /* they both next mention the same Thr* */
1776 tl_assert(tmpa->thr == tmpb->thr);
1777 thr = tmpa->thr; /* == tmpb->thr */
1778 tyma = tmpa->tym;
1779 tymb = tmpb->tym;
1780 ia++;
1781 ib++;
1782 }
1783 }
1784
1785 /* having laboriously determined (thr, tyma, tymb), do something
1786 useful with it. */
1787 tymMax = tyma > tymb ? tyma : tymb;
1788 if (tymMax > 0) {
1789 ScalarTS st;
1790 st.thr = thr;
1791 st.tym = tymMax;
1792 VG_(addToXA)( res->ts, &st );
1793 }
1794
1795 }
1796
1797 tl_assert(is_sane_VTS( res ));
1798
1799 return res;
1800}
1801
1802
1803/* Compute the partial ordering relation of the two args.
1804*/
1805POrd VTS__cmp ( VTS* a, VTS* b )
1806{
1807 Word ia, ib, useda, usedb;
1808 ULong tyma, tymb;
1809 Thr* thr;
1810 ScalarTS *tmpa, *tmpb;
1811
1812 Bool all_leq = True;
1813 Bool all_geq = True;
1814
1815 tl_assert(a && a->ts);
1816 tl_assert(b && b->ts);
1817 useda = VG_(sizeXA)( a->ts );
1818 usedb = VG_(sizeXA)( b->ts );
1819
1820 ia = ib = 0;
1821
1822 while (1) {
1823
1824 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1825 from a and b in order, where thr is the next Thr*
1826 occurring in either a or b, and tyma/b are the relevant
1827 scalar timestamps, taking into account implicit zeroes. */
1828 tl_assert(ia >= 0 && ia <= useda);
1829 tl_assert(ib >= 0 && ib <= usedb);
1830 tmpa = tmpb = NULL;
1831
1832 if (ia == useda && ib == usedb) {
1833 /* both empty - done */
1834 break;
1835 }
1836 else
1837 if (ia == useda && ib != usedb) {
1838 /* a empty, use up b */
1839 tmpb = VG_(indexXA)( b->ts, ib );
1840 thr = tmpb->thr;
1841 tyma = 0;
1842 tymb = tmpb->tym;
1843 ib++;
1844 }
1845 else
1846 if (ia != useda && ib == usedb) {
1847 /* b empty, use up a */
1848 tmpa = VG_(indexXA)( a->ts, ia );
1849 thr = tmpa->thr;
1850 tyma = tmpa->tym;
1851 tymb = 0;
1852 ia++;
1853 }
1854 else {
1855 /* both not empty; extract lowest-Thr*'d triple */
1856 tmpa = VG_(indexXA)( a->ts, ia );
1857 tmpb = VG_(indexXA)( b->ts, ib );
1858 if (tmpa->thr < tmpb->thr) {
1859 /* a has the lowest unconsidered Thr* */
1860 thr = tmpa->thr;
1861 tyma = tmpa->tym;
1862 tymb = 0;
1863 ia++;
1864 }
1865 else
1866 if (tmpa->thr > tmpb->thr) {
1867 /* b has the lowest unconsidered Thr* */
1868 thr = tmpb->thr;
1869 tyma = 0;
1870 tymb = tmpb->tym;
1871 ib++;
1872 } else {
1873 /* they both next mention the same Thr* */
1874 tl_assert(tmpa->thr == tmpb->thr);
1875 thr = tmpa->thr; /* == tmpb->thr */
1876 tyma = tmpa->tym;
1877 tymb = tmpb->tym;
1878 ia++;
1879 ib++;
1880 }
1881 }
1882
1883 /* having laboriously determined (thr, tyma, tymb), do something
1884 useful with it. */
1885 if (tyma < tymb)
1886 all_geq = False;
1887 if (tyma > tymb)
1888 all_leq = False;
1889 }
1890
1891 if (all_leq && all_geq)
1892 return POrd_EQ;
1893 /* now we know they aren't equal, so either all_leq or all_geq or
1894 both are false. */
1895 if (all_leq)
1896 return POrd_LT;
1897 if (all_geq)
1898 return POrd_GT;
1899 /* hmm, neither all_geq or all_leq. This means unordered. */
1900 return POrd_UN;
1901}
1902
1903
1904/* Compute an arbitrary structural (total) ordering on the two args,
1905 based on their VCs, so they can be looked up in a table, tree, etc.
1906 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1907*/
1908Word VTS__cmp_structural ( VTS* a, VTS* b )
1909{
1910 /* We just need to generate an arbitrary total ordering based on
1911 a->ts and b->ts. Preferably do it in a way which comes across likely
1912 differences relatively quickly. */
1913 Word i, useda, usedb;
1914 ScalarTS *tmpa, *tmpb;
1915
1916 tl_assert(a && a->ts);
1917 tl_assert(b && b->ts);
1918 useda = VG_(sizeXA)( a->ts );
1919 usedb = VG_(sizeXA)( b->ts );
1920
1921 if (useda < usedb) return -1;
1922 if (useda > usedb) return 1;
1923
1924 /* Same length vectors, so let's step through them together. */
1925 tl_assert(useda == usedb);
1926 for (i = 0; i < useda; i++) {
1927 tmpa = VG_(indexXA)( a->ts, i );
1928 tmpb = VG_(indexXA)( b->ts, i );
1929 if (tmpa->tym < tmpb->tym) return -1;
1930 if (tmpa->tym > tmpb->tym) return 1;
1931 if (tmpa->thr < tmpb->thr) return -1;
1932 if (tmpa->thr > tmpb->thr) return 1;
1933 }
1934
1935 /* They're identical. */
1936 return 0;
1937}
1938
1939
1940/* Debugging only. Display the given VTS in the buffer.
1941*/
1942void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1943 ScalarTS* st;
1944 HChar unit[64];
1945 Word i, n;
1946 Int avail = nBuf;
1947 tl_assert(vts && vts->ts);
1948 tl_assert(nBuf > 16);
1949 buf[0] = '[';
1950 buf[1] = 0;
1951 n = VG_(sizeXA)( vts->ts );
1952 for (i = 0; i < n; i++) {
1953 tl_assert(avail >= 40);
1954 st = VG_(indexXA)( vts->ts, i );
1955 VG_(memset)(unit, 0, sizeof(unit));
1956 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1957 st->thr, st->tym);
1958 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1959 VG_(strcat)(buf, " ...]");
1960 buf[nBuf-1] = 0;
1961 return;
1962 }
1963 VG_(strcat)(buf, unit);
1964 avail -= VG_(strlen)(unit);
1965 }
1966 VG_(strcat)(buf, "]");
1967 buf[nBuf-1] = 0;
1968}
1969
1970
1971/* Debugging only. Return vts[index], so to speak.
1972*/
1973ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1974 UWord i, n;
1975 tl_assert(vts && vts->ts);
1976 n = VG_(sizeXA)( vts->ts );
1977 for (i = 0; i < n; i++) {
1978 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1979 if (st->thr == idx)
1980 return st->tym;
1981 }
1982 return 0;
1983}
1984
1985
1986/////////////////////////////////////////////////////////////////
1987/////////////////////////////////////////////////////////////////
1988// //
1989// SECTION END vts primitives //
1990// //
1991/////////////////////////////////////////////////////////////////
1992/////////////////////////////////////////////////////////////////
1993
1994
1995
1996/////////////////////////////////////////////////////////////////
1997/////////////////////////////////////////////////////////////////
1998// //
1999// SECTION BEGIN main library //
2000// //
2001/////////////////////////////////////////////////////////////////
2002/////////////////////////////////////////////////////////////////
2003
2004
2005/////////////////////////////////////////////////////////
2006// //
2007// VTS set //
2008// //
2009/////////////////////////////////////////////////////////
2010
2011static WordFM* /* VTS* void void */ vts_set = NULL;
2012
2013static void vts_set_init ( void )
2014{
2015 tl_assert(!vts_set);
2016 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2017 HG_(free),
2018 (Word(*)(UWord,UWord))VTS__cmp_structural );
2019 tl_assert(vts_set);
2020}
2021
2022/* Given a newly made VTS, look in vts_set to see if we already have
2023 an identical one. If yes, free up this one and return instead a
2024 pointer to the existing one. If no, add this one to the set and
2025 return the same pointer. Caller differentiates the two cases by
2026 comparing returned pointer with the supplied one (although that
2027 does require that the supplied VTS is not already in the set).
2028*/
2029static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2030{
2031 UWord keyW, valW;
2032 /* lookup cand (by value) */
2033 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2034 /* found it */
2035 tl_assert(valW == 0);
2036 /* if this fails, cand (by ref) was already present (!) */
2037 tl_assert(keyW != (UWord)cand);
2038 VTS__delete(cand);
2039 return (VTS*)keyW;
2040 } else {
2041 /* not present. Add and return pointer to same. */
2042 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2043 return cand;
2044 }
2045}
2046
2047
2048/////////////////////////////////////////////////////////
2049// //
2050// VTS table //
2051// //
2052/////////////////////////////////////////////////////////
2053
2054static void VtsID__invalidate_caches ( void ); /* fwds */
2055
2056/* A type to hold VTS table entries. Invariants:
2057 If .vts == NULL, then this entry is not in use, so:
2058 - .rc == 0
2059 - this entry is on the freelist (unfortunately, does not imply
2060 any constraints on value for .nextfree)
2061 If .vts != NULL, then this entry is in use:
2062 - .vts is findable in vts_set
2063 - .vts->id == this entry number
2064 - no specific value for .rc (even 0 is OK)
2065 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2066*/
2067typedef
2068 struct {
2069 VTS* vts; /* vts, in vts_set */
2070 UWord rc; /* reference count - enough for entire aspace */
2071 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2072 }
2073 VtsTE;
2074
2075/* The VTS table. */
2076static XArray* /* of VtsTE */ vts_tab = NULL;
2077
2078/* An index into the VTS table, indicating the start of the list of
2079 free (available for use) entries. If the list is empty, this is
2080 VtsID_INVALID. */
2081static VtsID vts_tab_freelist = VtsID_INVALID;
2082
2083/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2084 vts_tab equals or exceeds this size. After GC, the value here is
2085 set appropriately so as to check for the next GC point. */
2086static Word vts_next_GC_at = 1000;
2087
2088static void vts_tab_init ( void )
2089{
2090 vts_tab
2091 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2092 HG_(free), sizeof(VtsTE) );
2093 vts_tab_freelist
2094 = VtsID_INVALID;
2095 tl_assert(vts_tab);
2096}
2097
2098/* Add ii to the free list, checking that it looks out-of-use. */
2099static void add_to_free_list ( VtsID ii )
2100{
2101 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2102 tl_assert(ie->vts == NULL);
2103 tl_assert(ie->rc == 0);
2104 tl_assert(ie->freelink == VtsID_INVALID);
2105 ie->freelink = vts_tab_freelist;
2106 vts_tab_freelist = ii;
2107}
2108
2109/* Get an entry from the free list. This will return VtsID_INVALID if
2110 the free list is empty. */
2111static VtsID get_from_free_list ( void )
2112{
2113 VtsID ii;
2114 VtsTE* ie;
2115 if (vts_tab_freelist == VtsID_INVALID)
2116 return VtsID_INVALID;
2117 ii = vts_tab_freelist;
2118 ie = VG_(indexXA)( vts_tab, ii );
2119 tl_assert(ie->vts == NULL);
2120 tl_assert(ie->rc == 0);
2121 vts_tab_freelist = ie->freelink;
2122 return ii;
2123}
2124
2125/* Produce a new VtsID that can be used, either by getting it from
2126 the freelist, or, if that is empty, by expanding vts_tab. */
2127static VtsID get_new_VtsID ( void )
2128{
2129 VtsID ii;
2130 VtsTE te;
2131 ii = get_from_free_list();
2132 if (ii != VtsID_INVALID)
2133 return ii;
2134 te.vts = NULL;
2135 te.rc = 0;
2136 te.freelink = VtsID_INVALID;
2137 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2138 return ii;
2139}
2140
2141
2142/* Indirect callback from lib_zsm. */
2143static void VtsID__rcinc ( VtsID ii )
2144{
2145 VtsTE* ie;
2146 /* VG_(indexXA) does a range check for us */
2147 ie = VG_(indexXA)( vts_tab, ii );
2148 tl_assert(ie->vts); /* else it's not in use */
2149 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2150 tl_assert(ie->vts->id == ii);
2151 ie->rc++;
2152}
2153
2154/* Indirect callback from lib_zsm. */
2155static void VtsID__rcdec ( VtsID ii )
2156{
2157 VtsTE* ie;
2158 /* VG_(indexXA) does a range check for us */
2159 ie = VG_(indexXA)( vts_tab, ii );
2160 tl_assert(ie->vts); /* else it's not in use */
2161 tl_assert(ie->rc > 0); /* else RC snafu */
2162 tl_assert(ie->vts->id == ii);
2163 ie->rc--;
2164}
2165
2166
2167/* Look up 'cand' in our collection of VTSs. If present, deallocate
2168 it and return the VtsID for the pre-existing version. If not
2169 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2170 for it, and return that. */
2171static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2172{
2173 VTS* auld;
2174 tl_assert(cand->id == VtsID_INVALID);
2175 auld = vts_set__find_and_dealloc__or_add(cand);
2176 if (auld != cand) {
2177 /* We already have an Aulde one. Use that. */
2178 VtsTE* ie;
2179 tl_assert(auld->id != VtsID_INVALID);
2180 ie = VG_(indexXA)( vts_tab, auld->id );
2181 tl_assert(ie->vts == auld);
2182 return auld->id;
2183 } else {
2184 VtsID ii = get_new_VtsID();
2185 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2186 ie->vts = cand;
2187 ie->rc = 0;
2188 ie->freelink = VtsID_INVALID;
2189 cand->id = ii;
2190 return ii;
2191 }
2192}
2193
2194
2195static void show_vts_stats ( HChar* caller )
2196{
2197 UWord nSet, nTab, nLive;
2198 ULong totrc;
2199 UWord n, i;
2200 nSet = VG_(sizeFM)( vts_set );
2201 nTab = VG_(sizeXA)( vts_tab );
2202 totrc = 0;
2203 nLive = 0;
2204 n = VG_(sizeXA)( vts_tab );
2205 for (i = 0; i < n; i++) {
2206 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2207 if (ie->vts) {
2208 nLive++;
2209 totrc += (ULong)ie->rc;
2210 } else {
2211 tl_assert(ie->rc == 0);
2212 }
2213 }
2214 VG_(printf)(" show_vts_stats %s\n", caller);
2215 VG_(printf)(" vts_tab size %4lu\n", nTab);
2216 VG_(printf)(" vts_tab live %4lu\n", nLive);
2217 VG_(printf)(" vts_set size %4lu\n", nSet);
2218 VG_(printf)(" total rc %4llu\n", totrc);
2219}
2220
2221/* NOT TO BE CALLED FROM WITHIN libzsm. */
2222static void vts_tab__do_GC ( Bool show_stats )
2223{
2224 UWord i, nTab, nLive, nFreed;
2225
2226 /* check this is actually necessary. */
2227 tl_assert(vts_tab_freelist == VtsID_INVALID);
2228
2229 /* empty the caches for partial order checks and binary joins. We
2230 could do better and prune out the entries to be deleted, but it
2231 ain't worth the hassle. */
2232 VtsID__invalidate_caches();
2233
2234 /* First, make the reference counts up to date. */
2235 zsm_flush_cache();
2236
2237 nTab = VG_(sizeXA)( vts_tab );
2238
2239 if (show_stats) {
2240 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2241 show_vts_stats("before GC");
2242 }
2243
2244 /* Now we can inspect the entire vts_tab. Any entries
2245 with zero .rc fields are now no longer in use and can be
2246 free list, removed from vts_set, and deleted. */
2247 nFreed = 0;
2248 for (i = 0; i < nTab; i++) {
2249 Bool present;
2250 UWord oldK = 0, oldV = 0;
2251 VtsTE* te = VG_(indexXA)( vts_tab, i );
2252 if (te->vts == NULL) {
2253 tl_assert(te->rc == 0);
2254 continue; /* already on the free list (presumably) */
2255 }
2256 if (te->rc > 0)
2257 continue; /* in use */
2258 /* Ok, we got one we can free. */
2259 tl_assert(te->vts->id == i);
2260 /* first, remove it from vts_set. */
2261 present = VG_(delFromFM)( vts_set,
2262 &oldK, &oldV, (UWord)te->vts );
2263 tl_assert(present); /* else it isn't in vts_set ?! */
2264 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2265 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2266 /* now free the VTS itself */
2267 VTS__delete(te->vts);
2268 te->vts = NULL;
2269 /* and finally put this entry on the free list */
2270 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2271 add_to_free_list( i );
2272 nFreed++;
2273 }
2274
2275 /* Now figure out when the next GC should be. We'll allow the
2276 number of VTSs to double before GCing again. Except of course
2277 that since we can't (or, at least, don't) shrink vts_tab, we
2278 can't set the threshhold value smaller than it. */
2279 tl_assert(nFreed <= nTab);
2280 nLive = nTab - nFreed;
2281 tl_assert(nLive >= 0 && nLive <= nTab);
2282 vts_next_GC_at = 2 * nLive;
2283 if (vts_next_GC_at < nTab)
2284 vts_next_GC_at = nTab;
2285
2286 if (show_stats) {
2287 show_vts_stats("after GC");
2288 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2289 }
2290
2291 if (1) {
2292 static UInt ctr = 0;
2293 tl_assert(nTab > 0);
2294 VG_(printf)("libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
2295 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2296 }
2297}
2298
2299
2300/////////////////////////////////////////////////////////
2301// //
2302// Vts IDs //
2303// //
2304/////////////////////////////////////////////////////////
2305
2306//////////////////////////
2307static ULong stats__getOrdering_queries = 0;
2308static ULong stats__getOrdering_misses = 0;
2309static ULong stats__join2_queries = 0;
2310static ULong stats__join2_misses = 0;
2311
2312static inline UInt ROL32 ( UInt w, Int n ) {
2313 w = (w << n) | (w >> (32-n));
2314 return w;
2315}
2316static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2317 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2318 return hash % nTab;
2319}
2320
2321#define N_GETORDERING_CACHE 1023
2322static
2323 struct { VtsID vi1; VtsID vi2; POrd ord; }
2324 getOrdering_cache[N_GETORDERING_CACHE];
2325
2326#define N_JOIN2_CACHE 1023
2327static
2328 struct { VtsID vi1; VtsID vi2; VtsID res; }
2329 join2_cache[N_JOIN2_CACHE];
2330
2331static void VtsID__invalidate_caches ( void ) {
2332 Int i;
2333 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2334 getOrdering_cache[i].vi1 = VtsID_INVALID;
2335 getOrdering_cache[i].vi2 = VtsID_INVALID;
2336 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2337 }
2338 for (i = 0; i < N_JOIN2_CACHE; i++) {
2339 join2_cache[i].vi1 = VtsID_INVALID;
2340 join2_cache[i].vi2 = VtsID_INVALID;
2341 join2_cache[i].res = VtsID_INVALID;
2342 }
2343}
2344//////////////////////////
2345
2346static Bool VtsID__is_valid ( VtsID vi ) {
2347 VtsTE* ve;
2348 if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2349 return False;
2350 ve = VG_(indexXA)( vts_tab, vi );
2351 if (!ve->vts)
2352 return False;
2353 tl_assert(ve->vts->id == vi);
2354 return True;
2355}
2356
2357static VTS* VtsID__to_VTS ( VtsID vi ) {
2358 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2359 tl_assert(te->vts);
2360 return te->vts;
2361}
2362
2363static void VtsID__pp ( VtsID vi ) {
2364 HChar buf[100];
2365 VTS* vts = VtsID__to_VTS(vi);
2366 VTS__show( buf, sizeof(buf)-1, vts );
2367 buf[sizeof(buf)-1] = 0;
2368 VG_(printf)("%s", buf);
2369}
2370
2371/* compute partial ordering relation of vi1 and vi2. */
2372__attribute__((noinline))
2373static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2374 UInt hash;
2375 POrd ord;
2376 VTS *v1, *v2;
2377 //if (vi1 == vi2) return POrd_EQ;
2378 tl_assert(vi1 != vi2);
2379 ////++
2380 stats__getOrdering_queries++;
2381 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2382 if (getOrdering_cache[hash].vi1 == vi1
2383 && getOrdering_cache[hash].vi2 == vi2)
2384 return getOrdering_cache[hash].ord;
2385 stats__getOrdering_misses++;
2386 ////--
2387 v1 = VtsID__to_VTS(vi1);
2388 v2 = VtsID__to_VTS(vi2);
2389 ord = VTS__cmp( v1, v2 );
2390 ////++
2391 getOrdering_cache[hash].vi1 = vi1;
2392 getOrdering_cache[hash].vi2 = vi2;
2393 getOrdering_cache[hash].ord = ord;
2394 ////--
2395 return ord;
2396}
2397static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2398 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2399}
2400
2401/* compute binary join */
2402__attribute__((noinline))
2403static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2404 UInt hash;
2405 VtsID res;
2406 VTS *vts1, *vts2, *nyu;
2407 //if (vi1 == vi2) return vi1;
2408 tl_assert(vi1 != vi2);
2409 ////++
2410 stats__join2_queries++;
2411 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2412 if (join2_cache[hash].vi1 == vi1
2413 && join2_cache[hash].vi2 == vi2)
2414 return join2_cache[hash].res;
2415 stats__join2_misses++;
2416 ////--
2417 vts1 = VtsID__to_VTS(vi1);
2418 vts2 = VtsID__to_VTS(vi2);
2419 nyu = VTS__join(vts1,vts2);
2420 res = vts_tab__find_and_dealloc__or_add(nyu);
2421 ////++
2422 join2_cache[hash].vi1 = vi1;
2423 join2_cache[hash].vi2 = vi2;
2424 join2_cache[hash].res = res;
2425 ////--
2426 return res;
2427}
2428static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2429 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2430}
2431
2432/* create a singleton VTS, namely [thr:1] */
2433static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2434 VTS* nyu = VTS__singleton(thr,tym);
2435 return vts_tab__find_and_dealloc__or_add(nyu);
2436}
2437
2438/* tick operation, creates value 1 if specified index is absent */
2439static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2440 VTS* vts = VtsID__to_VTS(vi);
2441 VTS* nyu = VTS__tick(idx,vts);
2442 return vts_tab__find_and_dealloc__or_add(nyu);
2443}
2444
2445/* index into a VTS (only for assertions) */
2446static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2447 VTS* vts = VtsID__to_VTS(vi);
2448 return VTS__indexAt_SLOW( vts, idx );
2449}
2450
2451
2452/////////////////////////////////////////////////////////
2453// //
2454// Threads //
2455// //
2456/////////////////////////////////////////////////////////
2457
2458struct _Thr {
2459 /* Current VTSs for this thread. They change as we go along. viR
2460 is the VTS to be used for reads, viW for writes. Usually they
2461 are the same, but can differ when we deal with reader-writer
2462 locks. It is always the case that VtsID__getOrdering(viW,viR)
2463 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2464 lagging behind, viR. */
2465 VtsID viR;
2466 VtsID viW;
2467 /* opaque (to us) data we hold on behalf of the library's user. */
2468 void* opaque;
2469};
2470
2471static Thr* Thr__new ( void ) {
2472 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2473 thr->viR = VtsID_INVALID;
2474 thr->viW = VtsID_INVALID;
2475 return thr;
2476}
2477
2478
2479/////////////////////////////////////////////////////////
2480// //
2481// Shadow Values //
2482// //
2483/////////////////////////////////////////////////////////
2484
2485// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2486// hb_zsm.h. We have to do everything else here.
2487
2488/* SVal is 64 bit unsigned int.
2489
2490 <---------30---------> <---------30--------->
2491 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2492 01 X--------------------X XX X--------------------X E(rror)
2493 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2494 11 X--------------------X XX X--------------------X I: SVal_INVALID
2495*/
2496#define SVAL_TAGMASK (3ULL << 62)
2497
2498static inline Bool SVal__isC ( SVal s ) {
2499 return (0ULL << 62) == (s & SVAL_TAGMASK);
2500}
2501static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2502 //tl_assert(VtsID__is_valid(rmini));
2503 //tl_assert(VtsID__is_valid(wmini));
2504 return (((ULong)rmini) << 32) | ((ULong)wmini);
2505}
2506static inline VtsID SVal__unC_Rmin ( SVal s ) {
2507 tl_assert(SVal__isC(s));
2508 return (VtsID)(s >> 32);
2509}
2510static inline VtsID SVal__unC_Wmin ( SVal s ) {
2511 tl_assert(SVal__isC(s));
2512 return (VtsID)(s & 0xFFFFFFFFULL);
2513}
2514
2515static Bool SVal__isE ( SVal s ) {
2516 return (1ULL << 62) == (s & SVAL_TAGMASK);
2517}
2518static SVal SVal__mkE ( void ) {
2519 return 1ULL << 62;
2520}
2521
2522static Bool SVal__isA ( SVal s ) {
2523 return (2ULL << 62) == (s & SVAL_TAGMASK);
2524}
2525static SVal SVal__mkA ( void ) {
2526 return 2ULL << 62;
2527}
2528
2529/* Direct callback from lib_zsm. */
2530static void SVal__rcinc ( SVal s ) {
2531 if (SVal__isC(s)) {
2532 VtsID__rcinc( SVal__unC_Rmin(s) );
2533 VtsID__rcinc( SVal__unC_Wmin(s) );
2534 }
2535}
2536
2537/* Direct callback from lib_zsm. */
2538static void SVal__rcdec ( SVal s ) {
2539 if (SVal__isC(s)) {
2540 VtsID__rcdec( SVal__unC_Rmin(s) );
2541 VtsID__rcdec( SVal__unC_Wmin(s) );
2542 }
2543}
2544
2545
2546/////////////////////////////////////////////////////////
2547// //
2548// Change-event map2 //
2549// //
2550/////////////////////////////////////////////////////////
2551
2552#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
2553#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2554
2555/* This is in two parts:
2556
2557 1. An OSet of RCECs. This is a set of reference-counted stack
2558 traces. When the reference count of a stack trace becomes zero,
2559 it is removed from the set and freed up. The intent is to have
2560 a set of stack traces which can be referred to from (2), but to
2561 only represent each one once. The set is indexed/searched by
2562 ordering on the stack trace vectors.
2563
2564 2. An OSet of OldRefs. These store information about each old ref
2565 that we need to record. It is indexed by address of the
2566 location for which the information is recorded. For LRU
2567 purposes, each OldRef also contains a generation number,
2568 indicating when it was most recently accessed.
2569
2570 The important part of an OldRef is, however, its accs[] array.
2571 This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
2572 allows us to collect the last access-traceback by up to
2573 N_OLDREF_ACCS different threads for this location. The accs[]
2574 array is a MTF-array. If a pair falls off the end, that's too
2575 bad -- we will lose info about that thread's access to this
2576 location.
2577
2578 When this OSet becomes too big, we can throw away the entries
2579 whose generation numbers are below some threshold; hence doing
2580 approximate LRU discarding. For each discarded OldRef we must
2581 of course decrement the reference count on the all RCECs it
2582 refers to, in order that entries from (1) eventually get
2583 discarded too.
2584*/
2585
2586
2587static UWord stats__ctxt_rcdec1 = 0;
2588static UWord stats__ctxt_rcdec2 = 0;
2589static UWord stats__ctxt_rcdec3 = 0;
2590static UWord stats__ctxt_rcdec_calls = 0;
2591static UWord stats__ctxt_rcdec_discards = 0;
2592static UWord stats__ctxt_rcdec1_eq = 0;
2593
2594static UWord stats__ctxt_tab_curr = 0;
2595static UWord stats__ctxt_tab_max = 0;
2596
2597static UWord stats__ctxt_tab_qs = 0;
2598static UWord stats__ctxt_tab_cmps = 0;
2599
2600
2601///////////////////////////////////////////////////////
2602//// Part (1): An OSet of RCECs
2603///
2604
2605#define N_FRAMES 8
2606
2607// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2608#define RCEC_MAGIC 0xab88abb2UL
2609
2610//#define N_RCEC_TAB 98317 /* prime */
2611#define N_RCEC_TAB 196613 /* prime */
2612
2613typedef
2614 struct _RCEC {
2615 struct _RCEC* next;
2616 UWord magic;
2617 UWord rc;
2618 UWord rcX; /* used for crosschecking */
2619 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2620 }
2621 RCEC;
2622
2623static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2624
2625
2626/* Gives an arbitrary total order on RCEC .frames fields */
2627static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2628 Word i;
2629 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2630 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2631 if (ec1->frames[0] < ec2->frames[0]) return -1;
2632 if (ec1->frames[0] > ec2->frames[0]) return 1;
2633 for (i = 1; i < 1 + N_FRAMES; i++) {
2634 if (ec1->frames[i] < ec2->frames[i]) return -1;
2635 if (ec1->frames[i] > ec2->frames[i]) return 1;
2636 }
2637 return 0;
2638}
2639
2640
2641/* Dec the ref of this RCEC. */
2642static void ctxt__rcdec ( RCEC* ec )
2643{
2644 stats__ctxt_rcdec_calls++;
2645 tl_assert(ec && ec->magic == RCEC_MAGIC);
2646 tl_assert(ec->rc > 0);
2647 ec->rc--;
2648}
2649
2650static void ctxt__rcinc ( RCEC* ec )
2651{
2652 tl_assert(ec && ec->magic == RCEC_MAGIC);
2653 ec->rc++;
2654}
2655
2656
2657/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2658 move it one step closer the the front of the list, so as to make
2659 subsequent searches for it cheaper. */
2660static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2661{
2662 RCEC *ec0, *ec1, *ec2;
2663 if (ec == *headp)
2664 tl_assert(0); /* already at head of list */
2665 tl_assert(ec != NULL);
2666 ec0 = *headp;
2667 ec1 = NULL;
2668 ec2 = NULL;
2669 while (True) {
2670 if (ec0 == NULL || ec0 == ec) break;
2671 ec2 = ec1;
2672 ec1 = ec0;
2673 ec0 = ec0->next;
2674 }
2675 tl_assert(ec0 == ec);
2676 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2677 RCEC* tmp;
2678 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2679 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2680 closer to the start of the list. */
2681 tl_assert(ec2->next == ec1);
2682 tl_assert(ec1->next == ec0);
2683 tmp = ec0->next;
2684 ec2->next = ec0;
2685 ec0->next = ec1;
2686 ec1->next = tmp;
2687 }
2688 else
2689 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2690 /* it's second in the list. */
2691 tl_assert(*headp == ec1);
2692 tl_assert(ec1->next == ec0);
2693 ec1->next = ec0->next;
2694 ec0->next = ec1;
2695 *headp = ec0;
2696 }
2697}
2698
2699
2700/* Find the given RCEC in the tree, and return a pointer to it. Or,
2701 if not present, add the given one to the tree (by making a copy of
2702 it, so the caller can immediately deallocate the original) and
2703 return a pointer to the copy. The caller can safely have 'example'
2704 on its stack, since we will always return a pointer to a copy of
2705 it, not to the original. Note that the inserted node will have .rc
2706 of zero and so the caller must immediatly increment it. */
2707__attribute__((noinline))
2708static RCEC* ctxt__find_or_add ( RCEC* example )
2709{
2710 UWord hent;
2711 RCEC* copy;
2712 tl_assert(example && example->magic == RCEC_MAGIC);
2713 tl_assert(example->rc == 0);
2714
2715 /* Search the hash table to see if we already have it. */
2716 stats__ctxt_tab_qs++;
2717 hent = example->frames[0] % N_RCEC_TAB;
2718 copy = contextTab[hent];
2719 while (1) {
2720 if (!copy) break;
2721 tl_assert(copy->magic == RCEC_MAGIC);
2722 stats__ctxt_tab_cmps++;
2723 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2724 copy = copy->next;
2725 }
2726
2727 if (copy) {
2728 tl_assert(copy != example);
2729 /* optimisation: if it's not at the head of its list, move 1
2730 step fwds, to make future searches cheaper */
2731 if (copy != contextTab[hent]) {
2732 move_RCEC_one_step_forward( &contextTab[hent], copy );
2733 }
2734 } else {
2735 copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
2736 tl_assert(copy != example);
2737 *copy = *example;
2738 copy->next = contextTab[hent];
2739 contextTab[hent] = copy;
2740 stats__ctxt_tab_curr++;
2741 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2742 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2743 }
2744 return copy;
2745}
2746
2747static inline UWord ROLW ( UWord w, Int n )
2748{
2749 Int bpw = 8 * sizeof(UWord);
2750 w = (w << n) | (w >> (bpw-n));
2751 return w;
2752}
2753
2754__attribute__((noinline))
2755static RCEC* get_RCEC ( Thr* thr )
2756{
2757 UWord hash, i;
2758 RCEC example;
2759 example.magic = RCEC_MAGIC;
2760 example.rc = 0;
2761 example.rcX = 0;
2762 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2763 hash = 0;
2764 for (i = 1; i < 1 + N_FRAMES; i++) {
2765 hash ^= example.frames[i];
2766 hash = ROLW(hash, 19);
2767 }
2768 example.frames[0] = hash;
2769 return ctxt__find_or_add( &example );
2770}
2771
2772///////////////////////////////////////////////////////
2773//// Part (2): An OSet of OldRefs, that refer to (1)
2774///
2775
2776// (UInt) `echo "Old Reference Information" | md5sum`
2777#define OldRef_MAGIC 0x30b1f075UL
2778
2779typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2780
2781#define N_OLDREF_ACCS 3
2782
2783typedef
2784 struct {
2785 Addr ea;
2786 UWord magic;
2787 UWord gen; /* when most recently accessed */
2788 /* unused slots in this array have .thr == NULL */
2789 Thr_n_RCEC accs[N_OLDREF_ACCS];
2790 }
2791 OldRef;
2792
2793static Word OldRef__cmp_by_EA ( OldRef* r1, OldRef* r2 ) {
2794 tl_assert(r1 && r1->magic == OldRef_MAGIC);
2795 tl_assert(r2 && r2->magic == OldRef_MAGIC);
2796 if (r1->ea < r2->ea) return -1;
2797 if (r1->ea > r2->ea) return 1;
2798 return 0;
2799}
2800
2801static OSet* oldrefTree = NULL; /* OSet* of OldRef */
2802static UWord oldrefGen = 0; /* current LRU generation # */
2803static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2804static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
2805
2806static void event_map_bind ( Addr a, Thr* thr )
2807{
2808 OldRef key, *ref;
2809 RCEC* here;
2810 Word i, j;
2811
2812 key.ea = a;
2813 key.magic = OldRef_MAGIC;
2814
2815 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2816
2817 if (ref) {
2818
2819 /* We already have a record for this address. We now need to
2820 see if we have a stack trace pertaining to this thread's
2821 access. */
2822 tl_assert(ref->magic == OldRef_MAGIC);
2823
2824 tl_assert(thr);
2825 for (i = 0; i < N_OLDREF_ACCS; i++) {
2826 if (ref->accs[i].thr == thr)
2827 break;
2828 }
2829
2830 if (i < N_OLDREF_ACCS) {
2831 /* thread 'thr' has an entry at index 'i'. Update it. */
2832 if (i > 0) {
2833 Thr_n_RCEC tmp = ref->accs[i-1];
2834 ref->accs[i-1] = ref->accs[i];
2835 ref->accs[i] = tmp;
2836 i--;
2837 }
2838 here = get_RCEC( thr );
2839 if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
2840 ctxt__rcinc( here );
2841 stats__ctxt_rcdec1++;
2842 ctxt__rcdec( ref->accs[i].rcec );
2843 ref->accs[i].rcec = here;
2844 tl_assert(ref->accs[i].thr == thr);
2845 } else {
2846 here = get_RCEC( thr );
2847 ctxt__rcinc( here );
2848 /* No entry for this thread. Shuffle all of them down one
2849 slot, and put the new entry at the start of the array. */
2850 if (ref->accs[N_OLDREF_ACCS-1].thr) {
2851 /* the last slot is in use. We must dec the rc on the
2852 associated rcec. */
2853 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
2854 stats__ctxt_rcdec2++;
2855 ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
2856 } else {
2857 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
2858 }
2859 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
2860 ref->accs[j] = ref->accs[j-1];
2861 ref->accs[0].thr = thr;
2862 ref->accs[0].rcec = here;
2863 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2864 so we can't add a NULL thr. */
2865 }
2866
2867 ref->gen = oldrefGen;
2868 tl_assert(ref->ea == a);
2869
2870 } else {
2871
2872 /* We don't have a record for this address. Create a new one. */
2873 if (oldrefTreeN >= oldrefGenIncAt) {
2874 oldrefGen++;
2875 oldrefGenIncAt = oldrefTreeN + 50000;
2876 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
2877 oldrefGen, oldrefTreeN );
2878 }
2879 here = get_RCEC( thr );
2880 ctxt__rcinc(here);
2881 ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
2882 ref->magic = OldRef_MAGIC;
2883 ref->gen = oldrefGen;
2884 ref->ea = a;
2885 ref->accs[0].rcec = here;
2886 ref->accs[0].thr = thr;
2887 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2888 so we can't add a NULL thr. */
2889 for (j = 1; j < N_OLDREF_ACCS; j++) {
2890 ref->accs[j].thr = NULL;
2891 ref->accs[j].rcec = NULL;
2892 }
2893 VG_(OSetGen_Insert)( oldrefTree, ref );
2894 oldrefTreeN++;
2895
2896 }
2897}
2898
2899
2900static
2901Bool event_map_lookup ( /*OUT*/struct _EC** resEC,
2902 /*OUT*/Thr** resThr,
2903 Thr* thr_acc, Addr a )
2904{
2905 Word i;
2906 OldRef key, *ref;
2907
2908 tl_assert(thr_acc);
2909
2910 key.ea = a;
2911 key.magic = OldRef_MAGIC;
2912
2913 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2914 if (ref) {
2915 tl_assert(ref->magic == OldRef_MAGIC);
2916 tl_assert(ref->accs[0].thr); /* first slot must always be used */
2917
2918 for (i = 0; i < N_OLDREF_ACCS; i++) {
2919 if (ref->accs[i].thr != NULL
2920 && ref->accs[i].thr != thr_acc)
2921 break;
2922 }
2923 /* If we didn't find an entry for some thread other than
2924 thr_acc, just return the entry for thread 0. It'll look
2925 pretty stupid to the user though. */
2926 if (i == N_OLDREF_ACCS)
2927 i = 0;
2928
2929 tl_assert(i >= 0 && i < N_OLDREF_ACCS);
2930 tl_assert(ref->accs[i].thr);
2931 tl_assert(ref->accs[i].rcec);
2932 tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
2933
2934 *resEC = main_stacktrace_to_EC(&ref->accs[i].rcec->frames[1], N_FRAMES);
2935 *resThr = ref->accs[i].thr;
2936 return True;
2937 } else {
2938 return False;
2939 }
2940}
2941
2942static void event_map_init ( void )
2943{
2944 Word i;
2945 tl_assert(!contextTab);
2946 contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
2947 N_RCEC_TAB * sizeof(RCEC*) );
2948 tl_assert(contextTab);
2949 for (i = 0; i < N_RCEC_TAB; i++)
2950 contextTab[i] = NULL;
2951
2952 tl_assert(!oldrefTree);
2953 tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
2954 oldrefTree = VG_(OSetGen_Create)(
2955 offsetof(OldRef,ea), /* == 0 */
2956 NULL, /* use unboxed cmp on OldRefs */
2957 HG_(zalloc), "libhb.event_map_init.2 (oldref tree)",
2958 HG_(free)
2959 );
2960 tl_assert(oldrefTree);
2961
2962 oldrefGen = 0;
2963 oldrefGenIncAt = 0;
2964 oldrefTreeN = 0;
2965}
2966
2967static void event_map__check_reference_counts ( Bool before )
2968{
2969 RCEC* rcec;
2970 OldRef* oldref;
2971 Word i;
2972 UWord nEnts = 0;
2973
2974 /* Set the 'check' reference counts to zero. Also, optionally
2975 check that the real reference counts are non-zero. We allow
2976 these to fall to zero before a GC, but the GC must get rid of
2977 all those that are zero, hence none should be zero after a
2978 GC. */
2979 for (i = 0; i < N_RCEC_TAB; i++) {
2980 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
2981 nEnts++;
2982 tl_assert(rcec);
2983 tl_assert(rcec->magic == RCEC_MAGIC);
2984 if (!before)
2985 tl_assert(rcec->rc > 0);
2986 rcec->rcX = 0;
2987 }
2988 }
2989
2990 /* check that the stats are sane */
2991 tl_assert(nEnts == stats__ctxt_tab_curr);
2992 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
2993
2994 /* visit all the referencing points, inc check ref counts */
2995 VG_(OSetGen_ResetIter)( oldrefTree );
2996 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
2997 tl_assert(oldref->magic == OldRef_MAGIC);
2998 for (i = 0; i < N_OLDREF_ACCS; i++) {
2999 if (oldref->accs[i].thr) {
3000 tl_assert(oldref->accs[i].rcec);
3001 tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
3002 oldref->accs[i].rcec->rcX++;
3003 } else {
3004 tl_assert(!oldref->accs[i].rcec);
3005 }
3006 }
3007 }
3008
3009 /* compare check ref counts with actual */
3010 for (i = 0; i < N_RCEC_TAB; i++) {
3011 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3012 tl_assert(rcec->rc == rcec->rcX);
3013 }
3014 }
3015}
3016
3017static void event_map_maybe_GC ( void )
3018{
3019 OldRef* oldref;
3020 UWord keyW, valW, retained, maxGen;
3021 WordFM* genMap;
3022 XArray* refs2del;
3023 Word i, j, n2del;
3024
3025 if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
3026 return;
3027
3028 if (0)
3029 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3030
3031 /* Check our counting is sane */
3032 tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
3033
3034 /* Check the reference counts */
3035 event_map__check_reference_counts( True/*before*/ );
3036
3037 /* Compute the distribution of generation values in the ref tree */
3038 /* genMap :: generation-number -> count-of-nodes-with-that-number */
3039 genMap = VG_(newFM)( HG_(zalloc), "libhb.emmG.1",
3040 HG_(free), NULL );
3041
3042 VG_(OSetGen_ResetIter)( oldrefTree );
3043 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3044 UWord key = oldref->gen;
3045 keyW = valW = 0;
3046 if (VG_(lookupFM)(genMap, &keyW, &valW, key )) {
3047 tl_assert(keyW == key);
3048 tl_assert(valW > 0);
3049 }
3050 /* now valW is the old count for generation 'key' */
3051 VG_(addToFM)(genMap, key, valW+1);
3052 }
3053
3054 tl_assert(VG_(sizeFM)(genMap) > 0);
3055
3056 retained = oldrefTreeN;
3057 maxGen = 0;
3058 VG_(initIterFM)( genMap );
3059 while (VG_(nextIterFM)( genMap, &keyW, &valW )) {
3060 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3061 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3062 tl_assert(keyW >= maxGen);
3063 tl_assert(retained >= valW);
3064 if (retained - valW
3065 > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
3066 retained -= valW;
3067 maxGen = keyW;
3068 } else {
3069 break;
3070 }
3071 }
3072 VG_(doneIterFM)( genMap );
3073
3074 VG_(printf)(
3075 "libhb: EvM GC: delete generations %lu and below, "
3076 "retaining %lu entries\n",
3077 maxGen, retained );
3078
3079 VG_(deleteFM)( genMap, NULL, NULL );
3080
3081 /* If this fails, it means there's only one generation in the
3082 entire tree. So we're kind of in a bad situation, and need to
3083 do some stop-gap measure, such as randomly deleting half the
3084 entries. */
3085 tl_assert(retained < oldrefTreeN);
3086
3087 /* Now make up a big list of the oldrefTree entries we want to
3088 delete. We can't simultaneously traverse the tree and delete
3089 stuff from it, so first we need to copy them off somewhere
3090 else. (sigh) */
3091 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.1",
3092 HG_(free), sizeof(OldRef*) );
3093
3094 VG_(OSetGen_ResetIter)( oldrefTree );
3095 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3096 tl_assert(oldref->magic == OldRef_MAGIC);
3097 if (oldref->gen <= maxGen) {
3098 VG_(addToXA)( refs2del, &oldref );
3099 }
3100 }
3101
3102 n2del = VG_(sizeXA)( refs2del );
3103 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3104
3105 if (0) VG_(printf)("%s","deleting entries\n");
3106 for (i = 0; i < n2del; i++) {
3107 void* nd;
3108 OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
3109 tl_assert(ref);
3110 tl_assert(ref->magic == OldRef_MAGIC);
3111 for (j = 0; j < N_OLDREF_ACCS; j++) {
3112 if (ref->accs[j].rcec) {
3113 tl_assert(ref->accs[j].thr);
3114 stats__ctxt_rcdec3++;
3115 ctxt__rcdec( ref->accs[j].rcec );
3116 } else {
3117 tl_assert(!ref->accs[j].thr);
3118 }
3119 }
3120 nd = VG_(OSetGen_Remove)( oldrefTree, ref );
3121 VG_(OSetGen_FreeNode)( oldrefTree, nd );
3122 }
3123
3124 VG_(deleteXA)( refs2del );
3125
3126 tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
3127
3128 oldrefTreeN = retained;
3129 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3130
3131 /* Throw away all RCECs with zero reference counts */
3132 for (i = 0; i < N_RCEC_TAB; i++) {
3133 RCEC** pp = &contextTab[i];
3134 RCEC* p = *pp;
3135 while (p) {
3136 if (p->rc == 0) {
3137 *pp = p->next;
3138 HG_(free)(p);
3139 p = *pp;
3140 tl_assert(stats__ctxt_tab_curr > 0);
3141 stats__ctxt_tab_curr--;
3142 } else {
3143 pp = &p->next;
3144 p = p->next;
3145 }
3146 }
3147 }
3148
3149 /* Check the reference counts */
3150 event_map__check_reference_counts( False/*after*/ );
3151
3152 //if (0)
3153 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3154 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3155
3156}
3157
3158
3159/////////////////////////////////////////////////////////
3160// //
3161// Core MSM //
3162// //
3163/////////////////////////////////////////////////////////
3164
3165#define MSM_CONFACC 1
3166
3167#define MSM_RACE2ERR 1
3168
3169#define MSM_CHECK 0
3170
3171static ULong stats__msm_read = 0;
3172static ULong stats__msm_read_change = 0;
3173static ULong stats__msm_write = 0;
3174static ULong stats__msm_write_change = 0;
3175
3176__attribute__((noinline))
3177static void record_race_info ( Thr* acc_thr,
3178 Addr acc_addr, SizeT szB, Bool isWrite,
3179 SVal svOld, SVal svNew )
3180{
3181 Bool found;
3182 Thr* thrp = NULL;
3183 struct _EC* where = NULL;
3184 struct _EC* wherep = NULL;
3185 where = main_get_EC( acc_thr );
3186 found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
3187 if (found) {
3188 tl_assert(wherep);
3189 tl_assert(thrp);
3190 tl_assert(thrp->opaque);
3191 tl_assert(acc_thr->opaque);
3192 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3193 isWrite, szB, NULL/*mb_lastlock*/,
3194 wherep, thrp->opaque );
3195 } else {
3196 tl_assert(!wherep);
3197 tl_assert(!thrp);
3198 tl_assert(acc_thr->opaque);
3199 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3200 isWrite, szB, NULL/*mb_lastlock*/,
3201 NULL, NULL );
3202 }
3203}
3204
3205static Bool is_sane_SVal_C ( SVal sv ) {
3206 POrd ord;
3207 if (!SVal__isC(sv)) return True;
3208 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3209 if (ord == POrd_EQ || ord == POrd_LT) return True;
3210 return False;
3211}
3212
3213
3214/* Compute new state following a read */
3215static inline SVal msm_read ( SVal svOld,
3216 /* The following are only needed for
3217 creating error reports. */
3218 Thr* acc_thr,
3219 Addr acc_addr, SizeT szB )
3220{
3221 SVal svNew = SVal_INVALID;
3222 stats__msm_read++;
3223
3224 /* Redundant sanity check on the constraints */
3225 if (MSM_CHECK) {
3226 tl_assert(is_sane_SVal_C(svOld));
3227 }
3228
3229 if (SVal__isC(svOld)) {
3230 POrd ord;
3231 VtsID tviR = acc_thr->viR;
3232 VtsID tviW = acc_thr->viW;
3233 VtsID rmini = SVal__unC_Rmin(svOld);
3234 VtsID wmini = SVal__unC_Wmin(svOld);
3235
3236 ord = VtsID__getOrdering(rmini,tviR);
3237 if (ord == POrd_EQ || ord == POrd_LT) {
3238 /* no race */
3239 /* Note: RWLOCK subtlety: use tviW, not tviR */
3240 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3241 goto out;
3242 } else {
3243 svNew = MSM_RACE2ERR
3244 ? SVal__mkE()
3245 : SVal__mkC( rmini, VtsID__join2(wmini,tviR) );
3246 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
3247 svOld, svNew );
3248 goto out;
3249 }
3250 }
3251 if (SVal__isA(svOld)) {
3252 /* reading no-access memory (sigh); leave unchanged */
3253 /* check for no pollution */
3254 tl_assert(svOld == SVal_NOACCESS);
3255 svNew = SVal_NOACCESS;
3256 goto out;
3257 }
3258 if (SVal__isE(svOld)) {
3259 /* no race, location is already "in error" */
3260 svNew = SVal__mkE();
3261 goto out;
3262 }
3263 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3264 tl_assert(0);
3265
3266 out:
3267 if (MSM_CHECK) {
3268 tl_assert(is_sane_SVal_C(svNew));
3269 }
3270 tl_assert(svNew != SVal_INVALID);
3271 if (svNew != svOld) {
3272 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3273 event_map_bind( acc_addr, acc_thr );
3274 stats__msm_read_change++;
3275 }
3276 }
3277 return svNew;
3278}
3279
3280
3281/* Compute new state following a write */
3282static inline SVal msm_write ( SVal svOld,
3283 /* The following are only needed for
3284 creating error reports. */
3285 Thr* acc_thr,
3286 Addr acc_addr, SizeT szB )
3287{
3288 SVal svNew = SVal_INVALID;
3289 stats__msm_write++;
3290
3291 /* Redundant sanity check on the constraints */
3292 if (MSM_CHECK) {
3293 tl_assert(is_sane_SVal_C(svOld));
3294 }
3295
3296 if (SVal__isC(svOld)) {
3297 POrd ord;
3298 VtsID tviW = acc_thr->viW;
3299 VtsID wmini = SVal__unC_Wmin(svOld);
3300
3301 ord = VtsID__getOrdering(wmini,tviW);
3302 if (ord == POrd_EQ || ord == POrd_LT) {
3303 /* no race */
3304 svNew = SVal__mkC( tviW, tviW );
3305 goto out;
3306 } else {
3307 VtsID rmini = SVal__unC_Rmin(svOld);
3308 svNew = MSM_RACE2ERR
3309 ? SVal__mkE()
3310 : SVal__mkC( VtsID__join2(rmini,tviW),
3311 VtsID__join2(wmini,tviW) );
3312 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
3313 svOld, svNew );
3314 goto out;
3315 }
3316 }
3317 if (SVal__isA(svOld)) {
3318 /* writing no-access memory (sigh); leave unchanged */
3319 /* check for no pollution */
3320 tl_assert(svOld == SVal_NOACCESS);
3321 svNew = SVal_NOACCESS;
3322 goto out;
3323 }
3324 if (SVal__isE(svOld)) {
3325 /* no race, location is already "in error" */
3326 svNew = SVal__mkE();
3327 goto out;
3328 }
3329 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3330 tl_assert(0);
3331
3332 out:
3333 if (MSM_CHECK) {
3334 tl_assert(is_sane_SVal_C(svNew));
3335 }
3336 tl_assert(svNew != SVal_INVALID);
3337 if (svNew != svOld) {
3338 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3339 event_map_bind( acc_addr, acc_thr );
3340 stats__msm_write_change++;
3341 }
3342 }
3343 return svNew;
3344}
3345
3346
3347/////////////////////////////////////////////////////////
3348// //
3349// Apply core MSM to specific memory locations //
3350// //
3351/////////////////////////////////////////////////////////
3352
3353/*------------- ZSM accesses: 8 bit apply ------------- */
3354
3355void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3356 CacheLine* cl;
3357 UWord cloff, tno, toff;
3358 SVal svOld, svNew;
3359 UShort descr;
3360 stats__cline_read8s++;
3361 cl = get_cacheline(a);
3362 cloff = get_cacheline_offset(a);
3363 tno = get_treeno(a);
3364 toff = get_tree_offset(a); /* == 0 .. 7 */
3365 descr = cl->descrs[tno];
3366 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3367 SVal* tree = &cl->svals[tno << 3];
3368 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3369 if (SCE_CACHELINE)
3370 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3371 }
3372 svOld = cl->svals[cloff];
3373 svNew = msm_read( svOld, thr,a,1 );
3374 tl_assert(svNew != SVal_INVALID);
3375 cl->svals[cloff] = svNew;
3376}
3377
3378void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3379 CacheLine* cl;
3380 UWord cloff, tno, toff;
3381 SVal svOld, svNew;
3382 UShort descr;
3383 stats__cline_read8s++;
3384 cl = get_cacheline(a);
3385 cloff = get_cacheline_offset(a);
3386 tno = get_treeno(a);
3387 toff = get_tree_offset(a); /* == 0 .. 7 */
3388 descr = cl->descrs[tno];
3389 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3390 SVal* tree = &cl->svals[tno << 3];
3391 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3392 if (SCE_CACHELINE)
3393 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3394 }
3395 svOld = cl->svals[cloff];
3396 svNew = msm_write( svOld, thr,a,1 );
3397 tl_assert(svNew != SVal_INVALID);
3398 cl->svals[cloff] = svNew;
3399}
3400
3401/*------------- ZSM accesses: 16 bit apply ------------- */
3402
3403void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3404 CacheLine* cl;
3405 UWord cloff, tno, toff;
3406 SVal svOld, svNew;
3407 UShort descr;
3408 stats__cline_read16s++;
3409 if (UNLIKELY(!aligned16(a))) goto slowcase;
3410 cl = get_cacheline(a);
3411 cloff = get_cacheline_offset(a);
3412 tno = get_treeno(a);
3413 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3414 descr = cl->descrs[tno];
3415 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3416 if (valid_value_is_below_me_16(descr, toff)) {
3417 goto slowcase;
3418 } else {
3419 SVal* tree = &cl->svals[tno << 3];
3420 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3421 }
3422 if (SCE_CACHELINE)
3423 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3424 }
3425 svOld = cl->svals[cloff];
3426 svNew = msm_read( svOld, thr,a,2 );
3427 tl_assert(svNew != SVal_INVALID);
3428 cl->svals[cloff] = svNew;
3429 return;
3430 slowcase: /* misaligned, or must go further down the tree */
3431 stats__cline_16to8splits++;
3432 zsm_apply8___msm_read( thr, a + 0 );
3433 zsm_apply8___msm_read( thr, a + 1 );
3434}
3435
3436void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3437 CacheLine* cl;
3438 UWord cloff, tno, toff;
3439 SVal svOld, svNew;
3440 UShort descr;
3441 stats__cline_read16s++;
3442 if (UNLIKELY(!aligned16(a))) goto slowcase;
3443 cl = get_cacheline(a);
3444 cloff = get_cacheline_offset(a);
3445 tno = get_treeno(a);
3446 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3447 descr = cl->descrs[tno];
3448 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3449 if (valid_value_is_below_me_16(descr, toff)) {
3450 goto slowcase;
3451 } else {
3452 SVal* tree = &cl->svals[tno << 3];
3453 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3454 }
3455 if (SCE_CACHELINE)
3456 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3457 }
3458 svOld = cl->svals[cloff];
3459 svNew = msm_write( svOld, thr,a,2 );
3460 tl_assert(svNew != SVal_INVALID);
3461 cl->svals[cloff] = svNew;
3462 return;
3463 slowcase: /* misaligned, or must go further down the tree */
3464 stats__cline_16to8splits++;
3465 zsm_apply8___msm_write( thr, a + 0 );
3466 zsm_apply8___msm_write( thr, a + 1 );
3467}
3468
3469/*------------- ZSM accesses: 32 bit apply ------------- */
3470
3471void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3472 CacheLine* cl;
3473 UWord cloff, tno, toff;
3474 SVal svOld, svNew;
3475 UShort descr;
3476 if (UNLIKELY(!aligned32(a))) goto slowcase;
3477 cl = get_cacheline(a);
3478 cloff = get_cacheline_offset(a);
3479 tno = get_treeno(a);
3480 toff = get_tree_offset(a); /* == 0 or 4 */
3481 descr = cl->descrs[tno];
3482 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3483 if (valid_value_is_above_me_32(descr, toff)) {
3484 SVal* tree = &cl->svals[tno << 3];
3485 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3486 } else {
3487 goto slowcase;
3488 }
3489 if (SCE_CACHELINE)
3490 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3491 }
3492 svOld = cl->svals[cloff];
3493 svNew = msm_read( svOld, thr,a,4 );
3494 tl_assert(svNew != SVal_INVALID);
3495 cl->svals[cloff] = svNew;
3496 return;
3497 slowcase: /* misaligned, or must go further down the tree */
3498 stats__cline_32to16splits++;
3499 zsm_apply16___msm_read( thr, a + 0 );
3500 zsm_apply16___msm_read( thr, a + 2 );
3501}
3502
3503void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3504 CacheLine* cl;
3505 UWord cloff, tno, toff;
3506 SVal svOld, svNew;
3507 UShort descr;
3508 if (UNLIKELY(!aligned32(a))) goto slowcase;
3509 cl = get_cacheline(a);
3510 cloff = get_cacheline_offset(a);
3511 tno = get_treeno(a);
3512 toff = get_tree_offset(a); /* == 0 or 4 */
3513 descr = cl->descrs[tno];
3514 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3515 if (valid_value_is_above_me_32(descr, toff)) {
3516 SVal* tree = &cl->svals[tno << 3];
3517 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3518 } else {
3519 goto slowcase;
3520 }
3521 if (SCE_CACHELINE)
3522 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3523 }
3524 svOld = cl->svals[cloff];
3525 svNew = msm_write( svOld, thr,a,4 );
3526 tl_assert(svNew != SVal_INVALID);
3527 cl->svals[cloff] = svNew;
3528 return;
3529 slowcase: /* misaligned, or must go further down the tree */
3530 stats__cline_32to16splits++;
3531 zsm_apply16___msm_write( thr, a + 0 );
3532 zsm_apply16___msm_write( thr, a + 2 );
3533}
3534
3535/*------------- ZSM accesses: 64 bit apply ------------- */
3536
3537void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3538 CacheLine* cl;
3539 UWord cloff, tno, toff;
3540 SVal svOld, svNew;
3541 UShort descr;
3542 stats__cline_read64s++;
3543 if (UNLIKELY(!aligned64(a))) goto slowcase;
3544 cl = get_cacheline(a);
3545 cloff = get_cacheline_offset(a);
3546 tno = get_treeno(a);
3547 toff = get_tree_offset(a); /* == 0, unused */
3548 descr = cl->descrs[tno];
3549 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3550 goto slowcase;
3551 }
3552 svOld = cl->svals[cloff];
3553 svNew = msm_read( svOld, thr,a,8 );
3554 tl_assert(svNew != SVal_INVALID);
3555 cl->svals[cloff] = svNew;
3556 return;
3557 slowcase: /* misaligned, or must go further down the tree */
3558 stats__cline_64to32splits++;
3559 zsm_apply32___msm_read( thr, a + 0 );
3560 zsm_apply32___msm_read( thr, a + 4 );
3561}
3562
3563void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
3564 CacheLine* cl;
3565 UWord cloff, tno, toff;
3566 SVal svOld, svNew;
3567 UShort descr;
3568 stats__cline_read64s++;
3569 if (UNLIKELY(!aligned64(a))) goto slowcase;
3570 cl = get_cacheline(a);
3571 cloff = get_cacheline_offset(a);
3572 tno = get_treeno(a);
3573 toff = get_tree_offset(a); /* == 0, unused */
3574 descr = cl->descrs[tno];
3575 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3576 goto slowcase;
3577 }
3578 svOld = cl->svals[cloff];
3579 svNew = msm_write( svOld, thr,a,8 );
3580 tl_assert(svNew != SVal_INVALID);
3581 cl->svals[cloff] = svNew;
3582 return;
3583 slowcase: /* misaligned, or must go further down the tree */
3584 stats__cline_64to32splits++;
3585 zsm_apply32___msm_write( thr, a + 0 );
3586 zsm_apply32___msm_write( thr, a + 4 );
3587}
3588
3589/*--------------- ZSM accesses: 8 bit write --------------- */
3590
3591static
3592void zsm_write8 ( Addr a, SVal svNew ) {
3593 CacheLine* cl;
3594 UWord cloff, tno, toff;
3595 UShort descr;
3596 stats__cline_set8s++;
3597 cl = get_cacheline(a);
3598 cloff = get_cacheline_offset(a);
3599 tno = get_treeno(a);
3600 toff = get_tree_offset(a); /* == 0 .. 7 */
3601 descr = cl->descrs[tno];
3602 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3603 SVal* tree = &cl->svals[tno << 3];
3604 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3605 if (SCE_CACHELINE)
3606 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3607 }
3608 tl_assert(svNew != SVal_INVALID);
3609 cl->svals[cloff] = svNew;
3610}
3611
3612/*--------------- ZSM accesses: 16 bit write --------------- */
3613
3614static
3615void zsm_write16 ( Addr a, SVal svNew ) {
3616 CacheLine* cl;
3617 UWord cloff, tno, toff;
3618 UShort descr;
3619 stats__cline_set16s++;
3620 if (UNLIKELY(!aligned16(a))) goto slowcase;
3621 cl = get_cacheline(a);
3622 cloff = get_cacheline_offset(a);
3623 tno = get_treeno(a);
3624 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3625 descr = cl->descrs[tno];
3626 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3627 if (valid_value_is_below_me_16(descr, toff)) {
3628 /* Writing at this level. Need to fix up 'descr'. */
3629 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
3630 /* At this point, the tree does not match cl->descr[tno] any
3631 more. The assignments below will fix it up. */
3632 } else {
3633 /* We can't indiscriminately write on the w16 node as in the
3634 w64 case, as that might make the node inconsistent with
3635 its parent. So first, pull down to this level. */
3636 SVal* tree = &cl->svals[tno << 3];
3637 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3638 if (SCE_CACHELINE)
3639 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3640 }
3641 }
3642 tl_assert(svNew != SVal_INVALID);
3643 cl->svals[cloff + 0] = svNew;
3644 cl->svals[cloff + 1] = SVal_INVALID;
3645 return;
3646 slowcase: /* misaligned */
3647 stats__cline_16to8splits++;
3648 zsm_write8( a + 0, svNew );
3649 zsm_write8( a + 1, svNew );
3650}
3651
3652/*--------------- ZSM accesses: 32 bit write --------------- */
3653
3654static
3655void zsm_write32 ( Addr a, SVal svNew ) {
3656 CacheLine* cl;
3657 UWord cloff, tno, toff;
3658 UShort descr;
3659 stats__cline_set32s++;
3660 if (UNLIKELY(!aligned32(a))) goto slowcase;
3661 cl = get_cacheline(a);
3662 cloff = get_cacheline_offset(a);
3663 tno = get_treeno(a);
3664 toff = get_tree_offset(a); /* == 0 or 4 */
3665 descr = cl->descrs[tno];
3666 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3667 if (valid_value_is_above_me_32(descr, toff)) {
3668 /* We can't indiscriminately write on the w32 node as in the
3669 w64 case, as that might make the node inconsistent with
3670 its parent. So first, pull down to this level. */
3671 SVal* tree = &cl->svals[tno << 3];
3672 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3673 if (SCE_CACHELINE)
3674 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3675 } else {
3676 /* Writing at this level. Need to fix up 'descr'. */
3677 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
3678 /* At this point, the tree does not match cl->descr[tno] any
3679 more. The assignments below will fix it up. */
3680 }
3681 }
3682 tl_assert(svNew != SVal_INVALID);
3683 cl->svals[cloff + 0] = svNew;
3684 cl->svals[cloff + 1] = SVal_INVALID;
3685 cl->svals[cloff + 2] = SVal_INVALID;
3686 cl->svals[cloff + 3] = SVal_INVALID;
3687 return;
3688 slowcase: /* misaligned */
3689 stats__cline_32to16splits++;
3690 zsm_write16( a + 0, svNew );
3691 zsm_write16( a + 2, svNew );
3692}
3693
3694/*--------------- ZSM accesses: 64 bit write --------------- */
3695
3696static
3697void zsm_write64 ( Addr a, SVal svNew ) {
3698 CacheLine* cl;
3699 UWord cloff, tno, toff;
3700 stats__cline_set64s++;
3701 if (UNLIKELY(!aligned64(a))) goto slowcase;
3702 cl = get_cacheline(a);
3703 cloff = get_cacheline_offset(a);
3704 tno = get_treeno(a);
3705 toff = get_tree_offset(a); /* == 0 */
3706 cl->descrs[tno] = TREE_DESCR_64;
3707 tl_assert(svNew != SVal_INVALID);
3708 cl->svals[cloff + 0] = svNew;
3709 cl->svals[cloff + 1] = SVal_INVALID;
3710 cl->svals[cloff + 2] = SVal_INVALID;
3711 cl->svals[cloff + 3] = SVal_INVALID;
3712 cl->svals[cloff + 4] = SVal_INVALID;
3713 cl->svals[cloff + 5] = SVal_INVALID;
3714 cl->svals[cloff + 6] = SVal_INVALID;
3715 cl->svals[cloff + 7] = SVal_INVALID;
3716 return;
3717 slowcase: /* misaligned */
3718 stats__cline_64to32splits++;
3719 zsm_write32( a + 0, svNew );
3720 zsm_write32( a + 4, svNew );
3721}
3722
3723/*------------- ZSM accesses: 8 bit read/copy ------------- */
3724
3725static
3726SVal zsm_read8 ( Addr a ) {
3727 CacheLine* cl;
3728 UWord cloff, tno, toff;
3729 UShort descr;
3730 stats__cline_get8s++;
3731 cl = get_cacheline(a);
3732 cloff = get_cacheline_offset(a);
3733 tno = get_treeno(a);
3734 toff = get_tree_offset(a); /* == 0 .. 7 */
3735 descr = cl->descrs[tno];
3736 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3737 SVal* tree = &cl->svals[tno << 3];
3738 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3739 }
3740 return cl->svals[cloff];
3741}
3742
3743static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
3744 SVal sv;
3745 stats__cline_copy8s++;
3746 sv = zsm_read8( src );
3747 zsm_write8( dst, sv );
3748}
3749
3750/* ------------ Shadow memory range setting ops ------------ */
3751
3752void zsm_apply_range___msm_read ( Thr* thr,
3753 Addr a, SizeT len )
3754{
3755 /* fast track a couple of common cases */
3756 if (len == 4 && aligned32(a)) {
3757 zsm_apply32___msm_read( thr, a );
3758 return;
3759 }
3760 if (len == 8 && aligned64(a)) {
3761 zsm_apply64___msm_read( thr, a );
3762 return;
3763 }
3764
3765 /* be completely general (but as efficient as possible) */
3766 if (len == 0) return;
3767
3768 if (!aligned16(a) && len >= 1) {
3769 zsm_apply8___msm_read( thr, a );
3770 a += 1;
3771 len -= 1;
3772 tl_assert(aligned16(a));
3773 }
3774 if (len == 0) return;
3775
3776 if (!aligned32(a) && len >= 2) {
3777 zsm_apply16___msm_read( thr, a );
3778 a += 2;
3779 len -= 2;
3780 tl_assert(aligned32(a));
3781 }
3782 if (len == 0) return;
3783
3784 if (!aligned64(a) && len >= 4) {
3785 zsm_apply32___msm_read( thr, a );
3786 a += 4;
3787 len -= 4;
3788 tl_assert(aligned64(a));
3789 }
3790 if (len == 0) return;
3791
3792 if (len >= 8) {
3793 tl_assert(aligned64(a));
3794 while (len >= 8) {
3795 zsm_apply64___msm_read( thr, a );
3796 a += 8;
3797 len -= 8;
3798 }
3799 tl_assert(aligned64(a));
3800 }
3801 if (len == 0) return;
3802
3803 if (len >= 4)
3804 tl_assert(aligned32(a));
3805 if (len >= 4) {
3806 zsm_apply32___msm_read( thr, a );
3807 a += 4;
3808 len -= 4;
3809 }
3810 if (len == 0) return;
3811
3812 if (len >= 2)
3813 tl_assert(aligned16(a));
3814 if (len >= 2) {
3815 zsm_apply16___msm_read( thr, a );
3816 a += 2;
3817 len -= 2;
3818 }
3819 if (len == 0) return;
3820
3821 if (len >= 1) {
3822 zsm_apply8___msm_read( thr, a );
3823 a += 1;
3824 len -= 1;
3825 }
3826 tl_assert(len == 0);
3827}
3828
3829
3830
3831void zsm_apply_range___msm_write ( Thr* thr,
3832 Addr a, SizeT len )
3833{
3834 /* fast track a couple of common cases */
3835 if (len == 4 && aligned32(a)) {
3836 zsm_apply32___msm_write( thr, a );
3837 return;
3838 }
3839 if (len == 8 && aligned64(a)) {
3840 zsm_apply64___msm_write( thr, a );
3841 return;
3842 }
3843
3844 /* be completely general (but as efficient as possible) */
3845 if (len == 0) return;
3846
3847 if (!aligned16(a) && len >= 1) {
3848 zsm_apply8___msm_write( thr, a );
3849 a += 1;
3850 len -= 1;
3851 tl_assert(aligned16(a));
3852 }
3853 if (len == 0) return;
3854
3855 if (!aligned32(a) && len >= 2) {
3856 zsm_apply16___msm_write( thr, a );
3857 a += 2;
3858 len -= 2;
3859 tl_assert(aligned32(a));
3860 }
3861 if (len == 0) return;
3862
3863 if (!aligned64(a) && len >= 4) {
3864 zsm_apply32___msm_write( thr, a );
3865 a += 4;
3866 len -= 4;
3867 tl_assert(aligned64(a));
3868 }
3869 if (len == 0) return;
3870
3871 if (len >= 8) {
3872 tl_assert(aligned64(a));
3873 while (len >= 8) {
3874 zsm_apply64___msm_write( thr, a );
3875 a += 8;
3876 len -= 8;
3877 }
3878 tl_assert(aligned64(a));
3879 }
3880 if (len == 0) return;
3881
3882 if (len >= 4)
3883 tl_assert(aligned32(a));
3884 if (len >= 4) {
3885 zsm_apply32___msm_write( thr, a );
3886 a += 4;
3887 len -= 4;
3888 }
3889 if (len == 0) return;
3890
3891 if (len >= 2)
3892 tl_assert(aligned16(a));
3893 if (len >= 2) {
3894 zsm_apply16___msm_write( thr, a );
3895 a += 2;
3896 len -= 2;
3897 }
3898 if (len == 0) return;
3899
3900 if (len >= 1) {
3901 zsm_apply8___msm_write( thr, a );
3902 a += 1;
3903 len -= 1;
3904 }
3905 tl_assert(len == 0);
3906}
3907
3908
3909
3910
3911/* Block-copy states (needed for implementing realloc()). */
3912
3913static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
3914{
3915 SizeT i;
3916 if (len == 0)
3917 return;
3918
3919 /* assert for non-overlappingness */
3920 tl_assert(src+len <= dst || dst+len <= src);
3921
3922 /* To be simple, just copy byte by byte. But so as not to wreck
3923 performance for later accesses to dst[0 .. len-1], normalise
3924 destination lines as we finish with them, and also normalise the
3925 line containing the first and last address. */
3926 for (i = 0; i < len; i++) {
3927 Bool normalise
3928 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
3929 || i == 0 /* first in range */
3930 || i == len-1; /* last in range */
3931 zsm_copy8( src+i, dst+i, normalise );
3932 }
3933}
3934
3935
3936/* For setting address ranges to a given value. Has considerable
3937 sophistication so as to avoid generating large numbers of pointless
3938 cache loads/writebacks for large ranges. */
3939
3940/* Do small ranges in-cache, in the obvious way. */
3941static
3942void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
3943{
3944 /* fast track a couple of common cases */
3945 if (len == 4 && aligned32(a)) {
3946 zsm_write32( a, svNew );
3947 return;
3948 }
3949 if (len == 8 && aligned64(a)) {
3950 zsm_write64( a, svNew );
3951 return;
3952 }
3953
3954 /* be completely general (but as efficient as possible) */
3955 if (len == 0) return;
3956
3957 if (!aligned16(a) && len >= 1) {
3958 zsm_write8( a, svNew );
3959 a += 1;
3960 len -= 1;
3961 tl_assert(aligned16(a));
3962 }
3963 if (len == 0) return;
3964
3965 if (!aligned32(a) && len >= 2) {
3966 zsm_write16( a, svNew );
3967 a += 2;
3968 len -= 2;
3969 tl_assert(aligned32(a));
3970 }
3971 if (len == 0) return;
3972
3973 if (!aligned64(a) && len >= 4) {
3974 zsm_write32( a, svNew );
3975 a += 4;
3976 len -= 4;
3977 tl_assert(aligned64(a));
3978 }
3979 if (len == 0) return;
3980
3981 if (len >= 8) {
3982 tl_assert(aligned64(a));
3983 while (len >= 8) {
3984 zsm_write64( a, svNew );
3985 a += 8;
3986 len -= 8;
3987 }
3988 tl_assert(aligned64(a));
3989 }
3990 if (len == 0) return;
3991
3992 if (len >= 4)
3993 tl_assert(aligned32(a));
3994 if (len >= 4) {
3995 zsm_write32( a, svNew );
3996 a += 4;
3997 len -= 4;
3998 }
3999 if (len == 0) return;
4000
4001 if (len >= 2)
4002 tl_assert(aligned16(a));
4003 if (len >= 2) {
4004 zsm_write16( a, svNew );
4005 a += 2;
4006 len -= 2;
4007 }
4008 if (len == 0) return;
4009
4010 if (len >= 1) {
4011 zsm_write8( a, svNew );
4012 a += 1;
4013 len -= 1;
4014 }
4015 tl_assert(len == 0);
4016}
4017
4018
4019/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4020 for larger ranges, try to operate directly on the out-of-cache
4021 representation, rather than dragging lines into the cache,
4022 overwriting them, and forcing them out. This turns out to be an
4023 important performance optimisation. */
4024
4025static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4026{
4027 tl_assert(svNew != SVal_INVALID);
4028 stats__cache_make_New_arange += (ULong)len;
4029
4030 if (0 && len > 500)
4031 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4032
4033 if (0) {
4034 static UWord n_New_in_cache = 0;
4035 static UWord n_New_not_in_cache = 0;
4036 /* tag is 'a' with the in-line offset masked out,
4037 eg a[31]..a[4] 0000 */
4038 Addr tag = a & ~(N_LINE_ARANGE - 1);
4039 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4040 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4041 n_New_in_cache++;
4042 } else {
4043 n_New_not_in_cache++;
4044 }
4045 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4046 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4047 n_New_in_cache, n_New_not_in_cache );
4048 }
4049
4050 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4051 zsm_set_range_SMALL( a, len, svNew );
4052 } else {
4053 Addr before_start = a;
4054 Addr aligned_start = cacheline_ROUNDUP(a);
4055 Addr after_start = cacheline_ROUNDDN(a + len);
4056 UWord before_len = aligned_start - before_start;
4057 UWord aligned_len = after_start - aligned_start;
4058 UWord after_len = a + len - after_start;
4059 tl_assert(before_start <= aligned_start);
4060 tl_assert(aligned_start <= after_start);
4061 tl_assert(before_len < N_LINE_ARANGE);
4062 tl_assert(after_len < N_LINE_ARANGE);
4063 tl_assert(get_cacheline_offset(aligned_start) == 0);
4064 if (get_cacheline_offset(a) == 0) {
4065 tl_assert(before_len == 0);
4066 tl_assert(a == aligned_start);
4067 }
4068 if (get_cacheline_offset(a+len) == 0) {
4069 tl_assert(after_len == 0);
4070 tl_assert(after_start == a+len);
4071 }
4072 if (before_len > 0) {
4073 zsm_set_range_SMALL( before_start, before_len, svNew );
4074 }
4075 if (after_len > 0) {
4076 zsm_set_range_SMALL( after_start, after_len, svNew );
4077 }
4078 stats__cache_make_New_inZrep += (ULong)aligned_len;
4079
4080 while (1) {
4081 Addr tag;
4082 UWord wix;
4083 if (aligned_start >= after_start)
4084 break;
4085 tl_assert(get_cacheline_offset(aligned_start) == 0);
4086 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4087 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4088 if (tag == cache_shmem.tags0[wix]) {
4089 UWord i;
4090 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4091 zsm_write64( aligned_start + i * 8, svNew );
4092 } else {
4093 UWord i;
4094 Word zix;
4095 SecMap* sm;
4096 LineZ* lineZ;
4097 /* This line is not in the cache. Do not force it in; instead
4098 modify it in-place. */
4099 /* find the Z line to write in and rcdec it or the
4100 associated F line. */
4101 find_Z_for_writing( &sm, &zix, tag );
4102 tl_assert(sm);
4103 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4104 lineZ = &sm->linesZ[zix];
4105 lineZ->dict[0] = svNew;
4106 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4107 for (i = 0; i < N_LINE_ARANGE/4; i++)
4108 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4109 rcinc_LineZ(lineZ);
4110 }
4111 aligned_start += N_LINE_ARANGE;
4112 aligned_len -= N_LINE_ARANGE;
4113 }
4114 tl_assert(aligned_start == after_start);
4115 tl_assert(aligned_len == 0);
4116 }
4117}
4118
4119
4120/////////////////////////////////////////////////////////
4121// //
4122// Synchronisation objects //
4123// //
4124/////////////////////////////////////////////////////////
4125
4126// (UInt) `echo "Synchronisation object" | md5sum`
4127#define SO_MAGIC 0x56b3c5b0U
4128
4129struct _SO {
4130 VtsID viR; /* r-clock of sender */
4131 VtsID viW; /* w-clock of sender */
4132 UInt magic;
4133};
4134
4135static SO* SO__Alloc ( void ) {
4136 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4137 so->viR = VtsID_INVALID;
4138 so->viW = VtsID_INVALID;
4139 so->magic = SO_MAGIC;
4140 return so;
4141}
4142static void SO__Dealloc ( SO* so ) {
4143 tl_assert(so);
4144 tl_assert(so->magic == SO_MAGIC);
4145 if (so->viR == VtsID_INVALID) {
4146 tl_assert(so->viW == VtsID_INVALID);
4147 } else {
4148 tl_assert(so->viW != VtsID_INVALID);
4149 VtsID__rcdec(so->viR);
4150 VtsID__rcdec(so->viW);
4151 }
4152 so->magic = 0;
4153 HG_(free)( so );
4154}
4155
4156
4157/////////////////////////////////////////////////////////
4158// //
4159// Top Level API //
4160// //
4161/////////////////////////////////////////////////////////
4162
4163static void show_thread_state ( HChar* str, Thr* t )
4164{
4165 if (1) return;
4166 if (t->viR == t->viW) {
4167 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4168 VtsID__pp( t->viR );
4169 VG_(printf)("%s","\n");
4170 } else {
4171 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4172 VtsID__pp( t->viR );
4173 VG_(printf)(" viW %u==", t->viW);
4174 VtsID__pp( t->viW );
4175 VG_(printf)("%s","\n");
4176 }
4177}
4178
4179
4180Thr* libhb_init (
4181 void (*get_stacktrace)( Thr*, Addr*, UWord ),
4182 struct _EC* (*stacktrace_to_EC)( Addr*, UWord ),
4183 struct _EC* (*get_EC)( Thr* )
4184 )
4185{
4186 Thr* thr;
4187 VtsID vi;
4188 tl_assert(get_stacktrace);
4189 tl_assert(stacktrace_to_EC);
4190 tl_assert(get_EC);
4191 main_get_stacktrace = get_stacktrace;
4192 main_stacktrace_to_EC = stacktrace_to_EC;
4193 main_get_EC = get_EC;
4194
4195 // No need to initialise hg_wordfm.
4196 // No need to initialise hg_wordset.
4197
4198 vts_set_init();
4199 vts_tab_init();
4200 event_map_init();
4201 VtsID__invalidate_caches();
4202
4203 // initialise shadow memory
4204 zsm_init( SVal__rcinc, SVal__rcdec );
4205
4206 thr = Thr__new();
4207 vi = VtsID__mk_Singleton( thr, 1 );
4208 thr->viR = vi;
4209 thr->viW = vi;
4210 VtsID__rcinc(thr->viR);
4211 VtsID__rcinc(thr->viW);
4212
4213 show_thread_state(" root", thr);
4214 return thr;
4215}
4216
4217Thr* libhb_create ( Thr* parent )
4218{
4219 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4220 the child's index. Since the child's index is guaranteed
4221 unique, it has never been seen before, so the implicit value
4222 before the tick is zero and after that is one. */
4223 Thr* child = Thr__new();
4224
4225 child->viR = VtsID__tick( parent->viR, child );
4226 child->viW = VtsID__tick( parent->viW, child );
4227 VtsID__rcinc(child->viR);
4228 VtsID__rcinc(child->viW);
4229
4230 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4231 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4232
4233 /* and the parent has to move along too */
4234 VtsID__rcdec(parent->viR);
4235 VtsID__rcdec(parent->viW);
4236 parent->viR = VtsID__tick( parent->viR, parent );
4237 parent->viW = VtsID__tick( parent->viW, parent );
4238 VtsID__rcinc(parent->viR);
4239 VtsID__rcinc(parent->viW);
4240
4241 show_thread_state(" child", child);
4242 show_thread_state("parent", parent);
4243
4244 return child;
4245}
4246
4247/* Shut down the library, and print stats (in fact that's _all_
4248 this is for. */
4249void libhb_shutdown ( Bool show_stats )
4250{
4251 if (show_stats) {
4252 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4253 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4254 stats__secmaps_allocd,
4255 stats__secmap_ga_space_covered);
4256 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4257 stats__secmap_linesZ_allocd,
4258 stats__secmap_linesZ_bytes);
4259 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4260 stats__secmap_linesF_allocd,
4261 stats__secmap_linesF_bytes);
4262 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4263 stats__secmap_iterator_steppings);
4264 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4265 stats__secmaps_search, stats__secmaps_search_slow);
4266
4267 VG_(printf)("%s","\n");
4268 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4269 stats__cache_totrefs, stats__cache_totmisses );
4270 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4271 stats__cache_Z_fetches, stats__cache_F_fetches );
4272 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4273 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4274 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4275 stats__cache_invals, stats__cache_flushes );
4276 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4277 stats__cache_make_New_arange,
4278 stats__cache_make_New_inZrep);
4279
4280 VG_(printf)("%s","\n");
4281 VG_(printf)(" cline: %'10lu normalises\n",
4282 stats__cline_normalises );
4283 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4284 stats__cline_read64s,
4285 stats__cline_read32s,
4286 stats__cline_read16s,
4287 stats__cline_read8s );
4288 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4289 stats__cline_write64s,
4290 stats__cline_write32s,
4291 stats__cline_write16s,
4292 stats__cline_write8s );
4293 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4294 stats__cline_set64s,
4295 stats__cline_set32s,
4296 stats__cline_set16s,
4297 stats__cline_set8s );
4298 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4299 stats__cline_get8s, stats__cline_copy8s );
4300 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4301 stats__cline_64to32splits,
4302 stats__cline_32to16splits,
4303 stats__cline_16to8splits );
4304 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4305 stats__cline_64to32pulldown,
4306 stats__cline_32to16pulldown,
4307 stats__cline_16to8pulldown );
4308 if (0)
4309 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4310 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4311
4312 VG_(printf)("%s","\n");
4313
4314 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4315 stats__msm_read, stats__msm_read_change);
4316 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4317 stats__msm_write, stats__msm_write_change);
4318 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4319 stats__getOrdering_queries, stats__getOrdering_misses);
4320 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4321 stats__join2_queries, stats__join2_misses);
4322
4323 VG_(printf)("%s","\n");
4324 VG_(printf)(
4325 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4326 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4327 );
4328 VG_(printf)( " libhb: %lu entries in vts_set\n",
4329 VG_(sizeFM)( vts_set ) );
4330
4331 VG_(printf)("%s","\n");
4332 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4333 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4334 stats__ctxt_rcdec2,
4335 stats__ctxt_rcdec3 );
4336 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4337 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4338 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4339 (UWord)N_RCEC_TAB,
4340 stats__ctxt_tab_curr );
4341 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4342 stats__ctxt_tab_qs,
4343 stats__ctxt_tab_cmps );
4344#if 0
4345 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4346 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4347 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4348 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4349 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4350 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4351 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4352 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4353 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4354 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4355 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4356 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4357 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4358 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4359
4360 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4361 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4362 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4363 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4364#endif
4365
4366 VG_(printf)("%s","<<< END libhb stats >>>\n");
4367 VG_(printf)("%s","\n");
4368
4369 }
4370}
4371
4372void libhb_async_exit ( Thr* thr )
4373{
4374 /* is there anything we need to do? */
4375}
4376
4377/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4378 a Seg that points at a VTS is its one-and-only owner, and ditto for
4379 a SO that points at a VTS. */
4380
4381SO* libhb_so_alloc ( void )
4382{
4383 return SO__Alloc();
4384}
4385
4386void libhb_so_dealloc ( SO* so )
4387{
4388 tl_assert(so);
4389 tl_assert(so->magic == SO_MAGIC);
4390 SO__Dealloc(so);
4391}
4392
4393/* See comments in libhb.h for details on the meaning of
4394 strong vs weak sends and strong vs weak receives. */
4395void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4396{
4397 /* Copy the VTSs from 'thr' into the sync object, and then move
4398 the thread along one step. */
4399
4400 tl_assert(so);
4401 tl_assert(so->magic == SO_MAGIC);
4402
4403 /* stay sane .. a thread's read-clock must always lead or be the
4404 same as its write-clock */
4405 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4406 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4407 }
4408
4409 /* since we're overwriting the VtsIDs in the SO, we need to drop
4410 any references made by the previous contents thereof */
4411 if (so->viR == VtsID_INVALID) {
4412 tl_assert(so->viW == VtsID_INVALID);
4413 so->viR = thr->viR;
4414 so->viW = thr->viW;
4415 VtsID__rcinc(so->viR);
4416 VtsID__rcinc(so->viW);
4417 } else {
4418 /* In a strong send, we dump any previous VC in the SO and
4419 install the sending thread's VC instead. For a weak send we
4420 must join2 with what's already there. */
4421 tl_assert(so->viW != VtsID_INVALID);
4422 VtsID__rcdec(so->viR);
4423 VtsID__rcdec(so->viW);
4424 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4425 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4426 VtsID__rcinc(so->viR);
4427 VtsID__rcinc(so->viW);
4428 }
4429
4430 /* move both parent clocks along */
4431 VtsID__rcdec(thr->viR);
4432 VtsID__rcdec(thr->viW);
4433 thr->viR = VtsID__tick( thr->viR, thr );
4434 thr->viW = VtsID__tick( thr->viW, thr );
4435 VtsID__rcinc(thr->viR);
4436 VtsID__rcinc(thr->viW);
4437 if (strong_send)
4438 show_thread_state("s-send", thr);
4439 else
4440 show_thread_state("w-send", thr);
4441}
4442
4443void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4444{
4445 tl_assert(so);
4446 tl_assert(so->magic == SO_MAGIC);
4447
4448 if (so->viR != VtsID_INVALID) {
4449 tl_assert(so->viW != VtsID_INVALID);
4450
4451 /* Weak receive (basically, an R-acquisition of a R-W lock).
4452 This advances the read-clock of the receiver, but not the
4453 write-clock. */
4454 VtsID__rcdec(thr->viR);
4455 thr->viR = VtsID__join2( thr->viR, so->viR );
4456 VtsID__rcinc(thr->viR);
4457
4458 /* For a strong receive, we also advance the receiver's write
4459 clock, which means the receive as a whole is essentially
4460 equivalent to a W-acquisition of a R-W lock. */
4461 if (strong_recv) {
4462 VtsID__rcdec(thr->viW);
4463 thr->viW = VtsID__join2( thr->viW, so->viW );
4464 VtsID__rcinc(thr->viW);
4465 }
4466
4467 if (strong_recv)
4468 show_thread_state("s-recv", thr);
4469 else
4470 show_thread_state("w-recv", thr);
4471
4472 } else {
4473 tl_assert(so->viW == VtsID_INVALID);
4474 /* Deal with degenerate case: 'so' has no vts, so there has been
4475 no message posted to it. Just ignore this case. */
4476 show_thread_state("d-recv", thr);
4477 }
4478}
4479
4480Bool libhb_so_everSent ( SO* so )
4481{
4482 if (so->viR == VtsID_INVALID) {
4483 tl_assert(so->viW == VtsID_INVALID);
4484 return False;
4485 } else {
4486 tl_assert(so->viW != VtsID_INVALID);
4487 return True;
4488 }
4489}
4490
4491#define XXX1 0 // 0x67a106c
4492#define XXX2 0
4493
4494static Bool TRACEME(Addr a, SizeT szB) {
4495 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4496 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4497 return False;
4498}
4499static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4500 SVal sv = zsm_read8(a);
4501 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4502 show_thread_state("", thr);
4503 VG_(printf)("%s","\n");
4504}
4505
4506void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4507{
4508 SVal sv = SVal__mkC(thr->viW, thr->viW);
4509 tl_assert(is_sane_SVal_C(sv));
4510 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4511 zsm_set_range( a, szB, sv );
4512 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4513}
4514
4515void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4516{
4517 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4518 zsm_set_range( a, szB, SVal__mkA() );
4519 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4520}
4521
4522void* libhb_get_Thr_opaque ( Thr* thr ) {
4523 tl_assert(thr);
4524 return thr->opaque;
4525}
4526
4527void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4528 tl_assert(thr);
4529 thr->opaque = v;
4530}
4531
4532void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4533{
4534 zsm_copy_range(dst, src, len);
4535}
4536
4537void libhb_maybe_GC ( void )
4538{
4539 event_map_maybe_GC();
4540 /* If there are still freelist entries available, no need for a
4541 GC. */
4542 if (vts_tab_freelist != VtsID_INVALID)
4543 return;
4544 /* So all the table entries are full, and we're having to expand
4545 the table. But did we hit the threshhold point yet? */
4546 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
4547 return;
4548 vts_tab__do_GC( False/*don't show stats*/ );
4549}
4550
4551
4552/////////////////////////////////////////////////////////////////
4553/////////////////////////////////////////////////////////////////
4554// //
4555// SECTION END main library //
4556// //
4557/////////////////////////////////////////////////////////////////
4558/////////////////////////////////////////////////////////////////
4559
4560/*--------------------------------------------------------------------*/
4561/*--- end libhb_main.c ---*/
4562/*--------------------------------------------------------------------*/