blob: 82159ba39e22f857dcef366476ad3d3168d24b43 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
39#include "pub_tool_xarray.h"
40#include "pub_tool_oset.h"
41#include "pub_tool_threadstate.h"
42#include "pub_tool_aspacemgr.h"
43#include "pub_tool_execontext.h"
44#include "pub_tool_errormgr.h"
45
46#include "hg_basics.h"
47#include "hg_wordset.h"
48#include "hg_lock_n_thread.h"
49#include "hg_errors.h"
50
51#include "libhb.h"
52
53
54/* fwds for
55 Globals needed by other parts of the library. These are set
56 once at startup and then never changed. */
57static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +000058static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +000059
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62// //
63// //
64// //
65/////////////////////////////////////////////////////////////////
66/////////////////////////////////////////////////////////////////
67
68
69/////////////////////////////////////////////////////////////////
70/////////////////////////////////////////////////////////////////
71// //
72// SECTION BEGIN compressed shadow memory //
73// //
74/////////////////////////////////////////////////////////////////
75/////////////////////////////////////////////////////////////////
76
77#ifndef __HB_ZSM_H
78#define __HB_ZSM_H
79
80typedef ULong SVal;
81
82/* This value has special significance to the implementation, and callers
83 may not store it in the shadow memory. */
84#define SVal_INVALID (3ULL << 62)
85
86/* This is the default value for shadow memory. Initially the shadow
87 memory contains no accessible areas and so all reads produce this
88 value. TODO: make this caller-defineable. */
89#define SVal_NOACCESS (2ULL << 62)
90
91/* Initialise the library. Once initialised, it will (or may) call
92 rcinc and rcdec in response to all the calls below, in order to
93 allow the user to do reference counting on the SVals stored herein.
94 It is important to understand, however, that due to internal
95 caching, the reference counts are in general inaccurate, and can be
96 both above or below the true reference count for an item. In
97 particular, the library may indicate that the reference count for
98 an item is zero, when in fact it is not.
99
100 To make the reference counting exact and therefore non-pointless,
101 call zsm_flush_cache. Immediately after it returns, the reference
102 counts for all items, as deduced by the caller by observing calls
103 to rcinc and rcdec, will be correct, and so any items with a zero
104 reference count may be freed (or at least considered to be
105 unreferenced by this library).
106*/
107static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
108
109static void zsm_set_range ( Addr, SizeT, SVal );
110static SVal zsm_read8 ( Addr );
111static void zsm_copy_range ( Addr, Addr, SizeT );
112static void zsm_flush_cache ( void );
113
114#endif /* ! __HB_ZSM_H */
115
116
117/* For the shadow mem cache stuff we may want more intrusive
118 checks. Unfortunately there's no almost-zero-cost way to make them
119 selectable at run time. Hence set the #if 0 to #if 1 and
120 rebuild if you want them. */
121#if 0
122# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
123# define inline __attribute__((noinline))
124 /* probably want to ditch -fomit-frame-pointer too */
125#else
126# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
127#endif
128
129/* For the SegmentID, SegmentSet and SVal stuff we may want more
130 intrusive checks. Again there's no zero cost way to do this. Set
131 the #if 0 to #if 1 and rebuild if you want them. */
132#if 0
133# define SCE_SVALS 1 /* sanity-check shadow value stuff */
134#else
135# define SCE_SVALS 0
136#endif
137
138
139/* Round a up to the next multiple of N. N must be a power of 2 */
140#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
141/* Round a down to the next multiple of N. N must be a power of 2 */
142#define ROUNDDN(a, N) ((a) & ~(N-1))
143
144
145
146/* ------ User-supplied RC functions ------ */
147static void(*rcinc)(SVal) = NULL;
148static void(*rcdec)(SVal) = NULL;
149
150
151/* ------ CacheLine ------ */
152
153#define N_LINE_BITS 6 /* must be >= 3 */
154#define N_LINE_ARANGE (1 << N_LINE_BITS)
155#define N_LINE_TREES (N_LINE_ARANGE >> 3)
156
157typedef
158 struct {
159 UShort descrs[N_LINE_TREES];
160 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
161 }
162 CacheLine;
163
164#define TREE_DESCR_16_0 (1<<0)
165#define TREE_DESCR_32_0 (1<<1)
166#define TREE_DESCR_16_1 (1<<2)
167#define TREE_DESCR_64 (1<<3)
168#define TREE_DESCR_16_2 (1<<4)
169#define TREE_DESCR_32_1 (1<<5)
170#define TREE_DESCR_16_3 (1<<6)
171#define TREE_DESCR_8_0 (1<<7)
172#define TREE_DESCR_8_1 (1<<8)
173#define TREE_DESCR_8_2 (1<<9)
174#define TREE_DESCR_8_3 (1<<10)
175#define TREE_DESCR_8_4 (1<<11)
176#define TREE_DESCR_8_5 (1<<12)
177#define TREE_DESCR_8_6 (1<<13)
178#define TREE_DESCR_8_7 (1<<14)
179#define TREE_DESCR_DTY (1<<15)
180
181typedef
182 struct {
183 SVal dict[4]; /* can represent up to 4 diff values in the line */
184 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
185 dict indexes */
186 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
187 LineF to use, and dict[2..] are also SVal_INVALID. */
188 }
189 LineZ; /* compressed rep for a cache line */
190
191typedef
192 struct {
193 Bool inUse;
194 SVal w64s[N_LINE_ARANGE];
195 }
196 LineF; /* full rep for a cache line */
197
198/* Shadow memory.
199 Primary map is a WordFM Addr SecMap*.
200 SecMaps cover some page-size-ish section of address space and hold
201 a compressed representation.
202 CacheLine-sized chunks of SecMaps are copied into a Cache, being
203 decompressed when moved into the cache and recompressed on the
204 way out. Because of this, the cache must operate as a writeback
205 cache, not a writethrough one.
206
207 Each SecMap must hold a power-of-2 number of CacheLines. Hence
208 N_SECMAP_BITS must >= N_LINE_BITS.
209*/
210#define N_SECMAP_BITS 13
211#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
212
213// # CacheLines held by a SecMap
214#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
215
216/* The data in the SecMap is held in the array of LineZs. Each LineZ
217 either carries the required data directly, in a compressed
218 representation, or it holds (in .dict[0]) an index to the LineF in
219 .linesF that holds the full representation.
220
221 Currently-unused LineF's have their .inUse bit set to zero.
222 Since each in-use LineF is referred to be exactly one LineZ,
223 the number of .linesZ[] that refer to .linesF should equal
224 the number of .linesF[] that have .inUse == True.
225
226 RC obligations: the RCs presented to the user include exactly
227 the values in:
228 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
229 * F reps that are in use (.inUse == True)
230
231 Hence the following actions at the following transitions are required:
232
233 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
234 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
235 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
236 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
237*/
238typedef
239 struct {
240 UInt magic;
241 LineZ linesZ[N_SECMAP_ZLINES];
242 LineF* linesF;
243 UInt linesF_size;
244 }
245 SecMap;
246
247#define SecMap_MAGIC 0x571e58cbU
248
249static inline Bool is_sane_SecMap ( SecMap* sm ) {
250 return sm != NULL && sm->magic == SecMap_MAGIC;
251}
252
253/* ------ Cache ------ */
254
255#define N_WAY_BITS 16
256#define N_WAY_NENT (1 << N_WAY_BITS)
257
258/* Each tag is the address of the associated CacheLine, rounded down
259 to a CacheLine address boundary. A CacheLine size must be a power
260 of 2 and must be 8 or more. Hence an easy way to initialise the
261 cache so it is empty is to set all the tag values to any value % 8
262 != 0, eg 1. This means all queries in the cache initially miss.
263 It does however require us to detect and not writeback, any line
264 with a bogus tag. */
265typedef
266 struct {
267 CacheLine lyns0[N_WAY_NENT];
268 Addr tags0[N_WAY_NENT];
269 }
270 Cache;
271
272static inline Bool is_valid_scache_tag ( Addr tag ) {
273 /* a valid tag should be naturally aligned to the start of
274 a CacheLine. */
275 return 0 == (tag & (N_LINE_ARANGE - 1));
276}
277
278
279/* --------- Primary data structures --------- */
280
281/* Shadow memory primary map */
282static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
283static Cache cache_shmem;
284
285
286static UWord stats__secmaps_search = 0; // # SM finds
287static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
288static UWord stats__secmaps_allocd = 0; // # SecMaps issued
289static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
290static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
291static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
292static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
293static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
294static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
295static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
296static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
297static UWord stats__cache_F_fetches = 0; // # F lines fetched
298static UWord stats__cache_F_wbacks = 0; // # F lines written back
299static UWord stats__cache_invals = 0; // # cache invals
300static UWord stats__cache_flushes = 0; // # cache flushes
301static UWord stats__cache_totrefs = 0; // # total accesses
302static UWord stats__cache_totmisses = 0; // # misses
303static ULong stats__cache_make_New_arange = 0; // total arange made New
304static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
305static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
306static UWord stats__cline_read64s = 0; // # calls to s_m_read64
307static UWord stats__cline_read32s = 0; // # calls to s_m_read32
308static UWord stats__cline_read16s = 0; // # calls to s_m_read16
309static UWord stats__cline_read8s = 0; // # calls to s_m_read8
310static UWord stats__cline_write64s = 0; // # calls to s_m_write64
311static UWord stats__cline_write32s = 0; // # calls to s_m_write32
312static UWord stats__cline_write16s = 0; // # calls to s_m_write16
313static UWord stats__cline_write8s = 0; // # calls to s_m_write8
314static UWord stats__cline_set64s = 0; // # calls to s_m_set64
315static UWord stats__cline_set32s = 0; // # calls to s_m_set32
316static UWord stats__cline_set16s = 0; // # calls to s_m_set16
317static UWord stats__cline_set8s = 0; // # calls to s_m_set8
318static UWord stats__cline_get8s = 0; // # calls to s_m_get8
319static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
320static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
321static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
322static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
323static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
324static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
325static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
326
327static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
328 return a & ~(N_SECMAP_ARANGE - 1);
329}
330static inline UWord shmem__get_SecMap_offset ( Addr a ) {
331 return a & (N_SECMAP_ARANGE - 1);
332}
333
334
335/*----------------------------------------------------------------*/
336/*--- map_shmem :: WordFM Addr SecMap ---*/
337/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
338/*----------------------------------------------------------------*/
339
340/*--------------- SecMap allocation --------------- */
341
342static HChar* shmem__bigchunk_next = NULL;
343static HChar* shmem__bigchunk_end1 = NULL;
344
345static void* shmem__bigchunk_alloc ( SizeT n )
346{
347 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
348 tl_assert(n > 0);
349 n = VG_ROUNDUP(n, 16);
350 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
351 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
352 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
353 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
354 if (0)
355 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
356 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
357 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
358 if (shmem__bigchunk_next == NULL)
359 VG_(out_of_memory_NORETURN)(
360 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
361 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
362 }
363 tl_assert(shmem__bigchunk_next);
364 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
365 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
366 shmem__bigchunk_next += n;
367 return shmem__bigchunk_next - n;
368}
369
370static SecMap* shmem__alloc_SecMap ( void )
371{
372 Word i, j;
373 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
374 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
375 tl_assert(sm);
376 sm->magic = SecMap_MAGIC;
377 for (i = 0; i < N_SECMAP_ZLINES; i++) {
378 sm->linesZ[i].dict[0] = SVal_NOACCESS;
379 sm->linesZ[i].dict[1] = SVal_INVALID;
380 sm->linesZ[i].dict[2] = SVal_INVALID;
381 sm->linesZ[i].dict[3] = SVal_INVALID;
382 for (j = 0; j < N_LINE_ARANGE/4; j++)
383 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
384 }
385 sm->linesF = NULL;
386 sm->linesF_size = 0;
387 stats__secmaps_allocd++;
388 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
389 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
390 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
391 return sm;
392}
393
394typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
395static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
396
397static SecMap* shmem__find_SecMap ( Addr ga )
398{
399 SecMap* sm = NULL;
400 Addr gaKey = shmem__round_to_SecMap_base(ga);
401 // Cache
402 stats__secmaps_search++;
403 if (LIKELY(gaKey == smCache[0].gaKey))
404 return smCache[0].sm;
405 if (LIKELY(gaKey == smCache[1].gaKey)) {
406 SMCacheEnt tmp = smCache[0];
407 smCache[0] = smCache[1];
408 smCache[1] = tmp;
409 return smCache[0].sm;
410 }
411 if (gaKey == smCache[2].gaKey) {
412 SMCacheEnt tmp = smCache[1];
413 smCache[1] = smCache[2];
414 smCache[2] = tmp;
415 return smCache[1].sm;
416 }
417 // end Cache
418 stats__secmaps_search_slow++;
419 if (VG_(lookupFM)( map_shmem,
420 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
421 tl_assert(sm != NULL);
422 smCache[2] = smCache[1];
423 smCache[1] = smCache[0];
424 smCache[0].gaKey = gaKey;
425 smCache[0].sm = sm;
426 } else {
427 tl_assert(sm == NULL);
428 }
429 return sm;
430}
431
432static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
433{
434 SecMap* sm = shmem__find_SecMap ( ga );
435 if (LIKELY(sm)) {
436 return sm;
437 } else {
438 /* create a new one */
439 Addr gaKey = shmem__round_to_SecMap_base(ga);
440 sm = shmem__alloc_SecMap();
441 tl_assert(sm);
442 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
443 return sm;
444 }
445}
446
447
448/* ------------ LineF and LineZ related ------------ */
449
450static void rcinc_LineF ( LineF* lineF ) {
451 UWord i;
452 tl_assert(lineF->inUse);
453 for (i = 0; i < N_LINE_ARANGE; i++)
454 rcinc(lineF->w64s[i]);
455}
456
457static void rcdec_LineF ( LineF* lineF ) {
458 UWord i;
459 tl_assert(lineF->inUse);
460 for (i = 0; i < N_LINE_ARANGE; i++)
461 rcdec(lineF->w64s[i]);
462}
463
464static void rcinc_LineZ ( LineZ* lineZ ) {
465 tl_assert(lineZ->dict[0] != SVal_INVALID);
466 rcinc(lineZ->dict[0]);
467 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
468 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
469 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
470}
471
472static void rcdec_LineZ ( LineZ* lineZ ) {
473 tl_assert(lineZ->dict[0] != SVal_INVALID);
474 rcdec(lineZ->dict[0]);
475 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
476 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
477 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
478}
479
480inline
481static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
482 Word bix, shft, mask, prep;
483 tl_assert(ix >= 0);
484 bix = ix >> 2;
485 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
486 mask = 3 << shft;
487 prep = b2 << shft;
488 arr[bix] = (arr[bix] & ~mask) | prep;
489}
490
491inline
492static UWord read_twobit_array ( UChar* arr, UWord ix ) {
493 Word bix, shft;
494 tl_assert(ix >= 0);
495 bix = ix >> 2;
496 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
497 return (arr[bix] >> shft) & 3;
498}
499
500/* Given address 'tag', find either the Z or F line containing relevant
501 data, so it can be read into the cache.
502*/
503static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
504 /*OUT*/LineF** fp, Addr tag ) {
505 LineZ* lineZ;
506 LineF* lineF;
507 UWord zix;
508 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
509 UWord smoff = shmem__get_SecMap_offset(tag);
510 /* since smoff is derived from a valid tag, it should be
511 cacheline-aligned. */
512 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
513 zix = smoff >> N_LINE_BITS;
514 tl_assert(zix < N_SECMAP_ZLINES);
515 lineZ = &sm->linesZ[zix];
516 lineF = NULL;
517 if (lineZ->dict[0] == SVal_INVALID) {
518 UInt fix = (UInt)lineZ->dict[1];
519 tl_assert(sm->linesF);
520 tl_assert(sm->linesF_size > 0);
521 tl_assert(fix >= 0 && fix < sm->linesF_size);
522 lineF = &sm->linesF[fix];
523 tl_assert(lineF->inUse);
524 lineZ = NULL;
525 }
526 *zp = lineZ;
527 *fp = lineF;
528}
529
530/* Given address 'tag', return the relevant SecMap and the index of
531 the LineZ within it, in the expectation that the line is to be
532 overwritten. Regardless of whether 'tag' is currently associated
533 with a Z or F representation, to rcdec on the current
534 representation, in recognition of the fact that the contents are
535 just about to be overwritten. */
536static __attribute__((noinline))
537void find_Z_for_writing ( /*OUT*/SecMap** smp,
538 /*OUT*/Word* zixp,
539 Addr tag ) {
540 LineZ* lineZ;
541 LineF* lineF;
542 UWord zix;
543 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
544 UWord smoff = shmem__get_SecMap_offset(tag);
545 /* since smoff is derived from a valid tag, it should be
546 cacheline-aligned. */
547 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
548 zix = smoff >> N_LINE_BITS;
549 tl_assert(zix < N_SECMAP_ZLINES);
550 lineZ = &sm->linesZ[zix];
551 lineF = NULL;
552 /* re RCs, we are freeing up this LineZ/LineF so that new data can
553 be parked in it. Hence have to rcdec it accordingly. */
554 /* If lineZ has an associated lineF, free it up. */
555 if (lineZ->dict[0] == SVal_INVALID) {
556 UInt fix = (UInt)lineZ->dict[1];
557 tl_assert(sm->linesF);
558 tl_assert(sm->linesF_size > 0);
559 tl_assert(fix >= 0 && fix < sm->linesF_size);
560 lineF = &sm->linesF[fix];
561 tl_assert(lineF->inUse);
562 rcdec_LineF(lineF);
563 lineF->inUse = False;
564 } else {
565 rcdec_LineZ(lineZ);
566 }
567 *smp = sm;
568 *zixp = zix;
569}
570
571static __attribute__((noinline))
572void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
573 UInt i, new_size;
574 LineF* nyu;
575
576 if (sm->linesF) {
577 tl_assert(sm->linesF_size > 0);
578 } else {
579 tl_assert(sm->linesF_size == 0);
580 }
581
582 if (sm->linesF) {
583 for (i = 0; i < sm->linesF_size; i++) {
584 if (!sm->linesF[i].inUse) {
585 *fixp = (Word)i;
586 return;
587 }
588 }
589 }
590
591 /* No free F line found. Expand existing array and try again. */
592 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
593 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
594 new_size * sizeof(LineF) );
595 tl_assert(nyu);
596
597 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
598 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
599 * sizeof(LineF);
600
601 if (0)
602 VG_(printf)("SM %p: expand F array from %d to %d\n",
603 sm, (Int)sm->linesF_size, new_size);
604
605 for (i = 0; i < new_size; i++)
606 nyu[i].inUse = False;
607
608 if (sm->linesF) {
609 for (i = 0; i < sm->linesF_size; i++) {
610 tl_assert(sm->linesF[i].inUse);
611 nyu[i] = sm->linesF[i];
612 }
613 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
614 HG_(free)(sm->linesF);
615 }
616
617 sm->linesF = nyu;
618 sm->linesF_size = new_size;
619
620 for (i = 0; i < sm->linesF_size; i++) {
621 if (!sm->linesF[i].inUse) {
622 *fixp = (Word)i;
623 return;
624 }
625 }
626
627 /*NOTREACHED*/
628 tl_assert(0);
629}
630
631
632/* ------------ CacheLine and implicit-tree related ------------ */
633
634__attribute__((unused))
635static void pp_CacheLine ( CacheLine* cl ) {
636 Word i;
637 if (!cl) {
638 VG_(printf)("%s","pp_CacheLine(NULL)\n");
639 return;
640 }
641 for (i = 0; i < N_LINE_TREES; i++)
642 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
643 for (i = 0; i < N_LINE_ARANGE; i++)
644 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
645}
646
647static UChar descr_to_validbits ( UShort descr )
648{
649 /* a.k.a Party Time for gcc's constant folder */
650# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
651 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
652 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
653 ( (b8_5) << 12) | ( (b8_4) << 11) | \
654 ( (b8_3) << 10) | ( (b8_2) << 9) | \
655 ( (b8_1) << 8) | ( (b8_0) << 7) | \
656 ( (b16_3) << 6) | ( (b32_1) << 5) | \
657 ( (b16_2) << 4) | ( (b64) << 3) | \
658 ( (b16_1) << 2) | ( (b32_0) << 1) | \
659 ( (b16_0) << 0) ) )
660
661# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
662 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
663 ( (bit5) << 5) | ( (bit4) << 4) | \
664 ( (bit3) << 3) | ( (bit2) << 2) | \
665 ( (bit1) << 1) | ( (bit0) << 0) ) )
666
667 /* these should all get folded out at compile time */
668 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
669 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
670 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
671 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
672 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
673 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
674 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
675 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
676 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
677
678 switch (descr) {
679 /*
680 +--------------------------------- TREE_DESCR_8_7
681 | +------------------- TREE_DESCR_8_0
682 | | +---------------- TREE_DESCR_16_3
683 | | | +-------------- TREE_DESCR_32_1
684 | | | | +------------ TREE_DESCR_16_2
685 | | | | | +--------- TREE_DESCR_64
686 | | | | | | +------ TREE_DESCR_16_1
687 | | | | | | | +---- TREE_DESCR_32_0
688 | | | | | | | | +-- TREE_DESCR_16_0
689 | | | | | | | | |
690 | | | | | | | | | GRANULARITY, 7 -> 0 */
691 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
692 return BYTE(1,1,1,1,1,1,1,1);
693 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
694 return BYTE(1,1,0,1,1,1,1,1);
695 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
696 return BYTE(0,1,1,1,1,1,1,1);
697 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
698 return BYTE(0,1,0,1,1,1,1,1);
699
700 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
701 return BYTE(1,1,1,1,1,1,0,1);
702 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
703 return BYTE(1,1,0,1,1,1,0,1);
704 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
705 return BYTE(0,1,1,1,1,1,0,1);
706 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
707 return BYTE(0,1,0,1,1,1,0,1);
708
709 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
710 return BYTE(1,1,1,1,0,1,1,1);
711 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
712 return BYTE(1,1,0,1,0,1,1,1);
713 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
714 return BYTE(0,1,1,1,0,1,1,1);
715 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
716 return BYTE(0,1,0,1,0,1,1,1);
717
718 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
719 return BYTE(1,1,1,1,0,1,0,1);
720 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
721 return BYTE(1,1,0,1,0,1,0,1);
722 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
723 return BYTE(0,1,1,1,0,1,0,1);
724 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
725 return BYTE(0,1,0,1,0,1,0,1);
726
727 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
728 return BYTE(0,0,0,1,1,1,1,1);
729 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
730 return BYTE(0,0,0,1,1,1,0,1);
731 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
732 return BYTE(0,0,0,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
734 return BYTE(0,0,0,1,0,1,0,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
737 return BYTE(1,1,1,1,0,0,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
739 return BYTE(1,1,0,1,0,0,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
741 return BYTE(0,1,1,1,0,0,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
743 return BYTE(0,1,0,1,0,0,0,1);
744
745 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
746 return BYTE(0,0,0,1,0,0,0,1);
747
748 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
749 return BYTE(0,0,0,0,0,0,0,1);
750
751 default: return BYTE(0,0,0,0,0,0,0,0);
752 /* INVALID - any valid descr produces at least one
753 valid bit in tree[0..7]*/
754 }
755 /* NOTREACHED*/
756 tl_assert(0);
757
758# undef DESCR
759# undef BYTE
760}
761
762__attribute__((unused))
763static Bool is_sane_Descr ( UShort descr ) {
764 return descr_to_validbits(descr) != 0;
765}
766
767static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
768 VG_(sprintf)(dst,
769 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
770 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
771 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
772 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
773 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
774 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
775 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
776 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
777 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
778 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
779 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
780 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
781 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
782 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
783 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
784 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
785 );
786}
787static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
788 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
789 (Int)((byte & 128) ? 1 : 0),
790 (Int)((byte & 64) ? 1 : 0),
791 (Int)((byte & 32) ? 1 : 0),
792 (Int)((byte & 16) ? 1 : 0),
793 (Int)((byte & 8) ? 1 : 0),
794 (Int)((byte & 4) ? 1 : 0),
795 (Int)((byte & 2) ? 1 : 0),
796 (Int)((byte & 1) ? 1 : 0)
797 );
798}
799
800static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
801 Word i;
802 UChar validbits = descr_to_validbits(descr);
803 HChar buf[128], buf2[128];
804 if (validbits == 0)
805 goto bad;
806 for (i = 0; i < 8; i++) {
807 if (validbits & (1<<i)) {
808 if (tree[i] == SVal_INVALID)
809 goto bad;
810 } else {
811 if (tree[i] != SVal_INVALID)
812 goto bad;
813 }
814 }
815 return True;
816 bad:
817 sprintf_Descr( buf, descr );
818 sprintf_Byte( buf2, validbits );
819 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
820 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
821 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
822 for (i = 0; i < 8; i++)
823 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
824 VG_(printf)("%s","}\n");
825 return 0;
826}
827
828static Bool is_sane_CacheLine ( CacheLine* cl )
829{
830 Word tno, cloff;
831
832 if (!cl) goto bad;
833
834 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
835 UShort descr = cl->descrs[tno];
836 SVal* tree = &cl->svals[cloff];
837 if (!is_sane_Descr_and_Tree(descr, tree))
838 goto bad;
839 }
840 tl_assert(cloff == N_LINE_ARANGE);
841 return True;
842 bad:
843 pp_CacheLine(cl);
844 return False;
845}
846
847static UShort normalise_tree ( /*MOD*/SVal* tree )
848{
849 UShort descr;
850 /* pre: incoming tree[0..7] does not have any invalid shvals, in
851 particular no zeroes. */
852 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
853 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
854 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
855 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
856 tl_assert(0);
857
858 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
859 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
860 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
861 /* build 16-bit layer */
862 if (tree[1] == tree[0]) {
863 tree[1] = SVal_INVALID;
864 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
865 descr |= TREE_DESCR_16_0;
866 }
867 if (tree[3] == tree[2]) {
868 tree[3] = SVal_INVALID;
869 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
870 descr |= TREE_DESCR_16_1;
871 }
872 if (tree[5] == tree[4]) {
873 tree[5] = SVal_INVALID;
874 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
875 descr |= TREE_DESCR_16_2;
876 }
877 if (tree[7] == tree[6]) {
878 tree[7] = SVal_INVALID;
879 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
880 descr |= TREE_DESCR_16_3;
881 }
882 /* build 32-bit layer */
883 if (tree[2] == tree[0]
884 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
885 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
886 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
887 descr |= TREE_DESCR_32_0;
888 }
889 if (tree[6] == tree[4]
890 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
891 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
892 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
893 descr |= TREE_DESCR_32_1;
894 }
895 /* build 64-bit layer */
896 if (tree[4] == tree[0]
897 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
898 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
899 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
900 descr |= TREE_DESCR_64;
901 }
902 return descr;
903}
904
905/* This takes a cacheline where all the data is at the leaves
906 (w8[..]) and builds a correctly normalised tree. */
907static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
908{
909 Word tno, cloff;
910 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
911 SVal* tree = &cl->svals[cloff];
912 cl->descrs[tno] = normalise_tree( tree );
913 }
914 tl_assert(cloff == N_LINE_ARANGE);
915 if (SCE_CACHELINE)
916 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
917 stats__cline_normalises++;
918}
919
920
921typedef struct { UChar count; SVal sval; } CountedSVal;
922
923static
924void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
925 /*OUT*/Word* dstUsedP,
926 Word nDst, CacheLine* src )
927{
928 Word tno, cloff, dstUsed;
929
930 tl_assert(nDst == N_LINE_ARANGE);
931 dstUsed = 0;
932
933 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
934 UShort descr = src->descrs[tno];
935 SVal* tree = &src->svals[cloff];
936
937 /* sequentialise the tree described by (descr,tree). */
938# define PUT(_n,_v) \
939 do { dst[dstUsed ].count = (_n); \
940 dst[dstUsed++].sval = (_v); \
941 } while (0)
942
943 /* byte 0 */
944 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
945 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
946 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
947 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
948 /* byte 1 */
949 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
950 /* byte 2 */
951 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
952 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
953 /* byte 3 */
954 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
955 /* byte 4 */
956 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
957 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
958 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
959 /* byte 5 */
960 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
961 /* byte 6 */
962 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
963 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
964 /* byte 7 */
965 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
966
967# undef PUT
968 /* END sequentialise the tree described by (descr,tree). */
969
970 }
971 tl_assert(cloff == N_LINE_ARANGE);
972 tl_assert(dstUsed <= nDst);
973
974 *dstUsedP = dstUsed;
975}
976
977/* Write the cacheline 'wix' to backing store. Where it ends up
978 is determined by its tag field. */
979static __attribute__((noinline)) void cacheline_wback ( UWord wix )
980{
981 Word i, j, k, m;
982 Addr tag;
983 SecMap* sm;
984 CacheLine* cl;
985 LineZ* lineZ;
986 LineF* lineF;
987 Word zix, fix, csvalsUsed;
988 CountedSVal csvals[N_LINE_ARANGE];
989 SVal sv;
990
991 if (0)
992 VG_(printf)("scache wback line %d\n", (Int)wix);
993
994 tl_assert(wix >= 0 && wix < N_WAY_NENT);
995
996 tag = cache_shmem.tags0[wix];
997 cl = &cache_shmem.lyns0[wix];
998
999 /* The cache line may have been invalidated; if so, ignore it. */
1000 if (!is_valid_scache_tag(tag))
1001 return;
1002
1003 /* Where are we going to put it? */
1004 sm = NULL;
1005 lineZ = NULL;
1006 lineF = NULL;
1007 zix = fix = -1;
1008
1009 /* find the Z line to write in and rcdec it or the associated F
1010 line. */
1011 find_Z_for_writing( &sm, &zix, tag );
1012
1013 tl_assert(sm);
1014 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1015 lineZ = &sm->linesZ[zix];
1016
1017 /* Generate the data to be stored */
1018 if (SCE_CACHELINE)
1019 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1020
1021 csvalsUsed = -1;
1022 sequentialise_CacheLine( csvals, &csvalsUsed,
1023 N_LINE_ARANGE, cl );
1024 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1025 if (0) VG_(printf)("%lu ", csvalsUsed);
1026
1027 lineZ->dict[0] = lineZ->dict[1]
1028 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1029
1030 /* i indexes actual shadow values, k is cursor in csvals */
1031 i = 0;
1032 for (k = 0; k < csvalsUsed; k++) {
1033
1034 sv = csvals[k].sval;
1035 if (SCE_SVALS)
1036 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1037 /* do we already have it? */
1038 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1039 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1040 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1041 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1042 /* no. look for a free slot. */
1043 if (SCE_SVALS)
1044 tl_assert(sv != SVal_INVALID);
1045 if (lineZ->dict[0]
1046 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1047 if (lineZ->dict[1]
1048 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1049 if (lineZ->dict[2]
1050 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1051 if (lineZ->dict[3]
1052 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1053 break; /* we'll have to use the f rep */
1054 dict_ok:
1055 m = csvals[k].count;
1056 if (m == 8) {
1057 write_twobit_array( lineZ->ix2s, i+0, j );
1058 write_twobit_array( lineZ->ix2s, i+1, j );
1059 write_twobit_array( lineZ->ix2s, i+2, j );
1060 write_twobit_array( lineZ->ix2s, i+3, j );
1061 write_twobit_array( lineZ->ix2s, i+4, j );
1062 write_twobit_array( lineZ->ix2s, i+5, j );
1063 write_twobit_array( lineZ->ix2s, i+6, j );
1064 write_twobit_array( lineZ->ix2s, i+7, j );
1065 i += 8;
1066 }
1067 else if (m == 4) {
1068 write_twobit_array( lineZ->ix2s, i+0, j );
1069 write_twobit_array( lineZ->ix2s, i+1, j );
1070 write_twobit_array( lineZ->ix2s, i+2, j );
1071 write_twobit_array( lineZ->ix2s, i+3, j );
1072 i += 4;
1073 }
1074 else if (m == 1) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 i += 1;
1077 }
1078 else if (m == 2) {
1079 write_twobit_array( lineZ->ix2s, i+0, j );
1080 write_twobit_array( lineZ->ix2s, i+1, j );
1081 i += 2;
1082 }
1083 else {
1084 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1085 }
1086
1087 }
1088
1089 if (LIKELY(i == N_LINE_ARANGE)) {
1090 /* Construction of the compressed representation was
1091 successful. */
1092 rcinc_LineZ(lineZ);
1093 stats__cache_Z_wbacks++;
1094 } else {
1095 /* Cannot use the compressed(z) representation. Use the full(f)
1096 rep instead. */
1097 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1098 alloc_F_for_writing( sm, &fix );
1099 tl_assert(sm->linesF);
1100 tl_assert(sm->linesF_size > 0);
1101 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1102 lineF = &sm->linesF[fix];
1103 tl_assert(!lineF->inUse);
1104 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1105 lineZ->dict[1] = (SVal)fix;
1106 lineF->inUse = True;
1107 i = 0;
1108 for (k = 0; k < csvalsUsed; k++) {
1109 if (SCE_SVALS)
1110 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1111 sv = csvals[k].sval;
1112 if (SCE_SVALS)
1113 tl_assert(sv != SVal_INVALID);
1114 for (m = csvals[k].count; m > 0; m--) {
1115 lineF->w64s[i] = sv;
1116 i++;
1117 }
1118 }
1119 tl_assert(i == N_LINE_ARANGE);
1120 rcinc_LineF(lineF);
1121 stats__cache_F_wbacks++;
1122 }
1123
1124 //if (anyShared)
1125 // sm->mbHasShared = True;
1126
1127 /* mb_tidy_one_cacheline(); */
1128}
1129
1130/* Fetch the cacheline 'wix' from the backing store. The tag
1131 associated with 'wix' is assumed to have already been filled in;
1132 hence that is used to determine where in the backing store to read
1133 from. */
1134static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1135{
1136 Word i;
1137 Addr tag;
1138 CacheLine* cl;
1139 LineZ* lineZ;
1140 LineF* lineF;
1141
1142 if (0)
1143 VG_(printf)("scache fetch line %d\n", (Int)wix);
1144
1145 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1146
1147 tag = cache_shmem.tags0[wix];
1148 cl = &cache_shmem.lyns0[wix];
1149
1150 /* reject nonsense requests */
1151 tl_assert(is_valid_scache_tag(tag));
1152
1153 lineZ = NULL;
1154 lineF = NULL;
1155 find_ZF_for_reading( &lineZ, &lineF, tag );
1156 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1157
1158 /* expand the data into the bottom layer of the tree, then get
1159 cacheline_normalise to build the descriptor array. */
1160 if (lineF) {
1161 tl_assert(lineF->inUse);
1162 for (i = 0; i < N_LINE_ARANGE; i++) {
1163 cl->svals[i] = lineF->w64s[i];
1164 }
1165 stats__cache_F_fetches++;
1166 } else {
1167 for (i = 0; i < N_LINE_ARANGE; i++) {
1168 SVal sv;
1169 UWord ix = read_twobit_array( lineZ->ix2s, i );
1170 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1171 sv = lineZ->dict[ix];
1172 tl_assert(sv != SVal_INVALID);
1173 cl->svals[i] = sv;
1174 }
1175 stats__cache_Z_fetches++;
1176 }
1177 normalise_CacheLine( cl );
1178}
1179
1180static void shmem__invalidate_scache ( void ) {
1181 Word wix;
1182 if (0) VG_(printf)("%s","scache inval\n");
1183 tl_assert(!is_valid_scache_tag(1));
1184 for (wix = 0; wix < N_WAY_NENT; wix++) {
1185 cache_shmem.tags0[wix] = 1/*INVALID*/;
1186 }
1187 stats__cache_invals++;
1188}
1189
1190static void shmem__flush_and_invalidate_scache ( void ) {
1191 Word wix;
1192 Addr tag;
1193 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1194 tl_assert(!is_valid_scache_tag(1));
1195 for (wix = 0; wix < N_WAY_NENT; wix++) {
1196 tag = cache_shmem.tags0[wix];
1197 if (tag == 1/*INVALID*/) {
1198 /* already invalid; nothing to do */
1199 } else {
1200 tl_assert(is_valid_scache_tag(tag));
1201 cacheline_wback( wix );
1202 }
1203 cache_shmem.tags0[wix] = 1/*INVALID*/;
1204 }
1205 stats__cache_flushes++;
1206 stats__cache_invals++;
1207}
1208
1209
1210static inline Bool aligned16 ( Addr a ) {
1211 return 0 == (a & 1);
1212}
1213static inline Bool aligned32 ( Addr a ) {
1214 return 0 == (a & 3);
1215}
1216static inline Bool aligned64 ( Addr a ) {
1217 return 0 == (a & 7);
1218}
1219static inline UWord get_cacheline_offset ( Addr a ) {
1220 return (UWord)(a & (N_LINE_ARANGE - 1));
1221}
1222static inline Addr cacheline_ROUNDUP ( Addr a ) {
1223 return ROUNDUP(a, N_LINE_ARANGE);
1224}
1225static inline Addr cacheline_ROUNDDN ( Addr a ) {
1226 return ROUNDDN(a, N_LINE_ARANGE);
1227}
1228static inline UWord get_treeno ( Addr a ) {
1229 return get_cacheline_offset(a) >> 3;
1230}
1231static inline UWord get_tree_offset ( Addr a ) {
1232 return a & 7;
1233}
1234
1235static __attribute__((noinline))
1236 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1237static inline CacheLine* get_cacheline ( Addr a )
1238{
1239 /* tag is 'a' with the in-line offset masked out,
1240 eg a[31]..a[4] 0000 */
1241 Addr tag = a & ~(N_LINE_ARANGE - 1);
1242 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1243 stats__cache_totrefs++;
1244 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1245 return &cache_shmem.lyns0[wix];
1246 } else {
1247 return get_cacheline_MISS( a );
1248 }
1249}
1250
1251static __attribute__((noinline))
1252 CacheLine* get_cacheline_MISS ( Addr a )
1253{
1254 /* tag is 'a' with the in-line offset masked out,
1255 eg a[31]..a[4] 0000 */
1256
1257 CacheLine* cl;
1258 Addr* tag_old_p;
1259 Addr tag = a & ~(N_LINE_ARANGE - 1);
1260 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1261
1262 tl_assert(tag != cache_shmem.tags0[wix]);
1263
1264 /* Dump the old line into the backing store. */
1265 stats__cache_totmisses++;
1266
1267 cl = &cache_shmem.lyns0[wix];
1268 tag_old_p = &cache_shmem.tags0[wix];
1269
1270 if (is_valid_scache_tag( *tag_old_p )) {
1271 /* EXPENSIVE and REDUNDANT: callee does it */
1272 if (SCE_CACHELINE)
1273 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1274 cacheline_wback( wix );
1275 }
1276 /* and reload the new one */
1277 *tag_old_p = tag;
1278 cacheline_fetch( wix );
1279 if (SCE_CACHELINE)
1280 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1281 return cl;
1282}
1283
1284static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1285 stats__cline_64to32pulldown++;
1286 switch (toff) {
1287 case 0: case 4:
1288 tl_assert(descr & TREE_DESCR_64);
1289 tree[4] = tree[0];
1290 descr &= ~TREE_DESCR_64;
1291 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1292 break;
1293 default:
1294 tl_assert(0);
1295 }
1296 return descr;
1297}
1298
1299static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1300 stats__cline_32to16pulldown++;
1301 switch (toff) {
1302 case 0: case 2:
1303 if (!(descr & TREE_DESCR_32_0)) {
1304 descr = pulldown_to_32(tree, 0, descr);
1305 }
1306 tl_assert(descr & TREE_DESCR_32_0);
1307 tree[2] = tree[0];
1308 descr &= ~TREE_DESCR_32_0;
1309 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1310 break;
1311 case 4: case 6:
1312 if (!(descr & TREE_DESCR_32_1)) {
1313 descr = pulldown_to_32(tree, 4, descr);
1314 }
1315 tl_assert(descr & TREE_DESCR_32_1);
1316 tree[6] = tree[4];
1317 descr &= ~TREE_DESCR_32_1;
1318 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1319 break;
1320 default:
1321 tl_assert(0);
1322 }
1323 return descr;
1324}
1325
1326static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1327 stats__cline_16to8pulldown++;
1328 switch (toff) {
1329 case 0: case 1:
1330 if (!(descr & TREE_DESCR_16_0)) {
1331 descr = pulldown_to_16(tree, 0, descr);
1332 }
1333 tl_assert(descr & TREE_DESCR_16_0);
1334 tree[1] = tree[0];
1335 descr &= ~TREE_DESCR_16_0;
1336 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1337 break;
1338 case 2: case 3:
1339 if (!(descr & TREE_DESCR_16_1)) {
1340 descr = pulldown_to_16(tree, 2, descr);
1341 }
1342 tl_assert(descr & TREE_DESCR_16_1);
1343 tree[3] = tree[2];
1344 descr &= ~TREE_DESCR_16_1;
1345 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1346 break;
1347 case 4: case 5:
1348 if (!(descr & TREE_DESCR_16_2)) {
1349 descr = pulldown_to_16(tree, 4, descr);
1350 }
1351 tl_assert(descr & TREE_DESCR_16_2);
1352 tree[5] = tree[4];
1353 descr &= ~TREE_DESCR_16_2;
1354 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1355 break;
1356 case 6: case 7:
1357 if (!(descr & TREE_DESCR_16_3)) {
1358 descr = pulldown_to_16(tree, 6, descr);
1359 }
1360 tl_assert(descr & TREE_DESCR_16_3);
1361 tree[7] = tree[6];
1362 descr &= ~TREE_DESCR_16_3;
1363 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1364 break;
1365 default:
1366 tl_assert(0);
1367 }
1368 return descr;
1369}
1370
1371
1372static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1373 UShort mask;
1374 switch (toff) {
1375 case 0:
1376 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1377 tl_assert( (descr & mask) == mask );
1378 descr &= ~mask;
1379 descr |= TREE_DESCR_16_0;
1380 break;
1381 case 2:
1382 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1383 tl_assert( (descr & mask) == mask );
1384 descr &= ~mask;
1385 descr |= TREE_DESCR_16_1;
1386 break;
1387 case 4:
1388 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1389 tl_assert( (descr & mask) == mask );
1390 descr &= ~mask;
1391 descr |= TREE_DESCR_16_2;
1392 break;
1393 case 6:
1394 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1395 tl_assert( (descr & mask) == mask );
1396 descr &= ~mask;
1397 descr |= TREE_DESCR_16_3;
1398 break;
1399 default:
1400 tl_assert(0);
1401 }
1402 return descr;
1403}
1404
1405static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1406 UShort mask;
1407 switch (toff) {
1408 case 0:
1409 if (!(descr & TREE_DESCR_16_0))
1410 descr = pullup_descr_to_16(descr, 0);
1411 if (!(descr & TREE_DESCR_16_1))
1412 descr = pullup_descr_to_16(descr, 2);
1413 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1414 tl_assert( (descr & mask) == mask );
1415 descr &= ~mask;
1416 descr |= TREE_DESCR_32_0;
1417 break;
1418 case 4:
1419 if (!(descr & TREE_DESCR_16_2))
1420 descr = pullup_descr_to_16(descr, 4);
1421 if (!(descr & TREE_DESCR_16_3))
1422 descr = pullup_descr_to_16(descr, 6);
1423 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1424 tl_assert( (descr & mask) == mask );
1425 descr &= ~mask;
1426 descr |= TREE_DESCR_32_1;
1427 break;
1428 default:
1429 tl_assert(0);
1430 }
1431 return descr;
1432}
1433
1434static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1435 switch (toff) {
1436 case 0: case 4:
1437 return 0 != (descr & TREE_DESCR_64);
1438 default:
1439 tl_assert(0);
1440 }
1441}
1442
1443static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1444 switch (toff) {
1445 case 0:
1446 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1447 case 2:
1448 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1449 case 4:
1450 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1451 case 6:
1452 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1453 default:
1454 tl_assert(0);
1455 }
1456}
1457
1458/* ------------ Cache management ------------ */
1459
1460static void zsm_flush_cache ( void )
1461{
1462 shmem__flush_and_invalidate_scache();
1463}
1464
1465
1466static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1467{
1468 tl_assert( sizeof(UWord) == sizeof(Addr) );
1469
1470 rcinc = p_rcinc;
1471 rcdec = p_rcdec;
1472
1473 tl_assert(map_shmem == NULL);
1474 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1475 HG_(free),
1476 NULL/*unboxed UWord cmp*/);
1477 tl_assert(map_shmem != NULL);
1478 shmem__invalidate_scache();
1479
1480 /* a SecMap must contain an integral number of CacheLines */
1481 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1482 /* also ... a CacheLine holds an integral number of trees */
1483 tl_assert(0 == (N_LINE_ARANGE % 8));
1484}
1485
1486/////////////////////////////////////////////////////////////////
1487/////////////////////////////////////////////////////////////////
1488// //
1489// SECTION END compressed shadow memory //
1490// //
1491/////////////////////////////////////////////////////////////////
1492/////////////////////////////////////////////////////////////////
1493
1494
1495
1496/////////////////////////////////////////////////////////////////
1497/////////////////////////////////////////////////////////////////
1498// //
1499// SECTION BEGIN vts primitives //
1500// //
1501/////////////////////////////////////////////////////////////////
1502/////////////////////////////////////////////////////////////////
1503
1504#ifndef __HB_VTS_H
1505#define __HB_VTS_H
1506
1507/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1508 lowest 30 bits of an SVal. */
1509typedef UInt VtsID;
1510#define VtsID_INVALID 0xFFFFFFFF
1511
1512/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1513 a backlink for the caller's convenience. Since we have no idea
1514 what to set that to in the library, it always gets set to
1515 VtsID_INVALID. */
1516typedef
1517 struct {
1518 VtsID id;
1519 XArray* ts; /* XArray* ScalarTS(abstract) */
1520 }
1521 VTS;
1522
1523
1524/* Create a new, empty VTS. */
1525VTS* VTS__new ( void );
1526
1527/* Delete this VTS in its entirety. */
1528void VTS__delete ( VTS* vts );
1529
1530/* Create a new singleton VTS. */
1531VTS* VTS__singleton ( Thr* thr, ULong tym );
1532
1533/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1534 not modified. */
1535VTS* VTS__tick ( Thr* me, VTS* vts );
1536
1537/* Return a new VTS constructed as the join (max) of the 2 args.
1538 Neither arg is modified. */
1539VTS* VTS__join ( VTS* a, VTS* b );
1540
1541/* Compute the partial ordering relation of the two args. */
1542typedef
1543 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1544 POrd;
1545
1546POrd VTS__cmp ( VTS* a, VTS* b );
1547
1548/* Compute an arbitrary structural (total) ordering on the two args,
1549 based on their VCs, so they can be looked up in a table, tree, etc.
1550 Returns -1, 0 or 1. */
1551Word VTS__cmp_structural ( VTS* a, VTS* b );
1552
1553/* Debugging only. Display the given VTS in the buffer. */
1554void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1555
1556/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001557ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001558
1559#endif /* ! __HB_VTS_H */
1560
1561
1562/*--------------- to do with Vector Timestamps ---------------*/
1563
1564/* Scalar Timestamp */
1565typedef
1566 struct {
1567 Thr* thr;
1568 ULong tym;
1569 }
1570 ScalarTS;
1571
1572
1573static Bool is_sane_VTS ( VTS* vts )
1574{
1575 UWord i, n;
1576 ScalarTS *st1, *st2;
1577 if (!vts) return False;
1578 if (!vts->ts) return False;
1579 n = VG_(sizeXA)( vts->ts );
1580 if (n >= 2) {
1581 for (i = 0; i < n-1; i++) {
1582 st1 = VG_(indexXA)( vts->ts, i );
1583 st2 = VG_(indexXA)( vts->ts, i+1 );
1584 if (st1->thr >= st2->thr)
1585 return False;
1586 if (st1->tym == 0 || st2->tym == 0)
1587 return False;
1588 }
1589 }
1590 return True;
1591}
1592
1593
1594/* Create a new, empty VTS.
1595*/
1596VTS* VTS__new ( void )
1597{
1598 VTS* vts;
1599 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1600 tl_assert(vts);
1601 vts->id = VtsID_INVALID;
1602 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1603 HG_(free), sizeof(ScalarTS) );
1604 tl_assert(vts->ts);
1605 return vts;
1606}
1607
1608
1609/* Delete this VTS in its entirety.
1610*/
1611void VTS__delete ( VTS* vts )
1612{
1613 tl_assert(vts);
1614 tl_assert(vts->ts);
1615 VG_(deleteXA)( vts->ts );
1616 HG_(free)(vts);
1617}
1618
1619
1620/* Create a new singleton VTS.
1621*/
1622VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1623 ScalarTS st;
1624 VTS* vts;
1625 tl_assert(thr);
1626 tl_assert(tym >= 1);
1627 vts = VTS__new();
1628 st.thr = thr;
1629 st.tym = tym;
1630 VG_(addToXA)( vts->ts, &st );
1631 return vts;
1632}
1633
1634
1635/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1636 not modified.
1637*/
1638VTS* VTS__tick ( Thr* me, VTS* vts )
1639{
1640 ScalarTS* here = NULL;
1641 ScalarTS tmp;
1642 VTS* res;
1643 Word i, n;
1644 tl_assert(me);
1645 tl_assert(is_sane_VTS(vts));
1646 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1647 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1648 res = VTS__new();
1649 n = VG_(sizeXA)( vts->ts );
1650
1651 /* main loop doesn't handle zero-entry case correctly, so
1652 special-case it. */
1653 if (n == 0) {
1654 tmp.thr = me;
1655 tmp.tym = 1;
1656 VG_(addToXA)( res->ts, &tmp );
1657 tl_assert(is_sane_VTS(res));
1658 return res;
1659 }
1660
1661 for (i = 0; i < n; i++) {
1662 here = VG_(indexXA)( vts->ts, i );
1663 if (me < here->thr) {
1664 /* We just went past 'me', without seeing it. */
1665 tmp.thr = me;
1666 tmp.tym = 1;
1667 VG_(addToXA)( res->ts, &tmp );
1668 tmp = *here;
1669 VG_(addToXA)( res->ts, &tmp );
1670 i++;
1671 break;
1672 }
1673 else if (me == here->thr) {
1674 tmp = *here;
1675 tmp.tym++;
1676 VG_(addToXA)( res->ts, &tmp );
1677 i++;
1678 break;
1679 }
1680 else /* me > here->thr */ {
1681 tmp = *here;
1682 VG_(addToXA)( res->ts, &tmp );
1683 }
1684 }
1685 tl_assert(i >= 0 && i <= n);
1686 if (i == n && here && here->thr < me) {
1687 tmp.thr = me;
1688 tmp.tym = 1;
1689 VG_(addToXA)( res->ts, &tmp );
1690 } else {
1691 for (/*keepgoing*/; i < n; i++) {
1692 here = VG_(indexXA)( vts->ts, i );
1693 tmp = *here;
1694 VG_(addToXA)( res->ts, &tmp );
1695 }
1696 }
1697 tl_assert(is_sane_VTS(res));
1698 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1699 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1700 return res;
1701}
1702
1703
1704/* Return a new VTS constructed as the join (max) of the 2 args.
1705 Neither arg is modified.
1706*/
1707VTS* VTS__join ( VTS* a, VTS* b )
1708{
1709 Word ia, ib, useda, usedb;
1710 ULong tyma, tymb, tymMax;
1711 Thr* thr;
1712 VTS* res;
1713 ScalarTS *tmpa, *tmpb;
1714
1715 tl_assert(a && a->ts);
1716 tl_assert(b && b->ts);
1717 useda = VG_(sizeXA)( a->ts );
1718 usedb = VG_(sizeXA)( b->ts );
1719
1720 res = VTS__new();
1721 ia = ib = 0;
1722
1723 while (1) {
1724
1725 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1726 from a and b in order, where thr is the next Thr*
1727 occurring in either a or b, and tyma/b are the relevant
1728 scalar timestamps, taking into account implicit zeroes. */
1729 tl_assert(ia >= 0 && ia <= useda);
1730 tl_assert(ib >= 0 && ib <= usedb);
1731 tmpa = tmpb = NULL;
1732
1733 if (ia == useda && ib == usedb) {
1734 /* both empty - done */
1735 break;
1736 }
1737 else
1738 if (ia == useda && ib != usedb) {
1739 /* a empty, use up b */
1740 tmpb = VG_(indexXA)( b->ts, ib );
1741 thr = tmpb->thr;
1742 tyma = 0;
1743 tymb = tmpb->tym;
1744 ib++;
1745 }
1746 else
1747 if (ia != useda && ib == usedb) {
1748 /* b empty, use up a */
1749 tmpa = VG_(indexXA)( a->ts, ia );
1750 thr = tmpa->thr;
1751 tyma = tmpa->tym;
1752 tymb = 0;
1753 ia++;
1754 }
1755 else {
1756 /* both not empty; extract lowest-Thr*'d triple */
1757 tmpa = VG_(indexXA)( a->ts, ia );
1758 tmpb = VG_(indexXA)( b->ts, ib );
1759 if (tmpa->thr < tmpb->thr) {
1760 /* a has the lowest unconsidered Thr* */
1761 thr = tmpa->thr;
1762 tyma = tmpa->tym;
1763 tymb = 0;
1764 ia++;
1765 }
1766 else
1767 if (tmpa->thr > tmpb->thr) {
1768 /* b has the lowest unconsidered Thr* */
1769 thr = tmpb->thr;
1770 tyma = 0;
1771 tymb = tmpb->tym;
1772 ib++;
1773 } else {
1774 /* they both next mention the same Thr* */
1775 tl_assert(tmpa->thr == tmpb->thr);
1776 thr = tmpa->thr; /* == tmpb->thr */
1777 tyma = tmpa->tym;
1778 tymb = tmpb->tym;
1779 ia++;
1780 ib++;
1781 }
1782 }
1783
1784 /* having laboriously determined (thr, tyma, tymb), do something
1785 useful with it. */
1786 tymMax = tyma > tymb ? tyma : tymb;
1787 if (tymMax > 0) {
1788 ScalarTS st;
1789 st.thr = thr;
1790 st.tym = tymMax;
1791 VG_(addToXA)( res->ts, &st );
1792 }
1793
1794 }
1795
1796 tl_assert(is_sane_VTS( res ));
1797
1798 return res;
1799}
1800
1801
1802/* Compute the partial ordering relation of the two args.
1803*/
1804POrd VTS__cmp ( VTS* a, VTS* b )
1805{
1806 Word ia, ib, useda, usedb;
1807 ULong tyma, tymb;
1808 Thr* thr;
1809 ScalarTS *tmpa, *tmpb;
1810
1811 Bool all_leq = True;
1812 Bool all_geq = True;
1813
1814 tl_assert(a && a->ts);
1815 tl_assert(b && b->ts);
1816 useda = VG_(sizeXA)( a->ts );
1817 usedb = VG_(sizeXA)( b->ts );
1818
1819 ia = ib = 0;
1820
1821 while (1) {
1822
1823 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1824 from a and b in order, where thr is the next Thr*
1825 occurring in either a or b, and tyma/b are the relevant
1826 scalar timestamps, taking into account implicit zeroes. */
1827 tl_assert(ia >= 0 && ia <= useda);
1828 tl_assert(ib >= 0 && ib <= usedb);
1829 tmpa = tmpb = NULL;
1830
1831 if (ia == useda && ib == usedb) {
1832 /* both empty - done */
1833 break;
1834 }
1835 else
1836 if (ia == useda && ib != usedb) {
1837 /* a empty, use up b */
1838 tmpb = VG_(indexXA)( b->ts, ib );
1839 thr = tmpb->thr;
1840 tyma = 0;
1841 tymb = tmpb->tym;
1842 ib++;
1843 }
1844 else
1845 if (ia != useda && ib == usedb) {
1846 /* b empty, use up a */
1847 tmpa = VG_(indexXA)( a->ts, ia );
1848 thr = tmpa->thr;
1849 tyma = tmpa->tym;
1850 tymb = 0;
1851 ia++;
1852 }
1853 else {
1854 /* both not empty; extract lowest-Thr*'d triple */
1855 tmpa = VG_(indexXA)( a->ts, ia );
1856 tmpb = VG_(indexXA)( b->ts, ib );
1857 if (tmpa->thr < tmpb->thr) {
1858 /* a has the lowest unconsidered Thr* */
1859 thr = tmpa->thr;
1860 tyma = tmpa->tym;
1861 tymb = 0;
1862 ia++;
1863 }
1864 else
1865 if (tmpa->thr > tmpb->thr) {
1866 /* b has the lowest unconsidered Thr* */
1867 thr = tmpb->thr;
1868 tyma = 0;
1869 tymb = tmpb->tym;
1870 ib++;
1871 } else {
1872 /* they both next mention the same Thr* */
1873 tl_assert(tmpa->thr == tmpb->thr);
1874 thr = tmpa->thr; /* == tmpb->thr */
1875 tyma = tmpa->tym;
1876 tymb = tmpb->tym;
1877 ia++;
1878 ib++;
1879 }
1880 }
1881
1882 /* having laboriously determined (thr, tyma, tymb), do something
1883 useful with it. */
1884 if (tyma < tymb)
1885 all_geq = False;
1886 if (tyma > tymb)
1887 all_leq = False;
1888 }
1889
1890 if (all_leq && all_geq)
1891 return POrd_EQ;
1892 /* now we know they aren't equal, so either all_leq or all_geq or
1893 both are false. */
1894 if (all_leq)
1895 return POrd_LT;
1896 if (all_geq)
1897 return POrd_GT;
1898 /* hmm, neither all_geq or all_leq. This means unordered. */
1899 return POrd_UN;
1900}
1901
1902
1903/* Compute an arbitrary structural (total) ordering on the two args,
1904 based on their VCs, so they can be looked up in a table, tree, etc.
1905 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1906*/
1907Word VTS__cmp_structural ( VTS* a, VTS* b )
1908{
1909 /* We just need to generate an arbitrary total ordering based on
1910 a->ts and b->ts. Preferably do it in a way which comes across likely
1911 differences relatively quickly. */
1912 Word i, useda, usedb;
1913 ScalarTS *tmpa, *tmpb;
1914
1915 tl_assert(a && a->ts);
1916 tl_assert(b && b->ts);
1917 useda = VG_(sizeXA)( a->ts );
1918 usedb = VG_(sizeXA)( b->ts );
1919
1920 if (useda < usedb) return -1;
1921 if (useda > usedb) return 1;
1922
1923 /* Same length vectors, so let's step through them together. */
1924 tl_assert(useda == usedb);
1925 for (i = 0; i < useda; i++) {
1926 tmpa = VG_(indexXA)( a->ts, i );
1927 tmpb = VG_(indexXA)( b->ts, i );
1928 if (tmpa->tym < tmpb->tym) return -1;
1929 if (tmpa->tym > tmpb->tym) return 1;
1930 if (tmpa->thr < tmpb->thr) return -1;
1931 if (tmpa->thr > tmpb->thr) return 1;
1932 }
1933
1934 /* They're identical. */
1935 return 0;
1936}
1937
1938
1939/* Debugging only. Display the given VTS in the buffer.
1940*/
1941void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1942 ScalarTS* st;
1943 HChar unit[64];
1944 Word i, n;
1945 Int avail = nBuf;
1946 tl_assert(vts && vts->ts);
1947 tl_assert(nBuf > 16);
1948 buf[0] = '[';
1949 buf[1] = 0;
1950 n = VG_(sizeXA)( vts->ts );
1951 for (i = 0; i < n; i++) {
1952 tl_assert(avail >= 40);
1953 st = VG_(indexXA)( vts->ts, i );
1954 VG_(memset)(unit, 0, sizeof(unit));
1955 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1956 st->thr, st->tym);
1957 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1958 VG_(strcat)(buf, " ...]");
1959 buf[nBuf-1] = 0;
1960 return;
1961 }
1962 VG_(strcat)(buf, unit);
1963 avail -= VG_(strlen)(unit);
1964 }
1965 VG_(strcat)(buf, "]");
1966 buf[nBuf-1] = 0;
1967}
1968
1969
1970/* Debugging only. Return vts[index], so to speak.
1971*/
1972ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1973 UWord i, n;
1974 tl_assert(vts && vts->ts);
1975 n = VG_(sizeXA)( vts->ts );
1976 for (i = 0; i < n; i++) {
1977 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1978 if (st->thr == idx)
1979 return st->tym;
1980 }
1981 return 0;
1982}
1983
1984
1985/////////////////////////////////////////////////////////////////
1986/////////////////////////////////////////////////////////////////
1987// //
1988// SECTION END vts primitives //
1989// //
1990/////////////////////////////////////////////////////////////////
1991/////////////////////////////////////////////////////////////////
1992
1993
1994
1995/////////////////////////////////////////////////////////////////
1996/////////////////////////////////////////////////////////////////
1997// //
1998// SECTION BEGIN main library //
1999// //
2000/////////////////////////////////////////////////////////////////
2001/////////////////////////////////////////////////////////////////
2002
2003
2004/////////////////////////////////////////////////////////
2005// //
2006// VTS set //
2007// //
2008/////////////////////////////////////////////////////////
2009
2010static WordFM* /* VTS* void void */ vts_set = NULL;
2011
2012static void vts_set_init ( void )
2013{
2014 tl_assert(!vts_set);
2015 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2016 HG_(free),
2017 (Word(*)(UWord,UWord))VTS__cmp_structural );
2018 tl_assert(vts_set);
2019}
2020
2021/* Given a newly made VTS, look in vts_set to see if we already have
2022 an identical one. If yes, free up this one and return instead a
2023 pointer to the existing one. If no, add this one to the set and
2024 return the same pointer. Caller differentiates the two cases by
2025 comparing returned pointer with the supplied one (although that
2026 does require that the supplied VTS is not already in the set).
2027*/
2028static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2029{
2030 UWord keyW, valW;
2031 /* lookup cand (by value) */
2032 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2033 /* found it */
2034 tl_assert(valW == 0);
2035 /* if this fails, cand (by ref) was already present (!) */
2036 tl_assert(keyW != (UWord)cand);
2037 VTS__delete(cand);
2038 return (VTS*)keyW;
2039 } else {
2040 /* not present. Add and return pointer to same. */
2041 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2042 return cand;
2043 }
2044}
2045
2046
2047/////////////////////////////////////////////////////////
2048// //
2049// VTS table //
2050// //
2051/////////////////////////////////////////////////////////
2052
2053static void VtsID__invalidate_caches ( void ); /* fwds */
2054
2055/* A type to hold VTS table entries. Invariants:
2056 If .vts == NULL, then this entry is not in use, so:
2057 - .rc == 0
2058 - this entry is on the freelist (unfortunately, does not imply
2059 any constraints on value for .nextfree)
2060 If .vts != NULL, then this entry is in use:
2061 - .vts is findable in vts_set
2062 - .vts->id == this entry number
2063 - no specific value for .rc (even 0 is OK)
2064 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2065*/
2066typedef
2067 struct {
2068 VTS* vts; /* vts, in vts_set */
2069 UWord rc; /* reference count - enough for entire aspace */
2070 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2071 }
2072 VtsTE;
2073
2074/* The VTS table. */
2075static XArray* /* of VtsTE */ vts_tab = NULL;
2076
2077/* An index into the VTS table, indicating the start of the list of
2078 free (available for use) entries. If the list is empty, this is
2079 VtsID_INVALID. */
2080static VtsID vts_tab_freelist = VtsID_INVALID;
2081
2082/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2083 vts_tab equals or exceeds this size. After GC, the value here is
2084 set appropriately so as to check for the next GC point. */
2085static Word vts_next_GC_at = 1000;
2086
2087static void vts_tab_init ( void )
2088{
2089 vts_tab
2090 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2091 HG_(free), sizeof(VtsTE) );
2092 vts_tab_freelist
2093 = VtsID_INVALID;
2094 tl_assert(vts_tab);
2095}
2096
2097/* Add ii to the free list, checking that it looks out-of-use. */
2098static void add_to_free_list ( VtsID ii )
2099{
2100 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2101 tl_assert(ie->vts == NULL);
2102 tl_assert(ie->rc == 0);
2103 tl_assert(ie->freelink == VtsID_INVALID);
2104 ie->freelink = vts_tab_freelist;
2105 vts_tab_freelist = ii;
2106}
2107
2108/* Get an entry from the free list. This will return VtsID_INVALID if
2109 the free list is empty. */
2110static VtsID get_from_free_list ( void )
2111{
2112 VtsID ii;
2113 VtsTE* ie;
2114 if (vts_tab_freelist == VtsID_INVALID)
2115 return VtsID_INVALID;
2116 ii = vts_tab_freelist;
2117 ie = VG_(indexXA)( vts_tab, ii );
2118 tl_assert(ie->vts == NULL);
2119 tl_assert(ie->rc == 0);
2120 vts_tab_freelist = ie->freelink;
2121 return ii;
2122}
2123
2124/* Produce a new VtsID that can be used, either by getting it from
2125 the freelist, or, if that is empty, by expanding vts_tab. */
2126static VtsID get_new_VtsID ( void )
2127{
2128 VtsID ii;
2129 VtsTE te;
2130 ii = get_from_free_list();
2131 if (ii != VtsID_INVALID)
2132 return ii;
2133 te.vts = NULL;
2134 te.rc = 0;
2135 te.freelink = VtsID_INVALID;
2136 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2137 return ii;
2138}
2139
2140
2141/* Indirect callback from lib_zsm. */
2142static void VtsID__rcinc ( VtsID ii )
2143{
2144 VtsTE* ie;
2145 /* VG_(indexXA) does a range check for us */
2146 ie = VG_(indexXA)( vts_tab, ii );
2147 tl_assert(ie->vts); /* else it's not in use */
2148 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2149 tl_assert(ie->vts->id == ii);
2150 ie->rc++;
2151}
2152
2153/* Indirect callback from lib_zsm. */
2154static void VtsID__rcdec ( VtsID ii )
2155{
2156 VtsTE* ie;
2157 /* VG_(indexXA) does a range check for us */
2158 ie = VG_(indexXA)( vts_tab, ii );
2159 tl_assert(ie->vts); /* else it's not in use */
2160 tl_assert(ie->rc > 0); /* else RC snafu */
2161 tl_assert(ie->vts->id == ii);
2162 ie->rc--;
2163}
2164
2165
2166/* Look up 'cand' in our collection of VTSs. If present, deallocate
2167 it and return the VtsID for the pre-existing version. If not
2168 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2169 for it, and return that. */
2170static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2171{
2172 VTS* auld;
2173 tl_assert(cand->id == VtsID_INVALID);
2174 auld = vts_set__find_and_dealloc__or_add(cand);
2175 if (auld != cand) {
2176 /* We already have an Aulde one. Use that. */
2177 VtsTE* ie;
2178 tl_assert(auld->id != VtsID_INVALID);
2179 ie = VG_(indexXA)( vts_tab, auld->id );
2180 tl_assert(ie->vts == auld);
2181 return auld->id;
2182 } else {
2183 VtsID ii = get_new_VtsID();
2184 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2185 ie->vts = cand;
2186 ie->rc = 0;
2187 ie->freelink = VtsID_INVALID;
2188 cand->id = ii;
2189 return ii;
2190 }
2191}
2192
2193
2194static void show_vts_stats ( HChar* caller )
2195{
2196 UWord nSet, nTab, nLive;
2197 ULong totrc;
2198 UWord n, i;
2199 nSet = VG_(sizeFM)( vts_set );
2200 nTab = VG_(sizeXA)( vts_tab );
2201 totrc = 0;
2202 nLive = 0;
2203 n = VG_(sizeXA)( vts_tab );
2204 for (i = 0; i < n; i++) {
2205 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2206 if (ie->vts) {
2207 nLive++;
2208 totrc += (ULong)ie->rc;
2209 } else {
2210 tl_assert(ie->rc == 0);
2211 }
2212 }
2213 VG_(printf)(" show_vts_stats %s\n", caller);
2214 VG_(printf)(" vts_tab size %4lu\n", nTab);
2215 VG_(printf)(" vts_tab live %4lu\n", nLive);
2216 VG_(printf)(" vts_set size %4lu\n", nSet);
2217 VG_(printf)(" total rc %4llu\n", totrc);
2218}
2219
2220/* NOT TO BE CALLED FROM WITHIN libzsm. */
2221static void vts_tab__do_GC ( Bool show_stats )
2222{
2223 UWord i, nTab, nLive, nFreed;
2224
2225 /* check this is actually necessary. */
2226 tl_assert(vts_tab_freelist == VtsID_INVALID);
2227
2228 /* empty the caches for partial order checks and binary joins. We
2229 could do better and prune out the entries to be deleted, but it
2230 ain't worth the hassle. */
2231 VtsID__invalidate_caches();
2232
2233 /* First, make the reference counts up to date. */
2234 zsm_flush_cache();
2235
2236 nTab = VG_(sizeXA)( vts_tab );
2237
2238 if (show_stats) {
2239 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2240 show_vts_stats("before GC");
2241 }
2242
2243 /* Now we can inspect the entire vts_tab. Any entries
2244 with zero .rc fields are now no longer in use and can be
2245 free list, removed from vts_set, and deleted. */
2246 nFreed = 0;
2247 for (i = 0; i < nTab; i++) {
2248 Bool present;
2249 UWord oldK = 0, oldV = 0;
2250 VtsTE* te = VG_(indexXA)( vts_tab, i );
2251 if (te->vts == NULL) {
2252 tl_assert(te->rc == 0);
2253 continue; /* already on the free list (presumably) */
2254 }
2255 if (te->rc > 0)
2256 continue; /* in use */
2257 /* Ok, we got one we can free. */
2258 tl_assert(te->vts->id == i);
2259 /* first, remove it from vts_set. */
2260 present = VG_(delFromFM)( vts_set,
2261 &oldK, &oldV, (UWord)te->vts );
2262 tl_assert(present); /* else it isn't in vts_set ?! */
2263 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2264 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2265 /* now free the VTS itself */
2266 VTS__delete(te->vts);
2267 te->vts = NULL;
2268 /* and finally put this entry on the free list */
2269 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2270 add_to_free_list( i );
2271 nFreed++;
2272 }
2273
2274 /* Now figure out when the next GC should be. We'll allow the
2275 number of VTSs to double before GCing again. Except of course
2276 that since we can't (or, at least, don't) shrink vts_tab, we
2277 can't set the threshhold value smaller than it. */
2278 tl_assert(nFreed <= nTab);
2279 nLive = nTab - nFreed;
2280 tl_assert(nLive >= 0 && nLive <= nTab);
2281 vts_next_GC_at = 2 * nLive;
2282 if (vts_next_GC_at < nTab)
2283 vts_next_GC_at = nTab;
2284
2285 if (show_stats) {
2286 show_vts_stats("after GC");
2287 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2288 }
2289
2290 if (1) {
2291 static UInt ctr = 0;
2292 tl_assert(nTab > 0);
2293 VG_(printf)("libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
2294 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2295 }
2296}
2297
2298
2299/////////////////////////////////////////////////////////
2300// //
2301// Vts IDs //
2302// //
2303/////////////////////////////////////////////////////////
2304
2305//////////////////////////
2306static ULong stats__getOrdering_queries = 0;
2307static ULong stats__getOrdering_misses = 0;
2308static ULong stats__join2_queries = 0;
2309static ULong stats__join2_misses = 0;
2310
2311static inline UInt ROL32 ( UInt w, Int n ) {
2312 w = (w << n) | (w >> (32-n));
2313 return w;
2314}
2315static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2316 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2317 return hash % nTab;
2318}
2319
2320#define N_GETORDERING_CACHE 1023
2321static
2322 struct { VtsID vi1; VtsID vi2; POrd ord; }
2323 getOrdering_cache[N_GETORDERING_CACHE];
2324
2325#define N_JOIN2_CACHE 1023
2326static
2327 struct { VtsID vi1; VtsID vi2; VtsID res; }
2328 join2_cache[N_JOIN2_CACHE];
2329
2330static void VtsID__invalidate_caches ( void ) {
2331 Int i;
2332 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2333 getOrdering_cache[i].vi1 = VtsID_INVALID;
2334 getOrdering_cache[i].vi2 = VtsID_INVALID;
2335 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2336 }
2337 for (i = 0; i < N_JOIN2_CACHE; i++) {
2338 join2_cache[i].vi1 = VtsID_INVALID;
2339 join2_cache[i].vi2 = VtsID_INVALID;
2340 join2_cache[i].res = VtsID_INVALID;
2341 }
2342}
2343//////////////////////////
2344
sewardjd52392d2008-11-08 20:36:26 +00002345//static Bool VtsID__is_valid ( VtsID vi ) {
2346// VtsTE* ve;
2347// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2348// return False;
2349// ve = VG_(indexXA)( vts_tab, vi );
2350// if (!ve->vts)
2351// return False;
2352// tl_assert(ve->vts->id == vi);
2353// return True;
2354//}
sewardjf98e1c02008-10-25 16:22:41 +00002355
2356static VTS* VtsID__to_VTS ( VtsID vi ) {
2357 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2358 tl_assert(te->vts);
2359 return te->vts;
2360}
2361
2362static void VtsID__pp ( VtsID vi ) {
2363 HChar buf[100];
2364 VTS* vts = VtsID__to_VTS(vi);
2365 VTS__show( buf, sizeof(buf)-1, vts );
2366 buf[sizeof(buf)-1] = 0;
2367 VG_(printf)("%s", buf);
2368}
2369
2370/* compute partial ordering relation of vi1 and vi2. */
2371__attribute__((noinline))
2372static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2373 UInt hash;
2374 POrd ord;
2375 VTS *v1, *v2;
2376 //if (vi1 == vi2) return POrd_EQ;
2377 tl_assert(vi1 != vi2);
2378 ////++
2379 stats__getOrdering_queries++;
2380 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2381 if (getOrdering_cache[hash].vi1 == vi1
2382 && getOrdering_cache[hash].vi2 == vi2)
2383 return getOrdering_cache[hash].ord;
2384 stats__getOrdering_misses++;
2385 ////--
2386 v1 = VtsID__to_VTS(vi1);
2387 v2 = VtsID__to_VTS(vi2);
2388 ord = VTS__cmp( v1, v2 );
2389 ////++
2390 getOrdering_cache[hash].vi1 = vi1;
2391 getOrdering_cache[hash].vi2 = vi2;
2392 getOrdering_cache[hash].ord = ord;
2393 ////--
2394 return ord;
2395}
2396static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2397 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2398}
2399
2400/* compute binary join */
2401__attribute__((noinline))
2402static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2403 UInt hash;
2404 VtsID res;
2405 VTS *vts1, *vts2, *nyu;
2406 //if (vi1 == vi2) return vi1;
2407 tl_assert(vi1 != vi2);
2408 ////++
2409 stats__join2_queries++;
2410 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2411 if (join2_cache[hash].vi1 == vi1
2412 && join2_cache[hash].vi2 == vi2)
2413 return join2_cache[hash].res;
2414 stats__join2_misses++;
2415 ////--
2416 vts1 = VtsID__to_VTS(vi1);
2417 vts2 = VtsID__to_VTS(vi2);
2418 nyu = VTS__join(vts1,vts2);
2419 res = vts_tab__find_and_dealloc__or_add(nyu);
2420 ////++
2421 join2_cache[hash].vi1 = vi1;
2422 join2_cache[hash].vi2 = vi2;
2423 join2_cache[hash].res = res;
2424 ////--
2425 return res;
2426}
2427static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2428 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2429}
2430
2431/* create a singleton VTS, namely [thr:1] */
2432static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2433 VTS* nyu = VTS__singleton(thr,tym);
2434 return vts_tab__find_and_dealloc__or_add(nyu);
2435}
2436
2437/* tick operation, creates value 1 if specified index is absent */
2438static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2439 VTS* vts = VtsID__to_VTS(vi);
2440 VTS* nyu = VTS__tick(idx,vts);
2441 return vts_tab__find_and_dealloc__or_add(nyu);
2442}
2443
2444/* index into a VTS (only for assertions) */
2445static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2446 VTS* vts = VtsID__to_VTS(vi);
2447 return VTS__indexAt_SLOW( vts, idx );
2448}
2449
2450
2451/////////////////////////////////////////////////////////
2452// //
2453// Threads //
2454// //
2455/////////////////////////////////////////////////////////
2456
2457struct _Thr {
2458 /* Current VTSs for this thread. They change as we go along. viR
2459 is the VTS to be used for reads, viW for writes. Usually they
2460 are the same, but can differ when we deal with reader-writer
2461 locks. It is always the case that VtsID__getOrdering(viW,viR)
2462 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2463 lagging behind, viR. */
2464 VtsID viR;
2465 VtsID viW;
2466 /* opaque (to us) data we hold on behalf of the library's user. */
2467 void* opaque;
2468};
2469
2470static Thr* Thr__new ( void ) {
2471 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2472 thr->viR = VtsID_INVALID;
2473 thr->viW = VtsID_INVALID;
2474 return thr;
2475}
2476
2477
2478/////////////////////////////////////////////////////////
2479// //
2480// Shadow Values //
2481// //
2482/////////////////////////////////////////////////////////
2483
2484// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2485// hb_zsm.h. We have to do everything else here.
2486
2487/* SVal is 64 bit unsigned int.
2488
2489 <---------30---------> <---------30--------->
2490 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2491 01 X--------------------X XX X--------------------X E(rror)
2492 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2493 11 X--------------------X XX X--------------------X I: SVal_INVALID
2494*/
2495#define SVAL_TAGMASK (3ULL << 62)
2496
2497static inline Bool SVal__isC ( SVal s ) {
2498 return (0ULL << 62) == (s & SVAL_TAGMASK);
2499}
2500static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2501 //tl_assert(VtsID__is_valid(rmini));
2502 //tl_assert(VtsID__is_valid(wmini));
2503 return (((ULong)rmini) << 32) | ((ULong)wmini);
2504}
2505static inline VtsID SVal__unC_Rmin ( SVal s ) {
2506 tl_assert(SVal__isC(s));
2507 return (VtsID)(s >> 32);
2508}
2509static inline VtsID SVal__unC_Wmin ( SVal s ) {
2510 tl_assert(SVal__isC(s));
2511 return (VtsID)(s & 0xFFFFFFFFULL);
2512}
2513
2514static Bool SVal__isE ( SVal s ) {
2515 return (1ULL << 62) == (s & SVAL_TAGMASK);
2516}
2517static SVal SVal__mkE ( void ) {
2518 return 1ULL << 62;
2519}
2520
2521static Bool SVal__isA ( SVal s ) {
2522 return (2ULL << 62) == (s & SVAL_TAGMASK);
2523}
2524static SVal SVal__mkA ( void ) {
2525 return 2ULL << 62;
2526}
2527
2528/* Direct callback from lib_zsm. */
2529static void SVal__rcinc ( SVal s ) {
2530 if (SVal__isC(s)) {
2531 VtsID__rcinc( SVal__unC_Rmin(s) );
2532 VtsID__rcinc( SVal__unC_Wmin(s) );
2533 }
2534}
2535
2536/* Direct callback from lib_zsm. */
2537static void SVal__rcdec ( SVal s ) {
2538 if (SVal__isC(s)) {
2539 VtsID__rcdec( SVal__unC_Rmin(s) );
2540 VtsID__rcdec( SVal__unC_Wmin(s) );
2541 }
2542}
2543
2544
2545/////////////////////////////////////////////////////////
2546// //
2547// Change-event map2 //
2548// //
2549/////////////////////////////////////////////////////////
2550
2551#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
2552#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2553
2554/* This is in two parts:
2555
2556 1. An OSet of RCECs. This is a set of reference-counted stack
2557 traces. When the reference count of a stack trace becomes zero,
2558 it is removed from the set and freed up. The intent is to have
2559 a set of stack traces which can be referred to from (2), but to
2560 only represent each one once. The set is indexed/searched by
2561 ordering on the stack trace vectors.
2562
2563 2. An OSet of OldRefs. These store information about each old ref
2564 that we need to record. It is indexed by address of the
2565 location for which the information is recorded. For LRU
2566 purposes, each OldRef also contains a generation number,
2567 indicating when it was most recently accessed.
2568
2569 The important part of an OldRef is, however, its accs[] array.
2570 This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
2571 allows us to collect the last access-traceback by up to
2572 N_OLDREF_ACCS different threads for this location. The accs[]
2573 array is a MTF-array. If a pair falls off the end, that's too
2574 bad -- we will lose info about that thread's access to this
2575 location.
2576
2577 When this OSet becomes too big, we can throw away the entries
2578 whose generation numbers are below some threshold; hence doing
2579 approximate LRU discarding. For each discarded OldRef we must
2580 of course decrement the reference count on the all RCECs it
2581 refers to, in order that entries from (1) eventually get
2582 discarded too.
2583*/
2584
2585
2586static UWord stats__ctxt_rcdec1 = 0;
2587static UWord stats__ctxt_rcdec2 = 0;
2588static UWord stats__ctxt_rcdec3 = 0;
2589static UWord stats__ctxt_rcdec_calls = 0;
2590static UWord stats__ctxt_rcdec_discards = 0;
2591static UWord stats__ctxt_rcdec1_eq = 0;
2592
2593static UWord stats__ctxt_tab_curr = 0;
2594static UWord stats__ctxt_tab_max = 0;
2595
2596static UWord stats__ctxt_tab_qs = 0;
2597static UWord stats__ctxt_tab_cmps = 0;
2598
2599
2600///////////////////////////////////////////////////////
2601//// Part (1): An OSet of RCECs
2602///
2603
2604#define N_FRAMES 8
2605
2606// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2607#define RCEC_MAGIC 0xab88abb2UL
2608
2609//#define N_RCEC_TAB 98317 /* prime */
2610#define N_RCEC_TAB 196613 /* prime */
2611
2612typedef
2613 struct _RCEC {
2614 struct _RCEC* next;
2615 UWord magic;
2616 UWord rc;
2617 UWord rcX; /* used for crosschecking */
2618 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2619 }
2620 RCEC;
2621
2622static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2623
2624
2625/* Gives an arbitrary total order on RCEC .frames fields */
2626static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2627 Word i;
2628 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2629 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2630 if (ec1->frames[0] < ec2->frames[0]) return -1;
2631 if (ec1->frames[0] > ec2->frames[0]) return 1;
2632 for (i = 1; i < 1 + N_FRAMES; i++) {
2633 if (ec1->frames[i] < ec2->frames[i]) return -1;
2634 if (ec1->frames[i] > ec2->frames[i]) return 1;
2635 }
2636 return 0;
2637}
2638
2639
2640/* Dec the ref of this RCEC. */
2641static void ctxt__rcdec ( RCEC* ec )
2642{
2643 stats__ctxt_rcdec_calls++;
2644 tl_assert(ec && ec->magic == RCEC_MAGIC);
2645 tl_assert(ec->rc > 0);
2646 ec->rc--;
2647}
2648
2649static void ctxt__rcinc ( RCEC* ec )
2650{
2651 tl_assert(ec && ec->magic == RCEC_MAGIC);
2652 ec->rc++;
2653}
2654
2655
2656/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2657 move it one step closer the the front of the list, so as to make
2658 subsequent searches for it cheaper. */
2659static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2660{
2661 RCEC *ec0, *ec1, *ec2;
2662 if (ec == *headp)
2663 tl_assert(0); /* already at head of list */
2664 tl_assert(ec != NULL);
2665 ec0 = *headp;
2666 ec1 = NULL;
2667 ec2 = NULL;
2668 while (True) {
2669 if (ec0 == NULL || ec0 == ec) break;
2670 ec2 = ec1;
2671 ec1 = ec0;
2672 ec0 = ec0->next;
2673 }
2674 tl_assert(ec0 == ec);
2675 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2676 RCEC* tmp;
2677 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2678 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2679 closer to the start of the list. */
2680 tl_assert(ec2->next == ec1);
2681 tl_assert(ec1->next == ec0);
2682 tmp = ec0->next;
2683 ec2->next = ec0;
2684 ec0->next = ec1;
2685 ec1->next = tmp;
2686 }
2687 else
2688 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2689 /* it's second in the list. */
2690 tl_assert(*headp == ec1);
2691 tl_assert(ec1->next == ec0);
2692 ec1->next = ec0->next;
2693 ec0->next = ec1;
2694 *headp = ec0;
2695 }
2696}
2697
2698
2699/* Find the given RCEC in the tree, and return a pointer to it. Or,
2700 if not present, add the given one to the tree (by making a copy of
2701 it, so the caller can immediately deallocate the original) and
2702 return a pointer to the copy. The caller can safely have 'example'
2703 on its stack, since we will always return a pointer to a copy of
2704 it, not to the original. Note that the inserted node will have .rc
2705 of zero and so the caller must immediatly increment it. */
2706__attribute__((noinline))
2707static RCEC* ctxt__find_or_add ( RCEC* example )
2708{
2709 UWord hent;
2710 RCEC* copy;
2711 tl_assert(example && example->magic == RCEC_MAGIC);
2712 tl_assert(example->rc == 0);
2713
2714 /* Search the hash table to see if we already have it. */
2715 stats__ctxt_tab_qs++;
2716 hent = example->frames[0] % N_RCEC_TAB;
2717 copy = contextTab[hent];
2718 while (1) {
2719 if (!copy) break;
2720 tl_assert(copy->magic == RCEC_MAGIC);
2721 stats__ctxt_tab_cmps++;
2722 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2723 copy = copy->next;
2724 }
2725
2726 if (copy) {
2727 tl_assert(copy != example);
2728 /* optimisation: if it's not at the head of its list, move 1
2729 step fwds, to make future searches cheaper */
2730 if (copy != contextTab[hent]) {
2731 move_RCEC_one_step_forward( &contextTab[hent], copy );
2732 }
2733 } else {
2734 copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
2735 tl_assert(copy != example);
2736 *copy = *example;
2737 copy->next = contextTab[hent];
2738 contextTab[hent] = copy;
2739 stats__ctxt_tab_curr++;
2740 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2741 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2742 }
2743 return copy;
2744}
2745
2746static inline UWord ROLW ( UWord w, Int n )
2747{
2748 Int bpw = 8 * sizeof(UWord);
2749 w = (w << n) | (w >> (bpw-n));
2750 return w;
2751}
2752
2753__attribute__((noinline))
2754static RCEC* get_RCEC ( Thr* thr )
2755{
2756 UWord hash, i;
2757 RCEC example;
2758 example.magic = RCEC_MAGIC;
2759 example.rc = 0;
2760 example.rcX = 0;
2761 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2762 hash = 0;
2763 for (i = 1; i < 1 + N_FRAMES; i++) {
2764 hash ^= example.frames[i];
2765 hash = ROLW(hash, 19);
2766 }
2767 example.frames[0] = hash;
2768 return ctxt__find_or_add( &example );
2769}
2770
2771///////////////////////////////////////////////////////
2772//// Part (2): An OSet of OldRefs, that refer to (1)
2773///
2774
2775// (UInt) `echo "Old Reference Information" | md5sum`
2776#define OldRef_MAGIC 0x30b1f075UL
2777
2778typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2779
2780#define N_OLDREF_ACCS 3
2781
2782typedef
2783 struct {
2784 Addr ea;
2785 UWord magic;
2786 UWord gen; /* when most recently accessed */
2787 /* unused slots in this array have .thr == NULL */
2788 Thr_n_RCEC accs[N_OLDREF_ACCS];
2789 }
2790 OldRef;
2791
sewardjf98e1c02008-10-25 16:22:41 +00002792static OSet* oldrefTree = NULL; /* OSet* of OldRef */
2793static UWord oldrefGen = 0; /* current LRU generation # */
2794static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2795static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
2796
2797static void event_map_bind ( Addr a, Thr* thr )
2798{
2799 OldRef key, *ref;
2800 RCEC* here;
2801 Word i, j;
2802
2803 key.ea = a;
2804 key.magic = OldRef_MAGIC;
2805
2806 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2807
2808 if (ref) {
2809
2810 /* We already have a record for this address. We now need to
2811 see if we have a stack trace pertaining to this thread's
2812 access. */
2813 tl_assert(ref->magic == OldRef_MAGIC);
2814
2815 tl_assert(thr);
2816 for (i = 0; i < N_OLDREF_ACCS; i++) {
2817 if (ref->accs[i].thr == thr)
2818 break;
2819 }
2820
2821 if (i < N_OLDREF_ACCS) {
2822 /* thread 'thr' has an entry at index 'i'. Update it. */
2823 if (i > 0) {
2824 Thr_n_RCEC tmp = ref->accs[i-1];
2825 ref->accs[i-1] = ref->accs[i];
2826 ref->accs[i] = tmp;
2827 i--;
2828 }
2829 here = get_RCEC( thr );
2830 if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
2831 ctxt__rcinc( here );
2832 stats__ctxt_rcdec1++;
2833 ctxt__rcdec( ref->accs[i].rcec );
2834 ref->accs[i].rcec = here;
2835 tl_assert(ref->accs[i].thr == thr);
2836 } else {
2837 here = get_RCEC( thr );
2838 ctxt__rcinc( here );
2839 /* No entry for this thread. Shuffle all of them down one
2840 slot, and put the new entry at the start of the array. */
2841 if (ref->accs[N_OLDREF_ACCS-1].thr) {
2842 /* the last slot is in use. We must dec the rc on the
2843 associated rcec. */
2844 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
2845 stats__ctxt_rcdec2++;
2846 ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
2847 } else {
2848 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
2849 }
2850 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
2851 ref->accs[j] = ref->accs[j-1];
2852 ref->accs[0].thr = thr;
2853 ref->accs[0].rcec = here;
2854 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2855 so we can't add a NULL thr. */
2856 }
2857
2858 ref->gen = oldrefGen;
2859 tl_assert(ref->ea == a);
2860
2861 } else {
2862
2863 /* We don't have a record for this address. Create a new one. */
2864 if (oldrefTreeN >= oldrefGenIncAt) {
2865 oldrefGen++;
2866 oldrefGenIncAt = oldrefTreeN + 50000;
2867 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
2868 oldrefGen, oldrefTreeN );
2869 }
2870 here = get_RCEC( thr );
2871 ctxt__rcinc(here);
2872 ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
2873 ref->magic = OldRef_MAGIC;
2874 ref->gen = oldrefGen;
2875 ref->ea = a;
2876 ref->accs[0].rcec = here;
2877 ref->accs[0].thr = thr;
2878 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2879 so we can't add a NULL thr. */
2880 for (j = 1; j < N_OLDREF_ACCS; j++) {
2881 ref->accs[j].thr = NULL;
2882 ref->accs[j].rcec = NULL;
2883 }
2884 VG_(OSetGen_Insert)( oldrefTree, ref );
2885 oldrefTreeN++;
2886
2887 }
2888}
2889
2890
2891static
sewardjd52392d2008-11-08 20:36:26 +00002892Bool event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjf98e1c02008-10-25 16:22:41 +00002893 /*OUT*/Thr** resThr,
2894 Thr* thr_acc, Addr a )
2895{
2896 Word i;
2897 OldRef key, *ref;
2898
2899 tl_assert(thr_acc);
2900
2901 key.ea = a;
2902 key.magic = OldRef_MAGIC;
2903
2904 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2905 if (ref) {
2906 tl_assert(ref->magic == OldRef_MAGIC);
2907 tl_assert(ref->accs[0].thr); /* first slot must always be used */
2908
2909 for (i = 0; i < N_OLDREF_ACCS; i++) {
2910 if (ref->accs[i].thr != NULL
2911 && ref->accs[i].thr != thr_acc)
2912 break;
2913 }
2914 /* If we didn't find an entry for some thread other than
2915 thr_acc, just return the entry for thread 0. It'll look
2916 pretty stupid to the user though. */
2917 if (i == N_OLDREF_ACCS)
2918 i = 0;
2919
2920 tl_assert(i >= 0 && i < N_OLDREF_ACCS);
2921 tl_assert(ref->accs[i].thr);
2922 tl_assert(ref->accs[i].rcec);
2923 tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
2924
sewardjd52392d2008-11-08 20:36:26 +00002925 *resEC = VG_(make_ExeContext_from_StackTrace)(
2926 &ref->accs[i].rcec->frames[1], N_FRAMES
2927 );
sewardjf98e1c02008-10-25 16:22:41 +00002928 *resThr = ref->accs[i].thr;
2929 return True;
2930 } else {
2931 return False;
2932 }
2933}
2934
2935static void event_map_init ( void )
2936{
2937 Word i;
2938 tl_assert(!contextTab);
2939 contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
2940 N_RCEC_TAB * sizeof(RCEC*) );
2941 tl_assert(contextTab);
2942 for (i = 0; i < N_RCEC_TAB; i++)
2943 contextTab[i] = NULL;
2944
2945 tl_assert(!oldrefTree);
2946 tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
2947 oldrefTree = VG_(OSetGen_Create)(
2948 offsetof(OldRef,ea), /* == 0 */
2949 NULL, /* use unboxed cmp on OldRefs */
2950 HG_(zalloc), "libhb.event_map_init.2 (oldref tree)",
2951 HG_(free)
2952 );
2953 tl_assert(oldrefTree);
2954
2955 oldrefGen = 0;
2956 oldrefGenIncAt = 0;
2957 oldrefTreeN = 0;
2958}
2959
2960static void event_map__check_reference_counts ( Bool before )
2961{
2962 RCEC* rcec;
2963 OldRef* oldref;
2964 Word i;
2965 UWord nEnts = 0;
2966
2967 /* Set the 'check' reference counts to zero. Also, optionally
2968 check that the real reference counts are non-zero. We allow
2969 these to fall to zero before a GC, but the GC must get rid of
2970 all those that are zero, hence none should be zero after a
2971 GC. */
2972 for (i = 0; i < N_RCEC_TAB; i++) {
2973 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
2974 nEnts++;
2975 tl_assert(rcec);
2976 tl_assert(rcec->magic == RCEC_MAGIC);
2977 if (!before)
2978 tl_assert(rcec->rc > 0);
2979 rcec->rcX = 0;
2980 }
2981 }
2982
2983 /* check that the stats are sane */
2984 tl_assert(nEnts == stats__ctxt_tab_curr);
2985 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
2986
2987 /* visit all the referencing points, inc check ref counts */
2988 VG_(OSetGen_ResetIter)( oldrefTree );
2989 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
2990 tl_assert(oldref->magic == OldRef_MAGIC);
2991 for (i = 0; i < N_OLDREF_ACCS; i++) {
2992 if (oldref->accs[i].thr) {
2993 tl_assert(oldref->accs[i].rcec);
2994 tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
2995 oldref->accs[i].rcec->rcX++;
2996 } else {
2997 tl_assert(!oldref->accs[i].rcec);
2998 }
2999 }
3000 }
3001
3002 /* compare check ref counts with actual */
3003 for (i = 0; i < N_RCEC_TAB; i++) {
3004 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3005 tl_assert(rcec->rc == rcec->rcX);
3006 }
3007 }
3008}
3009
3010static void event_map_maybe_GC ( void )
3011{
3012 OldRef* oldref;
3013 UWord keyW, valW, retained, maxGen;
3014 WordFM* genMap;
3015 XArray* refs2del;
3016 Word i, j, n2del;
3017
3018 if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
3019 return;
3020
3021 if (0)
3022 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3023
3024 /* Check our counting is sane */
3025 tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
3026
3027 /* Check the reference counts */
3028 event_map__check_reference_counts( True/*before*/ );
3029
3030 /* Compute the distribution of generation values in the ref tree */
3031 /* genMap :: generation-number -> count-of-nodes-with-that-number */
3032 genMap = VG_(newFM)( HG_(zalloc), "libhb.emmG.1",
3033 HG_(free), NULL );
3034
3035 VG_(OSetGen_ResetIter)( oldrefTree );
3036 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3037 UWord key = oldref->gen;
3038 keyW = valW = 0;
3039 if (VG_(lookupFM)(genMap, &keyW, &valW, key )) {
3040 tl_assert(keyW == key);
3041 tl_assert(valW > 0);
3042 }
3043 /* now valW is the old count for generation 'key' */
3044 VG_(addToFM)(genMap, key, valW+1);
3045 }
3046
3047 tl_assert(VG_(sizeFM)(genMap) > 0);
3048
3049 retained = oldrefTreeN;
3050 maxGen = 0;
3051 VG_(initIterFM)( genMap );
3052 while (VG_(nextIterFM)( genMap, &keyW, &valW )) {
3053 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3054 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3055 tl_assert(keyW >= maxGen);
3056 tl_assert(retained >= valW);
3057 if (retained - valW
3058 > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
3059 retained -= valW;
3060 maxGen = keyW;
3061 } else {
3062 break;
3063 }
3064 }
3065 VG_(doneIterFM)( genMap );
3066
3067 VG_(printf)(
3068 "libhb: EvM GC: delete generations %lu and below, "
3069 "retaining %lu entries\n",
3070 maxGen, retained );
3071
3072 VG_(deleteFM)( genMap, NULL, NULL );
3073
3074 /* If this fails, it means there's only one generation in the
3075 entire tree. So we're kind of in a bad situation, and need to
3076 do some stop-gap measure, such as randomly deleting half the
3077 entries. */
3078 tl_assert(retained < oldrefTreeN);
3079
3080 /* Now make up a big list of the oldrefTree entries we want to
3081 delete. We can't simultaneously traverse the tree and delete
3082 stuff from it, so first we need to copy them off somewhere
3083 else. (sigh) */
3084 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.1",
3085 HG_(free), sizeof(OldRef*) );
3086
3087 VG_(OSetGen_ResetIter)( oldrefTree );
3088 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3089 tl_assert(oldref->magic == OldRef_MAGIC);
3090 if (oldref->gen <= maxGen) {
3091 VG_(addToXA)( refs2del, &oldref );
3092 }
3093 }
3094
3095 n2del = VG_(sizeXA)( refs2del );
3096 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3097
3098 if (0) VG_(printf)("%s","deleting entries\n");
3099 for (i = 0; i < n2del; i++) {
3100 void* nd;
3101 OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
3102 tl_assert(ref);
3103 tl_assert(ref->magic == OldRef_MAGIC);
3104 for (j = 0; j < N_OLDREF_ACCS; j++) {
3105 if (ref->accs[j].rcec) {
3106 tl_assert(ref->accs[j].thr);
3107 stats__ctxt_rcdec3++;
3108 ctxt__rcdec( ref->accs[j].rcec );
3109 } else {
3110 tl_assert(!ref->accs[j].thr);
3111 }
3112 }
3113 nd = VG_(OSetGen_Remove)( oldrefTree, ref );
3114 VG_(OSetGen_FreeNode)( oldrefTree, nd );
3115 }
3116
3117 VG_(deleteXA)( refs2del );
3118
3119 tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
3120
3121 oldrefTreeN = retained;
3122 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3123
3124 /* Throw away all RCECs with zero reference counts */
3125 for (i = 0; i < N_RCEC_TAB; i++) {
3126 RCEC** pp = &contextTab[i];
3127 RCEC* p = *pp;
3128 while (p) {
3129 if (p->rc == 0) {
3130 *pp = p->next;
3131 HG_(free)(p);
3132 p = *pp;
3133 tl_assert(stats__ctxt_tab_curr > 0);
3134 stats__ctxt_tab_curr--;
3135 } else {
3136 pp = &p->next;
3137 p = p->next;
3138 }
3139 }
3140 }
3141
3142 /* Check the reference counts */
3143 event_map__check_reference_counts( False/*after*/ );
3144
3145 //if (0)
3146 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3147 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3148
3149}
3150
3151
3152/////////////////////////////////////////////////////////
3153// //
3154// Core MSM //
3155// //
3156/////////////////////////////////////////////////////////
3157
3158#define MSM_CONFACC 1
3159
3160#define MSM_RACE2ERR 1
3161
3162#define MSM_CHECK 0
3163
3164static ULong stats__msm_read = 0;
3165static ULong stats__msm_read_change = 0;
3166static ULong stats__msm_write = 0;
3167static ULong stats__msm_write_change = 0;
3168
3169__attribute__((noinline))
3170static void record_race_info ( Thr* acc_thr,
3171 Addr acc_addr, SizeT szB, Bool isWrite,
3172 SVal svOld, SVal svNew )
3173{
3174 Bool found;
3175 Thr* thrp = NULL;
sewardjd52392d2008-11-08 20:36:26 +00003176 ExeContext* where = NULL;
3177 ExeContext* wherep = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00003178 where = main_get_EC( acc_thr );
3179 found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
3180 if (found) {
3181 tl_assert(wherep);
3182 tl_assert(thrp);
3183 tl_assert(thrp->opaque);
3184 tl_assert(acc_thr->opaque);
3185 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3186 isWrite, szB, NULL/*mb_lastlock*/,
3187 wherep, thrp->opaque );
3188 } else {
3189 tl_assert(!wherep);
3190 tl_assert(!thrp);
3191 tl_assert(acc_thr->opaque);
3192 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3193 isWrite, szB, NULL/*mb_lastlock*/,
3194 NULL, NULL );
3195 }
3196}
3197
3198static Bool is_sane_SVal_C ( SVal sv ) {
3199 POrd ord;
3200 if (!SVal__isC(sv)) return True;
3201 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3202 if (ord == POrd_EQ || ord == POrd_LT) return True;
3203 return False;
3204}
3205
3206
3207/* Compute new state following a read */
3208static inline SVal msm_read ( SVal svOld,
3209 /* The following are only needed for
3210 creating error reports. */
3211 Thr* acc_thr,
3212 Addr acc_addr, SizeT szB )
3213{
3214 SVal svNew = SVal_INVALID;
3215 stats__msm_read++;
3216
3217 /* Redundant sanity check on the constraints */
3218 if (MSM_CHECK) {
3219 tl_assert(is_sane_SVal_C(svOld));
3220 }
3221
3222 if (SVal__isC(svOld)) {
3223 POrd ord;
3224 VtsID tviR = acc_thr->viR;
3225 VtsID tviW = acc_thr->viW;
3226 VtsID rmini = SVal__unC_Rmin(svOld);
3227 VtsID wmini = SVal__unC_Wmin(svOld);
3228
3229 ord = VtsID__getOrdering(rmini,tviR);
3230 if (ord == POrd_EQ || ord == POrd_LT) {
3231 /* no race */
3232 /* Note: RWLOCK subtlety: use tviW, not tviR */
3233 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3234 goto out;
3235 } else {
3236 svNew = MSM_RACE2ERR
3237 ? SVal__mkE()
3238 : SVal__mkC( rmini, VtsID__join2(wmini,tviR) );
3239 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
3240 svOld, svNew );
3241 goto out;
3242 }
3243 }
3244 if (SVal__isA(svOld)) {
3245 /* reading no-access memory (sigh); leave unchanged */
3246 /* check for no pollution */
3247 tl_assert(svOld == SVal_NOACCESS);
3248 svNew = SVal_NOACCESS;
3249 goto out;
3250 }
3251 if (SVal__isE(svOld)) {
3252 /* no race, location is already "in error" */
3253 svNew = SVal__mkE();
3254 goto out;
3255 }
3256 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3257 tl_assert(0);
3258
3259 out:
3260 if (MSM_CHECK) {
3261 tl_assert(is_sane_SVal_C(svNew));
3262 }
3263 tl_assert(svNew != SVal_INVALID);
3264 if (svNew != svOld) {
3265 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3266 event_map_bind( acc_addr, acc_thr );
3267 stats__msm_read_change++;
3268 }
3269 }
3270 return svNew;
3271}
3272
3273
3274/* Compute new state following a write */
3275static inline SVal msm_write ( SVal svOld,
3276 /* The following are only needed for
3277 creating error reports. */
3278 Thr* acc_thr,
3279 Addr acc_addr, SizeT szB )
3280{
3281 SVal svNew = SVal_INVALID;
3282 stats__msm_write++;
3283
3284 /* Redundant sanity check on the constraints */
3285 if (MSM_CHECK) {
3286 tl_assert(is_sane_SVal_C(svOld));
3287 }
3288
3289 if (SVal__isC(svOld)) {
3290 POrd ord;
3291 VtsID tviW = acc_thr->viW;
3292 VtsID wmini = SVal__unC_Wmin(svOld);
3293
3294 ord = VtsID__getOrdering(wmini,tviW);
3295 if (ord == POrd_EQ || ord == POrd_LT) {
3296 /* no race */
3297 svNew = SVal__mkC( tviW, tviW );
3298 goto out;
3299 } else {
3300 VtsID rmini = SVal__unC_Rmin(svOld);
3301 svNew = MSM_RACE2ERR
3302 ? SVal__mkE()
3303 : SVal__mkC( VtsID__join2(rmini,tviW),
3304 VtsID__join2(wmini,tviW) );
3305 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
3306 svOld, svNew );
3307 goto out;
3308 }
3309 }
3310 if (SVal__isA(svOld)) {
3311 /* writing no-access memory (sigh); leave unchanged */
3312 /* check for no pollution */
3313 tl_assert(svOld == SVal_NOACCESS);
3314 svNew = SVal_NOACCESS;
3315 goto out;
3316 }
3317 if (SVal__isE(svOld)) {
3318 /* no race, location is already "in error" */
3319 svNew = SVal__mkE();
3320 goto out;
3321 }
3322 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3323 tl_assert(0);
3324
3325 out:
3326 if (MSM_CHECK) {
3327 tl_assert(is_sane_SVal_C(svNew));
3328 }
3329 tl_assert(svNew != SVal_INVALID);
3330 if (svNew != svOld) {
3331 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3332 event_map_bind( acc_addr, acc_thr );
3333 stats__msm_write_change++;
3334 }
3335 }
3336 return svNew;
3337}
3338
3339
3340/////////////////////////////////////////////////////////
3341// //
3342// Apply core MSM to specific memory locations //
3343// //
3344/////////////////////////////////////////////////////////
3345
3346/*------------- ZSM accesses: 8 bit apply ------------- */
3347
3348void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3349 CacheLine* cl;
3350 UWord cloff, tno, toff;
3351 SVal svOld, svNew;
3352 UShort descr;
3353 stats__cline_read8s++;
3354 cl = get_cacheline(a);
3355 cloff = get_cacheline_offset(a);
3356 tno = get_treeno(a);
3357 toff = get_tree_offset(a); /* == 0 .. 7 */
3358 descr = cl->descrs[tno];
3359 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3360 SVal* tree = &cl->svals[tno << 3];
3361 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3362 if (SCE_CACHELINE)
3363 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3364 }
3365 svOld = cl->svals[cloff];
3366 svNew = msm_read( svOld, thr,a,1 );
3367 tl_assert(svNew != SVal_INVALID);
3368 cl->svals[cloff] = svNew;
3369}
3370
3371void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3372 CacheLine* cl;
3373 UWord cloff, tno, toff;
3374 SVal svOld, svNew;
3375 UShort descr;
3376 stats__cline_read8s++;
3377 cl = get_cacheline(a);
3378 cloff = get_cacheline_offset(a);
3379 tno = get_treeno(a);
3380 toff = get_tree_offset(a); /* == 0 .. 7 */
3381 descr = cl->descrs[tno];
3382 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3383 SVal* tree = &cl->svals[tno << 3];
3384 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3385 if (SCE_CACHELINE)
3386 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3387 }
3388 svOld = cl->svals[cloff];
3389 svNew = msm_write( svOld, thr,a,1 );
3390 tl_assert(svNew != SVal_INVALID);
3391 cl->svals[cloff] = svNew;
3392}
3393
3394/*------------- ZSM accesses: 16 bit apply ------------- */
3395
3396void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3397 CacheLine* cl;
3398 UWord cloff, tno, toff;
3399 SVal svOld, svNew;
3400 UShort descr;
3401 stats__cline_read16s++;
3402 if (UNLIKELY(!aligned16(a))) goto slowcase;
3403 cl = get_cacheline(a);
3404 cloff = get_cacheline_offset(a);
3405 tno = get_treeno(a);
3406 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3407 descr = cl->descrs[tno];
3408 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3409 if (valid_value_is_below_me_16(descr, toff)) {
3410 goto slowcase;
3411 } else {
3412 SVal* tree = &cl->svals[tno << 3];
3413 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3414 }
3415 if (SCE_CACHELINE)
3416 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3417 }
3418 svOld = cl->svals[cloff];
3419 svNew = msm_read( svOld, thr,a,2 );
3420 tl_assert(svNew != SVal_INVALID);
3421 cl->svals[cloff] = svNew;
3422 return;
3423 slowcase: /* misaligned, or must go further down the tree */
3424 stats__cline_16to8splits++;
3425 zsm_apply8___msm_read( thr, a + 0 );
3426 zsm_apply8___msm_read( thr, a + 1 );
3427}
3428
3429void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3430 CacheLine* cl;
3431 UWord cloff, tno, toff;
3432 SVal svOld, svNew;
3433 UShort descr;
3434 stats__cline_read16s++;
3435 if (UNLIKELY(!aligned16(a))) goto slowcase;
3436 cl = get_cacheline(a);
3437 cloff = get_cacheline_offset(a);
3438 tno = get_treeno(a);
3439 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3440 descr = cl->descrs[tno];
3441 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3442 if (valid_value_is_below_me_16(descr, toff)) {
3443 goto slowcase;
3444 } else {
3445 SVal* tree = &cl->svals[tno << 3];
3446 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3447 }
3448 if (SCE_CACHELINE)
3449 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3450 }
3451 svOld = cl->svals[cloff];
3452 svNew = msm_write( svOld, thr,a,2 );
3453 tl_assert(svNew != SVal_INVALID);
3454 cl->svals[cloff] = svNew;
3455 return;
3456 slowcase: /* misaligned, or must go further down the tree */
3457 stats__cline_16to8splits++;
3458 zsm_apply8___msm_write( thr, a + 0 );
3459 zsm_apply8___msm_write( thr, a + 1 );
3460}
3461
3462/*------------- ZSM accesses: 32 bit apply ------------- */
3463
3464void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3465 CacheLine* cl;
3466 UWord cloff, tno, toff;
3467 SVal svOld, svNew;
3468 UShort descr;
3469 if (UNLIKELY(!aligned32(a))) goto slowcase;
3470 cl = get_cacheline(a);
3471 cloff = get_cacheline_offset(a);
3472 tno = get_treeno(a);
3473 toff = get_tree_offset(a); /* == 0 or 4 */
3474 descr = cl->descrs[tno];
3475 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3476 if (valid_value_is_above_me_32(descr, toff)) {
3477 SVal* tree = &cl->svals[tno << 3];
3478 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3479 } else {
3480 goto slowcase;
3481 }
3482 if (SCE_CACHELINE)
3483 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3484 }
3485 svOld = cl->svals[cloff];
3486 svNew = msm_read( svOld, thr,a,4 );
3487 tl_assert(svNew != SVal_INVALID);
3488 cl->svals[cloff] = svNew;
3489 return;
3490 slowcase: /* misaligned, or must go further down the tree */
3491 stats__cline_32to16splits++;
3492 zsm_apply16___msm_read( thr, a + 0 );
3493 zsm_apply16___msm_read( thr, a + 2 );
3494}
3495
3496void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3497 CacheLine* cl;
3498 UWord cloff, tno, toff;
3499 SVal svOld, svNew;
3500 UShort descr;
3501 if (UNLIKELY(!aligned32(a))) goto slowcase;
3502 cl = get_cacheline(a);
3503 cloff = get_cacheline_offset(a);
3504 tno = get_treeno(a);
3505 toff = get_tree_offset(a); /* == 0 or 4 */
3506 descr = cl->descrs[tno];
3507 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3508 if (valid_value_is_above_me_32(descr, toff)) {
3509 SVal* tree = &cl->svals[tno << 3];
3510 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3511 } else {
3512 goto slowcase;
3513 }
3514 if (SCE_CACHELINE)
3515 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3516 }
3517 svOld = cl->svals[cloff];
3518 svNew = msm_write( svOld, thr,a,4 );
3519 tl_assert(svNew != SVal_INVALID);
3520 cl->svals[cloff] = svNew;
3521 return;
3522 slowcase: /* misaligned, or must go further down the tree */
3523 stats__cline_32to16splits++;
3524 zsm_apply16___msm_write( thr, a + 0 );
3525 zsm_apply16___msm_write( thr, a + 2 );
3526}
3527
3528/*------------- ZSM accesses: 64 bit apply ------------- */
3529
3530void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3531 CacheLine* cl;
3532 UWord cloff, tno, toff;
3533 SVal svOld, svNew;
3534 UShort descr;
3535 stats__cline_read64s++;
3536 if (UNLIKELY(!aligned64(a))) goto slowcase;
3537 cl = get_cacheline(a);
3538 cloff = get_cacheline_offset(a);
3539 tno = get_treeno(a);
3540 toff = get_tree_offset(a); /* == 0, unused */
3541 descr = cl->descrs[tno];
3542 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3543 goto slowcase;
3544 }
3545 svOld = cl->svals[cloff];
3546 svNew = msm_read( svOld, thr,a,8 );
3547 tl_assert(svNew != SVal_INVALID);
3548 cl->svals[cloff] = svNew;
3549 return;
3550 slowcase: /* misaligned, or must go further down the tree */
3551 stats__cline_64to32splits++;
3552 zsm_apply32___msm_read( thr, a + 0 );
3553 zsm_apply32___msm_read( thr, a + 4 );
3554}
3555
3556void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
3557 CacheLine* cl;
3558 UWord cloff, tno, toff;
3559 SVal svOld, svNew;
3560 UShort descr;
3561 stats__cline_read64s++;
3562 if (UNLIKELY(!aligned64(a))) goto slowcase;
3563 cl = get_cacheline(a);
3564 cloff = get_cacheline_offset(a);
3565 tno = get_treeno(a);
3566 toff = get_tree_offset(a); /* == 0, unused */
3567 descr = cl->descrs[tno];
3568 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3569 goto slowcase;
3570 }
3571 svOld = cl->svals[cloff];
3572 svNew = msm_write( svOld, thr,a,8 );
3573 tl_assert(svNew != SVal_INVALID);
3574 cl->svals[cloff] = svNew;
3575 return;
3576 slowcase: /* misaligned, or must go further down the tree */
3577 stats__cline_64to32splits++;
3578 zsm_apply32___msm_write( thr, a + 0 );
3579 zsm_apply32___msm_write( thr, a + 4 );
3580}
3581
3582/*--------------- ZSM accesses: 8 bit write --------------- */
3583
3584static
3585void zsm_write8 ( Addr a, SVal svNew ) {
3586 CacheLine* cl;
3587 UWord cloff, tno, toff;
3588 UShort descr;
3589 stats__cline_set8s++;
3590 cl = get_cacheline(a);
3591 cloff = get_cacheline_offset(a);
3592 tno = get_treeno(a);
3593 toff = get_tree_offset(a); /* == 0 .. 7 */
3594 descr = cl->descrs[tno];
3595 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3596 SVal* tree = &cl->svals[tno << 3];
3597 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3598 if (SCE_CACHELINE)
3599 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3600 }
3601 tl_assert(svNew != SVal_INVALID);
3602 cl->svals[cloff] = svNew;
3603}
3604
3605/*--------------- ZSM accesses: 16 bit write --------------- */
3606
3607static
3608void zsm_write16 ( Addr a, SVal svNew ) {
3609 CacheLine* cl;
3610 UWord cloff, tno, toff;
3611 UShort descr;
3612 stats__cline_set16s++;
3613 if (UNLIKELY(!aligned16(a))) goto slowcase;
3614 cl = get_cacheline(a);
3615 cloff = get_cacheline_offset(a);
3616 tno = get_treeno(a);
3617 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3618 descr = cl->descrs[tno];
3619 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3620 if (valid_value_is_below_me_16(descr, toff)) {
3621 /* Writing at this level. Need to fix up 'descr'. */
3622 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
3623 /* At this point, the tree does not match cl->descr[tno] any
3624 more. The assignments below will fix it up. */
3625 } else {
3626 /* We can't indiscriminately write on the w16 node as in the
3627 w64 case, as that might make the node inconsistent with
3628 its parent. So first, pull down to this level. */
3629 SVal* tree = &cl->svals[tno << 3];
3630 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3631 if (SCE_CACHELINE)
3632 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3633 }
3634 }
3635 tl_assert(svNew != SVal_INVALID);
3636 cl->svals[cloff + 0] = svNew;
3637 cl->svals[cloff + 1] = SVal_INVALID;
3638 return;
3639 slowcase: /* misaligned */
3640 stats__cline_16to8splits++;
3641 zsm_write8( a + 0, svNew );
3642 zsm_write8( a + 1, svNew );
3643}
3644
3645/*--------------- ZSM accesses: 32 bit write --------------- */
3646
3647static
3648void zsm_write32 ( Addr a, SVal svNew ) {
3649 CacheLine* cl;
3650 UWord cloff, tno, toff;
3651 UShort descr;
3652 stats__cline_set32s++;
3653 if (UNLIKELY(!aligned32(a))) goto slowcase;
3654 cl = get_cacheline(a);
3655 cloff = get_cacheline_offset(a);
3656 tno = get_treeno(a);
3657 toff = get_tree_offset(a); /* == 0 or 4 */
3658 descr = cl->descrs[tno];
3659 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3660 if (valid_value_is_above_me_32(descr, toff)) {
3661 /* We can't indiscriminately write on the w32 node as in the
3662 w64 case, as that might make the node inconsistent with
3663 its parent. So first, pull down to this level. */
3664 SVal* tree = &cl->svals[tno << 3];
3665 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3666 if (SCE_CACHELINE)
3667 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3668 } else {
3669 /* Writing at this level. Need to fix up 'descr'. */
3670 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
3671 /* At this point, the tree does not match cl->descr[tno] any
3672 more. The assignments below will fix it up. */
3673 }
3674 }
3675 tl_assert(svNew != SVal_INVALID);
3676 cl->svals[cloff + 0] = svNew;
3677 cl->svals[cloff + 1] = SVal_INVALID;
3678 cl->svals[cloff + 2] = SVal_INVALID;
3679 cl->svals[cloff + 3] = SVal_INVALID;
3680 return;
3681 slowcase: /* misaligned */
3682 stats__cline_32to16splits++;
3683 zsm_write16( a + 0, svNew );
3684 zsm_write16( a + 2, svNew );
3685}
3686
3687/*--------------- ZSM accesses: 64 bit write --------------- */
3688
3689static
3690void zsm_write64 ( Addr a, SVal svNew ) {
3691 CacheLine* cl;
3692 UWord cloff, tno, toff;
3693 stats__cline_set64s++;
3694 if (UNLIKELY(!aligned64(a))) goto slowcase;
3695 cl = get_cacheline(a);
3696 cloff = get_cacheline_offset(a);
3697 tno = get_treeno(a);
3698 toff = get_tree_offset(a); /* == 0 */
3699 cl->descrs[tno] = TREE_DESCR_64;
3700 tl_assert(svNew != SVal_INVALID);
3701 cl->svals[cloff + 0] = svNew;
3702 cl->svals[cloff + 1] = SVal_INVALID;
3703 cl->svals[cloff + 2] = SVal_INVALID;
3704 cl->svals[cloff + 3] = SVal_INVALID;
3705 cl->svals[cloff + 4] = SVal_INVALID;
3706 cl->svals[cloff + 5] = SVal_INVALID;
3707 cl->svals[cloff + 6] = SVal_INVALID;
3708 cl->svals[cloff + 7] = SVal_INVALID;
3709 return;
3710 slowcase: /* misaligned */
3711 stats__cline_64to32splits++;
3712 zsm_write32( a + 0, svNew );
3713 zsm_write32( a + 4, svNew );
3714}
3715
3716/*------------- ZSM accesses: 8 bit read/copy ------------- */
3717
3718static
3719SVal zsm_read8 ( Addr a ) {
3720 CacheLine* cl;
3721 UWord cloff, tno, toff;
3722 UShort descr;
3723 stats__cline_get8s++;
3724 cl = get_cacheline(a);
3725 cloff = get_cacheline_offset(a);
3726 tno = get_treeno(a);
3727 toff = get_tree_offset(a); /* == 0 .. 7 */
3728 descr = cl->descrs[tno];
3729 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3730 SVal* tree = &cl->svals[tno << 3];
3731 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3732 }
3733 return cl->svals[cloff];
3734}
3735
3736static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
3737 SVal sv;
3738 stats__cline_copy8s++;
3739 sv = zsm_read8( src );
3740 zsm_write8( dst, sv );
3741}
3742
3743/* ------------ Shadow memory range setting ops ------------ */
3744
3745void zsm_apply_range___msm_read ( Thr* thr,
3746 Addr a, SizeT len )
3747{
3748 /* fast track a couple of common cases */
3749 if (len == 4 && aligned32(a)) {
3750 zsm_apply32___msm_read( thr, a );
3751 return;
3752 }
3753 if (len == 8 && aligned64(a)) {
3754 zsm_apply64___msm_read( thr, a );
3755 return;
3756 }
3757
3758 /* be completely general (but as efficient as possible) */
3759 if (len == 0) return;
3760
3761 if (!aligned16(a) && len >= 1) {
3762 zsm_apply8___msm_read( thr, a );
3763 a += 1;
3764 len -= 1;
3765 tl_assert(aligned16(a));
3766 }
3767 if (len == 0) return;
3768
3769 if (!aligned32(a) && len >= 2) {
3770 zsm_apply16___msm_read( thr, a );
3771 a += 2;
3772 len -= 2;
3773 tl_assert(aligned32(a));
3774 }
3775 if (len == 0) return;
3776
3777 if (!aligned64(a) && len >= 4) {
3778 zsm_apply32___msm_read( thr, a );
3779 a += 4;
3780 len -= 4;
3781 tl_assert(aligned64(a));
3782 }
3783 if (len == 0) return;
3784
3785 if (len >= 8) {
3786 tl_assert(aligned64(a));
3787 while (len >= 8) {
3788 zsm_apply64___msm_read( thr, a );
3789 a += 8;
3790 len -= 8;
3791 }
3792 tl_assert(aligned64(a));
3793 }
3794 if (len == 0) return;
3795
3796 if (len >= 4)
3797 tl_assert(aligned32(a));
3798 if (len >= 4) {
3799 zsm_apply32___msm_read( thr, a );
3800 a += 4;
3801 len -= 4;
3802 }
3803 if (len == 0) return;
3804
3805 if (len >= 2)
3806 tl_assert(aligned16(a));
3807 if (len >= 2) {
3808 zsm_apply16___msm_read( thr, a );
3809 a += 2;
3810 len -= 2;
3811 }
3812 if (len == 0) return;
3813
3814 if (len >= 1) {
3815 zsm_apply8___msm_read( thr, a );
3816 a += 1;
3817 len -= 1;
3818 }
3819 tl_assert(len == 0);
3820}
3821
3822
3823
3824void zsm_apply_range___msm_write ( Thr* thr,
3825 Addr a, SizeT len )
3826{
3827 /* fast track a couple of common cases */
3828 if (len == 4 && aligned32(a)) {
3829 zsm_apply32___msm_write( thr, a );
3830 return;
3831 }
3832 if (len == 8 && aligned64(a)) {
3833 zsm_apply64___msm_write( thr, a );
3834 return;
3835 }
3836
3837 /* be completely general (but as efficient as possible) */
3838 if (len == 0) return;
3839
3840 if (!aligned16(a) && len >= 1) {
3841 zsm_apply8___msm_write( thr, a );
3842 a += 1;
3843 len -= 1;
3844 tl_assert(aligned16(a));
3845 }
3846 if (len == 0) return;
3847
3848 if (!aligned32(a) && len >= 2) {
3849 zsm_apply16___msm_write( thr, a );
3850 a += 2;
3851 len -= 2;
3852 tl_assert(aligned32(a));
3853 }
3854 if (len == 0) return;
3855
3856 if (!aligned64(a) && len >= 4) {
3857 zsm_apply32___msm_write( thr, a );
3858 a += 4;
3859 len -= 4;
3860 tl_assert(aligned64(a));
3861 }
3862 if (len == 0) return;
3863
3864 if (len >= 8) {
3865 tl_assert(aligned64(a));
3866 while (len >= 8) {
3867 zsm_apply64___msm_write( thr, a );
3868 a += 8;
3869 len -= 8;
3870 }
3871 tl_assert(aligned64(a));
3872 }
3873 if (len == 0) return;
3874
3875 if (len >= 4)
3876 tl_assert(aligned32(a));
3877 if (len >= 4) {
3878 zsm_apply32___msm_write( thr, a );
3879 a += 4;
3880 len -= 4;
3881 }
3882 if (len == 0) return;
3883
3884 if (len >= 2)
3885 tl_assert(aligned16(a));
3886 if (len >= 2) {
3887 zsm_apply16___msm_write( thr, a );
3888 a += 2;
3889 len -= 2;
3890 }
3891 if (len == 0) return;
3892
3893 if (len >= 1) {
3894 zsm_apply8___msm_write( thr, a );
3895 a += 1;
3896 len -= 1;
3897 }
3898 tl_assert(len == 0);
3899}
3900
3901
3902
3903
3904/* Block-copy states (needed for implementing realloc()). */
3905
3906static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
3907{
3908 SizeT i;
3909 if (len == 0)
3910 return;
3911
3912 /* assert for non-overlappingness */
3913 tl_assert(src+len <= dst || dst+len <= src);
3914
3915 /* To be simple, just copy byte by byte. But so as not to wreck
3916 performance for later accesses to dst[0 .. len-1], normalise
3917 destination lines as we finish with them, and also normalise the
3918 line containing the first and last address. */
3919 for (i = 0; i < len; i++) {
3920 Bool normalise
3921 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
3922 || i == 0 /* first in range */
3923 || i == len-1; /* last in range */
3924 zsm_copy8( src+i, dst+i, normalise );
3925 }
3926}
3927
3928
3929/* For setting address ranges to a given value. Has considerable
3930 sophistication so as to avoid generating large numbers of pointless
3931 cache loads/writebacks for large ranges. */
3932
3933/* Do small ranges in-cache, in the obvious way. */
3934static
3935void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
3936{
3937 /* fast track a couple of common cases */
3938 if (len == 4 && aligned32(a)) {
3939 zsm_write32( a, svNew );
3940 return;
3941 }
3942 if (len == 8 && aligned64(a)) {
3943 zsm_write64( a, svNew );
3944 return;
3945 }
3946
3947 /* be completely general (but as efficient as possible) */
3948 if (len == 0) return;
3949
3950 if (!aligned16(a) && len >= 1) {
3951 zsm_write8( a, svNew );
3952 a += 1;
3953 len -= 1;
3954 tl_assert(aligned16(a));
3955 }
3956 if (len == 0) return;
3957
3958 if (!aligned32(a) && len >= 2) {
3959 zsm_write16( a, svNew );
3960 a += 2;
3961 len -= 2;
3962 tl_assert(aligned32(a));
3963 }
3964 if (len == 0) return;
3965
3966 if (!aligned64(a) && len >= 4) {
3967 zsm_write32( a, svNew );
3968 a += 4;
3969 len -= 4;
3970 tl_assert(aligned64(a));
3971 }
3972 if (len == 0) return;
3973
3974 if (len >= 8) {
3975 tl_assert(aligned64(a));
3976 while (len >= 8) {
3977 zsm_write64( a, svNew );
3978 a += 8;
3979 len -= 8;
3980 }
3981 tl_assert(aligned64(a));
3982 }
3983 if (len == 0) return;
3984
3985 if (len >= 4)
3986 tl_assert(aligned32(a));
3987 if (len >= 4) {
3988 zsm_write32( a, svNew );
3989 a += 4;
3990 len -= 4;
3991 }
3992 if (len == 0) return;
3993
3994 if (len >= 2)
3995 tl_assert(aligned16(a));
3996 if (len >= 2) {
3997 zsm_write16( a, svNew );
3998 a += 2;
3999 len -= 2;
4000 }
4001 if (len == 0) return;
4002
4003 if (len >= 1) {
4004 zsm_write8( a, svNew );
4005 a += 1;
4006 len -= 1;
4007 }
4008 tl_assert(len == 0);
4009}
4010
4011
4012/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4013 for larger ranges, try to operate directly on the out-of-cache
4014 representation, rather than dragging lines into the cache,
4015 overwriting them, and forcing them out. This turns out to be an
4016 important performance optimisation. */
4017
4018static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4019{
4020 tl_assert(svNew != SVal_INVALID);
4021 stats__cache_make_New_arange += (ULong)len;
4022
4023 if (0 && len > 500)
4024 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4025
4026 if (0) {
4027 static UWord n_New_in_cache = 0;
4028 static UWord n_New_not_in_cache = 0;
4029 /* tag is 'a' with the in-line offset masked out,
4030 eg a[31]..a[4] 0000 */
4031 Addr tag = a & ~(N_LINE_ARANGE - 1);
4032 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4033 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4034 n_New_in_cache++;
4035 } else {
4036 n_New_not_in_cache++;
4037 }
4038 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4039 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4040 n_New_in_cache, n_New_not_in_cache );
4041 }
4042
4043 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4044 zsm_set_range_SMALL( a, len, svNew );
4045 } else {
4046 Addr before_start = a;
4047 Addr aligned_start = cacheline_ROUNDUP(a);
4048 Addr after_start = cacheline_ROUNDDN(a + len);
4049 UWord before_len = aligned_start - before_start;
4050 UWord aligned_len = after_start - aligned_start;
4051 UWord after_len = a + len - after_start;
4052 tl_assert(before_start <= aligned_start);
4053 tl_assert(aligned_start <= after_start);
4054 tl_assert(before_len < N_LINE_ARANGE);
4055 tl_assert(after_len < N_LINE_ARANGE);
4056 tl_assert(get_cacheline_offset(aligned_start) == 0);
4057 if (get_cacheline_offset(a) == 0) {
4058 tl_assert(before_len == 0);
4059 tl_assert(a == aligned_start);
4060 }
4061 if (get_cacheline_offset(a+len) == 0) {
4062 tl_assert(after_len == 0);
4063 tl_assert(after_start == a+len);
4064 }
4065 if (before_len > 0) {
4066 zsm_set_range_SMALL( before_start, before_len, svNew );
4067 }
4068 if (after_len > 0) {
4069 zsm_set_range_SMALL( after_start, after_len, svNew );
4070 }
4071 stats__cache_make_New_inZrep += (ULong)aligned_len;
4072
4073 while (1) {
4074 Addr tag;
4075 UWord wix;
4076 if (aligned_start >= after_start)
4077 break;
4078 tl_assert(get_cacheline_offset(aligned_start) == 0);
4079 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4080 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4081 if (tag == cache_shmem.tags0[wix]) {
4082 UWord i;
4083 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4084 zsm_write64( aligned_start + i * 8, svNew );
4085 } else {
4086 UWord i;
4087 Word zix;
4088 SecMap* sm;
4089 LineZ* lineZ;
4090 /* This line is not in the cache. Do not force it in; instead
4091 modify it in-place. */
4092 /* find the Z line to write in and rcdec it or the
4093 associated F line. */
4094 find_Z_for_writing( &sm, &zix, tag );
4095 tl_assert(sm);
4096 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4097 lineZ = &sm->linesZ[zix];
4098 lineZ->dict[0] = svNew;
4099 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4100 for (i = 0; i < N_LINE_ARANGE/4; i++)
4101 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4102 rcinc_LineZ(lineZ);
4103 }
4104 aligned_start += N_LINE_ARANGE;
4105 aligned_len -= N_LINE_ARANGE;
4106 }
4107 tl_assert(aligned_start == after_start);
4108 tl_assert(aligned_len == 0);
4109 }
4110}
4111
4112
4113/////////////////////////////////////////////////////////
4114// //
4115// Synchronisation objects //
4116// //
4117/////////////////////////////////////////////////////////
4118
4119// (UInt) `echo "Synchronisation object" | md5sum`
4120#define SO_MAGIC 0x56b3c5b0U
4121
4122struct _SO {
4123 VtsID viR; /* r-clock of sender */
4124 VtsID viW; /* w-clock of sender */
4125 UInt magic;
4126};
4127
4128static SO* SO__Alloc ( void ) {
4129 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4130 so->viR = VtsID_INVALID;
4131 so->viW = VtsID_INVALID;
4132 so->magic = SO_MAGIC;
4133 return so;
4134}
4135static void SO__Dealloc ( SO* so ) {
4136 tl_assert(so);
4137 tl_assert(so->magic == SO_MAGIC);
4138 if (so->viR == VtsID_INVALID) {
4139 tl_assert(so->viW == VtsID_INVALID);
4140 } else {
4141 tl_assert(so->viW != VtsID_INVALID);
4142 VtsID__rcdec(so->viR);
4143 VtsID__rcdec(so->viW);
4144 }
4145 so->magic = 0;
4146 HG_(free)( so );
4147}
4148
4149
4150/////////////////////////////////////////////////////////
4151// //
4152// Top Level API //
4153// //
4154/////////////////////////////////////////////////////////
4155
4156static void show_thread_state ( HChar* str, Thr* t )
4157{
4158 if (1) return;
4159 if (t->viR == t->viW) {
4160 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4161 VtsID__pp( t->viR );
4162 VG_(printf)("%s","\n");
4163 } else {
4164 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4165 VtsID__pp( t->viR );
4166 VG_(printf)(" viW %u==", t->viW);
4167 VtsID__pp( t->viW );
4168 VG_(printf)("%s","\n");
4169 }
4170}
4171
4172
4173Thr* libhb_init (
4174 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004175 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004176 )
4177{
4178 Thr* thr;
4179 VtsID vi;
4180 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004181 tl_assert(get_EC);
4182 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004183 main_get_EC = get_EC;
4184
4185 // No need to initialise hg_wordfm.
4186 // No need to initialise hg_wordset.
4187
4188 vts_set_init();
4189 vts_tab_init();
4190 event_map_init();
4191 VtsID__invalidate_caches();
4192
4193 // initialise shadow memory
4194 zsm_init( SVal__rcinc, SVal__rcdec );
4195
4196 thr = Thr__new();
4197 vi = VtsID__mk_Singleton( thr, 1 );
4198 thr->viR = vi;
4199 thr->viW = vi;
4200 VtsID__rcinc(thr->viR);
4201 VtsID__rcinc(thr->viW);
4202
4203 show_thread_state(" root", thr);
4204 return thr;
4205}
4206
4207Thr* libhb_create ( Thr* parent )
4208{
4209 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4210 the child's index. Since the child's index is guaranteed
4211 unique, it has never been seen before, so the implicit value
4212 before the tick is zero and after that is one. */
4213 Thr* child = Thr__new();
4214
4215 child->viR = VtsID__tick( parent->viR, child );
4216 child->viW = VtsID__tick( parent->viW, child );
4217 VtsID__rcinc(child->viR);
4218 VtsID__rcinc(child->viW);
4219
4220 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4221 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4222
4223 /* and the parent has to move along too */
4224 VtsID__rcdec(parent->viR);
4225 VtsID__rcdec(parent->viW);
4226 parent->viR = VtsID__tick( parent->viR, parent );
4227 parent->viW = VtsID__tick( parent->viW, parent );
4228 VtsID__rcinc(parent->viR);
4229 VtsID__rcinc(parent->viW);
4230
4231 show_thread_state(" child", child);
4232 show_thread_state("parent", parent);
4233
4234 return child;
4235}
4236
4237/* Shut down the library, and print stats (in fact that's _all_
4238 this is for. */
4239void libhb_shutdown ( Bool show_stats )
4240{
4241 if (show_stats) {
4242 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4243 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4244 stats__secmaps_allocd,
4245 stats__secmap_ga_space_covered);
4246 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4247 stats__secmap_linesZ_allocd,
4248 stats__secmap_linesZ_bytes);
4249 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4250 stats__secmap_linesF_allocd,
4251 stats__secmap_linesF_bytes);
4252 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4253 stats__secmap_iterator_steppings);
4254 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4255 stats__secmaps_search, stats__secmaps_search_slow);
4256
4257 VG_(printf)("%s","\n");
4258 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4259 stats__cache_totrefs, stats__cache_totmisses );
4260 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4261 stats__cache_Z_fetches, stats__cache_F_fetches );
4262 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4263 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4264 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4265 stats__cache_invals, stats__cache_flushes );
4266 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4267 stats__cache_make_New_arange,
4268 stats__cache_make_New_inZrep);
4269
4270 VG_(printf)("%s","\n");
4271 VG_(printf)(" cline: %'10lu normalises\n",
4272 stats__cline_normalises );
4273 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4274 stats__cline_read64s,
4275 stats__cline_read32s,
4276 stats__cline_read16s,
4277 stats__cline_read8s );
4278 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4279 stats__cline_write64s,
4280 stats__cline_write32s,
4281 stats__cline_write16s,
4282 stats__cline_write8s );
4283 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4284 stats__cline_set64s,
4285 stats__cline_set32s,
4286 stats__cline_set16s,
4287 stats__cline_set8s );
4288 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4289 stats__cline_get8s, stats__cline_copy8s );
4290 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4291 stats__cline_64to32splits,
4292 stats__cline_32to16splits,
4293 stats__cline_16to8splits );
4294 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4295 stats__cline_64to32pulldown,
4296 stats__cline_32to16pulldown,
4297 stats__cline_16to8pulldown );
4298 if (0)
4299 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4300 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4301
4302 VG_(printf)("%s","\n");
4303
4304 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4305 stats__msm_read, stats__msm_read_change);
4306 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4307 stats__msm_write, stats__msm_write_change);
4308 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4309 stats__getOrdering_queries, stats__getOrdering_misses);
4310 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4311 stats__join2_queries, stats__join2_misses);
4312
4313 VG_(printf)("%s","\n");
4314 VG_(printf)(
4315 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4316 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4317 );
4318 VG_(printf)( " libhb: %lu entries in vts_set\n",
4319 VG_(sizeFM)( vts_set ) );
4320
4321 VG_(printf)("%s","\n");
4322 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4323 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4324 stats__ctxt_rcdec2,
4325 stats__ctxt_rcdec3 );
4326 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4327 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4328 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4329 (UWord)N_RCEC_TAB,
4330 stats__ctxt_tab_curr );
4331 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4332 stats__ctxt_tab_qs,
4333 stats__ctxt_tab_cmps );
4334#if 0
4335 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4336 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4337 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4338 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4339 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4340 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4341 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4342 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4343 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4344 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4345 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4346 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4347 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4348 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4349
4350 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4351 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4352 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4353 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4354#endif
4355
4356 VG_(printf)("%s","<<< END libhb stats >>>\n");
4357 VG_(printf)("%s","\n");
4358
4359 }
4360}
4361
4362void libhb_async_exit ( Thr* thr )
4363{
4364 /* is there anything we need to do? */
4365}
4366
4367/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4368 a Seg that points at a VTS is its one-and-only owner, and ditto for
4369 a SO that points at a VTS. */
4370
4371SO* libhb_so_alloc ( void )
4372{
4373 return SO__Alloc();
4374}
4375
4376void libhb_so_dealloc ( SO* so )
4377{
4378 tl_assert(so);
4379 tl_assert(so->magic == SO_MAGIC);
4380 SO__Dealloc(so);
4381}
4382
4383/* See comments in libhb.h for details on the meaning of
4384 strong vs weak sends and strong vs weak receives. */
4385void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4386{
4387 /* Copy the VTSs from 'thr' into the sync object, and then move
4388 the thread along one step. */
4389
4390 tl_assert(so);
4391 tl_assert(so->magic == SO_MAGIC);
4392
4393 /* stay sane .. a thread's read-clock must always lead or be the
4394 same as its write-clock */
4395 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4396 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4397 }
4398
4399 /* since we're overwriting the VtsIDs in the SO, we need to drop
4400 any references made by the previous contents thereof */
4401 if (so->viR == VtsID_INVALID) {
4402 tl_assert(so->viW == VtsID_INVALID);
4403 so->viR = thr->viR;
4404 so->viW = thr->viW;
4405 VtsID__rcinc(so->viR);
4406 VtsID__rcinc(so->viW);
4407 } else {
4408 /* In a strong send, we dump any previous VC in the SO and
4409 install the sending thread's VC instead. For a weak send we
4410 must join2 with what's already there. */
4411 tl_assert(so->viW != VtsID_INVALID);
4412 VtsID__rcdec(so->viR);
4413 VtsID__rcdec(so->viW);
4414 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4415 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4416 VtsID__rcinc(so->viR);
4417 VtsID__rcinc(so->viW);
4418 }
4419
4420 /* move both parent clocks along */
4421 VtsID__rcdec(thr->viR);
4422 VtsID__rcdec(thr->viW);
4423 thr->viR = VtsID__tick( thr->viR, thr );
4424 thr->viW = VtsID__tick( thr->viW, thr );
4425 VtsID__rcinc(thr->viR);
4426 VtsID__rcinc(thr->viW);
4427 if (strong_send)
4428 show_thread_state("s-send", thr);
4429 else
4430 show_thread_state("w-send", thr);
4431}
4432
4433void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4434{
4435 tl_assert(so);
4436 tl_assert(so->magic == SO_MAGIC);
4437
4438 if (so->viR != VtsID_INVALID) {
4439 tl_assert(so->viW != VtsID_INVALID);
4440
4441 /* Weak receive (basically, an R-acquisition of a R-W lock).
4442 This advances the read-clock of the receiver, but not the
4443 write-clock. */
4444 VtsID__rcdec(thr->viR);
4445 thr->viR = VtsID__join2( thr->viR, so->viR );
4446 VtsID__rcinc(thr->viR);
4447
4448 /* For a strong receive, we also advance the receiver's write
4449 clock, which means the receive as a whole is essentially
4450 equivalent to a W-acquisition of a R-W lock. */
4451 if (strong_recv) {
4452 VtsID__rcdec(thr->viW);
4453 thr->viW = VtsID__join2( thr->viW, so->viW );
4454 VtsID__rcinc(thr->viW);
4455 }
4456
4457 if (strong_recv)
4458 show_thread_state("s-recv", thr);
4459 else
4460 show_thread_state("w-recv", thr);
4461
4462 } else {
4463 tl_assert(so->viW == VtsID_INVALID);
4464 /* Deal with degenerate case: 'so' has no vts, so there has been
4465 no message posted to it. Just ignore this case. */
4466 show_thread_state("d-recv", thr);
4467 }
4468}
4469
4470Bool libhb_so_everSent ( SO* so )
4471{
4472 if (so->viR == VtsID_INVALID) {
4473 tl_assert(so->viW == VtsID_INVALID);
4474 return False;
4475 } else {
4476 tl_assert(so->viW != VtsID_INVALID);
4477 return True;
4478 }
4479}
4480
4481#define XXX1 0 // 0x67a106c
4482#define XXX2 0
4483
4484static Bool TRACEME(Addr a, SizeT szB) {
4485 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4486 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4487 return False;
4488}
4489static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4490 SVal sv = zsm_read8(a);
4491 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4492 show_thread_state("", thr);
4493 VG_(printf)("%s","\n");
4494}
4495
4496void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4497{
4498 SVal sv = SVal__mkC(thr->viW, thr->viW);
4499 tl_assert(is_sane_SVal_C(sv));
4500 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4501 zsm_set_range( a, szB, sv );
4502 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4503}
4504
4505void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4506{
4507 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4508 zsm_set_range( a, szB, SVal__mkA() );
4509 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4510}
4511
4512void* libhb_get_Thr_opaque ( Thr* thr ) {
4513 tl_assert(thr);
4514 return thr->opaque;
4515}
4516
4517void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4518 tl_assert(thr);
4519 thr->opaque = v;
4520}
4521
4522void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4523{
4524 zsm_copy_range(dst, src, len);
4525}
4526
4527void libhb_maybe_GC ( void )
4528{
4529 event_map_maybe_GC();
4530 /* If there are still freelist entries available, no need for a
4531 GC. */
4532 if (vts_tab_freelist != VtsID_INVALID)
4533 return;
4534 /* So all the table entries are full, and we're having to expand
4535 the table. But did we hit the threshhold point yet? */
4536 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
4537 return;
4538 vts_tab__do_GC( False/*don't show stats*/ );
4539}
4540
4541
4542/////////////////////////////////////////////////////////////////
4543/////////////////////////////////////////////////////////////////
4544// //
4545// SECTION END main library //
4546// //
4547/////////////////////////////////////////////////////////////////
4548/////////////////////////////////////////////////////////////////
4549
4550/*--------------------------------------------------------------------*/
4551/*--- end libhb_main.c ---*/
4552/*--------------------------------------------------------------------*/