blob: 3261f253216be7a0fe8203162f4e5430f3b35043 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
39#include "pub_tool_xarray.h"
40#include "pub_tool_oset.h"
41#include "pub_tool_threadstate.h"
42#include "pub_tool_aspacemgr.h"
43#include "pub_tool_execontext.h"
44#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000045#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000046#include "hg_basics.h"
47#include "hg_wordset.h"
48#include "hg_lock_n_thread.h"
49#include "hg_errors.h"
50
51#include "libhb.h"
52
53
54/* fwds for
55 Globals needed by other parts of the library. These are set
56 once at startup and then never changed. */
57static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +000058static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +000059
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62// //
63// //
64// //
65/////////////////////////////////////////////////////////////////
66/////////////////////////////////////////////////////////////////
67
68
69/////////////////////////////////////////////////////////////////
70/////////////////////////////////////////////////////////////////
71// //
72// SECTION BEGIN compressed shadow memory //
73// //
74/////////////////////////////////////////////////////////////////
75/////////////////////////////////////////////////////////////////
76
77#ifndef __HB_ZSM_H
78#define __HB_ZSM_H
79
80typedef ULong SVal;
81
82/* This value has special significance to the implementation, and callers
83 may not store it in the shadow memory. */
84#define SVal_INVALID (3ULL << 62)
85
86/* This is the default value for shadow memory. Initially the shadow
87 memory contains no accessible areas and so all reads produce this
88 value. TODO: make this caller-defineable. */
89#define SVal_NOACCESS (2ULL << 62)
90
91/* Initialise the library. Once initialised, it will (or may) call
92 rcinc and rcdec in response to all the calls below, in order to
93 allow the user to do reference counting on the SVals stored herein.
94 It is important to understand, however, that due to internal
95 caching, the reference counts are in general inaccurate, and can be
96 both above or below the true reference count for an item. In
97 particular, the library may indicate that the reference count for
98 an item is zero, when in fact it is not.
99
100 To make the reference counting exact and therefore non-pointless,
101 call zsm_flush_cache. Immediately after it returns, the reference
102 counts for all items, as deduced by the caller by observing calls
103 to rcinc and rcdec, will be correct, and so any items with a zero
104 reference count may be freed (or at least considered to be
105 unreferenced by this library).
106*/
107static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
108
109static void zsm_set_range ( Addr, SizeT, SVal );
110static SVal zsm_read8 ( Addr );
111static void zsm_copy_range ( Addr, Addr, SizeT );
112static void zsm_flush_cache ( void );
113
114#endif /* ! __HB_ZSM_H */
115
116
117/* For the shadow mem cache stuff we may want more intrusive
118 checks. Unfortunately there's no almost-zero-cost way to make them
119 selectable at run time. Hence set the #if 0 to #if 1 and
120 rebuild if you want them. */
121#if 0
122# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
123# define inline __attribute__((noinline))
124 /* probably want to ditch -fomit-frame-pointer too */
125#else
126# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
127#endif
128
129/* For the SegmentID, SegmentSet and SVal stuff we may want more
130 intrusive checks. Again there's no zero cost way to do this. Set
131 the #if 0 to #if 1 and rebuild if you want them. */
132#if 0
133# define SCE_SVALS 1 /* sanity-check shadow value stuff */
134#else
135# define SCE_SVALS 0
136#endif
137
138
139/* Round a up to the next multiple of N. N must be a power of 2 */
140#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
141/* Round a down to the next multiple of N. N must be a power of 2 */
142#define ROUNDDN(a, N) ((a) & ~(N-1))
143
144
145
146/* ------ User-supplied RC functions ------ */
147static void(*rcinc)(SVal) = NULL;
148static void(*rcdec)(SVal) = NULL;
149
150
151/* ------ CacheLine ------ */
152
153#define N_LINE_BITS 6 /* must be >= 3 */
154#define N_LINE_ARANGE (1 << N_LINE_BITS)
155#define N_LINE_TREES (N_LINE_ARANGE >> 3)
156
157typedef
158 struct {
159 UShort descrs[N_LINE_TREES];
160 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
161 }
162 CacheLine;
163
164#define TREE_DESCR_16_0 (1<<0)
165#define TREE_DESCR_32_0 (1<<1)
166#define TREE_DESCR_16_1 (1<<2)
167#define TREE_DESCR_64 (1<<3)
168#define TREE_DESCR_16_2 (1<<4)
169#define TREE_DESCR_32_1 (1<<5)
170#define TREE_DESCR_16_3 (1<<6)
171#define TREE_DESCR_8_0 (1<<7)
172#define TREE_DESCR_8_1 (1<<8)
173#define TREE_DESCR_8_2 (1<<9)
174#define TREE_DESCR_8_3 (1<<10)
175#define TREE_DESCR_8_4 (1<<11)
176#define TREE_DESCR_8_5 (1<<12)
177#define TREE_DESCR_8_6 (1<<13)
178#define TREE_DESCR_8_7 (1<<14)
179#define TREE_DESCR_DTY (1<<15)
180
181typedef
182 struct {
183 SVal dict[4]; /* can represent up to 4 diff values in the line */
184 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
185 dict indexes */
186 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
187 LineF to use, and dict[2..] are also SVal_INVALID. */
188 }
189 LineZ; /* compressed rep for a cache line */
190
191typedef
192 struct {
193 Bool inUse;
194 SVal w64s[N_LINE_ARANGE];
195 }
196 LineF; /* full rep for a cache line */
197
198/* Shadow memory.
199 Primary map is a WordFM Addr SecMap*.
200 SecMaps cover some page-size-ish section of address space and hold
201 a compressed representation.
202 CacheLine-sized chunks of SecMaps are copied into a Cache, being
203 decompressed when moved into the cache and recompressed on the
204 way out. Because of this, the cache must operate as a writeback
205 cache, not a writethrough one.
206
207 Each SecMap must hold a power-of-2 number of CacheLines. Hence
208 N_SECMAP_BITS must >= N_LINE_BITS.
209*/
210#define N_SECMAP_BITS 13
211#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
212
213// # CacheLines held by a SecMap
214#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
215
216/* The data in the SecMap is held in the array of LineZs. Each LineZ
217 either carries the required data directly, in a compressed
218 representation, or it holds (in .dict[0]) an index to the LineF in
219 .linesF that holds the full representation.
220
221 Currently-unused LineF's have their .inUse bit set to zero.
222 Since each in-use LineF is referred to be exactly one LineZ,
223 the number of .linesZ[] that refer to .linesF should equal
224 the number of .linesF[] that have .inUse == True.
225
226 RC obligations: the RCs presented to the user include exactly
227 the values in:
228 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
229 * F reps that are in use (.inUse == True)
230
231 Hence the following actions at the following transitions are required:
232
233 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
234 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
235 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
236 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
237*/
238typedef
239 struct {
240 UInt magic;
241 LineZ linesZ[N_SECMAP_ZLINES];
242 LineF* linesF;
243 UInt linesF_size;
244 }
245 SecMap;
246
247#define SecMap_MAGIC 0x571e58cbU
248
249static inline Bool is_sane_SecMap ( SecMap* sm ) {
250 return sm != NULL && sm->magic == SecMap_MAGIC;
251}
252
253/* ------ Cache ------ */
254
255#define N_WAY_BITS 16
256#define N_WAY_NENT (1 << N_WAY_BITS)
257
258/* Each tag is the address of the associated CacheLine, rounded down
259 to a CacheLine address boundary. A CacheLine size must be a power
260 of 2 and must be 8 or more. Hence an easy way to initialise the
261 cache so it is empty is to set all the tag values to any value % 8
262 != 0, eg 1. This means all queries in the cache initially miss.
263 It does however require us to detect and not writeback, any line
264 with a bogus tag. */
265typedef
266 struct {
267 CacheLine lyns0[N_WAY_NENT];
268 Addr tags0[N_WAY_NENT];
269 }
270 Cache;
271
272static inline Bool is_valid_scache_tag ( Addr tag ) {
273 /* a valid tag should be naturally aligned to the start of
274 a CacheLine. */
275 return 0 == (tag & (N_LINE_ARANGE - 1));
276}
277
278
279/* --------- Primary data structures --------- */
280
281/* Shadow memory primary map */
282static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
283static Cache cache_shmem;
284
285
286static UWord stats__secmaps_search = 0; // # SM finds
287static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
288static UWord stats__secmaps_allocd = 0; // # SecMaps issued
289static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
290static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
291static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
292static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
293static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
294static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
295static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
296static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
297static UWord stats__cache_F_fetches = 0; // # F lines fetched
298static UWord stats__cache_F_wbacks = 0; // # F lines written back
299static UWord stats__cache_invals = 0; // # cache invals
300static UWord stats__cache_flushes = 0; // # cache flushes
301static UWord stats__cache_totrefs = 0; // # total accesses
302static UWord stats__cache_totmisses = 0; // # misses
303static ULong stats__cache_make_New_arange = 0; // total arange made New
304static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
305static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
306static UWord stats__cline_read64s = 0; // # calls to s_m_read64
307static UWord stats__cline_read32s = 0; // # calls to s_m_read32
308static UWord stats__cline_read16s = 0; // # calls to s_m_read16
309static UWord stats__cline_read8s = 0; // # calls to s_m_read8
310static UWord stats__cline_write64s = 0; // # calls to s_m_write64
311static UWord stats__cline_write32s = 0; // # calls to s_m_write32
312static UWord stats__cline_write16s = 0; // # calls to s_m_write16
313static UWord stats__cline_write8s = 0; // # calls to s_m_write8
314static UWord stats__cline_set64s = 0; // # calls to s_m_set64
315static UWord stats__cline_set32s = 0; // # calls to s_m_set32
316static UWord stats__cline_set16s = 0; // # calls to s_m_set16
317static UWord stats__cline_set8s = 0; // # calls to s_m_set8
318static UWord stats__cline_get8s = 0; // # calls to s_m_get8
319static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
320static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
321static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
322static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
323static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
324static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
325static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
326
327static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
328 return a & ~(N_SECMAP_ARANGE - 1);
329}
330static inline UWord shmem__get_SecMap_offset ( Addr a ) {
331 return a & (N_SECMAP_ARANGE - 1);
332}
333
334
335/*----------------------------------------------------------------*/
336/*--- map_shmem :: WordFM Addr SecMap ---*/
337/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
338/*----------------------------------------------------------------*/
339
340/*--------------- SecMap allocation --------------- */
341
342static HChar* shmem__bigchunk_next = NULL;
343static HChar* shmem__bigchunk_end1 = NULL;
344
345static void* shmem__bigchunk_alloc ( SizeT n )
346{
347 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
348 tl_assert(n > 0);
349 n = VG_ROUNDUP(n, 16);
350 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
351 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
352 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
353 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
354 if (0)
355 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
356 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
357 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
358 if (shmem__bigchunk_next == NULL)
359 VG_(out_of_memory_NORETURN)(
360 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
361 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
362 }
363 tl_assert(shmem__bigchunk_next);
364 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
365 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
366 shmem__bigchunk_next += n;
367 return shmem__bigchunk_next - n;
368}
369
370static SecMap* shmem__alloc_SecMap ( void )
371{
372 Word i, j;
373 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
374 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
375 tl_assert(sm);
376 sm->magic = SecMap_MAGIC;
377 for (i = 0; i < N_SECMAP_ZLINES; i++) {
378 sm->linesZ[i].dict[0] = SVal_NOACCESS;
379 sm->linesZ[i].dict[1] = SVal_INVALID;
380 sm->linesZ[i].dict[2] = SVal_INVALID;
381 sm->linesZ[i].dict[3] = SVal_INVALID;
382 for (j = 0; j < N_LINE_ARANGE/4; j++)
383 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
384 }
385 sm->linesF = NULL;
386 sm->linesF_size = 0;
387 stats__secmaps_allocd++;
388 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
389 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
390 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
391 return sm;
392}
393
394typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
395static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
396
397static SecMap* shmem__find_SecMap ( Addr ga )
398{
399 SecMap* sm = NULL;
400 Addr gaKey = shmem__round_to_SecMap_base(ga);
401 // Cache
402 stats__secmaps_search++;
403 if (LIKELY(gaKey == smCache[0].gaKey))
404 return smCache[0].sm;
405 if (LIKELY(gaKey == smCache[1].gaKey)) {
406 SMCacheEnt tmp = smCache[0];
407 smCache[0] = smCache[1];
408 smCache[1] = tmp;
409 return smCache[0].sm;
410 }
411 if (gaKey == smCache[2].gaKey) {
412 SMCacheEnt tmp = smCache[1];
413 smCache[1] = smCache[2];
414 smCache[2] = tmp;
415 return smCache[1].sm;
416 }
417 // end Cache
418 stats__secmaps_search_slow++;
419 if (VG_(lookupFM)( map_shmem,
420 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
421 tl_assert(sm != NULL);
422 smCache[2] = smCache[1];
423 smCache[1] = smCache[0];
424 smCache[0].gaKey = gaKey;
425 smCache[0].sm = sm;
426 } else {
427 tl_assert(sm == NULL);
428 }
429 return sm;
430}
431
432static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
433{
434 SecMap* sm = shmem__find_SecMap ( ga );
435 if (LIKELY(sm)) {
436 return sm;
437 } else {
438 /* create a new one */
439 Addr gaKey = shmem__round_to_SecMap_base(ga);
440 sm = shmem__alloc_SecMap();
441 tl_assert(sm);
442 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
443 return sm;
444 }
445}
446
447
448/* ------------ LineF and LineZ related ------------ */
449
450static void rcinc_LineF ( LineF* lineF ) {
451 UWord i;
452 tl_assert(lineF->inUse);
453 for (i = 0; i < N_LINE_ARANGE; i++)
454 rcinc(lineF->w64s[i]);
455}
456
457static void rcdec_LineF ( LineF* lineF ) {
458 UWord i;
459 tl_assert(lineF->inUse);
460 for (i = 0; i < N_LINE_ARANGE; i++)
461 rcdec(lineF->w64s[i]);
462}
463
464static void rcinc_LineZ ( LineZ* lineZ ) {
465 tl_assert(lineZ->dict[0] != SVal_INVALID);
466 rcinc(lineZ->dict[0]);
467 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
468 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
469 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
470}
471
472static void rcdec_LineZ ( LineZ* lineZ ) {
473 tl_assert(lineZ->dict[0] != SVal_INVALID);
474 rcdec(lineZ->dict[0]);
475 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
476 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
477 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
478}
479
480inline
481static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
482 Word bix, shft, mask, prep;
483 tl_assert(ix >= 0);
484 bix = ix >> 2;
485 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
486 mask = 3 << shft;
487 prep = b2 << shft;
488 arr[bix] = (arr[bix] & ~mask) | prep;
489}
490
491inline
492static UWord read_twobit_array ( UChar* arr, UWord ix ) {
493 Word bix, shft;
494 tl_assert(ix >= 0);
495 bix = ix >> 2;
496 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
497 return (arr[bix] >> shft) & 3;
498}
499
500/* Given address 'tag', find either the Z or F line containing relevant
501 data, so it can be read into the cache.
502*/
503static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
504 /*OUT*/LineF** fp, Addr tag ) {
505 LineZ* lineZ;
506 LineF* lineF;
507 UWord zix;
508 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
509 UWord smoff = shmem__get_SecMap_offset(tag);
510 /* since smoff is derived from a valid tag, it should be
511 cacheline-aligned. */
512 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
513 zix = smoff >> N_LINE_BITS;
514 tl_assert(zix < N_SECMAP_ZLINES);
515 lineZ = &sm->linesZ[zix];
516 lineF = NULL;
517 if (lineZ->dict[0] == SVal_INVALID) {
518 UInt fix = (UInt)lineZ->dict[1];
519 tl_assert(sm->linesF);
520 tl_assert(sm->linesF_size > 0);
521 tl_assert(fix >= 0 && fix < sm->linesF_size);
522 lineF = &sm->linesF[fix];
523 tl_assert(lineF->inUse);
524 lineZ = NULL;
525 }
526 *zp = lineZ;
527 *fp = lineF;
528}
529
530/* Given address 'tag', return the relevant SecMap and the index of
531 the LineZ within it, in the expectation that the line is to be
532 overwritten. Regardless of whether 'tag' is currently associated
533 with a Z or F representation, to rcdec on the current
534 representation, in recognition of the fact that the contents are
535 just about to be overwritten. */
536static __attribute__((noinline))
537void find_Z_for_writing ( /*OUT*/SecMap** smp,
538 /*OUT*/Word* zixp,
539 Addr tag ) {
540 LineZ* lineZ;
541 LineF* lineF;
542 UWord zix;
543 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
544 UWord smoff = shmem__get_SecMap_offset(tag);
545 /* since smoff is derived from a valid tag, it should be
546 cacheline-aligned. */
547 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
548 zix = smoff >> N_LINE_BITS;
549 tl_assert(zix < N_SECMAP_ZLINES);
550 lineZ = &sm->linesZ[zix];
551 lineF = NULL;
552 /* re RCs, we are freeing up this LineZ/LineF so that new data can
553 be parked in it. Hence have to rcdec it accordingly. */
554 /* If lineZ has an associated lineF, free it up. */
555 if (lineZ->dict[0] == SVal_INVALID) {
556 UInt fix = (UInt)lineZ->dict[1];
557 tl_assert(sm->linesF);
558 tl_assert(sm->linesF_size > 0);
559 tl_assert(fix >= 0 && fix < sm->linesF_size);
560 lineF = &sm->linesF[fix];
561 tl_assert(lineF->inUse);
562 rcdec_LineF(lineF);
563 lineF->inUse = False;
564 } else {
565 rcdec_LineZ(lineZ);
566 }
567 *smp = sm;
568 *zixp = zix;
569}
570
571static __attribute__((noinline))
572void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
573 UInt i, new_size;
574 LineF* nyu;
575
576 if (sm->linesF) {
577 tl_assert(sm->linesF_size > 0);
578 } else {
579 tl_assert(sm->linesF_size == 0);
580 }
581
582 if (sm->linesF) {
583 for (i = 0; i < sm->linesF_size; i++) {
584 if (!sm->linesF[i].inUse) {
585 *fixp = (Word)i;
586 return;
587 }
588 }
589 }
590
591 /* No free F line found. Expand existing array and try again. */
592 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
593 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
594 new_size * sizeof(LineF) );
595 tl_assert(nyu);
596
597 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
598 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
599 * sizeof(LineF);
600
601 if (0)
602 VG_(printf)("SM %p: expand F array from %d to %d\n",
603 sm, (Int)sm->linesF_size, new_size);
604
605 for (i = 0; i < new_size; i++)
606 nyu[i].inUse = False;
607
608 if (sm->linesF) {
609 for (i = 0; i < sm->linesF_size; i++) {
610 tl_assert(sm->linesF[i].inUse);
611 nyu[i] = sm->linesF[i];
612 }
613 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
614 HG_(free)(sm->linesF);
615 }
616
617 sm->linesF = nyu;
618 sm->linesF_size = new_size;
619
620 for (i = 0; i < sm->linesF_size; i++) {
621 if (!sm->linesF[i].inUse) {
622 *fixp = (Word)i;
623 return;
624 }
625 }
626
627 /*NOTREACHED*/
628 tl_assert(0);
629}
630
631
632/* ------------ CacheLine and implicit-tree related ------------ */
633
634__attribute__((unused))
635static void pp_CacheLine ( CacheLine* cl ) {
636 Word i;
637 if (!cl) {
638 VG_(printf)("%s","pp_CacheLine(NULL)\n");
639 return;
640 }
641 for (i = 0; i < N_LINE_TREES; i++)
642 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
643 for (i = 0; i < N_LINE_ARANGE; i++)
644 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
645}
646
647static UChar descr_to_validbits ( UShort descr )
648{
649 /* a.k.a Party Time for gcc's constant folder */
650# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
651 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
652 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
653 ( (b8_5) << 12) | ( (b8_4) << 11) | \
654 ( (b8_3) << 10) | ( (b8_2) << 9) | \
655 ( (b8_1) << 8) | ( (b8_0) << 7) | \
656 ( (b16_3) << 6) | ( (b32_1) << 5) | \
657 ( (b16_2) << 4) | ( (b64) << 3) | \
658 ( (b16_1) << 2) | ( (b32_0) << 1) | \
659 ( (b16_0) << 0) ) )
660
661# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
662 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
663 ( (bit5) << 5) | ( (bit4) << 4) | \
664 ( (bit3) << 3) | ( (bit2) << 2) | \
665 ( (bit1) << 1) | ( (bit0) << 0) ) )
666
667 /* these should all get folded out at compile time */
668 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
669 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
670 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
671 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
672 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
673 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
674 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
675 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
676 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
677
678 switch (descr) {
679 /*
680 +--------------------------------- TREE_DESCR_8_7
681 | +------------------- TREE_DESCR_8_0
682 | | +---------------- TREE_DESCR_16_3
683 | | | +-------------- TREE_DESCR_32_1
684 | | | | +------------ TREE_DESCR_16_2
685 | | | | | +--------- TREE_DESCR_64
686 | | | | | | +------ TREE_DESCR_16_1
687 | | | | | | | +---- TREE_DESCR_32_0
688 | | | | | | | | +-- TREE_DESCR_16_0
689 | | | | | | | | |
690 | | | | | | | | | GRANULARITY, 7 -> 0 */
691 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
692 return BYTE(1,1,1,1,1,1,1,1);
693 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
694 return BYTE(1,1,0,1,1,1,1,1);
695 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
696 return BYTE(0,1,1,1,1,1,1,1);
697 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
698 return BYTE(0,1,0,1,1,1,1,1);
699
700 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
701 return BYTE(1,1,1,1,1,1,0,1);
702 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
703 return BYTE(1,1,0,1,1,1,0,1);
704 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
705 return BYTE(0,1,1,1,1,1,0,1);
706 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
707 return BYTE(0,1,0,1,1,1,0,1);
708
709 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
710 return BYTE(1,1,1,1,0,1,1,1);
711 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
712 return BYTE(1,1,0,1,0,1,1,1);
713 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
714 return BYTE(0,1,1,1,0,1,1,1);
715 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
716 return BYTE(0,1,0,1,0,1,1,1);
717
718 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
719 return BYTE(1,1,1,1,0,1,0,1);
720 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
721 return BYTE(1,1,0,1,0,1,0,1);
722 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
723 return BYTE(0,1,1,1,0,1,0,1);
724 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
725 return BYTE(0,1,0,1,0,1,0,1);
726
727 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
728 return BYTE(0,0,0,1,1,1,1,1);
729 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
730 return BYTE(0,0,0,1,1,1,0,1);
731 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
732 return BYTE(0,0,0,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
734 return BYTE(0,0,0,1,0,1,0,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
737 return BYTE(1,1,1,1,0,0,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
739 return BYTE(1,1,0,1,0,0,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
741 return BYTE(0,1,1,1,0,0,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
743 return BYTE(0,1,0,1,0,0,0,1);
744
745 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
746 return BYTE(0,0,0,1,0,0,0,1);
747
748 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
749 return BYTE(0,0,0,0,0,0,0,1);
750
751 default: return BYTE(0,0,0,0,0,0,0,0);
752 /* INVALID - any valid descr produces at least one
753 valid bit in tree[0..7]*/
754 }
755 /* NOTREACHED*/
756 tl_assert(0);
757
758# undef DESCR
759# undef BYTE
760}
761
762__attribute__((unused))
763static Bool is_sane_Descr ( UShort descr ) {
764 return descr_to_validbits(descr) != 0;
765}
766
767static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
768 VG_(sprintf)(dst,
769 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
770 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
771 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
772 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
773 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
774 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
775 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
776 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
777 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
778 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
779 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
780 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
781 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
782 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
783 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
784 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
785 );
786}
787static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
788 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
789 (Int)((byte & 128) ? 1 : 0),
790 (Int)((byte & 64) ? 1 : 0),
791 (Int)((byte & 32) ? 1 : 0),
792 (Int)((byte & 16) ? 1 : 0),
793 (Int)((byte & 8) ? 1 : 0),
794 (Int)((byte & 4) ? 1 : 0),
795 (Int)((byte & 2) ? 1 : 0),
796 (Int)((byte & 1) ? 1 : 0)
797 );
798}
799
800static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
801 Word i;
802 UChar validbits = descr_to_validbits(descr);
803 HChar buf[128], buf2[128];
804 if (validbits == 0)
805 goto bad;
806 for (i = 0; i < 8; i++) {
807 if (validbits & (1<<i)) {
808 if (tree[i] == SVal_INVALID)
809 goto bad;
810 } else {
811 if (tree[i] != SVal_INVALID)
812 goto bad;
813 }
814 }
815 return True;
816 bad:
817 sprintf_Descr( buf, descr );
818 sprintf_Byte( buf2, validbits );
819 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
820 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
821 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
822 for (i = 0; i < 8; i++)
823 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
824 VG_(printf)("%s","}\n");
825 return 0;
826}
827
828static Bool is_sane_CacheLine ( CacheLine* cl )
829{
830 Word tno, cloff;
831
832 if (!cl) goto bad;
833
834 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
835 UShort descr = cl->descrs[tno];
836 SVal* tree = &cl->svals[cloff];
837 if (!is_sane_Descr_and_Tree(descr, tree))
838 goto bad;
839 }
840 tl_assert(cloff == N_LINE_ARANGE);
841 return True;
842 bad:
843 pp_CacheLine(cl);
844 return False;
845}
846
847static UShort normalise_tree ( /*MOD*/SVal* tree )
848{
849 UShort descr;
850 /* pre: incoming tree[0..7] does not have any invalid shvals, in
851 particular no zeroes. */
852 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
853 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
854 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
855 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
856 tl_assert(0);
857
858 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
859 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
860 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
861 /* build 16-bit layer */
862 if (tree[1] == tree[0]) {
863 tree[1] = SVal_INVALID;
864 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
865 descr |= TREE_DESCR_16_0;
866 }
867 if (tree[3] == tree[2]) {
868 tree[3] = SVal_INVALID;
869 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
870 descr |= TREE_DESCR_16_1;
871 }
872 if (tree[5] == tree[4]) {
873 tree[5] = SVal_INVALID;
874 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
875 descr |= TREE_DESCR_16_2;
876 }
877 if (tree[7] == tree[6]) {
878 tree[7] = SVal_INVALID;
879 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
880 descr |= TREE_DESCR_16_3;
881 }
882 /* build 32-bit layer */
883 if (tree[2] == tree[0]
884 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
885 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
886 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
887 descr |= TREE_DESCR_32_0;
888 }
889 if (tree[6] == tree[4]
890 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
891 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
892 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
893 descr |= TREE_DESCR_32_1;
894 }
895 /* build 64-bit layer */
896 if (tree[4] == tree[0]
897 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
898 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
899 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
900 descr |= TREE_DESCR_64;
901 }
902 return descr;
903}
904
905/* This takes a cacheline where all the data is at the leaves
906 (w8[..]) and builds a correctly normalised tree. */
907static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
908{
909 Word tno, cloff;
910 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
911 SVal* tree = &cl->svals[cloff];
912 cl->descrs[tno] = normalise_tree( tree );
913 }
914 tl_assert(cloff == N_LINE_ARANGE);
915 if (SCE_CACHELINE)
916 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
917 stats__cline_normalises++;
918}
919
920
921typedef struct { UChar count; SVal sval; } CountedSVal;
922
923static
924void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
925 /*OUT*/Word* dstUsedP,
926 Word nDst, CacheLine* src )
927{
928 Word tno, cloff, dstUsed;
929
930 tl_assert(nDst == N_LINE_ARANGE);
931 dstUsed = 0;
932
933 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
934 UShort descr = src->descrs[tno];
935 SVal* tree = &src->svals[cloff];
936
937 /* sequentialise the tree described by (descr,tree). */
938# define PUT(_n,_v) \
939 do { dst[dstUsed ].count = (_n); \
940 dst[dstUsed++].sval = (_v); \
941 } while (0)
942
943 /* byte 0 */
944 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
945 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
946 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
947 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
948 /* byte 1 */
949 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
950 /* byte 2 */
951 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
952 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
953 /* byte 3 */
954 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
955 /* byte 4 */
956 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
957 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
958 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
959 /* byte 5 */
960 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
961 /* byte 6 */
962 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
963 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
964 /* byte 7 */
965 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
966
967# undef PUT
968 /* END sequentialise the tree described by (descr,tree). */
969
970 }
971 tl_assert(cloff == N_LINE_ARANGE);
972 tl_assert(dstUsed <= nDst);
973
974 *dstUsedP = dstUsed;
975}
976
977/* Write the cacheline 'wix' to backing store. Where it ends up
978 is determined by its tag field. */
979static __attribute__((noinline)) void cacheline_wback ( UWord wix )
980{
981 Word i, j, k, m;
982 Addr tag;
983 SecMap* sm;
984 CacheLine* cl;
985 LineZ* lineZ;
986 LineF* lineF;
987 Word zix, fix, csvalsUsed;
988 CountedSVal csvals[N_LINE_ARANGE];
989 SVal sv;
990
991 if (0)
992 VG_(printf)("scache wback line %d\n", (Int)wix);
993
994 tl_assert(wix >= 0 && wix < N_WAY_NENT);
995
996 tag = cache_shmem.tags0[wix];
997 cl = &cache_shmem.lyns0[wix];
998
999 /* The cache line may have been invalidated; if so, ignore it. */
1000 if (!is_valid_scache_tag(tag))
1001 return;
1002
1003 /* Where are we going to put it? */
1004 sm = NULL;
1005 lineZ = NULL;
1006 lineF = NULL;
1007 zix = fix = -1;
1008
1009 /* find the Z line to write in and rcdec it or the associated F
1010 line. */
1011 find_Z_for_writing( &sm, &zix, tag );
1012
1013 tl_assert(sm);
1014 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1015 lineZ = &sm->linesZ[zix];
1016
1017 /* Generate the data to be stored */
1018 if (SCE_CACHELINE)
1019 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1020
1021 csvalsUsed = -1;
1022 sequentialise_CacheLine( csvals, &csvalsUsed,
1023 N_LINE_ARANGE, cl );
1024 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1025 if (0) VG_(printf)("%lu ", csvalsUsed);
1026
1027 lineZ->dict[0] = lineZ->dict[1]
1028 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1029
1030 /* i indexes actual shadow values, k is cursor in csvals */
1031 i = 0;
1032 for (k = 0; k < csvalsUsed; k++) {
1033
1034 sv = csvals[k].sval;
1035 if (SCE_SVALS)
1036 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1037 /* do we already have it? */
1038 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1039 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1040 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1041 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1042 /* no. look for a free slot. */
1043 if (SCE_SVALS)
1044 tl_assert(sv != SVal_INVALID);
1045 if (lineZ->dict[0]
1046 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1047 if (lineZ->dict[1]
1048 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1049 if (lineZ->dict[2]
1050 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1051 if (lineZ->dict[3]
1052 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1053 break; /* we'll have to use the f rep */
1054 dict_ok:
1055 m = csvals[k].count;
1056 if (m == 8) {
1057 write_twobit_array( lineZ->ix2s, i+0, j );
1058 write_twobit_array( lineZ->ix2s, i+1, j );
1059 write_twobit_array( lineZ->ix2s, i+2, j );
1060 write_twobit_array( lineZ->ix2s, i+3, j );
1061 write_twobit_array( lineZ->ix2s, i+4, j );
1062 write_twobit_array( lineZ->ix2s, i+5, j );
1063 write_twobit_array( lineZ->ix2s, i+6, j );
1064 write_twobit_array( lineZ->ix2s, i+7, j );
1065 i += 8;
1066 }
1067 else if (m == 4) {
1068 write_twobit_array( lineZ->ix2s, i+0, j );
1069 write_twobit_array( lineZ->ix2s, i+1, j );
1070 write_twobit_array( lineZ->ix2s, i+2, j );
1071 write_twobit_array( lineZ->ix2s, i+3, j );
1072 i += 4;
1073 }
1074 else if (m == 1) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 i += 1;
1077 }
1078 else if (m == 2) {
1079 write_twobit_array( lineZ->ix2s, i+0, j );
1080 write_twobit_array( lineZ->ix2s, i+1, j );
1081 i += 2;
1082 }
1083 else {
1084 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1085 }
1086
1087 }
1088
1089 if (LIKELY(i == N_LINE_ARANGE)) {
1090 /* Construction of the compressed representation was
1091 successful. */
1092 rcinc_LineZ(lineZ);
1093 stats__cache_Z_wbacks++;
1094 } else {
1095 /* Cannot use the compressed(z) representation. Use the full(f)
1096 rep instead. */
1097 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1098 alloc_F_for_writing( sm, &fix );
1099 tl_assert(sm->linesF);
1100 tl_assert(sm->linesF_size > 0);
1101 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1102 lineF = &sm->linesF[fix];
1103 tl_assert(!lineF->inUse);
1104 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1105 lineZ->dict[1] = (SVal)fix;
1106 lineF->inUse = True;
1107 i = 0;
1108 for (k = 0; k < csvalsUsed; k++) {
1109 if (SCE_SVALS)
1110 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1111 sv = csvals[k].sval;
1112 if (SCE_SVALS)
1113 tl_assert(sv != SVal_INVALID);
1114 for (m = csvals[k].count; m > 0; m--) {
1115 lineF->w64s[i] = sv;
1116 i++;
1117 }
1118 }
1119 tl_assert(i == N_LINE_ARANGE);
1120 rcinc_LineF(lineF);
1121 stats__cache_F_wbacks++;
1122 }
1123
1124 //if (anyShared)
1125 // sm->mbHasShared = True;
1126
1127 /* mb_tidy_one_cacheline(); */
1128}
1129
1130/* Fetch the cacheline 'wix' from the backing store. The tag
1131 associated with 'wix' is assumed to have already been filled in;
1132 hence that is used to determine where in the backing store to read
1133 from. */
1134static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1135{
1136 Word i;
1137 Addr tag;
1138 CacheLine* cl;
1139 LineZ* lineZ;
1140 LineF* lineF;
1141
1142 if (0)
1143 VG_(printf)("scache fetch line %d\n", (Int)wix);
1144
1145 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1146
1147 tag = cache_shmem.tags0[wix];
1148 cl = &cache_shmem.lyns0[wix];
1149
1150 /* reject nonsense requests */
1151 tl_assert(is_valid_scache_tag(tag));
1152
1153 lineZ = NULL;
1154 lineF = NULL;
1155 find_ZF_for_reading( &lineZ, &lineF, tag );
1156 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1157
1158 /* expand the data into the bottom layer of the tree, then get
1159 cacheline_normalise to build the descriptor array. */
1160 if (lineF) {
1161 tl_assert(lineF->inUse);
1162 for (i = 0; i < N_LINE_ARANGE; i++) {
1163 cl->svals[i] = lineF->w64s[i];
1164 }
1165 stats__cache_F_fetches++;
1166 } else {
1167 for (i = 0; i < N_LINE_ARANGE; i++) {
1168 SVal sv;
1169 UWord ix = read_twobit_array( lineZ->ix2s, i );
1170 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1171 sv = lineZ->dict[ix];
1172 tl_assert(sv != SVal_INVALID);
1173 cl->svals[i] = sv;
1174 }
1175 stats__cache_Z_fetches++;
1176 }
1177 normalise_CacheLine( cl );
1178}
1179
1180static void shmem__invalidate_scache ( void ) {
1181 Word wix;
1182 if (0) VG_(printf)("%s","scache inval\n");
1183 tl_assert(!is_valid_scache_tag(1));
1184 for (wix = 0; wix < N_WAY_NENT; wix++) {
1185 cache_shmem.tags0[wix] = 1/*INVALID*/;
1186 }
1187 stats__cache_invals++;
1188}
1189
1190static void shmem__flush_and_invalidate_scache ( void ) {
1191 Word wix;
1192 Addr tag;
1193 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1194 tl_assert(!is_valid_scache_tag(1));
1195 for (wix = 0; wix < N_WAY_NENT; wix++) {
1196 tag = cache_shmem.tags0[wix];
1197 if (tag == 1/*INVALID*/) {
1198 /* already invalid; nothing to do */
1199 } else {
1200 tl_assert(is_valid_scache_tag(tag));
1201 cacheline_wback( wix );
1202 }
1203 cache_shmem.tags0[wix] = 1/*INVALID*/;
1204 }
1205 stats__cache_flushes++;
1206 stats__cache_invals++;
1207}
1208
1209
1210static inline Bool aligned16 ( Addr a ) {
1211 return 0 == (a & 1);
1212}
1213static inline Bool aligned32 ( Addr a ) {
1214 return 0 == (a & 3);
1215}
1216static inline Bool aligned64 ( Addr a ) {
1217 return 0 == (a & 7);
1218}
1219static inline UWord get_cacheline_offset ( Addr a ) {
1220 return (UWord)(a & (N_LINE_ARANGE - 1));
1221}
1222static inline Addr cacheline_ROUNDUP ( Addr a ) {
1223 return ROUNDUP(a, N_LINE_ARANGE);
1224}
1225static inline Addr cacheline_ROUNDDN ( Addr a ) {
1226 return ROUNDDN(a, N_LINE_ARANGE);
1227}
1228static inline UWord get_treeno ( Addr a ) {
1229 return get_cacheline_offset(a) >> 3;
1230}
1231static inline UWord get_tree_offset ( Addr a ) {
1232 return a & 7;
1233}
1234
1235static __attribute__((noinline))
1236 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1237static inline CacheLine* get_cacheline ( Addr a )
1238{
1239 /* tag is 'a' with the in-line offset masked out,
1240 eg a[31]..a[4] 0000 */
1241 Addr tag = a & ~(N_LINE_ARANGE - 1);
1242 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1243 stats__cache_totrefs++;
1244 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1245 return &cache_shmem.lyns0[wix];
1246 } else {
1247 return get_cacheline_MISS( a );
1248 }
1249}
1250
1251static __attribute__((noinline))
1252 CacheLine* get_cacheline_MISS ( Addr a )
1253{
1254 /* tag is 'a' with the in-line offset masked out,
1255 eg a[31]..a[4] 0000 */
1256
1257 CacheLine* cl;
1258 Addr* tag_old_p;
1259 Addr tag = a & ~(N_LINE_ARANGE - 1);
1260 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1261
1262 tl_assert(tag != cache_shmem.tags0[wix]);
1263
1264 /* Dump the old line into the backing store. */
1265 stats__cache_totmisses++;
1266
1267 cl = &cache_shmem.lyns0[wix];
1268 tag_old_p = &cache_shmem.tags0[wix];
1269
1270 if (is_valid_scache_tag( *tag_old_p )) {
1271 /* EXPENSIVE and REDUNDANT: callee does it */
1272 if (SCE_CACHELINE)
1273 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1274 cacheline_wback( wix );
1275 }
1276 /* and reload the new one */
1277 *tag_old_p = tag;
1278 cacheline_fetch( wix );
1279 if (SCE_CACHELINE)
1280 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1281 return cl;
1282}
1283
1284static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1285 stats__cline_64to32pulldown++;
1286 switch (toff) {
1287 case 0: case 4:
1288 tl_assert(descr & TREE_DESCR_64);
1289 tree[4] = tree[0];
1290 descr &= ~TREE_DESCR_64;
1291 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1292 break;
1293 default:
1294 tl_assert(0);
1295 }
1296 return descr;
1297}
1298
1299static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1300 stats__cline_32to16pulldown++;
1301 switch (toff) {
1302 case 0: case 2:
1303 if (!(descr & TREE_DESCR_32_0)) {
1304 descr = pulldown_to_32(tree, 0, descr);
1305 }
1306 tl_assert(descr & TREE_DESCR_32_0);
1307 tree[2] = tree[0];
1308 descr &= ~TREE_DESCR_32_0;
1309 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1310 break;
1311 case 4: case 6:
1312 if (!(descr & TREE_DESCR_32_1)) {
1313 descr = pulldown_to_32(tree, 4, descr);
1314 }
1315 tl_assert(descr & TREE_DESCR_32_1);
1316 tree[6] = tree[4];
1317 descr &= ~TREE_DESCR_32_1;
1318 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1319 break;
1320 default:
1321 tl_assert(0);
1322 }
1323 return descr;
1324}
1325
1326static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1327 stats__cline_16to8pulldown++;
1328 switch (toff) {
1329 case 0: case 1:
1330 if (!(descr & TREE_DESCR_16_0)) {
1331 descr = pulldown_to_16(tree, 0, descr);
1332 }
1333 tl_assert(descr & TREE_DESCR_16_0);
1334 tree[1] = tree[0];
1335 descr &= ~TREE_DESCR_16_0;
1336 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1337 break;
1338 case 2: case 3:
1339 if (!(descr & TREE_DESCR_16_1)) {
1340 descr = pulldown_to_16(tree, 2, descr);
1341 }
1342 tl_assert(descr & TREE_DESCR_16_1);
1343 tree[3] = tree[2];
1344 descr &= ~TREE_DESCR_16_1;
1345 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1346 break;
1347 case 4: case 5:
1348 if (!(descr & TREE_DESCR_16_2)) {
1349 descr = pulldown_to_16(tree, 4, descr);
1350 }
1351 tl_assert(descr & TREE_DESCR_16_2);
1352 tree[5] = tree[4];
1353 descr &= ~TREE_DESCR_16_2;
1354 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1355 break;
1356 case 6: case 7:
1357 if (!(descr & TREE_DESCR_16_3)) {
1358 descr = pulldown_to_16(tree, 6, descr);
1359 }
1360 tl_assert(descr & TREE_DESCR_16_3);
1361 tree[7] = tree[6];
1362 descr &= ~TREE_DESCR_16_3;
1363 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1364 break;
1365 default:
1366 tl_assert(0);
1367 }
1368 return descr;
1369}
1370
1371
1372static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1373 UShort mask;
1374 switch (toff) {
1375 case 0:
1376 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1377 tl_assert( (descr & mask) == mask );
1378 descr &= ~mask;
1379 descr |= TREE_DESCR_16_0;
1380 break;
1381 case 2:
1382 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1383 tl_assert( (descr & mask) == mask );
1384 descr &= ~mask;
1385 descr |= TREE_DESCR_16_1;
1386 break;
1387 case 4:
1388 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1389 tl_assert( (descr & mask) == mask );
1390 descr &= ~mask;
1391 descr |= TREE_DESCR_16_2;
1392 break;
1393 case 6:
1394 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1395 tl_assert( (descr & mask) == mask );
1396 descr &= ~mask;
1397 descr |= TREE_DESCR_16_3;
1398 break;
1399 default:
1400 tl_assert(0);
1401 }
1402 return descr;
1403}
1404
1405static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1406 UShort mask;
1407 switch (toff) {
1408 case 0:
1409 if (!(descr & TREE_DESCR_16_0))
1410 descr = pullup_descr_to_16(descr, 0);
1411 if (!(descr & TREE_DESCR_16_1))
1412 descr = pullup_descr_to_16(descr, 2);
1413 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1414 tl_assert( (descr & mask) == mask );
1415 descr &= ~mask;
1416 descr |= TREE_DESCR_32_0;
1417 break;
1418 case 4:
1419 if (!(descr & TREE_DESCR_16_2))
1420 descr = pullup_descr_to_16(descr, 4);
1421 if (!(descr & TREE_DESCR_16_3))
1422 descr = pullup_descr_to_16(descr, 6);
1423 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1424 tl_assert( (descr & mask) == mask );
1425 descr &= ~mask;
1426 descr |= TREE_DESCR_32_1;
1427 break;
1428 default:
1429 tl_assert(0);
1430 }
1431 return descr;
1432}
1433
1434static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1435 switch (toff) {
1436 case 0: case 4:
1437 return 0 != (descr & TREE_DESCR_64);
1438 default:
1439 tl_assert(0);
1440 }
1441}
1442
1443static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1444 switch (toff) {
1445 case 0:
1446 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1447 case 2:
1448 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1449 case 4:
1450 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1451 case 6:
1452 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1453 default:
1454 tl_assert(0);
1455 }
1456}
1457
1458/* ------------ Cache management ------------ */
1459
1460static void zsm_flush_cache ( void )
1461{
1462 shmem__flush_and_invalidate_scache();
1463}
1464
1465
1466static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1467{
1468 tl_assert( sizeof(UWord) == sizeof(Addr) );
1469
1470 rcinc = p_rcinc;
1471 rcdec = p_rcdec;
1472
1473 tl_assert(map_shmem == NULL);
1474 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1475 HG_(free),
1476 NULL/*unboxed UWord cmp*/);
1477 tl_assert(map_shmem != NULL);
1478 shmem__invalidate_scache();
1479
1480 /* a SecMap must contain an integral number of CacheLines */
1481 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1482 /* also ... a CacheLine holds an integral number of trees */
1483 tl_assert(0 == (N_LINE_ARANGE % 8));
1484}
1485
1486/////////////////////////////////////////////////////////////////
1487/////////////////////////////////////////////////////////////////
1488// //
1489// SECTION END compressed shadow memory //
1490// //
1491/////////////////////////////////////////////////////////////////
1492/////////////////////////////////////////////////////////////////
1493
1494
1495
1496/////////////////////////////////////////////////////////////////
1497/////////////////////////////////////////////////////////////////
1498// //
1499// SECTION BEGIN vts primitives //
1500// //
1501/////////////////////////////////////////////////////////////////
1502/////////////////////////////////////////////////////////////////
1503
1504#ifndef __HB_VTS_H
1505#define __HB_VTS_H
1506
1507/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1508 lowest 30 bits of an SVal. */
1509typedef UInt VtsID;
1510#define VtsID_INVALID 0xFFFFFFFF
1511
1512/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1513 a backlink for the caller's convenience. Since we have no idea
1514 what to set that to in the library, it always gets set to
1515 VtsID_INVALID. */
1516typedef
1517 struct {
1518 VtsID id;
1519 XArray* ts; /* XArray* ScalarTS(abstract) */
1520 }
1521 VTS;
1522
1523
1524/* Create a new, empty VTS. */
1525VTS* VTS__new ( void );
1526
1527/* Delete this VTS in its entirety. */
1528void VTS__delete ( VTS* vts );
1529
1530/* Create a new singleton VTS. */
1531VTS* VTS__singleton ( Thr* thr, ULong tym );
1532
1533/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1534 not modified. */
1535VTS* VTS__tick ( Thr* me, VTS* vts );
1536
1537/* Return a new VTS constructed as the join (max) of the 2 args.
1538 Neither arg is modified. */
1539VTS* VTS__join ( VTS* a, VTS* b );
1540
1541/* Compute the partial ordering relation of the two args. */
1542typedef
1543 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1544 POrd;
1545
1546POrd VTS__cmp ( VTS* a, VTS* b );
1547
1548/* Compute an arbitrary structural (total) ordering on the two args,
1549 based on their VCs, so they can be looked up in a table, tree, etc.
1550 Returns -1, 0 or 1. */
1551Word VTS__cmp_structural ( VTS* a, VTS* b );
1552
1553/* Debugging only. Display the given VTS in the buffer. */
1554void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1555
1556/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001557ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001558
1559#endif /* ! __HB_VTS_H */
1560
1561
1562/*--------------- to do with Vector Timestamps ---------------*/
1563
1564/* Scalar Timestamp */
1565typedef
1566 struct {
1567 Thr* thr;
1568 ULong tym;
1569 }
1570 ScalarTS;
1571
1572
1573static Bool is_sane_VTS ( VTS* vts )
1574{
1575 UWord i, n;
1576 ScalarTS *st1, *st2;
1577 if (!vts) return False;
1578 if (!vts->ts) return False;
1579 n = VG_(sizeXA)( vts->ts );
1580 if (n >= 2) {
1581 for (i = 0; i < n-1; i++) {
1582 st1 = VG_(indexXA)( vts->ts, i );
1583 st2 = VG_(indexXA)( vts->ts, i+1 );
1584 if (st1->thr >= st2->thr)
1585 return False;
1586 if (st1->tym == 0 || st2->tym == 0)
1587 return False;
1588 }
1589 }
1590 return True;
1591}
1592
1593
1594/* Create a new, empty VTS.
1595*/
1596VTS* VTS__new ( void )
1597{
1598 VTS* vts;
1599 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1600 tl_assert(vts);
1601 vts->id = VtsID_INVALID;
1602 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1603 HG_(free), sizeof(ScalarTS) );
1604 tl_assert(vts->ts);
1605 return vts;
1606}
1607
1608
1609/* Delete this VTS in its entirety.
1610*/
1611void VTS__delete ( VTS* vts )
1612{
1613 tl_assert(vts);
1614 tl_assert(vts->ts);
1615 VG_(deleteXA)( vts->ts );
1616 HG_(free)(vts);
1617}
1618
1619
1620/* Create a new singleton VTS.
1621*/
1622VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1623 ScalarTS st;
1624 VTS* vts;
1625 tl_assert(thr);
1626 tl_assert(tym >= 1);
1627 vts = VTS__new();
1628 st.thr = thr;
1629 st.tym = tym;
1630 VG_(addToXA)( vts->ts, &st );
1631 return vts;
1632}
1633
1634
1635/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1636 not modified.
1637*/
1638VTS* VTS__tick ( Thr* me, VTS* vts )
1639{
1640 ScalarTS* here = NULL;
1641 ScalarTS tmp;
1642 VTS* res;
1643 Word i, n;
1644 tl_assert(me);
1645 tl_assert(is_sane_VTS(vts));
1646 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1647 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1648 res = VTS__new();
1649 n = VG_(sizeXA)( vts->ts );
1650
1651 /* main loop doesn't handle zero-entry case correctly, so
1652 special-case it. */
1653 if (n == 0) {
1654 tmp.thr = me;
1655 tmp.tym = 1;
1656 VG_(addToXA)( res->ts, &tmp );
1657 tl_assert(is_sane_VTS(res));
1658 return res;
1659 }
1660
1661 for (i = 0; i < n; i++) {
1662 here = VG_(indexXA)( vts->ts, i );
1663 if (me < here->thr) {
1664 /* We just went past 'me', without seeing it. */
1665 tmp.thr = me;
1666 tmp.tym = 1;
1667 VG_(addToXA)( res->ts, &tmp );
1668 tmp = *here;
1669 VG_(addToXA)( res->ts, &tmp );
1670 i++;
1671 break;
1672 }
1673 else if (me == here->thr) {
1674 tmp = *here;
1675 tmp.tym++;
1676 VG_(addToXA)( res->ts, &tmp );
1677 i++;
1678 break;
1679 }
1680 else /* me > here->thr */ {
1681 tmp = *here;
1682 VG_(addToXA)( res->ts, &tmp );
1683 }
1684 }
1685 tl_assert(i >= 0 && i <= n);
1686 if (i == n && here && here->thr < me) {
1687 tmp.thr = me;
1688 tmp.tym = 1;
1689 VG_(addToXA)( res->ts, &tmp );
1690 } else {
1691 for (/*keepgoing*/; i < n; i++) {
1692 here = VG_(indexXA)( vts->ts, i );
1693 tmp = *here;
1694 VG_(addToXA)( res->ts, &tmp );
1695 }
1696 }
1697 tl_assert(is_sane_VTS(res));
1698 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1699 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1700 return res;
1701}
1702
1703
1704/* Return a new VTS constructed as the join (max) of the 2 args.
1705 Neither arg is modified.
1706*/
1707VTS* VTS__join ( VTS* a, VTS* b )
1708{
1709 Word ia, ib, useda, usedb;
1710 ULong tyma, tymb, tymMax;
1711 Thr* thr;
1712 VTS* res;
1713 ScalarTS *tmpa, *tmpb;
1714
1715 tl_assert(a && a->ts);
1716 tl_assert(b && b->ts);
1717 useda = VG_(sizeXA)( a->ts );
1718 usedb = VG_(sizeXA)( b->ts );
1719
1720 res = VTS__new();
1721 ia = ib = 0;
1722
1723 while (1) {
1724
1725 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1726 from a and b in order, where thr is the next Thr*
1727 occurring in either a or b, and tyma/b are the relevant
1728 scalar timestamps, taking into account implicit zeroes. */
1729 tl_assert(ia >= 0 && ia <= useda);
1730 tl_assert(ib >= 0 && ib <= usedb);
1731 tmpa = tmpb = NULL;
1732
1733 if (ia == useda && ib == usedb) {
1734 /* both empty - done */
1735 break;
1736 }
1737 else
1738 if (ia == useda && ib != usedb) {
1739 /* a empty, use up b */
1740 tmpb = VG_(indexXA)( b->ts, ib );
1741 thr = tmpb->thr;
1742 tyma = 0;
1743 tymb = tmpb->tym;
1744 ib++;
1745 }
1746 else
1747 if (ia != useda && ib == usedb) {
1748 /* b empty, use up a */
1749 tmpa = VG_(indexXA)( a->ts, ia );
1750 thr = tmpa->thr;
1751 tyma = tmpa->tym;
1752 tymb = 0;
1753 ia++;
1754 }
1755 else {
1756 /* both not empty; extract lowest-Thr*'d triple */
1757 tmpa = VG_(indexXA)( a->ts, ia );
1758 tmpb = VG_(indexXA)( b->ts, ib );
1759 if (tmpa->thr < tmpb->thr) {
1760 /* a has the lowest unconsidered Thr* */
1761 thr = tmpa->thr;
1762 tyma = tmpa->tym;
1763 tymb = 0;
1764 ia++;
1765 }
1766 else
1767 if (tmpa->thr > tmpb->thr) {
1768 /* b has the lowest unconsidered Thr* */
1769 thr = tmpb->thr;
1770 tyma = 0;
1771 tymb = tmpb->tym;
1772 ib++;
1773 } else {
1774 /* they both next mention the same Thr* */
1775 tl_assert(tmpa->thr == tmpb->thr);
1776 thr = tmpa->thr; /* == tmpb->thr */
1777 tyma = tmpa->tym;
1778 tymb = tmpb->tym;
1779 ia++;
1780 ib++;
1781 }
1782 }
1783
1784 /* having laboriously determined (thr, tyma, tymb), do something
1785 useful with it. */
1786 tymMax = tyma > tymb ? tyma : tymb;
1787 if (tymMax > 0) {
1788 ScalarTS st;
1789 st.thr = thr;
1790 st.tym = tymMax;
1791 VG_(addToXA)( res->ts, &st );
1792 }
1793
1794 }
1795
1796 tl_assert(is_sane_VTS( res ));
1797
1798 return res;
1799}
1800
1801
1802/* Compute the partial ordering relation of the two args.
1803*/
1804POrd VTS__cmp ( VTS* a, VTS* b )
1805{
1806 Word ia, ib, useda, usedb;
1807 ULong tyma, tymb;
1808 Thr* thr;
1809 ScalarTS *tmpa, *tmpb;
1810
1811 Bool all_leq = True;
1812 Bool all_geq = True;
1813
1814 tl_assert(a && a->ts);
1815 tl_assert(b && b->ts);
1816 useda = VG_(sizeXA)( a->ts );
1817 usedb = VG_(sizeXA)( b->ts );
1818
1819 ia = ib = 0;
1820
1821 while (1) {
1822
1823 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1824 from a and b in order, where thr is the next Thr*
1825 occurring in either a or b, and tyma/b are the relevant
1826 scalar timestamps, taking into account implicit zeroes. */
1827 tl_assert(ia >= 0 && ia <= useda);
1828 tl_assert(ib >= 0 && ib <= usedb);
1829 tmpa = tmpb = NULL;
1830
1831 if (ia == useda && ib == usedb) {
1832 /* both empty - done */
1833 break;
1834 }
1835 else
1836 if (ia == useda && ib != usedb) {
1837 /* a empty, use up b */
1838 tmpb = VG_(indexXA)( b->ts, ib );
1839 thr = tmpb->thr;
1840 tyma = 0;
1841 tymb = tmpb->tym;
1842 ib++;
1843 }
1844 else
1845 if (ia != useda && ib == usedb) {
1846 /* b empty, use up a */
1847 tmpa = VG_(indexXA)( a->ts, ia );
1848 thr = tmpa->thr;
1849 tyma = tmpa->tym;
1850 tymb = 0;
1851 ia++;
1852 }
1853 else {
1854 /* both not empty; extract lowest-Thr*'d triple */
1855 tmpa = VG_(indexXA)( a->ts, ia );
1856 tmpb = VG_(indexXA)( b->ts, ib );
1857 if (tmpa->thr < tmpb->thr) {
1858 /* a has the lowest unconsidered Thr* */
1859 thr = tmpa->thr;
1860 tyma = tmpa->tym;
1861 tymb = 0;
1862 ia++;
1863 }
1864 else
1865 if (tmpa->thr > tmpb->thr) {
1866 /* b has the lowest unconsidered Thr* */
1867 thr = tmpb->thr;
1868 tyma = 0;
1869 tymb = tmpb->tym;
1870 ib++;
1871 } else {
1872 /* they both next mention the same Thr* */
1873 tl_assert(tmpa->thr == tmpb->thr);
1874 thr = tmpa->thr; /* == tmpb->thr */
1875 tyma = tmpa->tym;
1876 tymb = tmpb->tym;
1877 ia++;
1878 ib++;
1879 }
1880 }
1881
1882 /* having laboriously determined (thr, tyma, tymb), do something
1883 useful with it. */
1884 if (tyma < tymb)
1885 all_geq = False;
1886 if (tyma > tymb)
1887 all_leq = False;
1888 }
1889
1890 if (all_leq && all_geq)
1891 return POrd_EQ;
1892 /* now we know they aren't equal, so either all_leq or all_geq or
1893 both are false. */
1894 if (all_leq)
1895 return POrd_LT;
1896 if (all_geq)
1897 return POrd_GT;
1898 /* hmm, neither all_geq or all_leq. This means unordered. */
1899 return POrd_UN;
1900}
1901
1902
1903/* Compute an arbitrary structural (total) ordering on the two args,
1904 based on their VCs, so they can be looked up in a table, tree, etc.
1905 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1906*/
1907Word VTS__cmp_structural ( VTS* a, VTS* b )
1908{
1909 /* We just need to generate an arbitrary total ordering based on
1910 a->ts and b->ts. Preferably do it in a way which comes across likely
1911 differences relatively quickly. */
1912 Word i, useda, usedb;
1913 ScalarTS *tmpa, *tmpb;
1914
1915 tl_assert(a && a->ts);
1916 tl_assert(b && b->ts);
1917 useda = VG_(sizeXA)( a->ts );
1918 usedb = VG_(sizeXA)( b->ts );
1919
1920 if (useda < usedb) return -1;
1921 if (useda > usedb) return 1;
1922
1923 /* Same length vectors, so let's step through them together. */
1924 tl_assert(useda == usedb);
1925 for (i = 0; i < useda; i++) {
1926 tmpa = VG_(indexXA)( a->ts, i );
1927 tmpb = VG_(indexXA)( b->ts, i );
1928 if (tmpa->tym < tmpb->tym) return -1;
1929 if (tmpa->tym > tmpb->tym) return 1;
1930 if (tmpa->thr < tmpb->thr) return -1;
1931 if (tmpa->thr > tmpb->thr) return 1;
1932 }
1933
1934 /* They're identical. */
1935 return 0;
1936}
1937
1938
1939/* Debugging only. Display the given VTS in the buffer.
1940*/
1941void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1942 ScalarTS* st;
1943 HChar unit[64];
1944 Word i, n;
1945 Int avail = nBuf;
1946 tl_assert(vts && vts->ts);
1947 tl_assert(nBuf > 16);
1948 buf[0] = '[';
1949 buf[1] = 0;
1950 n = VG_(sizeXA)( vts->ts );
1951 for (i = 0; i < n; i++) {
1952 tl_assert(avail >= 40);
1953 st = VG_(indexXA)( vts->ts, i );
1954 VG_(memset)(unit, 0, sizeof(unit));
1955 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1956 st->thr, st->tym);
1957 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1958 VG_(strcat)(buf, " ...]");
1959 buf[nBuf-1] = 0;
1960 return;
1961 }
1962 VG_(strcat)(buf, unit);
1963 avail -= VG_(strlen)(unit);
1964 }
1965 VG_(strcat)(buf, "]");
1966 buf[nBuf-1] = 0;
1967}
1968
1969
1970/* Debugging only. Return vts[index], so to speak.
1971*/
1972ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1973 UWord i, n;
1974 tl_assert(vts && vts->ts);
1975 n = VG_(sizeXA)( vts->ts );
1976 for (i = 0; i < n; i++) {
1977 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1978 if (st->thr == idx)
1979 return st->tym;
1980 }
1981 return 0;
1982}
1983
1984
1985/////////////////////////////////////////////////////////////////
1986/////////////////////////////////////////////////////////////////
1987// //
1988// SECTION END vts primitives //
1989// //
1990/////////////////////////////////////////////////////////////////
1991/////////////////////////////////////////////////////////////////
1992
1993
1994
1995/////////////////////////////////////////////////////////////////
1996/////////////////////////////////////////////////////////////////
1997// //
1998// SECTION BEGIN main library //
1999// //
2000/////////////////////////////////////////////////////////////////
2001/////////////////////////////////////////////////////////////////
2002
2003
2004/////////////////////////////////////////////////////////
2005// //
2006// VTS set //
2007// //
2008/////////////////////////////////////////////////////////
2009
2010static WordFM* /* VTS* void void */ vts_set = NULL;
2011
2012static void vts_set_init ( void )
2013{
2014 tl_assert(!vts_set);
2015 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2016 HG_(free),
2017 (Word(*)(UWord,UWord))VTS__cmp_structural );
2018 tl_assert(vts_set);
2019}
2020
2021/* Given a newly made VTS, look in vts_set to see if we already have
2022 an identical one. If yes, free up this one and return instead a
2023 pointer to the existing one. If no, add this one to the set and
2024 return the same pointer. Caller differentiates the two cases by
2025 comparing returned pointer with the supplied one (although that
2026 does require that the supplied VTS is not already in the set).
2027*/
2028static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2029{
2030 UWord keyW, valW;
2031 /* lookup cand (by value) */
2032 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2033 /* found it */
2034 tl_assert(valW == 0);
2035 /* if this fails, cand (by ref) was already present (!) */
2036 tl_assert(keyW != (UWord)cand);
2037 VTS__delete(cand);
2038 return (VTS*)keyW;
2039 } else {
2040 /* not present. Add and return pointer to same. */
2041 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2042 return cand;
2043 }
2044}
2045
2046
2047/////////////////////////////////////////////////////////
2048// //
2049// VTS table //
2050// //
2051/////////////////////////////////////////////////////////
2052
2053static void VtsID__invalidate_caches ( void ); /* fwds */
2054
2055/* A type to hold VTS table entries. Invariants:
2056 If .vts == NULL, then this entry is not in use, so:
2057 - .rc == 0
2058 - this entry is on the freelist (unfortunately, does not imply
2059 any constraints on value for .nextfree)
2060 If .vts != NULL, then this entry is in use:
2061 - .vts is findable in vts_set
2062 - .vts->id == this entry number
2063 - no specific value for .rc (even 0 is OK)
2064 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2065*/
2066typedef
2067 struct {
2068 VTS* vts; /* vts, in vts_set */
2069 UWord rc; /* reference count - enough for entire aspace */
2070 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2071 }
2072 VtsTE;
2073
2074/* The VTS table. */
2075static XArray* /* of VtsTE */ vts_tab = NULL;
2076
2077/* An index into the VTS table, indicating the start of the list of
2078 free (available for use) entries. If the list is empty, this is
2079 VtsID_INVALID. */
2080static VtsID vts_tab_freelist = VtsID_INVALID;
2081
2082/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2083 vts_tab equals or exceeds this size. After GC, the value here is
2084 set appropriately so as to check for the next GC point. */
2085static Word vts_next_GC_at = 1000;
2086
2087static void vts_tab_init ( void )
2088{
2089 vts_tab
2090 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2091 HG_(free), sizeof(VtsTE) );
2092 vts_tab_freelist
2093 = VtsID_INVALID;
2094 tl_assert(vts_tab);
2095}
2096
2097/* Add ii to the free list, checking that it looks out-of-use. */
2098static void add_to_free_list ( VtsID ii )
2099{
2100 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2101 tl_assert(ie->vts == NULL);
2102 tl_assert(ie->rc == 0);
2103 tl_assert(ie->freelink == VtsID_INVALID);
2104 ie->freelink = vts_tab_freelist;
2105 vts_tab_freelist = ii;
2106}
2107
2108/* Get an entry from the free list. This will return VtsID_INVALID if
2109 the free list is empty. */
2110static VtsID get_from_free_list ( void )
2111{
2112 VtsID ii;
2113 VtsTE* ie;
2114 if (vts_tab_freelist == VtsID_INVALID)
2115 return VtsID_INVALID;
2116 ii = vts_tab_freelist;
2117 ie = VG_(indexXA)( vts_tab, ii );
2118 tl_assert(ie->vts == NULL);
2119 tl_assert(ie->rc == 0);
2120 vts_tab_freelist = ie->freelink;
2121 return ii;
2122}
2123
2124/* Produce a new VtsID that can be used, either by getting it from
2125 the freelist, or, if that is empty, by expanding vts_tab. */
2126static VtsID get_new_VtsID ( void )
2127{
2128 VtsID ii;
2129 VtsTE te;
2130 ii = get_from_free_list();
2131 if (ii != VtsID_INVALID)
2132 return ii;
2133 te.vts = NULL;
2134 te.rc = 0;
2135 te.freelink = VtsID_INVALID;
2136 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2137 return ii;
2138}
2139
2140
2141/* Indirect callback from lib_zsm. */
2142static void VtsID__rcinc ( VtsID ii )
2143{
2144 VtsTE* ie;
2145 /* VG_(indexXA) does a range check for us */
2146 ie = VG_(indexXA)( vts_tab, ii );
2147 tl_assert(ie->vts); /* else it's not in use */
2148 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2149 tl_assert(ie->vts->id == ii);
2150 ie->rc++;
2151}
2152
2153/* Indirect callback from lib_zsm. */
2154static void VtsID__rcdec ( VtsID ii )
2155{
2156 VtsTE* ie;
2157 /* VG_(indexXA) does a range check for us */
2158 ie = VG_(indexXA)( vts_tab, ii );
2159 tl_assert(ie->vts); /* else it's not in use */
2160 tl_assert(ie->rc > 0); /* else RC snafu */
2161 tl_assert(ie->vts->id == ii);
2162 ie->rc--;
2163}
2164
2165
2166/* Look up 'cand' in our collection of VTSs. If present, deallocate
2167 it and return the VtsID for the pre-existing version. If not
2168 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2169 for it, and return that. */
2170static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2171{
2172 VTS* auld;
2173 tl_assert(cand->id == VtsID_INVALID);
2174 auld = vts_set__find_and_dealloc__or_add(cand);
2175 if (auld != cand) {
2176 /* We already have an Aulde one. Use that. */
2177 VtsTE* ie;
2178 tl_assert(auld->id != VtsID_INVALID);
2179 ie = VG_(indexXA)( vts_tab, auld->id );
2180 tl_assert(ie->vts == auld);
2181 return auld->id;
2182 } else {
2183 VtsID ii = get_new_VtsID();
2184 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2185 ie->vts = cand;
2186 ie->rc = 0;
2187 ie->freelink = VtsID_INVALID;
2188 cand->id = ii;
2189 return ii;
2190 }
2191}
2192
2193
2194static void show_vts_stats ( HChar* caller )
2195{
2196 UWord nSet, nTab, nLive;
2197 ULong totrc;
2198 UWord n, i;
2199 nSet = VG_(sizeFM)( vts_set );
2200 nTab = VG_(sizeXA)( vts_tab );
2201 totrc = 0;
2202 nLive = 0;
2203 n = VG_(sizeXA)( vts_tab );
2204 for (i = 0; i < n; i++) {
2205 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2206 if (ie->vts) {
2207 nLive++;
2208 totrc += (ULong)ie->rc;
2209 } else {
2210 tl_assert(ie->rc == 0);
2211 }
2212 }
2213 VG_(printf)(" show_vts_stats %s\n", caller);
2214 VG_(printf)(" vts_tab size %4lu\n", nTab);
2215 VG_(printf)(" vts_tab live %4lu\n", nLive);
2216 VG_(printf)(" vts_set size %4lu\n", nSet);
2217 VG_(printf)(" total rc %4llu\n", totrc);
2218}
2219
2220/* NOT TO BE CALLED FROM WITHIN libzsm. */
2221static void vts_tab__do_GC ( Bool show_stats )
2222{
2223 UWord i, nTab, nLive, nFreed;
2224
2225 /* check this is actually necessary. */
2226 tl_assert(vts_tab_freelist == VtsID_INVALID);
2227
2228 /* empty the caches for partial order checks and binary joins. We
2229 could do better and prune out the entries to be deleted, but it
2230 ain't worth the hassle. */
2231 VtsID__invalidate_caches();
2232
2233 /* First, make the reference counts up to date. */
2234 zsm_flush_cache();
2235
2236 nTab = VG_(sizeXA)( vts_tab );
2237
2238 if (show_stats) {
2239 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2240 show_vts_stats("before GC");
2241 }
2242
2243 /* Now we can inspect the entire vts_tab. Any entries
2244 with zero .rc fields are now no longer in use and can be
2245 free list, removed from vts_set, and deleted. */
2246 nFreed = 0;
2247 for (i = 0; i < nTab; i++) {
2248 Bool present;
2249 UWord oldK = 0, oldV = 0;
2250 VtsTE* te = VG_(indexXA)( vts_tab, i );
2251 if (te->vts == NULL) {
2252 tl_assert(te->rc == 0);
2253 continue; /* already on the free list (presumably) */
2254 }
2255 if (te->rc > 0)
2256 continue; /* in use */
2257 /* Ok, we got one we can free. */
2258 tl_assert(te->vts->id == i);
2259 /* first, remove it from vts_set. */
2260 present = VG_(delFromFM)( vts_set,
2261 &oldK, &oldV, (UWord)te->vts );
2262 tl_assert(present); /* else it isn't in vts_set ?! */
2263 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2264 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2265 /* now free the VTS itself */
2266 VTS__delete(te->vts);
2267 te->vts = NULL;
2268 /* and finally put this entry on the free list */
2269 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2270 add_to_free_list( i );
2271 nFreed++;
2272 }
2273
2274 /* Now figure out when the next GC should be. We'll allow the
2275 number of VTSs to double before GCing again. Except of course
2276 that since we can't (or, at least, don't) shrink vts_tab, we
2277 can't set the threshhold value smaller than it. */
2278 tl_assert(nFreed <= nTab);
2279 nLive = nTab - nFreed;
2280 tl_assert(nLive >= 0 && nLive <= nTab);
2281 vts_next_GC_at = 2 * nLive;
2282 if (vts_next_GC_at < nTab)
2283 vts_next_GC_at = nTab;
2284
2285 if (show_stats) {
2286 show_vts_stats("after GC");
2287 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2288 }
2289
sewardjd024ae52008-11-09 20:47:57 +00002290 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002291 static UInt ctr = 0;
2292 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002293 VG_(message)(Vg_DebugMsg,
2294 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
sewardjf98e1c02008-10-25 16:22:41 +00002295 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2296 }
2297}
2298
2299
2300/////////////////////////////////////////////////////////
2301// //
2302// Vts IDs //
2303// //
2304/////////////////////////////////////////////////////////
2305
2306//////////////////////////
2307static ULong stats__getOrdering_queries = 0;
2308static ULong stats__getOrdering_misses = 0;
2309static ULong stats__join2_queries = 0;
2310static ULong stats__join2_misses = 0;
2311
2312static inline UInt ROL32 ( UInt w, Int n ) {
2313 w = (w << n) | (w >> (32-n));
2314 return w;
2315}
2316static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2317 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2318 return hash % nTab;
2319}
2320
2321#define N_GETORDERING_CACHE 1023
2322static
2323 struct { VtsID vi1; VtsID vi2; POrd ord; }
2324 getOrdering_cache[N_GETORDERING_CACHE];
2325
2326#define N_JOIN2_CACHE 1023
2327static
2328 struct { VtsID vi1; VtsID vi2; VtsID res; }
2329 join2_cache[N_JOIN2_CACHE];
2330
2331static void VtsID__invalidate_caches ( void ) {
2332 Int i;
2333 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2334 getOrdering_cache[i].vi1 = VtsID_INVALID;
2335 getOrdering_cache[i].vi2 = VtsID_INVALID;
2336 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2337 }
2338 for (i = 0; i < N_JOIN2_CACHE; i++) {
2339 join2_cache[i].vi1 = VtsID_INVALID;
2340 join2_cache[i].vi2 = VtsID_INVALID;
2341 join2_cache[i].res = VtsID_INVALID;
2342 }
2343}
2344//////////////////////////
2345
sewardjd52392d2008-11-08 20:36:26 +00002346//static Bool VtsID__is_valid ( VtsID vi ) {
2347// VtsTE* ve;
2348// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2349// return False;
2350// ve = VG_(indexXA)( vts_tab, vi );
2351// if (!ve->vts)
2352// return False;
2353// tl_assert(ve->vts->id == vi);
2354// return True;
2355//}
sewardjf98e1c02008-10-25 16:22:41 +00002356
2357static VTS* VtsID__to_VTS ( VtsID vi ) {
2358 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2359 tl_assert(te->vts);
2360 return te->vts;
2361}
2362
2363static void VtsID__pp ( VtsID vi ) {
2364 HChar buf[100];
2365 VTS* vts = VtsID__to_VTS(vi);
2366 VTS__show( buf, sizeof(buf)-1, vts );
2367 buf[sizeof(buf)-1] = 0;
2368 VG_(printf)("%s", buf);
2369}
2370
2371/* compute partial ordering relation of vi1 and vi2. */
2372__attribute__((noinline))
2373static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2374 UInt hash;
2375 POrd ord;
2376 VTS *v1, *v2;
2377 //if (vi1 == vi2) return POrd_EQ;
2378 tl_assert(vi1 != vi2);
2379 ////++
2380 stats__getOrdering_queries++;
2381 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2382 if (getOrdering_cache[hash].vi1 == vi1
2383 && getOrdering_cache[hash].vi2 == vi2)
2384 return getOrdering_cache[hash].ord;
2385 stats__getOrdering_misses++;
2386 ////--
2387 v1 = VtsID__to_VTS(vi1);
2388 v2 = VtsID__to_VTS(vi2);
2389 ord = VTS__cmp( v1, v2 );
2390 ////++
2391 getOrdering_cache[hash].vi1 = vi1;
2392 getOrdering_cache[hash].vi2 = vi2;
2393 getOrdering_cache[hash].ord = ord;
2394 ////--
2395 return ord;
2396}
2397static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2398 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2399}
2400
2401/* compute binary join */
2402__attribute__((noinline))
2403static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2404 UInt hash;
2405 VtsID res;
2406 VTS *vts1, *vts2, *nyu;
2407 //if (vi1 == vi2) return vi1;
2408 tl_assert(vi1 != vi2);
2409 ////++
2410 stats__join2_queries++;
2411 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2412 if (join2_cache[hash].vi1 == vi1
2413 && join2_cache[hash].vi2 == vi2)
2414 return join2_cache[hash].res;
2415 stats__join2_misses++;
2416 ////--
2417 vts1 = VtsID__to_VTS(vi1);
2418 vts2 = VtsID__to_VTS(vi2);
2419 nyu = VTS__join(vts1,vts2);
2420 res = vts_tab__find_and_dealloc__or_add(nyu);
2421 ////++
2422 join2_cache[hash].vi1 = vi1;
2423 join2_cache[hash].vi2 = vi2;
2424 join2_cache[hash].res = res;
2425 ////--
2426 return res;
2427}
2428static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2429 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2430}
2431
2432/* create a singleton VTS, namely [thr:1] */
2433static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2434 VTS* nyu = VTS__singleton(thr,tym);
2435 return vts_tab__find_and_dealloc__or_add(nyu);
2436}
2437
2438/* tick operation, creates value 1 if specified index is absent */
2439static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2440 VTS* vts = VtsID__to_VTS(vi);
2441 VTS* nyu = VTS__tick(idx,vts);
2442 return vts_tab__find_and_dealloc__or_add(nyu);
2443}
2444
2445/* index into a VTS (only for assertions) */
2446static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2447 VTS* vts = VtsID__to_VTS(vi);
2448 return VTS__indexAt_SLOW( vts, idx );
2449}
2450
2451
2452/////////////////////////////////////////////////////////
2453// //
2454// Threads //
2455// //
2456/////////////////////////////////////////////////////////
2457
2458struct _Thr {
2459 /* Current VTSs for this thread. They change as we go along. viR
2460 is the VTS to be used for reads, viW for writes. Usually they
2461 are the same, but can differ when we deal with reader-writer
2462 locks. It is always the case that VtsID__getOrdering(viW,viR)
2463 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2464 lagging behind, viR. */
2465 VtsID viR;
2466 VtsID viW;
2467 /* opaque (to us) data we hold on behalf of the library's user. */
2468 void* opaque;
2469};
2470
2471static Thr* Thr__new ( void ) {
2472 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2473 thr->viR = VtsID_INVALID;
2474 thr->viW = VtsID_INVALID;
2475 return thr;
2476}
2477
2478
2479/////////////////////////////////////////////////////////
2480// //
2481// Shadow Values //
2482// //
2483/////////////////////////////////////////////////////////
2484
2485// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2486// hb_zsm.h. We have to do everything else here.
2487
2488/* SVal is 64 bit unsigned int.
2489
2490 <---------30---------> <---------30--------->
2491 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2492 01 X--------------------X XX X--------------------X E(rror)
2493 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2494 11 X--------------------X XX X--------------------X I: SVal_INVALID
2495*/
2496#define SVAL_TAGMASK (3ULL << 62)
2497
2498static inline Bool SVal__isC ( SVal s ) {
2499 return (0ULL << 62) == (s & SVAL_TAGMASK);
2500}
2501static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2502 //tl_assert(VtsID__is_valid(rmini));
2503 //tl_assert(VtsID__is_valid(wmini));
2504 return (((ULong)rmini) << 32) | ((ULong)wmini);
2505}
2506static inline VtsID SVal__unC_Rmin ( SVal s ) {
2507 tl_assert(SVal__isC(s));
2508 return (VtsID)(s >> 32);
2509}
2510static inline VtsID SVal__unC_Wmin ( SVal s ) {
2511 tl_assert(SVal__isC(s));
2512 return (VtsID)(s & 0xFFFFFFFFULL);
2513}
2514
2515static Bool SVal__isE ( SVal s ) {
2516 return (1ULL << 62) == (s & SVAL_TAGMASK);
2517}
2518static SVal SVal__mkE ( void ) {
2519 return 1ULL << 62;
2520}
2521
2522static Bool SVal__isA ( SVal s ) {
2523 return (2ULL << 62) == (s & SVAL_TAGMASK);
2524}
2525static SVal SVal__mkA ( void ) {
2526 return 2ULL << 62;
2527}
2528
2529/* Direct callback from lib_zsm. */
2530static void SVal__rcinc ( SVal s ) {
2531 if (SVal__isC(s)) {
2532 VtsID__rcinc( SVal__unC_Rmin(s) );
2533 VtsID__rcinc( SVal__unC_Wmin(s) );
2534 }
2535}
2536
2537/* Direct callback from lib_zsm. */
2538static void SVal__rcdec ( SVal s ) {
2539 if (SVal__isC(s)) {
2540 VtsID__rcdec( SVal__unC_Rmin(s) );
2541 VtsID__rcdec( SVal__unC_Wmin(s) );
2542 }
2543}
2544
2545
2546/////////////////////////////////////////////////////////
2547// //
2548// Change-event map2 //
2549// //
2550/////////////////////////////////////////////////////////
2551
2552#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
2553#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2554
2555/* This is in two parts:
2556
2557 1. An OSet of RCECs. This is a set of reference-counted stack
2558 traces. When the reference count of a stack trace becomes zero,
2559 it is removed from the set and freed up. The intent is to have
2560 a set of stack traces which can be referred to from (2), but to
2561 only represent each one once. The set is indexed/searched by
2562 ordering on the stack trace vectors.
2563
2564 2. An OSet of OldRefs. These store information about each old ref
2565 that we need to record. It is indexed by address of the
2566 location for which the information is recorded. For LRU
2567 purposes, each OldRef also contains a generation number,
2568 indicating when it was most recently accessed.
2569
2570 The important part of an OldRef is, however, its accs[] array.
2571 This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
2572 allows us to collect the last access-traceback by up to
2573 N_OLDREF_ACCS different threads for this location. The accs[]
2574 array is a MTF-array. If a pair falls off the end, that's too
2575 bad -- we will lose info about that thread's access to this
2576 location.
2577
2578 When this OSet becomes too big, we can throw away the entries
2579 whose generation numbers are below some threshold; hence doing
2580 approximate LRU discarding. For each discarded OldRef we must
2581 of course decrement the reference count on the all RCECs it
2582 refers to, in order that entries from (1) eventually get
2583 discarded too.
2584*/
2585
2586
2587static UWord stats__ctxt_rcdec1 = 0;
2588static UWord stats__ctxt_rcdec2 = 0;
2589static UWord stats__ctxt_rcdec3 = 0;
2590static UWord stats__ctxt_rcdec_calls = 0;
2591static UWord stats__ctxt_rcdec_discards = 0;
2592static UWord stats__ctxt_rcdec1_eq = 0;
2593
2594static UWord stats__ctxt_tab_curr = 0;
2595static UWord stats__ctxt_tab_max = 0;
2596
2597static UWord stats__ctxt_tab_qs = 0;
2598static UWord stats__ctxt_tab_cmps = 0;
2599
2600
2601///////////////////////////////////////////////////////
2602//// Part (1): An OSet of RCECs
2603///
2604
2605#define N_FRAMES 8
2606
2607// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2608#define RCEC_MAGIC 0xab88abb2UL
2609
2610//#define N_RCEC_TAB 98317 /* prime */
2611#define N_RCEC_TAB 196613 /* prime */
2612
2613typedef
2614 struct _RCEC {
2615 struct _RCEC* next;
2616 UWord magic;
2617 UWord rc;
2618 UWord rcX; /* used for crosschecking */
2619 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2620 }
2621 RCEC;
2622
2623static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2624
2625
2626/* Gives an arbitrary total order on RCEC .frames fields */
2627static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2628 Word i;
2629 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2630 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2631 if (ec1->frames[0] < ec2->frames[0]) return -1;
2632 if (ec1->frames[0] > ec2->frames[0]) return 1;
2633 for (i = 1; i < 1 + N_FRAMES; i++) {
2634 if (ec1->frames[i] < ec2->frames[i]) return -1;
2635 if (ec1->frames[i] > ec2->frames[i]) return 1;
2636 }
2637 return 0;
2638}
2639
2640
2641/* Dec the ref of this RCEC. */
2642static void ctxt__rcdec ( RCEC* ec )
2643{
2644 stats__ctxt_rcdec_calls++;
2645 tl_assert(ec && ec->magic == RCEC_MAGIC);
2646 tl_assert(ec->rc > 0);
2647 ec->rc--;
2648}
2649
2650static void ctxt__rcinc ( RCEC* ec )
2651{
2652 tl_assert(ec && ec->magic == RCEC_MAGIC);
2653 ec->rc++;
2654}
2655
2656
2657/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2658 move it one step closer the the front of the list, so as to make
2659 subsequent searches for it cheaper. */
2660static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2661{
2662 RCEC *ec0, *ec1, *ec2;
2663 if (ec == *headp)
2664 tl_assert(0); /* already at head of list */
2665 tl_assert(ec != NULL);
2666 ec0 = *headp;
2667 ec1 = NULL;
2668 ec2 = NULL;
2669 while (True) {
2670 if (ec0 == NULL || ec0 == ec) break;
2671 ec2 = ec1;
2672 ec1 = ec0;
2673 ec0 = ec0->next;
2674 }
2675 tl_assert(ec0 == ec);
2676 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2677 RCEC* tmp;
2678 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2679 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2680 closer to the start of the list. */
2681 tl_assert(ec2->next == ec1);
2682 tl_assert(ec1->next == ec0);
2683 tmp = ec0->next;
2684 ec2->next = ec0;
2685 ec0->next = ec1;
2686 ec1->next = tmp;
2687 }
2688 else
2689 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2690 /* it's second in the list. */
2691 tl_assert(*headp == ec1);
2692 tl_assert(ec1->next == ec0);
2693 ec1->next = ec0->next;
2694 ec0->next = ec1;
2695 *headp = ec0;
2696 }
2697}
2698
2699
2700/* Find the given RCEC in the tree, and return a pointer to it. Or,
2701 if not present, add the given one to the tree (by making a copy of
2702 it, so the caller can immediately deallocate the original) and
2703 return a pointer to the copy. The caller can safely have 'example'
2704 on its stack, since we will always return a pointer to a copy of
2705 it, not to the original. Note that the inserted node will have .rc
2706 of zero and so the caller must immediatly increment it. */
2707__attribute__((noinline))
2708static RCEC* ctxt__find_or_add ( RCEC* example )
2709{
2710 UWord hent;
2711 RCEC* copy;
2712 tl_assert(example && example->magic == RCEC_MAGIC);
2713 tl_assert(example->rc == 0);
2714
2715 /* Search the hash table to see if we already have it. */
2716 stats__ctxt_tab_qs++;
2717 hent = example->frames[0] % N_RCEC_TAB;
2718 copy = contextTab[hent];
2719 while (1) {
2720 if (!copy) break;
2721 tl_assert(copy->magic == RCEC_MAGIC);
2722 stats__ctxt_tab_cmps++;
2723 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2724 copy = copy->next;
2725 }
2726
2727 if (copy) {
2728 tl_assert(copy != example);
2729 /* optimisation: if it's not at the head of its list, move 1
2730 step fwds, to make future searches cheaper */
2731 if (copy != contextTab[hent]) {
2732 move_RCEC_one_step_forward( &contextTab[hent], copy );
2733 }
2734 } else {
2735 copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
2736 tl_assert(copy != example);
2737 *copy = *example;
2738 copy->next = contextTab[hent];
2739 contextTab[hent] = copy;
2740 stats__ctxt_tab_curr++;
2741 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2742 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2743 }
2744 return copy;
2745}
2746
2747static inline UWord ROLW ( UWord w, Int n )
2748{
2749 Int bpw = 8 * sizeof(UWord);
2750 w = (w << n) | (w >> (bpw-n));
2751 return w;
2752}
2753
2754__attribute__((noinline))
2755static RCEC* get_RCEC ( Thr* thr )
2756{
2757 UWord hash, i;
2758 RCEC example;
2759 example.magic = RCEC_MAGIC;
2760 example.rc = 0;
2761 example.rcX = 0;
2762 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2763 hash = 0;
2764 for (i = 1; i < 1 + N_FRAMES; i++) {
2765 hash ^= example.frames[i];
2766 hash = ROLW(hash, 19);
2767 }
2768 example.frames[0] = hash;
2769 return ctxt__find_or_add( &example );
2770}
2771
2772///////////////////////////////////////////////////////
2773//// Part (2): An OSet of OldRefs, that refer to (1)
2774///
2775
2776// (UInt) `echo "Old Reference Information" | md5sum`
2777#define OldRef_MAGIC 0x30b1f075UL
2778
2779typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2780
2781#define N_OLDREF_ACCS 3
2782
2783typedef
2784 struct {
2785 Addr ea;
2786 UWord magic;
2787 UWord gen; /* when most recently accessed */
2788 /* unused slots in this array have .thr == NULL */
2789 Thr_n_RCEC accs[N_OLDREF_ACCS];
2790 }
2791 OldRef;
2792
sewardjf98e1c02008-10-25 16:22:41 +00002793static OSet* oldrefTree = NULL; /* OSet* of OldRef */
2794static UWord oldrefGen = 0; /* current LRU generation # */
2795static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2796static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
2797
2798static void event_map_bind ( Addr a, Thr* thr )
2799{
2800 OldRef key, *ref;
2801 RCEC* here;
2802 Word i, j;
2803
2804 key.ea = a;
2805 key.magic = OldRef_MAGIC;
2806
2807 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2808
2809 if (ref) {
2810
2811 /* We already have a record for this address. We now need to
2812 see if we have a stack trace pertaining to this thread's
2813 access. */
2814 tl_assert(ref->magic == OldRef_MAGIC);
2815
2816 tl_assert(thr);
2817 for (i = 0; i < N_OLDREF_ACCS; i++) {
2818 if (ref->accs[i].thr == thr)
2819 break;
2820 }
2821
2822 if (i < N_OLDREF_ACCS) {
2823 /* thread 'thr' has an entry at index 'i'. Update it. */
2824 if (i > 0) {
2825 Thr_n_RCEC tmp = ref->accs[i-1];
2826 ref->accs[i-1] = ref->accs[i];
2827 ref->accs[i] = tmp;
2828 i--;
2829 }
2830 here = get_RCEC( thr );
2831 if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
2832 ctxt__rcinc( here );
2833 stats__ctxt_rcdec1++;
2834 ctxt__rcdec( ref->accs[i].rcec );
2835 ref->accs[i].rcec = here;
2836 tl_assert(ref->accs[i].thr == thr);
2837 } else {
2838 here = get_RCEC( thr );
2839 ctxt__rcinc( here );
2840 /* No entry for this thread. Shuffle all of them down one
2841 slot, and put the new entry at the start of the array. */
2842 if (ref->accs[N_OLDREF_ACCS-1].thr) {
2843 /* the last slot is in use. We must dec the rc on the
2844 associated rcec. */
2845 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
2846 stats__ctxt_rcdec2++;
2847 ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
2848 } else {
2849 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
2850 }
2851 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
2852 ref->accs[j] = ref->accs[j-1];
2853 ref->accs[0].thr = thr;
2854 ref->accs[0].rcec = here;
2855 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2856 so we can't add a NULL thr. */
2857 }
2858
2859 ref->gen = oldrefGen;
2860 tl_assert(ref->ea == a);
2861
2862 } else {
2863
2864 /* We don't have a record for this address. Create a new one. */
2865 if (oldrefTreeN >= oldrefGenIncAt) {
2866 oldrefGen++;
2867 oldrefGenIncAt = oldrefTreeN + 50000;
2868 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
2869 oldrefGen, oldrefTreeN );
2870 }
2871 here = get_RCEC( thr );
2872 ctxt__rcinc(here);
2873 ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
2874 ref->magic = OldRef_MAGIC;
2875 ref->gen = oldrefGen;
2876 ref->ea = a;
2877 ref->accs[0].rcec = here;
2878 ref->accs[0].thr = thr;
2879 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2880 so we can't add a NULL thr. */
2881 for (j = 1; j < N_OLDREF_ACCS; j++) {
2882 ref->accs[j].thr = NULL;
2883 ref->accs[j].rcec = NULL;
2884 }
2885 VG_(OSetGen_Insert)( oldrefTree, ref );
2886 oldrefTreeN++;
2887
2888 }
2889}
2890
2891
2892static
sewardjd52392d2008-11-08 20:36:26 +00002893Bool event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjf98e1c02008-10-25 16:22:41 +00002894 /*OUT*/Thr** resThr,
2895 Thr* thr_acc, Addr a )
2896{
2897 Word i;
2898 OldRef key, *ref;
2899
2900 tl_assert(thr_acc);
2901
2902 key.ea = a;
2903 key.magic = OldRef_MAGIC;
2904
2905 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2906 if (ref) {
2907 tl_assert(ref->magic == OldRef_MAGIC);
2908 tl_assert(ref->accs[0].thr); /* first slot must always be used */
2909
2910 for (i = 0; i < N_OLDREF_ACCS; i++) {
2911 if (ref->accs[i].thr != NULL
2912 && ref->accs[i].thr != thr_acc)
2913 break;
2914 }
2915 /* If we didn't find an entry for some thread other than
2916 thr_acc, just return the entry for thread 0. It'll look
2917 pretty stupid to the user though. */
2918 if (i == N_OLDREF_ACCS)
2919 i = 0;
2920
2921 tl_assert(i >= 0 && i < N_OLDREF_ACCS);
2922 tl_assert(ref->accs[i].thr);
2923 tl_assert(ref->accs[i].rcec);
2924 tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
2925
sewardjd52392d2008-11-08 20:36:26 +00002926 *resEC = VG_(make_ExeContext_from_StackTrace)(
2927 &ref->accs[i].rcec->frames[1], N_FRAMES
2928 );
sewardjf98e1c02008-10-25 16:22:41 +00002929 *resThr = ref->accs[i].thr;
2930 return True;
2931 } else {
2932 return False;
2933 }
2934}
2935
2936static void event_map_init ( void )
2937{
2938 Word i;
2939 tl_assert(!contextTab);
2940 contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
2941 N_RCEC_TAB * sizeof(RCEC*) );
2942 tl_assert(contextTab);
2943 for (i = 0; i < N_RCEC_TAB; i++)
2944 contextTab[i] = NULL;
2945
2946 tl_assert(!oldrefTree);
2947 tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
2948 oldrefTree = VG_(OSetGen_Create)(
2949 offsetof(OldRef,ea), /* == 0 */
2950 NULL, /* use unboxed cmp on OldRefs */
2951 HG_(zalloc), "libhb.event_map_init.2 (oldref tree)",
2952 HG_(free)
2953 );
2954 tl_assert(oldrefTree);
2955
2956 oldrefGen = 0;
2957 oldrefGenIncAt = 0;
2958 oldrefTreeN = 0;
2959}
2960
2961static void event_map__check_reference_counts ( Bool before )
2962{
2963 RCEC* rcec;
2964 OldRef* oldref;
2965 Word i;
2966 UWord nEnts = 0;
2967
2968 /* Set the 'check' reference counts to zero. Also, optionally
2969 check that the real reference counts are non-zero. We allow
2970 these to fall to zero before a GC, but the GC must get rid of
2971 all those that are zero, hence none should be zero after a
2972 GC. */
2973 for (i = 0; i < N_RCEC_TAB; i++) {
2974 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
2975 nEnts++;
2976 tl_assert(rcec);
2977 tl_assert(rcec->magic == RCEC_MAGIC);
2978 if (!before)
2979 tl_assert(rcec->rc > 0);
2980 rcec->rcX = 0;
2981 }
2982 }
2983
2984 /* check that the stats are sane */
2985 tl_assert(nEnts == stats__ctxt_tab_curr);
2986 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
2987
2988 /* visit all the referencing points, inc check ref counts */
2989 VG_(OSetGen_ResetIter)( oldrefTree );
2990 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
2991 tl_assert(oldref->magic == OldRef_MAGIC);
2992 for (i = 0; i < N_OLDREF_ACCS; i++) {
2993 if (oldref->accs[i].thr) {
2994 tl_assert(oldref->accs[i].rcec);
2995 tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
2996 oldref->accs[i].rcec->rcX++;
2997 } else {
2998 tl_assert(!oldref->accs[i].rcec);
2999 }
3000 }
3001 }
3002
3003 /* compare check ref counts with actual */
3004 for (i = 0; i < N_RCEC_TAB; i++) {
3005 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3006 tl_assert(rcec->rc == rcec->rcX);
3007 }
3008 }
3009}
3010
3011static void event_map_maybe_GC ( void )
3012{
3013 OldRef* oldref;
3014 UWord keyW, valW, retained, maxGen;
3015 WordFM* genMap;
3016 XArray* refs2del;
3017 Word i, j, n2del;
3018
3019 if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
3020 return;
3021
3022 if (0)
3023 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3024
3025 /* Check our counting is sane */
3026 tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
3027
3028 /* Check the reference counts */
3029 event_map__check_reference_counts( True/*before*/ );
3030
3031 /* Compute the distribution of generation values in the ref tree */
3032 /* genMap :: generation-number -> count-of-nodes-with-that-number */
3033 genMap = VG_(newFM)( HG_(zalloc), "libhb.emmG.1",
3034 HG_(free), NULL );
3035
3036 VG_(OSetGen_ResetIter)( oldrefTree );
3037 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3038 UWord key = oldref->gen;
3039 keyW = valW = 0;
3040 if (VG_(lookupFM)(genMap, &keyW, &valW, key )) {
3041 tl_assert(keyW == key);
3042 tl_assert(valW > 0);
3043 }
3044 /* now valW is the old count for generation 'key' */
3045 VG_(addToFM)(genMap, key, valW+1);
3046 }
3047
3048 tl_assert(VG_(sizeFM)(genMap) > 0);
3049
3050 retained = oldrefTreeN;
3051 maxGen = 0;
3052 VG_(initIterFM)( genMap );
3053 while (VG_(nextIterFM)( genMap, &keyW, &valW )) {
3054 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3055 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3056 tl_assert(keyW >= maxGen);
3057 tl_assert(retained >= valW);
3058 if (retained - valW
3059 > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
3060 retained -= valW;
3061 maxGen = keyW;
3062 } else {
3063 break;
3064 }
3065 }
3066 VG_(doneIterFM)( genMap );
3067
3068 VG_(printf)(
3069 "libhb: EvM GC: delete generations %lu and below, "
3070 "retaining %lu entries\n",
3071 maxGen, retained );
3072
3073 VG_(deleteFM)( genMap, NULL, NULL );
3074
3075 /* If this fails, it means there's only one generation in the
3076 entire tree. So we're kind of in a bad situation, and need to
3077 do some stop-gap measure, such as randomly deleting half the
3078 entries. */
3079 tl_assert(retained < oldrefTreeN);
3080
3081 /* Now make up a big list of the oldrefTree entries we want to
3082 delete. We can't simultaneously traverse the tree and delete
3083 stuff from it, so first we need to copy them off somewhere
3084 else. (sigh) */
3085 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.1",
3086 HG_(free), sizeof(OldRef*) );
3087
3088 VG_(OSetGen_ResetIter)( oldrefTree );
3089 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3090 tl_assert(oldref->magic == OldRef_MAGIC);
3091 if (oldref->gen <= maxGen) {
3092 VG_(addToXA)( refs2del, &oldref );
3093 }
3094 }
3095
3096 n2del = VG_(sizeXA)( refs2del );
3097 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3098
3099 if (0) VG_(printf)("%s","deleting entries\n");
3100 for (i = 0; i < n2del; i++) {
3101 void* nd;
3102 OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
3103 tl_assert(ref);
3104 tl_assert(ref->magic == OldRef_MAGIC);
3105 for (j = 0; j < N_OLDREF_ACCS; j++) {
3106 if (ref->accs[j].rcec) {
3107 tl_assert(ref->accs[j].thr);
3108 stats__ctxt_rcdec3++;
3109 ctxt__rcdec( ref->accs[j].rcec );
3110 } else {
3111 tl_assert(!ref->accs[j].thr);
3112 }
3113 }
3114 nd = VG_(OSetGen_Remove)( oldrefTree, ref );
3115 VG_(OSetGen_FreeNode)( oldrefTree, nd );
3116 }
3117
3118 VG_(deleteXA)( refs2del );
3119
3120 tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
3121
3122 oldrefTreeN = retained;
3123 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3124
3125 /* Throw away all RCECs with zero reference counts */
3126 for (i = 0; i < N_RCEC_TAB; i++) {
3127 RCEC** pp = &contextTab[i];
3128 RCEC* p = *pp;
3129 while (p) {
3130 if (p->rc == 0) {
3131 *pp = p->next;
3132 HG_(free)(p);
3133 p = *pp;
3134 tl_assert(stats__ctxt_tab_curr > 0);
3135 stats__ctxt_tab_curr--;
3136 } else {
3137 pp = &p->next;
3138 p = p->next;
3139 }
3140 }
3141 }
3142
3143 /* Check the reference counts */
3144 event_map__check_reference_counts( False/*after*/ );
3145
3146 //if (0)
3147 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3148 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3149
3150}
3151
3152
3153/////////////////////////////////////////////////////////
3154// //
3155// Core MSM //
3156// //
3157/////////////////////////////////////////////////////////
3158
3159#define MSM_CONFACC 1
3160
3161#define MSM_RACE2ERR 1
3162
3163#define MSM_CHECK 0
3164
3165static ULong stats__msm_read = 0;
3166static ULong stats__msm_read_change = 0;
3167static ULong stats__msm_write = 0;
3168static ULong stats__msm_write_change = 0;
3169
3170__attribute__((noinline))
3171static void record_race_info ( Thr* acc_thr,
3172 Addr acc_addr, SizeT szB, Bool isWrite,
3173 SVal svOld, SVal svNew )
3174{
3175 Bool found;
3176 Thr* thrp = NULL;
sewardjd52392d2008-11-08 20:36:26 +00003177 ExeContext* where = NULL;
3178 ExeContext* wherep = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00003179 where = main_get_EC( acc_thr );
3180 found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
3181 if (found) {
3182 tl_assert(wherep);
3183 tl_assert(thrp);
3184 tl_assert(thrp->opaque);
3185 tl_assert(acc_thr->opaque);
3186 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3187 isWrite, szB, NULL/*mb_lastlock*/,
3188 wherep, thrp->opaque );
3189 } else {
3190 tl_assert(!wherep);
3191 tl_assert(!thrp);
3192 tl_assert(acc_thr->opaque);
3193 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3194 isWrite, szB, NULL/*mb_lastlock*/,
3195 NULL, NULL );
3196 }
3197}
3198
3199static Bool is_sane_SVal_C ( SVal sv ) {
3200 POrd ord;
3201 if (!SVal__isC(sv)) return True;
3202 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3203 if (ord == POrd_EQ || ord == POrd_LT) return True;
3204 return False;
3205}
3206
3207
3208/* Compute new state following a read */
3209static inline SVal msm_read ( SVal svOld,
3210 /* The following are only needed for
3211 creating error reports. */
3212 Thr* acc_thr,
3213 Addr acc_addr, SizeT szB )
3214{
3215 SVal svNew = SVal_INVALID;
3216 stats__msm_read++;
3217
3218 /* Redundant sanity check on the constraints */
3219 if (MSM_CHECK) {
3220 tl_assert(is_sane_SVal_C(svOld));
3221 }
3222
3223 if (SVal__isC(svOld)) {
3224 POrd ord;
3225 VtsID tviR = acc_thr->viR;
3226 VtsID tviW = acc_thr->viW;
3227 VtsID rmini = SVal__unC_Rmin(svOld);
3228 VtsID wmini = SVal__unC_Wmin(svOld);
3229
3230 ord = VtsID__getOrdering(rmini,tviR);
3231 if (ord == POrd_EQ || ord == POrd_LT) {
3232 /* no race */
3233 /* Note: RWLOCK subtlety: use tviW, not tviR */
3234 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3235 goto out;
3236 } else {
3237 svNew = MSM_RACE2ERR
3238 ? SVal__mkE()
3239 : SVal__mkC( rmini, VtsID__join2(wmini,tviR) );
3240 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
3241 svOld, svNew );
3242 goto out;
3243 }
3244 }
3245 if (SVal__isA(svOld)) {
3246 /* reading no-access memory (sigh); leave unchanged */
3247 /* check for no pollution */
3248 tl_assert(svOld == SVal_NOACCESS);
3249 svNew = SVal_NOACCESS;
3250 goto out;
3251 }
3252 if (SVal__isE(svOld)) {
3253 /* no race, location is already "in error" */
3254 svNew = SVal__mkE();
3255 goto out;
3256 }
3257 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3258 tl_assert(0);
3259
3260 out:
3261 if (MSM_CHECK) {
3262 tl_assert(is_sane_SVal_C(svNew));
3263 }
3264 tl_assert(svNew != SVal_INVALID);
3265 if (svNew != svOld) {
3266 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3267 event_map_bind( acc_addr, acc_thr );
3268 stats__msm_read_change++;
3269 }
3270 }
3271 return svNew;
3272}
3273
3274
3275/* Compute new state following a write */
3276static inline SVal msm_write ( SVal svOld,
3277 /* The following are only needed for
3278 creating error reports. */
3279 Thr* acc_thr,
3280 Addr acc_addr, SizeT szB )
3281{
3282 SVal svNew = SVal_INVALID;
3283 stats__msm_write++;
3284
3285 /* Redundant sanity check on the constraints */
3286 if (MSM_CHECK) {
3287 tl_assert(is_sane_SVal_C(svOld));
3288 }
3289
3290 if (SVal__isC(svOld)) {
3291 POrd ord;
3292 VtsID tviW = acc_thr->viW;
3293 VtsID wmini = SVal__unC_Wmin(svOld);
3294
3295 ord = VtsID__getOrdering(wmini,tviW);
3296 if (ord == POrd_EQ || ord == POrd_LT) {
3297 /* no race */
3298 svNew = SVal__mkC( tviW, tviW );
3299 goto out;
3300 } else {
3301 VtsID rmini = SVal__unC_Rmin(svOld);
3302 svNew = MSM_RACE2ERR
3303 ? SVal__mkE()
3304 : SVal__mkC( VtsID__join2(rmini,tviW),
3305 VtsID__join2(wmini,tviW) );
3306 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
3307 svOld, svNew );
3308 goto out;
3309 }
3310 }
3311 if (SVal__isA(svOld)) {
3312 /* writing no-access memory (sigh); leave unchanged */
3313 /* check for no pollution */
3314 tl_assert(svOld == SVal_NOACCESS);
3315 svNew = SVal_NOACCESS;
3316 goto out;
3317 }
3318 if (SVal__isE(svOld)) {
3319 /* no race, location is already "in error" */
3320 svNew = SVal__mkE();
3321 goto out;
3322 }
3323 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3324 tl_assert(0);
3325
3326 out:
3327 if (MSM_CHECK) {
3328 tl_assert(is_sane_SVal_C(svNew));
3329 }
3330 tl_assert(svNew != SVal_INVALID);
3331 if (svNew != svOld) {
3332 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3333 event_map_bind( acc_addr, acc_thr );
3334 stats__msm_write_change++;
3335 }
3336 }
3337 return svNew;
3338}
3339
3340
3341/////////////////////////////////////////////////////////
3342// //
3343// Apply core MSM to specific memory locations //
3344// //
3345/////////////////////////////////////////////////////////
3346
3347/*------------- ZSM accesses: 8 bit apply ------------- */
3348
3349void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3350 CacheLine* cl;
3351 UWord cloff, tno, toff;
3352 SVal svOld, svNew;
3353 UShort descr;
3354 stats__cline_read8s++;
3355 cl = get_cacheline(a);
3356 cloff = get_cacheline_offset(a);
3357 tno = get_treeno(a);
3358 toff = get_tree_offset(a); /* == 0 .. 7 */
3359 descr = cl->descrs[tno];
3360 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3361 SVal* tree = &cl->svals[tno << 3];
3362 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3363 if (SCE_CACHELINE)
3364 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3365 }
3366 svOld = cl->svals[cloff];
3367 svNew = msm_read( svOld, thr,a,1 );
3368 tl_assert(svNew != SVal_INVALID);
3369 cl->svals[cloff] = svNew;
3370}
3371
3372void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3373 CacheLine* cl;
3374 UWord cloff, tno, toff;
3375 SVal svOld, svNew;
3376 UShort descr;
3377 stats__cline_read8s++;
3378 cl = get_cacheline(a);
3379 cloff = get_cacheline_offset(a);
3380 tno = get_treeno(a);
3381 toff = get_tree_offset(a); /* == 0 .. 7 */
3382 descr = cl->descrs[tno];
3383 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3384 SVal* tree = &cl->svals[tno << 3];
3385 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3386 if (SCE_CACHELINE)
3387 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3388 }
3389 svOld = cl->svals[cloff];
3390 svNew = msm_write( svOld, thr,a,1 );
3391 tl_assert(svNew != SVal_INVALID);
3392 cl->svals[cloff] = svNew;
3393}
3394
3395/*------------- ZSM accesses: 16 bit apply ------------- */
3396
3397void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3398 CacheLine* cl;
3399 UWord cloff, tno, toff;
3400 SVal svOld, svNew;
3401 UShort descr;
3402 stats__cline_read16s++;
3403 if (UNLIKELY(!aligned16(a))) goto slowcase;
3404 cl = get_cacheline(a);
3405 cloff = get_cacheline_offset(a);
3406 tno = get_treeno(a);
3407 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3408 descr = cl->descrs[tno];
3409 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3410 if (valid_value_is_below_me_16(descr, toff)) {
3411 goto slowcase;
3412 } else {
3413 SVal* tree = &cl->svals[tno << 3];
3414 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3415 }
3416 if (SCE_CACHELINE)
3417 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3418 }
3419 svOld = cl->svals[cloff];
3420 svNew = msm_read( svOld, thr,a,2 );
3421 tl_assert(svNew != SVal_INVALID);
3422 cl->svals[cloff] = svNew;
3423 return;
3424 slowcase: /* misaligned, or must go further down the tree */
3425 stats__cline_16to8splits++;
3426 zsm_apply8___msm_read( thr, a + 0 );
3427 zsm_apply8___msm_read( thr, a + 1 );
3428}
3429
3430void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3431 CacheLine* cl;
3432 UWord cloff, tno, toff;
3433 SVal svOld, svNew;
3434 UShort descr;
3435 stats__cline_read16s++;
3436 if (UNLIKELY(!aligned16(a))) goto slowcase;
3437 cl = get_cacheline(a);
3438 cloff = get_cacheline_offset(a);
3439 tno = get_treeno(a);
3440 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3441 descr = cl->descrs[tno];
3442 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3443 if (valid_value_is_below_me_16(descr, toff)) {
3444 goto slowcase;
3445 } else {
3446 SVal* tree = &cl->svals[tno << 3];
3447 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3448 }
3449 if (SCE_CACHELINE)
3450 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3451 }
3452 svOld = cl->svals[cloff];
3453 svNew = msm_write( svOld, thr,a,2 );
3454 tl_assert(svNew != SVal_INVALID);
3455 cl->svals[cloff] = svNew;
3456 return;
3457 slowcase: /* misaligned, or must go further down the tree */
3458 stats__cline_16to8splits++;
3459 zsm_apply8___msm_write( thr, a + 0 );
3460 zsm_apply8___msm_write( thr, a + 1 );
3461}
3462
3463/*------------- ZSM accesses: 32 bit apply ------------- */
3464
3465void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3466 CacheLine* cl;
3467 UWord cloff, tno, toff;
3468 SVal svOld, svNew;
3469 UShort descr;
3470 if (UNLIKELY(!aligned32(a))) goto slowcase;
3471 cl = get_cacheline(a);
3472 cloff = get_cacheline_offset(a);
3473 tno = get_treeno(a);
3474 toff = get_tree_offset(a); /* == 0 or 4 */
3475 descr = cl->descrs[tno];
3476 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3477 if (valid_value_is_above_me_32(descr, toff)) {
3478 SVal* tree = &cl->svals[tno << 3];
3479 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3480 } else {
3481 goto slowcase;
3482 }
3483 if (SCE_CACHELINE)
3484 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3485 }
3486 svOld = cl->svals[cloff];
3487 svNew = msm_read( svOld, thr,a,4 );
3488 tl_assert(svNew != SVal_INVALID);
3489 cl->svals[cloff] = svNew;
3490 return;
3491 slowcase: /* misaligned, or must go further down the tree */
3492 stats__cline_32to16splits++;
3493 zsm_apply16___msm_read( thr, a + 0 );
3494 zsm_apply16___msm_read( thr, a + 2 );
3495}
3496
3497void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3498 CacheLine* cl;
3499 UWord cloff, tno, toff;
3500 SVal svOld, svNew;
3501 UShort descr;
3502 if (UNLIKELY(!aligned32(a))) goto slowcase;
3503 cl = get_cacheline(a);
3504 cloff = get_cacheline_offset(a);
3505 tno = get_treeno(a);
3506 toff = get_tree_offset(a); /* == 0 or 4 */
3507 descr = cl->descrs[tno];
3508 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3509 if (valid_value_is_above_me_32(descr, toff)) {
3510 SVal* tree = &cl->svals[tno << 3];
3511 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3512 } else {
3513 goto slowcase;
3514 }
3515 if (SCE_CACHELINE)
3516 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3517 }
3518 svOld = cl->svals[cloff];
3519 svNew = msm_write( svOld, thr,a,4 );
3520 tl_assert(svNew != SVal_INVALID);
3521 cl->svals[cloff] = svNew;
3522 return;
3523 slowcase: /* misaligned, or must go further down the tree */
3524 stats__cline_32to16splits++;
3525 zsm_apply16___msm_write( thr, a + 0 );
3526 zsm_apply16___msm_write( thr, a + 2 );
3527}
3528
3529/*------------- ZSM accesses: 64 bit apply ------------- */
3530
3531void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3532 CacheLine* cl;
3533 UWord cloff, tno, toff;
3534 SVal svOld, svNew;
3535 UShort descr;
3536 stats__cline_read64s++;
3537 if (UNLIKELY(!aligned64(a))) goto slowcase;
3538 cl = get_cacheline(a);
3539 cloff = get_cacheline_offset(a);
3540 tno = get_treeno(a);
3541 toff = get_tree_offset(a); /* == 0, unused */
3542 descr = cl->descrs[tno];
3543 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3544 goto slowcase;
3545 }
3546 svOld = cl->svals[cloff];
3547 svNew = msm_read( svOld, thr,a,8 );
3548 tl_assert(svNew != SVal_INVALID);
3549 cl->svals[cloff] = svNew;
3550 return;
3551 slowcase: /* misaligned, or must go further down the tree */
3552 stats__cline_64to32splits++;
3553 zsm_apply32___msm_read( thr, a + 0 );
3554 zsm_apply32___msm_read( thr, a + 4 );
3555}
3556
3557void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
3558 CacheLine* cl;
3559 UWord cloff, tno, toff;
3560 SVal svOld, svNew;
3561 UShort descr;
3562 stats__cline_read64s++;
3563 if (UNLIKELY(!aligned64(a))) goto slowcase;
3564 cl = get_cacheline(a);
3565 cloff = get_cacheline_offset(a);
3566 tno = get_treeno(a);
3567 toff = get_tree_offset(a); /* == 0, unused */
3568 descr = cl->descrs[tno];
3569 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3570 goto slowcase;
3571 }
3572 svOld = cl->svals[cloff];
3573 svNew = msm_write( svOld, thr,a,8 );
3574 tl_assert(svNew != SVal_INVALID);
3575 cl->svals[cloff] = svNew;
3576 return;
3577 slowcase: /* misaligned, or must go further down the tree */
3578 stats__cline_64to32splits++;
3579 zsm_apply32___msm_write( thr, a + 0 );
3580 zsm_apply32___msm_write( thr, a + 4 );
3581}
3582
3583/*--------------- ZSM accesses: 8 bit write --------------- */
3584
3585static
3586void zsm_write8 ( Addr a, SVal svNew ) {
3587 CacheLine* cl;
3588 UWord cloff, tno, toff;
3589 UShort descr;
3590 stats__cline_set8s++;
3591 cl = get_cacheline(a);
3592 cloff = get_cacheline_offset(a);
3593 tno = get_treeno(a);
3594 toff = get_tree_offset(a); /* == 0 .. 7 */
3595 descr = cl->descrs[tno];
3596 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3597 SVal* tree = &cl->svals[tno << 3];
3598 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3599 if (SCE_CACHELINE)
3600 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3601 }
3602 tl_assert(svNew != SVal_INVALID);
3603 cl->svals[cloff] = svNew;
3604}
3605
3606/*--------------- ZSM accesses: 16 bit write --------------- */
3607
3608static
3609void zsm_write16 ( Addr a, SVal svNew ) {
3610 CacheLine* cl;
3611 UWord cloff, tno, toff;
3612 UShort descr;
3613 stats__cline_set16s++;
3614 if (UNLIKELY(!aligned16(a))) goto slowcase;
3615 cl = get_cacheline(a);
3616 cloff = get_cacheline_offset(a);
3617 tno = get_treeno(a);
3618 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3619 descr = cl->descrs[tno];
3620 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3621 if (valid_value_is_below_me_16(descr, toff)) {
3622 /* Writing at this level. Need to fix up 'descr'. */
3623 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
3624 /* At this point, the tree does not match cl->descr[tno] any
3625 more. The assignments below will fix it up. */
3626 } else {
3627 /* We can't indiscriminately write on the w16 node as in the
3628 w64 case, as that might make the node inconsistent with
3629 its parent. So first, pull down to this level. */
3630 SVal* tree = &cl->svals[tno << 3];
3631 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3632 if (SCE_CACHELINE)
3633 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3634 }
3635 }
3636 tl_assert(svNew != SVal_INVALID);
3637 cl->svals[cloff + 0] = svNew;
3638 cl->svals[cloff + 1] = SVal_INVALID;
3639 return;
3640 slowcase: /* misaligned */
3641 stats__cline_16to8splits++;
3642 zsm_write8( a + 0, svNew );
3643 zsm_write8( a + 1, svNew );
3644}
3645
3646/*--------------- ZSM accesses: 32 bit write --------------- */
3647
3648static
3649void zsm_write32 ( Addr a, SVal svNew ) {
3650 CacheLine* cl;
3651 UWord cloff, tno, toff;
3652 UShort descr;
3653 stats__cline_set32s++;
3654 if (UNLIKELY(!aligned32(a))) goto slowcase;
3655 cl = get_cacheline(a);
3656 cloff = get_cacheline_offset(a);
3657 tno = get_treeno(a);
3658 toff = get_tree_offset(a); /* == 0 or 4 */
3659 descr = cl->descrs[tno];
3660 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3661 if (valid_value_is_above_me_32(descr, toff)) {
3662 /* We can't indiscriminately write on the w32 node as in the
3663 w64 case, as that might make the node inconsistent with
3664 its parent. So first, pull down to this level. */
3665 SVal* tree = &cl->svals[tno << 3];
3666 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3667 if (SCE_CACHELINE)
3668 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3669 } else {
3670 /* Writing at this level. Need to fix up 'descr'. */
3671 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
3672 /* At this point, the tree does not match cl->descr[tno] any
3673 more. The assignments below will fix it up. */
3674 }
3675 }
3676 tl_assert(svNew != SVal_INVALID);
3677 cl->svals[cloff + 0] = svNew;
3678 cl->svals[cloff + 1] = SVal_INVALID;
3679 cl->svals[cloff + 2] = SVal_INVALID;
3680 cl->svals[cloff + 3] = SVal_INVALID;
3681 return;
3682 slowcase: /* misaligned */
3683 stats__cline_32to16splits++;
3684 zsm_write16( a + 0, svNew );
3685 zsm_write16( a + 2, svNew );
3686}
3687
3688/*--------------- ZSM accesses: 64 bit write --------------- */
3689
3690static
3691void zsm_write64 ( Addr a, SVal svNew ) {
3692 CacheLine* cl;
3693 UWord cloff, tno, toff;
3694 stats__cline_set64s++;
3695 if (UNLIKELY(!aligned64(a))) goto slowcase;
3696 cl = get_cacheline(a);
3697 cloff = get_cacheline_offset(a);
3698 tno = get_treeno(a);
3699 toff = get_tree_offset(a); /* == 0 */
3700 cl->descrs[tno] = TREE_DESCR_64;
3701 tl_assert(svNew != SVal_INVALID);
3702 cl->svals[cloff + 0] = svNew;
3703 cl->svals[cloff + 1] = SVal_INVALID;
3704 cl->svals[cloff + 2] = SVal_INVALID;
3705 cl->svals[cloff + 3] = SVal_INVALID;
3706 cl->svals[cloff + 4] = SVal_INVALID;
3707 cl->svals[cloff + 5] = SVal_INVALID;
3708 cl->svals[cloff + 6] = SVal_INVALID;
3709 cl->svals[cloff + 7] = SVal_INVALID;
3710 return;
3711 slowcase: /* misaligned */
3712 stats__cline_64to32splits++;
3713 zsm_write32( a + 0, svNew );
3714 zsm_write32( a + 4, svNew );
3715}
3716
3717/*------------- ZSM accesses: 8 bit read/copy ------------- */
3718
3719static
3720SVal zsm_read8 ( Addr a ) {
3721 CacheLine* cl;
3722 UWord cloff, tno, toff;
3723 UShort descr;
3724 stats__cline_get8s++;
3725 cl = get_cacheline(a);
3726 cloff = get_cacheline_offset(a);
3727 tno = get_treeno(a);
3728 toff = get_tree_offset(a); /* == 0 .. 7 */
3729 descr = cl->descrs[tno];
3730 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3731 SVal* tree = &cl->svals[tno << 3];
3732 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3733 }
3734 return cl->svals[cloff];
3735}
3736
3737static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
3738 SVal sv;
3739 stats__cline_copy8s++;
3740 sv = zsm_read8( src );
3741 zsm_write8( dst, sv );
3742}
3743
3744/* ------------ Shadow memory range setting ops ------------ */
3745
3746void zsm_apply_range___msm_read ( Thr* thr,
3747 Addr a, SizeT len )
3748{
3749 /* fast track a couple of common cases */
3750 if (len == 4 && aligned32(a)) {
3751 zsm_apply32___msm_read( thr, a );
3752 return;
3753 }
3754 if (len == 8 && aligned64(a)) {
3755 zsm_apply64___msm_read( thr, a );
3756 return;
3757 }
3758
3759 /* be completely general (but as efficient as possible) */
3760 if (len == 0) return;
3761
3762 if (!aligned16(a) && len >= 1) {
3763 zsm_apply8___msm_read( thr, a );
3764 a += 1;
3765 len -= 1;
3766 tl_assert(aligned16(a));
3767 }
3768 if (len == 0) return;
3769
3770 if (!aligned32(a) && len >= 2) {
3771 zsm_apply16___msm_read( thr, a );
3772 a += 2;
3773 len -= 2;
3774 tl_assert(aligned32(a));
3775 }
3776 if (len == 0) return;
3777
3778 if (!aligned64(a) && len >= 4) {
3779 zsm_apply32___msm_read( thr, a );
3780 a += 4;
3781 len -= 4;
3782 tl_assert(aligned64(a));
3783 }
3784 if (len == 0) return;
3785
3786 if (len >= 8) {
3787 tl_assert(aligned64(a));
3788 while (len >= 8) {
3789 zsm_apply64___msm_read( thr, a );
3790 a += 8;
3791 len -= 8;
3792 }
3793 tl_assert(aligned64(a));
3794 }
3795 if (len == 0) return;
3796
3797 if (len >= 4)
3798 tl_assert(aligned32(a));
3799 if (len >= 4) {
3800 zsm_apply32___msm_read( thr, a );
3801 a += 4;
3802 len -= 4;
3803 }
3804 if (len == 0) return;
3805
3806 if (len >= 2)
3807 tl_assert(aligned16(a));
3808 if (len >= 2) {
3809 zsm_apply16___msm_read( thr, a );
3810 a += 2;
3811 len -= 2;
3812 }
3813 if (len == 0) return;
3814
3815 if (len >= 1) {
3816 zsm_apply8___msm_read( thr, a );
3817 a += 1;
3818 len -= 1;
3819 }
3820 tl_assert(len == 0);
3821}
3822
3823
3824
3825void zsm_apply_range___msm_write ( Thr* thr,
3826 Addr a, SizeT len )
3827{
3828 /* fast track a couple of common cases */
3829 if (len == 4 && aligned32(a)) {
3830 zsm_apply32___msm_write( thr, a );
3831 return;
3832 }
3833 if (len == 8 && aligned64(a)) {
3834 zsm_apply64___msm_write( thr, a );
3835 return;
3836 }
3837
3838 /* be completely general (but as efficient as possible) */
3839 if (len == 0) return;
3840
3841 if (!aligned16(a) && len >= 1) {
3842 zsm_apply8___msm_write( thr, a );
3843 a += 1;
3844 len -= 1;
3845 tl_assert(aligned16(a));
3846 }
3847 if (len == 0) return;
3848
3849 if (!aligned32(a) && len >= 2) {
3850 zsm_apply16___msm_write( thr, a );
3851 a += 2;
3852 len -= 2;
3853 tl_assert(aligned32(a));
3854 }
3855 if (len == 0) return;
3856
3857 if (!aligned64(a) && len >= 4) {
3858 zsm_apply32___msm_write( thr, a );
3859 a += 4;
3860 len -= 4;
3861 tl_assert(aligned64(a));
3862 }
3863 if (len == 0) return;
3864
3865 if (len >= 8) {
3866 tl_assert(aligned64(a));
3867 while (len >= 8) {
3868 zsm_apply64___msm_write( thr, a );
3869 a += 8;
3870 len -= 8;
3871 }
3872 tl_assert(aligned64(a));
3873 }
3874 if (len == 0) return;
3875
3876 if (len >= 4)
3877 tl_assert(aligned32(a));
3878 if (len >= 4) {
3879 zsm_apply32___msm_write( thr, a );
3880 a += 4;
3881 len -= 4;
3882 }
3883 if (len == 0) return;
3884
3885 if (len >= 2)
3886 tl_assert(aligned16(a));
3887 if (len >= 2) {
3888 zsm_apply16___msm_write( thr, a );
3889 a += 2;
3890 len -= 2;
3891 }
3892 if (len == 0) return;
3893
3894 if (len >= 1) {
3895 zsm_apply8___msm_write( thr, a );
3896 a += 1;
3897 len -= 1;
3898 }
3899 tl_assert(len == 0);
3900}
3901
3902
3903
3904
3905/* Block-copy states (needed for implementing realloc()). */
3906
3907static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
3908{
3909 SizeT i;
3910 if (len == 0)
3911 return;
3912
3913 /* assert for non-overlappingness */
3914 tl_assert(src+len <= dst || dst+len <= src);
3915
3916 /* To be simple, just copy byte by byte. But so as not to wreck
3917 performance for later accesses to dst[0 .. len-1], normalise
3918 destination lines as we finish with them, and also normalise the
3919 line containing the first and last address. */
3920 for (i = 0; i < len; i++) {
3921 Bool normalise
3922 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
3923 || i == 0 /* first in range */
3924 || i == len-1; /* last in range */
3925 zsm_copy8( src+i, dst+i, normalise );
3926 }
3927}
3928
3929
3930/* For setting address ranges to a given value. Has considerable
3931 sophistication so as to avoid generating large numbers of pointless
3932 cache loads/writebacks for large ranges. */
3933
3934/* Do small ranges in-cache, in the obvious way. */
3935static
3936void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
3937{
3938 /* fast track a couple of common cases */
3939 if (len == 4 && aligned32(a)) {
3940 zsm_write32( a, svNew );
3941 return;
3942 }
3943 if (len == 8 && aligned64(a)) {
3944 zsm_write64( a, svNew );
3945 return;
3946 }
3947
3948 /* be completely general (but as efficient as possible) */
3949 if (len == 0) return;
3950
3951 if (!aligned16(a) && len >= 1) {
3952 zsm_write8( a, svNew );
3953 a += 1;
3954 len -= 1;
3955 tl_assert(aligned16(a));
3956 }
3957 if (len == 0) return;
3958
3959 if (!aligned32(a) && len >= 2) {
3960 zsm_write16( a, svNew );
3961 a += 2;
3962 len -= 2;
3963 tl_assert(aligned32(a));
3964 }
3965 if (len == 0) return;
3966
3967 if (!aligned64(a) && len >= 4) {
3968 zsm_write32( a, svNew );
3969 a += 4;
3970 len -= 4;
3971 tl_assert(aligned64(a));
3972 }
3973 if (len == 0) return;
3974
3975 if (len >= 8) {
3976 tl_assert(aligned64(a));
3977 while (len >= 8) {
3978 zsm_write64( a, svNew );
3979 a += 8;
3980 len -= 8;
3981 }
3982 tl_assert(aligned64(a));
3983 }
3984 if (len == 0) return;
3985
3986 if (len >= 4)
3987 tl_assert(aligned32(a));
3988 if (len >= 4) {
3989 zsm_write32( a, svNew );
3990 a += 4;
3991 len -= 4;
3992 }
3993 if (len == 0) return;
3994
3995 if (len >= 2)
3996 tl_assert(aligned16(a));
3997 if (len >= 2) {
3998 zsm_write16( a, svNew );
3999 a += 2;
4000 len -= 2;
4001 }
4002 if (len == 0) return;
4003
4004 if (len >= 1) {
4005 zsm_write8( a, svNew );
4006 a += 1;
4007 len -= 1;
4008 }
4009 tl_assert(len == 0);
4010}
4011
4012
4013/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4014 for larger ranges, try to operate directly on the out-of-cache
4015 representation, rather than dragging lines into the cache,
4016 overwriting them, and forcing them out. This turns out to be an
4017 important performance optimisation. */
4018
4019static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4020{
4021 tl_assert(svNew != SVal_INVALID);
4022 stats__cache_make_New_arange += (ULong)len;
4023
4024 if (0 && len > 500)
4025 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4026
4027 if (0) {
4028 static UWord n_New_in_cache = 0;
4029 static UWord n_New_not_in_cache = 0;
4030 /* tag is 'a' with the in-line offset masked out,
4031 eg a[31]..a[4] 0000 */
4032 Addr tag = a & ~(N_LINE_ARANGE - 1);
4033 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4034 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4035 n_New_in_cache++;
4036 } else {
4037 n_New_not_in_cache++;
4038 }
4039 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4040 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4041 n_New_in_cache, n_New_not_in_cache );
4042 }
4043
4044 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4045 zsm_set_range_SMALL( a, len, svNew );
4046 } else {
4047 Addr before_start = a;
4048 Addr aligned_start = cacheline_ROUNDUP(a);
4049 Addr after_start = cacheline_ROUNDDN(a + len);
4050 UWord before_len = aligned_start - before_start;
4051 UWord aligned_len = after_start - aligned_start;
4052 UWord after_len = a + len - after_start;
4053 tl_assert(before_start <= aligned_start);
4054 tl_assert(aligned_start <= after_start);
4055 tl_assert(before_len < N_LINE_ARANGE);
4056 tl_assert(after_len < N_LINE_ARANGE);
4057 tl_assert(get_cacheline_offset(aligned_start) == 0);
4058 if (get_cacheline_offset(a) == 0) {
4059 tl_assert(before_len == 0);
4060 tl_assert(a == aligned_start);
4061 }
4062 if (get_cacheline_offset(a+len) == 0) {
4063 tl_assert(after_len == 0);
4064 tl_assert(after_start == a+len);
4065 }
4066 if (before_len > 0) {
4067 zsm_set_range_SMALL( before_start, before_len, svNew );
4068 }
4069 if (after_len > 0) {
4070 zsm_set_range_SMALL( after_start, after_len, svNew );
4071 }
4072 stats__cache_make_New_inZrep += (ULong)aligned_len;
4073
4074 while (1) {
4075 Addr tag;
4076 UWord wix;
4077 if (aligned_start >= after_start)
4078 break;
4079 tl_assert(get_cacheline_offset(aligned_start) == 0);
4080 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4081 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4082 if (tag == cache_shmem.tags0[wix]) {
4083 UWord i;
4084 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4085 zsm_write64( aligned_start + i * 8, svNew );
4086 } else {
4087 UWord i;
4088 Word zix;
4089 SecMap* sm;
4090 LineZ* lineZ;
4091 /* This line is not in the cache. Do not force it in; instead
4092 modify it in-place. */
4093 /* find the Z line to write in and rcdec it or the
4094 associated F line. */
4095 find_Z_for_writing( &sm, &zix, tag );
4096 tl_assert(sm);
4097 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4098 lineZ = &sm->linesZ[zix];
4099 lineZ->dict[0] = svNew;
4100 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4101 for (i = 0; i < N_LINE_ARANGE/4; i++)
4102 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4103 rcinc_LineZ(lineZ);
4104 }
4105 aligned_start += N_LINE_ARANGE;
4106 aligned_len -= N_LINE_ARANGE;
4107 }
4108 tl_assert(aligned_start == after_start);
4109 tl_assert(aligned_len == 0);
4110 }
4111}
4112
4113
4114/////////////////////////////////////////////////////////
4115// //
4116// Synchronisation objects //
4117// //
4118/////////////////////////////////////////////////////////
4119
4120// (UInt) `echo "Synchronisation object" | md5sum`
4121#define SO_MAGIC 0x56b3c5b0U
4122
4123struct _SO {
4124 VtsID viR; /* r-clock of sender */
4125 VtsID viW; /* w-clock of sender */
4126 UInt magic;
4127};
4128
4129static SO* SO__Alloc ( void ) {
4130 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4131 so->viR = VtsID_INVALID;
4132 so->viW = VtsID_INVALID;
4133 so->magic = SO_MAGIC;
4134 return so;
4135}
4136static void SO__Dealloc ( SO* so ) {
4137 tl_assert(so);
4138 tl_assert(so->magic == SO_MAGIC);
4139 if (so->viR == VtsID_INVALID) {
4140 tl_assert(so->viW == VtsID_INVALID);
4141 } else {
4142 tl_assert(so->viW != VtsID_INVALID);
4143 VtsID__rcdec(so->viR);
4144 VtsID__rcdec(so->viW);
4145 }
4146 so->magic = 0;
4147 HG_(free)( so );
4148}
4149
4150
4151/////////////////////////////////////////////////////////
4152// //
4153// Top Level API //
4154// //
4155/////////////////////////////////////////////////////////
4156
4157static void show_thread_state ( HChar* str, Thr* t )
4158{
4159 if (1) return;
4160 if (t->viR == t->viW) {
4161 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4162 VtsID__pp( t->viR );
4163 VG_(printf)("%s","\n");
4164 } else {
4165 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4166 VtsID__pp( t->viR );
4167 VG_(printf)(" viW %u==", t->viW);
4168 VtsID__pp( t->viW );
4169 VG_(printf)("%s","\n");
4170 }
4171}
4172
4173
4174Thr* libhb_init (
4175 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004176 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004177 )
4178{
4179 Thr* thr;
4180 VtsID vi;
4181 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004182 tl_assert(get_EC);
4183 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004184 main_get_EC = get_EC;
4185
4186 // No need to initialise hg_wordfm.
4187 // No need to initialise hg_wordset.
4188
4189 vts_set_init();
4190 vts_tab_init();
4191 event_map_init();
4192 VtsID__invalidate_caches();
4193
4194 // initialise shadow memory
4195 zsm_init( SVal__rcinc, SVal__rcdec );
4196
4197 thr = Thr__new();
4198 vi = VtsID__mk_Singleton( thr, 1 );
4199 thr->viR = vi;
4200 thr->viW = vi;
4201 VtsID__rcinc(thr->viR);
4202 VtsID__rcinc(thr->viW);
4203
4204 show_thread_state(" root", thr);
4205 return thr;
4206}
4207
4208Thr* libhb_create ( Thr* parent )
4209{
4210 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4211 the child's index. Since the child's index is guaranteed
4212 unique, it has never been seen before, so the implicit value
4213 before the tick is zero and after that is one. */
4214 Thr* child = Thr__new();
4215
4216 child->viR = VtsID__tick( parent->viR, child );
4217 child->viW = VtsID__tick( parent->viW, child );
4218 VtsID__rcinc(child->viR);
4219 VtsID__rcinc(child->viW);
4220
4221 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4222 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4223
4224 /* and the parent has to move along too */
4225 VtsID__rcdec(parent->viR);
4226 VtsID__rcdec(parent->viW);
4227 parent->viR = VtsID__tick( parent->viR, parent );
4228 parent->viW = VtsID__tick( parent->viW, parent );
4229 VtsID__rcinc(parent->viR);
4230 VtsID__rcinc(parent->viW);
4231
4232 show_thread_state(" child", child);
4233 show_thread_state("parent", parent);
4234
4235 return child;
4236}
4237
4238/* Shut down the library, and print stats (in fact that's _all_
4239 this is for. */
4240void libhb_shutdown ( Bool show_stats )
4241{
4242 if (show_stats) {
4243 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4244 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4245 stats__secmaps_allocd,
4246 stats__secmap_ga_space_covered);
4247 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4248 stats__secmap_linesZ_allocd,
4249 stats__secmap_linesZ_bytes);
4250 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4251 stats__secmap_linesF_allocd,
4252 stats__secmap_linesF_bytes);
4253 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4254 stats__secmap_iterator_steppings);
4255 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4256 stats__secmaps_search, stats__secmaps_search_slow);
4257
4258 VG_(printf)("%s","\n");
4259 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4260 stats__cache_totrefs, stats__cache_totmisses );
4261 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4262 stats__cache_Z_fetches, stats__cache_F_fetches );
4263 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4264 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4265 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4266 stats__cache_invals, stats__cache_flushes );
4267 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4268 stats__cache_make_New_arange,
4269 stats__cache_make_New_inZrep);
4270
4271 VG_(printf)("%s","\n");
4272 VG_(printf)(" cline: %'10lu normalises\n",
4273 stats__cline_normalises );
4274 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4275 stats__cline_read64s,
4276 stats__cline_read32s,
4277 stats__cline_read16s,
4278 stats__cline_read8s );
4279 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4280 stats__cline_write64s,
4281 stats__cline_write32s,
4282 stats__cline_write16s,
4283 stats__cline_write8s );
4284 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4285 stats__cline_set64s,
4286 stats__cline_set32s,
4287 stats__cline_set16s,
4288 stats__cline_set8s );
4289 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4290 stats__cline_get8s, stats__cline_copy8s );
4291 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4292 stats__cline_64to32splits,
4293 stats__cline_32to16splits,
4294 stats__cline_16to8splits );
4295 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4296 stats__cline_64to32pulldown,
4297 stats__cline_32to16pulldown,
4298 stats__cline_16to8pulldown );
4299 if (0)
4300 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4301 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4302
4303 VG_(printf)("%s","\n");
4304
4305 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4306 stats__msm_read, stats__msm_read_change);
4307 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4308 stats__msm_write, stats__msm_write_change);
4309 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4310 stats__getOrdering_queries, stats__getOrdering_misses);
4311 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4312 stats__join2_queries, stats__join2_misses);
4313
4314 VG_(printf)("%s","\n");
4315 VG_(printf)(
4316 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4317 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4318 );
4319 VG_(printf)( " libhb: %lu entries in vts_set\n",
4320 VG_(sizeFM)( vts_set ) );
4321
4322 VG_(printf)("%s","\n");
4323 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4324 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4325 stats__ctxt_rcdec2,
4326 stats__ctxt_rcdec3 );
4327 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4328 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4329 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4330 (UWord)N_RCEC_TAB,
4331 stats__ctxt_tab_curr );
4332 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4333 stats__ctxt_tab_qs,
4334 stats__ctxt_tab_cmps );
4335#if 0
4336 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4337 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4338 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4339 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4340 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4341 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4342 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4343 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4344 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4345 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4346 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4347 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4348 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4349 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4350
4351 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4352 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4353 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4354 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4355#endif
4356
4357 VG_(printf)("%s","<<< END libhb stats >>>\n");
4358 VG_(printf)("%s","\n");
4359
4360 }
4361}
4362
4363void libhb_async_exit ( Thr* thr )
4364{
4365 /* is there anything we need to do? */
4366}
4367
4368/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4369 a Seg that points at a VTS is its one-and-only owner, and ditto for
4370 a SO that points at a VTS. */
4371
4372SO* libhb_so_alloc ( void )
4373{
4374 return SO__Alloc();
4375}
4376
4377void libhb_so_dealloc ( SO* so )
4378{
4379 tl_assert(so);
4380 tl_assert(so->magic == SO_MAGIC);
4381 SO__Dealloc(so);
4382}
4383
4384/* See comments in libhb.h for details on the meaning of
4385 strong vs weak sends and strong vs weak receives. */
4386void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4387{
4388 /* Copy the VTSs from 'thr' into the sync object, and then move
4389 the thread along one step. */
4390
4391 tl_assert(so);
4392 tl_assert(so->magic == SO_MAGIC);
4393
4394 /* stay sane .. a thread's read-clock must always lead or be the
4395 same as its write-clock */
4396 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4397 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4398 }
4399
4400 /* since we're overwriting the VtsIDs in the SO, we need to drop
4401 any references made by the previous contents thereof */
4402 if (so->viR == VtsID_INVALID) {
4403 tl_assert(so->viW == VtsID_INVALID);
4404 so->viR = thr->viR;
4405 so->viW = thr->viW;
4406 VtsID__rcinc(so->viR);
4407 VtsID__rcinc(so->viW);
4408 } else {
4409 /* In a strong send, we dump any previous VC in the SO and
4410 install the sending thread's VC instead. For a weak send we
4411 must join2 with what's already there. */
4412 tl_assert(so->viW != VtsID_INVALID);
4413 VtsID__rcdec(so->viR);
4414 VtsID__rcdec(so->viW);
4415 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4416 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4417 VtsID__rcinc(so->viR);
4418 VtsID__rcinc(so->viW);
4419 }
4420
4421 /* move both parent clocks along */
4422 VtsID__rcdec(thr->viR);
4423 VtsID__rcdec(thr->viW);
4424 thr->viR = VtsID__tick( thr->viR, thr );
4425 thr->viW = VtsID__tick( thr->viW, thr );
4426 VtsID__rcinc(thr->viR);
4427 VtsID__rcinc(thr->viW);
4428 if (strong_send)
4429 show_thread_state("s-send", thr);
4430 else
4431 show_thread_state("w-send", thr);
4432}
4433
4434void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4435{
4436 tl_assert(so);
4437 tl_assert(so->magic == SO_MAGIC);
4438
4439 if (so->viR != VtsID_INVALID) {
4440 tl_assert(so->viW != VtsID_INVALID);
4441
4442 /* Weak receive (basically, an R-acquisition of a R-W lock).
4443 This advances the read-clock of the receiver, but not the
4444 write-clock. */
4445 VtsID__rcdec(thr->viR);
4446 thr->viR = VtsID__join2( thr->viR, so->viR );
4447 VtsID__rcinc(thr->viR);
4448
4449 /* For a strong receive, we also advance the receiver's write
4450 clock, which means the receive as a whole is essentially
4451 equivalent to a W-acquisition of a R-W lock. */
4452 if (strong_recv) {
4453 VtsID__rcdec(thr->viW);
4454 thr->viW = VtsID__join2( thr->viW, so->viW );
4455 VtsID__rcinc(thr->viW);
4456 }
4457
4458 if (strong_recv)
4459 show_thread_state("s-recv", thr);
4460 else
4461 show_thread_state("w-recv", thr);
4462
4463 } else {
4464 tl_assert(so->viW == VtsID_INVALID);
4465 /* Deal with degenerate case: 'so' has no vts, so there has been
4466 no message posted to it. Just ignore this case. */
4467 show_thread_state("d-recv", thr);
4468 }
4469}
4470
4471Bool libhb_so_everSent ( SO* so )
4472{
4473 if (so->viR == VtsID_INVALID) {
4474 tl_assert(so->viW == VtsID_INVALID);
4475 return False;
4476 } else {
4477 tl_assert(so->viW != VtsID_INVALID);
4478 return True;
4479 }
4480}
4481
4482#define XXX1 0 // 0x67a106c
4483#define XXX2 0
4484
4485static Bool TRACEME(Addr a, SizeT szB) {
4486 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4487 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4488 return False;
4489}
4490static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4491 SVal sv = zsm_read8(a);
4492 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4493 show_thread_state("", thr);
4494 VG_(printf)("%s","\n");
4495}
4496
4497void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4498{
4499 SVal sv = SVal__mkC(thr->viW, thr->viW);
4500 tl_assert(is_sane_SVal_C(sv));
4501 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4502 zsm_set_range( a, szB, sv );
4503 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4504}
4505
4506void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4507{
4508 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4509 zsm_set_range( a, szB, SVal__mkA() );
4510 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4511}
4512
4513void* libhb_get_Thr_opaque ( Thr* thr ) {
4514 tl_assert(thr);
4515 return thr->opaque;
4516}
4517
4518void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4519 tl_assert(thr);
4520 thr->opaque = v;
4521}
4522
4523void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4524{
4525 zsm_copy_range(dst, src, len);
4526}
4527
4528void libhb_maybe_GC ( void )
4529{
4530 event_map_maybe_GC();
4531 /* If there are still freelist entries available, no need for a
4532 GC. */
4533 if (vts_tab_freelist != VtsID_INVALID)
4534 return;
4535 /* So all the table entries are full, and we're having to expand
4536 the table. But did we hit the threshhold point yet? */
4537 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
4538 return;
4539 vts_tab__do_GC( False/*don't show stats*/ );
4540}
4541
4542
4543/////////////////////////////////////////////////////////////////
4544/////////////////////////////////////////////////////////////////
4545// //
4546// SECTION END main library //
4547// //
4548/////////////////////////////////////////////////////////////////
4549/////////////////////////////////////////////////////////////////
4550
4551/*--------------------------------------------------------------------*/
4552/*--- end libhb_main.c ---*/
4553/*--------------------------------------------------------------------*/