blob: 689f89123c75511eba31681ce0d40444feeacecb [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
39#include "pub_tool_xarray.h"
40#include "pub_tool_oset.h"
41#include "pub_tool_threadstate.h"
42#include "pub_tool_aspacemgr.h"
43#include "pub_tool_execontext.h"
44#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000045#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000046#include "hg_basics.h"
47#include "hg_wordset.h"
48#include "hg_lock_n_thread.h"
49#include "hg_errors.h"
50
51#include "libhb.h"
52
53
54/* fwds for
55 Globals needed by other parts of the library. These are set
56 once at startup and then never changed. */
57static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +000058static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +000059
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62// //
63// //
64// //
65/////////////////////////////////////////////////////////////////
66/////////////////////////////////////////////////////////////////
67
68
69/////////////////////////////////////////////////////////////////
70/////////////////////////////////////////////////////////////////
71// //
72// SECTION BEGIN compressed shadow memory //
73// //
74/////////////////////////////////////////////////////////////////
75/////////////////////////////////////////////////////////////////
76
77#ifndef __HB_ZSM_H
78#define __HB_ZSM_H
79
80typedef ULong SVal;
81
82/* This value has special significance to the implementation, and callers
83 may not store it in the shadow memory. */
84#define SVal_INVALID (3ULL << 62)
85
86/* This is the default value for shadow memory. Initially the shadow
87 memory contains no accessible areas and so all reads produce this
88 value. TODO: make this caller-defineable. */
89#define SVal_NOACCESS (2ULL << 62)
90
91/* Initialise the library. Once initialised, it will (or may) call
92 rcinc and rcdec in response to all the calls below, in order to
93 allow the user to do reference counting on the SVals stored herein.
94 It is important to understand, however, that due to internal
95 caching, the reference counts are in general inaccurate, and can be
96 both above or below the true reference count for an item. In
97 particular, the library may indicate that the reference count for
98 an item is zero, when in fact it is not.
99
100 To make the reference counting exact and therefore non-pointless,
101 call zsm_flush_cache. Immediately after it returns, the reference
102 counts for all items, as deduced by the caller by observing calls
103 to rcinc and rcdec, will be correct, and so any items with a zero
104 reference count may be freed (or at least considered to be
105 unreferenced by this library).
106*/
107static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
108
109static void zsm_set_range ( Addr, SizeT, SVal );
110static SVal zsm_read8 ( Addr );
111static void zsm_copy_range ( Addr, Addr, SizeT );
112static void zsm_flush_cache ( void );
113
114#endif /* ! __HB_ZSM_H */
115
116
117/* For the shadow mem cache stuff we may want more intrusive
118 checks. Unfortunately there's no almost-zero-cost way to make them
119 selectable at run time. Hence set the #if 0 to #if 1 and
120 rebuild if you want them. */
121#if 0
122# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
123# define inline __attribute__((noinline))
124 /* probably want to ditch -fomit-frame-pointer too */
125#else
126# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
127#endif
128
129/* For the SegmentID, SegmentSet and SVal stuff we may want more
130 intrusive checks. Again there's no zero cost way to do this. Set
131 the #if 0 to #if 1 and rebuild if you want them. */
132#if 0
133# define SCE_SVALS 1 /* sanity-check shadow value stuff */
134#else
135# define SCE_SVALS 0
136#endif
137
138
139/* Round a up to the next multiple of N. N must be a power of 2 */
140#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
141/* Round a down to the next multiple of N. N must be a power of 2 */
142#define ROUNDDN(a, N) ((a) & ~(N-1))
143
144
145
146/* ------ User-supplied RC functions ------ */
147static void(*rcinc)(SVal) = NULL;
148static void(*rcdec)(SVal) = NULL;
149
150
151/* ------ CacheLine ------ */
152
153#define N_LINE_BITS 6 /* must be >= 3 */
154#define N_LINE_ARANGE (1 << N_LINE_BITS)
155#define N_LINE_TREES (N_LINE_ARANGE >> 3)
156
157typedef
158 struct {
159 UShort descrs[N_LINE_TREES];
160 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
161 }
162 CacheLine;
163
164#define TREE_DESCR_16_0 (1<<0)
165#define TREE_DESCR_32_0 (1<<1)
166#define TREE_DESCR_16_1 (1<<2)
167#define TREE_DESCR_64 (1<<3)
168#define TREE_DESCR_16_2 (1<<4)
169#define TREE_DESCR_32_1 (1<<5)
170#define TREE_DESCR_16_3 (1<<6)
171#define TREE_DESCR_8_0 (1<<7)
172#define TREE_DESCR_8_1 (1<<8)
173#define TREE_DESCR_8_2 (1<<9)
174#define TREE_DESCR_8_3 (1<<10)
175#define TREE_DESCR_8_4 (1<<11)
176#define TREE_DESCR_8_5 (1<<12)
177#define TREE_DESCR_8_6 (1<<13)
178#define TREE_DESCR_8_7 (1<<14)
179#define TREE_DESCR_DTY (1<<15)
180
181typedef
182 struct {
183 SVal dict[4]; /* can represent up to 4 diff values in the line */
184 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
185 dict indexes */
186 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
187 LineF to use, and dict[2..] are also SVal_INVALID. */
188 }
189 LineZ; /* compressed rep for a cache line */
190
191typedef
192 struct {
193 Bool inUse;
194 SVal w64s[N_LINE_ARANGE];
195 }
196 LineF; /* full rep for a cache line */
197
198/* Shadow memory.
199 Primary map is a WordFM Addr SecMap*.
200 SecMaps cover some page-size-ish section of address space and hold
201 a compressed representation.
202 CacheLine-sized chunks of SecMaps are copied into a Cache, being
203 decompressed when moved into the cache and recompressed on the
204 way out. Because of this, the cache must operate as a writeback
205 cache, not a writethrough one.
206
207 Each SecMap must hold a power-of-2 number of CacheLines. Hence
208 N_SECMAP_BITS must >= N_LINE_BITS.
209*/
210#define N_SECMAP_BITS 13
211#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
212
213// # CacheLines held by a SecMap
214#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
215
216/* The data in the SecMap is held in the array of LineZs. Each LineZ
217 either carries the required data directly, in a compressed
218 representation, or it holds (in .dict[0]) an index to the LineF in
219 .linesF that holds the full representation.
220
221 Currently-unused LineF's have their .inUse bit set to zero.
222 Since each in-use LineF is referred to be exactly one LineZ,
223 the number of .linesZ[] that refer to .linesF should equal
224 the number of .linesF[] that have .inUse == True.
225
226 RC obligations: the RCs presented to the user include exactly
227 the values in:
228 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
229 * F reps that are in use (.inUse == True)
230
231 Hence the following actions at the following transitions are required:
232
233 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
234 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
235 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
236 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
237*/
238typedef
239 struct {
240 UInt magic;
241 LineZ linesZ[N_SECMAP_ZLINES];
242 LineF* linesF;
243 UInt linesF_size;
244 }
245 SecMap;
246
247#define SecMap_MAGIC 0x571e58cbU
248
249static inline Bool is_sane_SecMap ( SecMap* sm ) {
250 return sm != NULL && sm->magic == SecMap_MAGIC;
251}
252
253/* ------ Cache ------ */
254
255#define N_WAY_BITS 16
256#define N_WAY_NENT (1 << N_WAY_BITS)
257
258/* Each tag is the address of the associated CacheLine, rounded down
259 to a CacheLine address boundary. A CacheLine size must be a power
260 of 2 and must be 8 or more. Hence an easy way to initialise the
261 cache so it is empty is to set all the tag values to any value % 8
262 != 0, eg 1. This means all queries in the cache initially miss.
263 It does however require us to detect and not writeback, any line
264 with a bogus tag. */
265typedef
266 struct {
267 CacheLine lyns0[N_WAY_NENT];
268 Addr tags0[N_WAY_NENT];
269 }
270 Cache;
271
272static inline Bool is_valid_scache_tag ( Addr tag ) {
273 /* a valid tag should be naturally aligned to the start of
274 a CacheLine. */
275 return 0 == (tag & (N_LINE_ARANGE - 1));
276}
277
278
279/* --------- Primary data structures --------- */
280
281/* Shadow memory primary map */
282static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
283static Cache cache_shmem;
284
285
286static UWord stats__secmaps_search = 0; // # SM finds
287static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
288static UWord stats__secmaps_allocd = 0; // # SecMaps issued
289static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
290static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
291static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
292static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
293static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
294static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
295static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
296static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
297static UWord stats__cache_F_fetches = 0; // # F lines fetched
298static UWord stats__cache_F_wbacks = 0; // # F lines written back
299static UWord stats__cache_invals = 0; // # cache invals
300static UWord stats__cache_flushes = 0; // # cache flushes
301static UWord stats__cache_totrefs = 0; // # total accesses
302static UWord stats__cache_totmisses = 0; // # misses
303static ULong stats__cache_make_New_arange = 0; // total arange made New
304static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
305static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
306static UWord stats__cline_read64s = 0; // # calls to s_m_read64
307static UWord stats__cline_read32s = 0; // # calls to s_m_read32
308static UWord stats__cline_read16s = 0; // # calls to s_m_read16
309static UWord stats__cline_read8s = 0; // # calls to s_m_read8
310static UWord stats__cline_write64s = 0; // # calls to s_m_write64
311static UWord stats__cline_write32s = 0; // # calls to s_m_write32
312static UWord stats__cline_write16s = 0; // # calls to s_m_write16
313static UWord stats__cline_write8s = 0; // # calls to s_m_write8
314static UWord stats__cline_set64s = 0; // # calls to s_m_set64
315static UWord stats__cline_set32s = 0; // # calls to s_m_set32
316static UWord stats__cline_set16s = 0; // # calls to s_m_set16
317static UWord stats__cline_set8s = 0; // # calls to s_m_set8
318static UWord stats__cline_get8s = 0; // # calls to s_m_get8
319static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
320static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
321static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
322static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
323static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
324static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
325static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
326
327static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
328 return a & ~(N_SECMAP_ARANGE - 1);
329}
330static inline UWord shmem__get_SecMap_offset ( Addr a ) {
331 return a & (N_SECMAP_ARANGE - 1);
332}
333
334
335/*----------------------------------------------------------------*/
336/*--- map_shmem :: WordFM Addr SecMap ---*/
337/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
338/*----------------------------------------------------------------*/
339
340/*--------------- SecMap allocation --------------- */
341
342static HChar* shmem__bigchunk_next = NULL;
343static HChar* shmem__bigchunk_end1 = NULL;
344
345static void* shmem__bigchunk_alloc ( SizeT n )
346{
347 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
348 tl_assert(n > 0);
349 n = VG_ROUNDUP(n, 16);
350 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
351 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
352 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
353 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
354 if (0)
355 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
356 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
357 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
358 if (shmem__bigchunk_next == NULL)
359 VG_(out_of_memory_NORETURN)(
360 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
361 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
362 }
363 tl_assert(shmem__bigchunk_next);
364 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
365 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
366 shmem__bigchunk_next += n;
367 return shmem__bigchunk_next - n;
368}
369
370static SecMap* shmem__alloc_SecMap ( void )
371{
372 Word i, j;
373 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
374 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
375 tl_assert(sm);
376 sm->magic = SecMap_MAGIC;
377 for (i = 0; i < N_SECMAP_ZLINES; i++) {
378 sm->linesZ[i].dict[0] = SVal_NOACCESS;
379 sm->linesZ[i].dict[1] = SVal_INVALID;
380 sm->linesZ[i].dict[2] = SVal_INVALID;
381 sm->linesZ[i].dict[3] = SVal_INVALID;
382 for (j = 0; j < N_LINE_ARANGE/4; j++)
383 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
384 }
385 sm->linesF = NULL;
386 sm->linesF_size = 0;
387 stats__secmaps_allocd++;
388 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
389 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
390 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
391 return sm;
392}
393
394typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
395static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
396
397static SecMap* shmem__find_SecMap ( Addr ga )
398{
399 SecMap* sm = NULL;
400 Addr gaKey = shmem__round_to_SecMap_base(ga);
401 // Cache
402 stats__secmaps_search++;
403 if (LIKELY(gaKey == smCache[0].gaKey))
404 return smCache[0].sm;
405 if (LIKELY(gaKey == smCache[1].gaKey)) {
406 SMCacheEnt tmp = smCache[0];
407 smCache[0] = smCache[1];
408 smCache[1] = tmp;
409 return smCache[0].sm;
410 }
411 if (gaKey == smCache[2].gaKey) {
412 SMCacheEnt tmp = smCache[1];
413 smCache[1] = smCache[2];
414 smCache[2] = tmp;
415 return smCache[1].sm;
416 }
417 // end Cache
418 stats__secmaps_search_slow++;
419 if (VG_(lookupFM)( map_shmem,
420 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
421 tl_assert(sm != NULL);
422 smCache[2] = smCache[1];
423 smCache[1] = smCache[0];
424 smCache[0].gaKey = gaKey;
425 smCache[0].sm = sm;
426 } else {
427 tl_assert(sm == NULL);
428 }
429 return sm;
430}
431
432static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
433{
434 SecMap* sm = shmem__find_SecMap ( ga );
435 if (LIKELY(sm)) {
436 return sm;
437 } else {
438 /* create a new one */
439 Addr gaKey = shmem__round_to_SecMap_base(ga);
440 sm = shmem__alloc_SecMap();
441 tl_assert(sm);
442 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
443 return sm;
444 }
445}
446
447
448/* ------------ LineF and LineZ related ------------ */
449
450static void rcinc_LineF ( LineF* lineF ) {
451 UWord i;
452 tl_assert(lineF->inUse);
453 for (i = 0; i < N_LINE_ARANGE; i++)
454 rcinc(lineF->w64s[i]);
455}
456
457static void rcdec_LineF ( LineF* lineF ) {
458 UWord i;
459 tl_assert(lineF->inUse);
460 for (i = 0; i < N_LINE_ARANGE; i++)
461 rcdec(lineF->w64s[i]);
462}
463
464static void rcinc_LineZ ( LineZ* lineZ ) {
465 tl_assert(lineZ->dict[0] != SVal_INVALID);
466 rcinc(lineZ->dict[0]);
467 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
468 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
469 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
470}
471
472static void rcdec_LineZ ( LineZ* lineZ ) {
473 tl_assert(lineZ->dict[0] != SVal_INVALID);
474 rcdec(lineZ->dict[0]);
475 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
476 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
477 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
478}
479
480inline
481static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
482 Word bix, shft, mask, prep;
483 tl_assert(ix >= 0);
484 bix = ix >> 2;
485 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
486 mask = 3 << shft;
487 prep = b2 << shft;
488 arr[bix] = (arr[bix] & ~mask) | prep;
489}
490
491inline
492static UWord read_twobit_array ( UChar* arr, UWord ix ) {
493 Word bix, shft;
494 tl_assert(ix >= 0);
495 bix = ix >> 2;
496 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
497 return (arr[bix] >> shft) & 3;
498}
499
500/* Given address 'tag', find either the Z or F line containing relevant
501 data, so it can be read into the cache.
502*/
503static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
504 /*OUT*/LineF** fp, Addr tag ) {
505 LineZ* lineZ;
506 LineF* lineF;
507 UWord zix;
508 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
509 UWord smoff = shmem__get_SecMap_offset(tag);
510 /* since smoff is derived from a valid tag, it should be
511 cacheline-aligned. */
512 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
513 zix = smoff >> N_LINE_BITS;
514 tl_assert(zix < N_SECMAP_ZLINES);
515 lineZ = &sm->linesZ[zix];
516 lineF = NULL;
517 if (lineZ->dict[0] == SVal_INVALID) {
518 UInt fix = (UInt)lineZ->dict[1];
519 tl_assert(sm->linesF);
520 tl_assert(sm->linesF_size > 0);
521 tl_assert(fix >= 0 && fix < sm->linesF_size);
522 lineF = &sm->linesF[fix];
523 tl_assert(lineF->inUse);
524 lineZ = NULL;
525 }
526 *zp = lineZ;
527 *fp = lineF;
528}
529
530/* Given address 'tag', return the relevant SecMap and the index of
531 the LineZ within it, in the expectation that the line is to be
532 overwritten. Regardless of whether 'tag' is currently associated
533 with a Z or F representation, to rcdec on the current
534 representation, in recognition of the fact that the contents are
535 just about to be overwritten. */
536static __attribute__((noinline))
537void find_Z_for_writing ( /*OUT*/SecMap** smp,
538 /*OUT*/Word* zixp,
539 Addr tag ) {
540 LineZ* lineZ;
541 LineF* lineF;
542 UWord zix;
543 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
544 UWord smoff = shmem__get_SecMap_offset(tag);
545 /* since smoff is derived from a valid tag, it should be
546 cacheline-aligned. */
547 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
548 zix = smoff >> N_LINE_BITS;
549 tl_assert(zix < N_SECMAP_ZLINES);
550 lineZ = &sm->linesZ[zix];
551 lineF = NULL;
552 /* re RCs, we are freeing up this LineZ/LineF so that new data can
553 be parked in it. Hence have to rcdec it accordingly. */
554 /* If lineZ has an associated lineF, free it up. */
555 if (lineZ->dict[0] == SVal_INVALID) {
556 UInt fix = (UInt)lineZ->dict[1];
557 tl_assert(sm->linesF);
558 tl_assert(sm->linesF_size > 0);
559 tl_assert(fix >= 0 && fix < sm->linesF_size);
560 lineF = &sm->linesF[fix];
561 tl_assert(lineF->inUse);
562 rcdec_LineF(lineF);
563 lineF->inUse = False;
564 } else {
565 rcdec_LineZ(lineZ);
566 }
567 *smp = sm;
568 *zixp = zix;
569}
570
571static __attribute__((noinline))
572void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
573 UInt i, new_size;
574 LineF* nyu;
575
576 if (sm->linesF) {
577 tl_assert(sm->linesF_size > 0);
578 } else {
579 tl_assert(sm->linesF_size == 0);
580 }
581
582 if (sm->linesF) {
583 for (i = 0; i < sm->linesF_size; i++) {
584 if (!sm->linesF[i].inUse) {
585 *fixp = (Word)i;
586 return;
587 }
588 }
589 }
590
591 /* No free F line found. Expand existing array and try again. */
592 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
593 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
594 new_size * sizeof(LineF) );
595 tl_assert(nyu);
596
597 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
598 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
599 * sizeof(LineF);
600
601 if (0)
602 VG_(printf)("SM %p: expand F array from %d to %d\n",
603 sm, (Int)sm->linesF_size, new_size);
604
605 for (i = 0; i < new_size; i++)
606 nyu[i].inUse = False;
607
608 if (sm->linesF) {
609 for (i = 0; i < sm->linesF_size; i++) {
610 tl_assert(sm->linesF[i].inUse);
611 nyu[i] = sm->linesF[i];
612 }
613 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
614 HG_(free)(sm->linesF);
615 }
616
617 sm->linesF = nyu;
618 sm->linesF_size = new_size;
619
620 for (i = 0; i < sm->linesF_size; i++) {
621 if (!sm->linesF[i].inUse) {
622 *fixp = (Word)i;
623 return;
624 }
625 }
626
627 /*NOTREACHED*/
628 tl_assert(0);
629}
630
631
632/* ------------ CacheLine and implicit-tree related ------------ */
633
634__attribute__((unused))
635static void pp_CacheLine ( CacheLine* cl ) {
636 Word i;
637 if (!cl) {
638 VG_(printf)("%s","pp_CacheLine(NULL)\n");
639 return;
640 }
641 for (i = 0; i < N_LINE_TREES; i++)
642 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
643 for (i = 0; i < N_LINE_ARANGE; i++)
644 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
645}
646
647static UChar descr_to_validbits ( UShort descr )
648{
649 /* a.k.a Party Time for gcc's constant folder */
650# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
651 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
652 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
653 ( (b8_5) << 12) | ( (b8_4) << 11) | \
654 ( (b8_3) << 10) | ( (b8_2) << 9) | \
655 ( (b8_1) << 8) | ( (b8_0) << 7) | \
656 ( (b16_3) << 6) | ( (b32_1) << 5) | \
657 ( (b16_2) << 4) | ( (b64) << 3) | \
658 ( (b16_1) << 2) | ( (b32_0) << 1) | \
659 ( (b16_0) << 0) ) )
660
661# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
662 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
663 ( (bit5) << 5) | ( (bit4) << 4) | \
664 ( (bit3) << 3) | ( (bit2) << 2) | \
665 ( (bit1) << 1) | ( (bit0) << 0) ) )
666
667 /* these should all get folded out at compile time */
668 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
669 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
670 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
671 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
672 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
673 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
674 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
675 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
676 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
677
678 switch (descr) {
679 /*
680 +--------------------------------- TREE_DESCR_8_7
681 | +------------------- TREE_DESCR_8_0
682 | | +---------------- TREE_DESCR_16_3
683 | | | +-------------- TREE_DESCR_32_1
684 | | | | +------------ TREE_DESCR_16_2
685 | | | | | +--------- TREE_DESCR_64
686 | | | | | | +------ TREE_DESCR_16_1
687 | | | | | | | +---- TREE_DESCR_32_0
688 | | | | | | | | +-- TREE_DESCR_16_0
689 | | | | | | | | |
690 | | | | | | | | | GRANULARITY, 7 -> 0 */
691 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
692 return BYTE(1,1,1,1,1,1,1,1);
693 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
694 return BYTE(1,1,0,1,1,1,1,1);
695 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
696 return BYTE(0,1,1,1,1,1,1,1);
697 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
698 return BYTE(0,1,0,1,1,1,1,1);
699
700 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
701 return BYTE(1,1,1,1,1,1,0,1);
702 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
703 return BYTE(1,1,0,1,1,1,0,1);
704 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
705 return BYTE(0,1,1,1,1,1,0,1);
706 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
707 return BYTE(0,1,0,1,1,1,0,1);
708
709 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
710 return BYTE(1,1,1,1,0,1,1,1);
711 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
712 return BYTE(1,1,0,1,0,1,1,1);
713 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
714 return BYTE(0,1,1,1,0,1,1,1);
715 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
716 return BYTE(0,1,0,1,0,1,1,1);
717
718 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
719 return BYTE(1,1,1,1,0,1,0,1);
720 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
721 return BYTE(1,1,0,1,0,1,0,1);
722 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
723 return BYTE(0,1,1,1,0,1,0,1);
724 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
725 return BYTE(0,1,0,1,0,1,0,1);
726
727 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
728 return BYTE(0,0,0,1,1,1,1,1);
729 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
730 return BYTE(0,0,0,1,1,1,0,1);
731 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
732 return BYTE(0,0,0,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
734 return BYTE(0,0,0,1,0,1,0,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
737 return BYTE(1,1,1,1,0,0,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
739 return BYTE(1,1,0,1,0,0,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
741 return BYTE(0,1,1,1,0,0,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
743 return BYTE(0,1,0,1,0,0,0,1);
744
745 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
746 return BYTE(0,0,0,1,0,0,0,1);
747
748 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
749 return BYTE(0,0,0,0,0,0,0,1);
750
751 default: return BYTE(0,0,0,0,0,0,0,0);
752 /* INVALID - any valid descr produces at least one
753 valid bit in tree[0..7]*/
754 }
755 /* NOTREACHED*/
756 tl_assert(0);
757
758# undef DESCR
759# undef BYTE
760}
761
762__attribute__((unused))
763static Bool is_sane_Descr ( UShort descr ) {
764 return descr_to_validbits(descr) != 0;
765}
766
767static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
768 VG_(sprintf)(dst,
769 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
770 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
771 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
772 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
773 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
774 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
775 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
776 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
777 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
778 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
779 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
780 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
781 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
782 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
783 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
784 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
785 );
786}
787static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
788 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
789 (Int)((byte & 128) ? 1 : 0),
790 (Int)((byte & 64) ? 1 : 0),
791 (Int)((byte & 32) ? 1 : 0),
792 (Int)((byte & 16) ? 1 : 0),
793 (Int)((byte & 8) ? 1 : 0),
794 (Int)((byte & 4) ? 1 : 0),
795 (Int)((byte & 2) ? 1 : 0),
796 (Int)((byte & 1) ? 1 : 0)
797 );
798}
799
800static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
801 Word i;
802 UChar validbits = descr_to_validbits(descr);
803 HChar buf[128], buf2[128];
804 if (validbits == 0)
805 goto bad;
806 for (i = 0; i < 8; i++) {
807 if (validbits & (1<<i)) {
808 if (tree[i] == SVal_INVALID)
809 goto bad;
810 } else {
811 if (tree[i] != SVal_INVALID)
812 goto bad;
813 }
814 }
815 return True;
816 bad:
817 sprintf_Descr( buf, descr );
818 sprintf_Byte( buf2, validbits );
819 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
820 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
821 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
822 for (i = 0; i < 8; i++)
823 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
824 VG_(printf)("%s","}\n");
825 return 0;
826}
827
828static Bool is_sane_CacheLine ( CacheLine* cl )
829{
830 Word tno, cloff;
831
832 if (!cl) goto bad;
833
834 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
835 UShort descr = cl->descrs[tno];
836 SVal* tree = &cl->svals[cloff];
837 if (!is_sane_Descr_and_Tree(descr, tree))
838 goto bad;
839 }
840 tl_assert(cloff == N_LINE_ARANGE);
841 return True;
842 bad:
843 pp_CacheLine(cl);
844 return False;
845}
846
847static UShort normalise_tree ( /*MOD*/SVal* tree )
848{
849 UShort descr;
850 /* pre: incoming tree[0..7] does not have any invalid shvals, in
851 particular no zeroes. */
852 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
853 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
854 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
855 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
856 tl_assert(0);
857
858 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
859 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
860 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
861 /* build 16-bit layer */
862 if (tree[1] == tree[0]) {
863 tree[1] = SVal_INVALID;
864 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
865 descr |= TREE_DESCR_16_0;
866 }
867 if (tree[3] == tree[2]) {
868 tree[3] = SVal_INVALID;
869 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
870 descr |= TREE_DESCR_16_1;
871 }
872 if (tree[5] == tree[4]) {
873 tree[5] = SVal_INVALID;
874 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
875 descr |= TREE_DESCR_16_2;
876 }
877 if (tree[7] == tree[6]) {
878 tree[7] = SVal_INVALID;
879 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
880 descr |= TREE_DESCR_16_3;
881 }
882 /* build 32-bit layer */
883 if (tree[2] == tree[0]
884 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
885 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
886 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
887 descr |= TREE_DESCR_32_0;
888 }
889 if (tree[6] == tree[4]
890 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
891 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
892 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
893 descr |= TREE_DESCR_32_1;
894 }
895 /* build 64-bit layer */
896 if (tree[4] == tree[0]
897 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
898 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
899 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
900 descr |= TREE_DESCR_64;
901 }
902 return descr;
903}
904
905/* This takes a cacheline where all the data is at the leaves
906 (w8[..]) and builds a correctly normalised tree. */
907static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
908{
909 Word tno, cloff;
910 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
911 SVal* tree = &cl->svals[cloff];
912 cl->descrs[tno] = normalise_tree( tree );
913 }
914 tl_assert(cloff == N_LINE_ARANGE);
915 if (SCE_CACHELINE)
916 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
917 stats__cline_normalises++;
918}
919
920
921typedef struct { UChar count; SVal sval; } CountedSVal;
922
923static
924void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
925 /*OUT*/Word* dstUsedP,
926 Word nDst, CacheLine* src )
927{
928 Word tno, cloff, dstUsed;
929
930 tl_assert(nDst == N_LINE_ARANGE);
931 dstUsed = 0;
932
933 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
934 UShort descr = src->descrs[tno];
935 SVal* tree = &src->svals[cloff];
936
937 /* sequentialise the tree described by (descr,tree). */
938# define PUT(_n,_v) \
939 do { dst[dstUsed ].count = (_n); \
940 dst[dstUsed++].sval = (_v); \
941 } while (0)
942
943 /* byte 0 */
944 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
945 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
946 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
947 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
948 /* byte 1 */
949 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
950 /* byte 2 */
951 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
952 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
953 /* byte 3 */
954 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
955 /* byte 4 */
956 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
957 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
958 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
959 /* byte 5 */
960 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
961 /* byte 6 */
962 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
963 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
964 /* byte 7 */
965 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
966
967# undef PUT
968 /* END sequentialise the tree described by (descr,tree). */
969
970 }
971 tl_assert(cloff == N_LINE_ARANGE);
972 tl_assert(dstUsed <= nDst);
973
974 *dstUsedP = dstUsed;
975}
976
977/* Write the cacheline 'wix' to backing store. Where it ends up
978 is determined by its tag field. */
979static __attribute__((noinline)) void cacheline_wback ( UWord wix )
980{
981 Word i, j, k, m;
982 Addr tag;
983 SecMap* sm;
984 CacheLine* cl;
985 LineZ* lineZ;
986 LineF* lineF;
987 Word zix, fix, csvalsUsed;
988 CountedSVal csvals[N_LINE_ARANGE];
989 SVal sv;
990
991 if (0)
992 VG_(printf)("scache wback line %d\n", (Int)wix);
993
994 tl_assert(wix >= 0 && wix < N_WAY_NENT);
995
996 tag = cache_shmem.tags0[wix];
997 cl = &cache_shmem.lyns0[wix];
998
999 /* The cache line may have been invalidated; if so, ignore it. */
1000 if (!is_valid_scache_tag(tag))
1001 return;
1002
1003 /* Where are we going to put it? */
1004 sm = NULL;
1005 lineZ = NULL;
1006 lineF = NULL;
1007 zix = fix = -1;
1008
1009 /* find the Z line to write in and rcdec it or the associated F
1010 line. */
1011 find_Z_for_writing( &sm, &zix, tag );
1012
1013 tl_assert(sm);
1014 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1015 lineZ = &sm->linesZ[zix];
1016
1017 /* Generate the data to be stored */
1018 if (SCE_CACHELINE)
1019 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1020
1021 csvalsUsed = -1;
1022 sequentialise_CacheLine( csvals, &csvalsUsed,
1023 N_LINE_ARANGE, cl );
1024 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1025 if (0) VG_(printf)("%lu ", csvalsUsed);
1026
1027 lineZ->dict[0] = lineZ->dict[1]
1028 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1029
1030 /* i indexes actual shadow values, k is cursor in csvals */
1031 i = 0;
1032 for (k = 0; k < csvalsUsed; k++) {
1033
1034 sv = csvals[k].sval;
1035 if (SCE_SVALS)
1036 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1037 /* do we already have it? */
1038 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1039 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1040 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1041 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1042 /* no. look for a free slot. */
1043 if (SCE_SVALS)
1044 tl_assert(sv != SVal_INVALID);
1045 if (lineZ->dict[0]
1046 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1047 if (lineZ->dict[1]
1048 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1049 if (lineZ->dict[2]
1050 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1051 if (lineZ->dict[3]
1052 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1053 break; /* we'll have to use the f rep */
1054 dict_ok:
1055 m = csvals[k].count;
1056 if (m == 8) {
1057 write_twobit_array( lineZ->ix2s, i+0, j );
1058 write_twobit_array( lineZ->ix2s, i+1, j );
1059 write_twobit_array( lineZ->ix2s, i+2, j );
1060 write_twobit_array( lineZ->ix2s, i+3, j );
1061 write_twobit_array( lineZ->ix2s, i+4, j );
1062 write_twobit_array( lineZ->ix2s, i+5, j );
1063 write_twobit_array( lineZ->ix2s, i+6, j );
1064 write_twobit_array( lineZ->ix2s, i+7, j );
1065 i += 8;
1066 }
1067 else if (m == 4) {
1068 write_twobit_array( lineZ->ix2s, i+0, j );
1069 write_twobit_array( lineZ->ix2s, i+1, j );
1070 write_twobit_array( lineZ->ix2s, i+2, j );
1071 write_twobit_array( lineZ->ix2s, i+3, j );
1072 i += 4;
1073 }
1074 else if (m == 1) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 i += 1;
1077 }
1078 else if (m == 2) {
1079 write_twobit_array( lineZ->ix2s, i+0, j );
1080 write_twobit_array( lineZ->ix2s, i+1, j );
1081 i += 2;
1082 }
1083 else {
1084 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1085 }
1086
1087 }
1088
1089 if (LIKELY(i == N_LINE_ARANGE)) {
1090 /* Construction of the compressed representation was
1091 successful. */
1092 rcinc_LineZ(lineZ);
1093 stats__cache_Z_wbacks++;
1094 } else {
1095 /* Cannot use the compressed(z) representation. Use the full(f)
1096 rep instead. */
1097 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1098 alloc_F_for_writing( sm, &fix );
1099 tl_assert(sm->linesF);
1100 tl_assert(sm->linesF_size > 0);
1101 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1102 lineF = &sm->linesF[fix];
1103 tl_assert(!lineF->inUse);
1104 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1105 lineZ->dict[1] = (SVal)fix;
1106 lineF->inUse = True;
1107 i = 0;
1108 for (k = 0; k < csvalsUsed; k++) {
1109 if (SCE_SVALS)
1110 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1111 sv = csvals[k].sval;
1112 if (SCE_SVALS)
1113 tl_assert(sv != SVal_INVALID);
1114 for (m = csvals[k].count; m > 0; m--) {
1115 lineF->w64s[i] = sv;
1116 i++;
1117 }
1118 }
1119 tl_assert(i == N_LINE_ARANGE);
1120 rcinc_LineF(lineF);
1121 stats__cache_F_wbacks++;
1122 }
1123
1124 //if (anyShared)
1125 // sm->mbHasShared = True;
1126
1127 /* mb_tidy_one_cacheline(); */
1128}
1129
1130/* Fetch the cacheline 'wix' from the backing store. The tag
1131 associated with 'wix' is assumed to have already been filled in;
1132 hence that is used to determine where in the backing store to read
1133 from. */
1134static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1135{
1136 Word i;
1137 Addr tag;
1138 CacheLine* cl;
1139 LineZ* lineZ;
1140 LineF* lineF;
1141
1142 if (0)
1143 VG_(printf)("scache fetch line %d\n", (Int)wix);
1144
1145 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1146
1147 tag = cache_shmem.tags0[wix];
1148 cl = &cache_shmem.lyns0[wix];
1149
1150 /* reject nonsense requests */
1151 tl_assert(is_valid_scache_tag(tag));
1152
1153 lineZ = NULL;
1154 lineF = NULL;
1155 find_ZF_for_reading( &lineZ, &lineF, tag );
1156 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1157
1158 /* expand the data into the bottom layer of the tree, then get
1159 cacheline_normalise to build the descriptor array. */
1160 if (lineF) {
1161 tl_assert(lineF->inUse);
1162 for (i = 0; i < N_LINE_ARANGE; i++) {
1163 cl->svals[i] = lineF->w64s[i];
1164 }
1165 stats__cache_F_fetches++;
1166 } else {
1167 for (i = 0; i < N_LINE_ARANGE; i++) {
1168 SVal sv;
1169 UWord ix = read_twobit_array( lineZ->ix2s, i );
1170 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1171 sv = lineZ->dict[ix];
1172 tl_assert(sv != SVal_INVALID);
1173 cl->svals[i] = sv;
1174 }
1175 stats__cache_Z_fetches++;
1176 }
1177 normalise_CacheLine( cl );
1178}
1179
1180static void shmem__invalidate_scache ( void ) {
1181 Word wix;
1182 if (0) VG_(printf)("%s","scache inval\n");
1183 tl_assert(!is_valid_scache_tag(1));
1184 for (wix = 0; wix < N_WAY_NENT; wix++) {
1185 cache_shmem.tags0[wix] = 1/*INVALID*/;
1186 }
1187 stats__cache_invals++;
1188}
1189
1190static void shmem__flush_and_invalidate_scache ( void ) {
1191 Word wix;
1192 Addr tag;
1193 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1194 tl_assert(!is_valid_scache_tag(1));
1195 for (wix = 0; wix < N_WAY_NENT; wix++) {
1196 tag = cache_shmem.tags0[wix];
1197 if (tag == 1/*INVALID*/) {
1198 /* already invalid; nothing to do */
1199 } else {
1200 tl_assert(is_valid_scache_tag(tag));
1201 cacheline_wback( wix );
1202 }
1203 cache_shmem.tags0[wix] = 1/*INVALID*/;
1204 }
1205 stats__cache_flushes++;
1206 stats__cache_invals++;
1207}
1208
1209
1210static inline Bool aligned16 ( Addr a ) {
1211 return 0 == (a & 1);
1212}
1213static inline Bool aligned32 ( Addr a ) {
1214 return 0 == (a & 3);
1215}
1216static inline Bool aligned64 ( Addr a ) {
1217 return 0 == (a & 7);
1218}
1219static inline UWord get_cacheline_offset ( Addr a ) {
1220 return (UWord)(a & (N_LINE_ARANGE - 1));
1221}
1222static inline Addr cacheline_ROUNDUP ( Addr a ) {
1223 return ROUNDUP(a, N_LINE_ARANGE);
1224}
1225static inline Addr cacheline_ROUNDDN ( Addr a ) {
1226 return ROUNDDN(a, N_LINE_ARANGE);
1227}
1228static inline UWord get_treeno ( Addr a ) {
1229 return get_cacheline_offset(a) >> 3;
1230}
1231static inline UWord get_tree_offset ( Addr a ) {
1232 return a & 7;
1233}
1234
1235static __attribute__((noinline))
1236 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1237static inline CacheLine* get_cacheline ( Addr a )
1238{
1239 /* tag is 'a' with the in-line offset masked out,
1240 eg a[31]..a[4] 0000 */
1241 Addr tag = a & ~(N_LINE_ARANGE - 1);
1242 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1243 stats__cache_totrefs++;
1244 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1245 return &cache_shmem.lyns0[wix];
1246 } else {
1247 return get_cacheline_MISS( a );
1248 }
1249}
1250
1251static __attribute__((noinline))
1252 CacheLine* get_cacheline_MISS ( Addr a )
1253{
1254 /* tag is 'a' with the in-line offset masked out,
1255 eg a[31]..a[4] 0000 */
1256
1257 CacheLine* cl;
1258 Addr* tag_old_p;
1259 Addr tag = a & ~(N_LINE_ARANGE - 1);
1260 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1261
1262 tl_assert(tag != cache_shmem.tags0[wix]);
1263
1264 /* Dump the old line into the backing store. */
1265 stats__cache_totmisses++;
1266
1267 cl = &cache_shmem.lyns0[wix];
1268 tag_old_p = &cache_shmem.tags0[wix];
1269
1270 if (is_valid_scache_tag( *tag_old_p )) {
1271 /* EXPENSIVE and REDUNDANT: callee does it */
1272 if (SCE_CACHELINE)
1273 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1274 cacheline_wback( wix );
1275 }
1276 /* and reload the new one */
1277 *tag_old_p = tag;
1278 cacheline_fetch( wix );
1279 if (SCE_CACHELINE)
1280 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1281 return cl;
1282}
1283
1284static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1285 stats__cline_64to32pulldown++;
1286 switch (toff) {
1287 case 0: case 4:
1288 tl_assert(descr & TREE_DESCR_64);
1289 tree[4] = tree[0];
1290 descr &= ~TREE_DESCR_64;
1291 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1292 break;
1293 default:
1294 tl_assert(0);
1295 }
1296 return descr;
1297}
1298
1299static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1300 stats__cline_32to16pulldown++;
1301 switch (toff) {
1302 case 0: case 2:
1303 if (!(descr & TREE_DESCR_32_0)) {
1304 descr = pulldown_to_32(tree, 0, descr);
1305 }
1306 tl_assert(descr & TREE_DESCR_32_0);
1307 tree[2] = tree[0];
1308 descr &= ~TREE_DESCR_32_0;
1309 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1310 break;
1311 case 4: case 6:
1312 if (!(descr & TREE_DESCR_32_1)) {
1313 descr = pulldown_to_32(tree, 4, descr);
1314 }
1315 tl_assert(descr & TREE_DESCR_32_1);
1316 tree[6] = tree[4];
1317 descr &= ~TREE_DESCR_32_1;
1318 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1319 break;
1320 default:
1321 tl_assert(0);
1322 }
1323 return descr;
1324}
1325
1326static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1327 stats__cline_16to8pulldown++;
1328 switch (toff) {
1329 case 0: case 1:
1330 if (!(descr & TREE_DESCR_16_0)) {
1331 descr = pulldown_to_16(tree, 0, descr);
1332 }
1333 tl_assert(descr & TREE_DESCR_16_0);
1334 tree[1] = tree[0];
1335 descr &= ~TREE_DESCR_16_0;
1336 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1337 break;
1338 case 2: case 3:
1339 if (!(descr & TREE_DESCR_16_1)) {
1340 descr = pulldown_to_16(tree, 2, descr);
1341 }
1342 tl_assert(descr & TREE_DESCR_16_1);
1343 tree[3] = tree[2];
1344 descr &= ~TREE_DESCR_16_1;
1345 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1346 break;
1347 case 4: case 5:
1348 if (!(descr & TREE_DESCR_16_2)) {
1349 descr = pulldown_to_16(tree, 4, descr);
1350 }
1351 tl_assert(descr & TREE_DESCR_16_2);
1352 tree[5] = tree[4];
1353 descr &= ~TREE_DESCR_16_2;
1354 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1355 break;
1356 case 6: case 7:
1357 if (!(descr & TREE_DESCR_16_3)) {
1358 descr = pulldown_to_16(tree, 6, descr);
1359 }
1360 tl_assert(descr & TREE_DESCR_16_3);
1361 tree[7] = tree[6];
1362 descr &= ~TREE_DESCR_16_3;
1363 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1364 break;
1365 default:
1366 tl_assert(0);
1367 }
1368 return descr;
1369}
1370
1371
1372static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1373 UShort mask;
1374 switch (toff) {
1375 case 0:
1376 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1377 tl_assert( (descr & mask) == mask );
1378 descr &= ~mask;
1379 descr |= TREE_DESCR_16_0;
1380 break;
1381 case 2:
1382 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1383 tl_assert( (descr & mask) == mask );
1384 descr &= ~mask;
1385 descr |= TREE_DESCR_16_1;
1386 break;
1387 case 4:
1388 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1389 tl_assert( (descr & mask) == mask );
1390 descr &= ~mask;
1391 descr |= TREE_DESCR_16_2;
1392 break;
1393 case 6:
1394 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1395 tl_assert( (descr & mask) == mask );
1396 descr &= ~mask;
1397 descr |= TREE_DESCR_16_3;
1398 break;
1399 default:
1400 tl_assert(0);
1401 }
1402 return descr;
1403}
1404
1405static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1406 UShort mask;
1407 switch (toff) {
1408 case 0:
1409 if (!(descr & TREE_DESCR_16_0))
1410 descr = pullup_descr_to_16(descr, 0);
1411 if (!(descr & TREE_DESCR_16_1))
1412 descr = pullup_descr_to_16(descr, 2);
1413 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1414 tl_assert( (descr & mask) == mask );
1415 descr &= ~mask;
1416 descr |= TREE_DESCR_32_0;
1417 break;
1418 case 4:
1419 if (!(descr & TREE_DESCR_16_2))
1420 descr = pullup_descr_to_16(descr, 4);
1421 if (!(descr & TREE_DESCR_16_3))
1422 descr = pullup_descr_to_16(descr, 6);
1423 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1424 tl_assert( (descr & mask) == mask );
1425 descr &= ~mask;
1426 descr |= TREE_DESCR_32_1;
1427 break;
1428 default:
1429 tl_assert(0);
1430 }
1431 return descr;
1432}
1433
1434static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1435 switch (toff) {
1436 case 0: case 4:
1437 return 0 != (descr & TREE_DESCR_64);
1438 default:
1439 tl_assert(0);
1440 }
1441}
1442
1443static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1444 switch (toff) {
1445 case 0:
1446 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1447 case 2:
1448 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1449 case 4:
1450 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1451 case 6:
1452 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1453 default:
1454 tl_assert(0);
1455 }
1456}
1457
1458/* ------------ Cache management ------------ */
1459
1460static void zsm_flush_cache ( void )
1461{
1462 shmem__flush_and_invalidate_scache();
1463}
1464
1465
1466static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1467{
1468 tl_assert( sizeof(UWord) == sizeof(Addr) );
1469
1470 rcinc = p_rcinc;
1471 rcdec = p_rcdec;
1472
1473 tl_assert(map_shmem == NULL);
1474 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1475 HG_(free),
1476 NULL/*unboxed UWord cmp*/);
1477 tl_assert(map_shmem != NULL);
1478 shmem__invalidate_scache();
1479
1480 /* a SecMap must contain an integral number of CacheLines */
1481 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1482 /* also ... a CacheLine holds an integral number of trees */
1483 tl_assert(0 == (N_LINE_ARANGE % 8));
1484}
1485
1486/////////////////////////////////////////////////////////////////
1487/////////////////////////////////////////////////////////////////
1488// //
1489// SECTION END compressed shadow memory //
1490// //
1491/////////////////////////////////////////////////////////////////
1492/////////////////////////////////////////////////////////////////
1493
1494
1495
1496/////////////////////////////////////////////////////////////////
1497/////////////////////////////////////////////////////////////////
1498// //
1499// SECTION BEGIN vts primitives //
1500// //
1501/////////////////////////////////////////////////////////////////
1502/////////////////////////////////////////////////////////////////
1503
1504#ifndef __HB_VTS_H
1505#define __HB_VTS_H
1506
1507/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1508 lowest 30 bits of an SVal. */
1509typedef UInt VtsID;
1510#define VtsID_INVALID 0xFFFFFFFF
1511
1512/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1513 a backlink for the caller's convenience. Since we have no idea
1514 what to set that to in the library, it always gets set to
1515 VtsID_INVALID. */
1516typedef
1517 struct {
1518 VtsID id;
1519 XArray* ts; /* XArray* ScalarTS(abstract) */
1520 }
1521 VTS;
1522
1523
1524/* Create a new, empty VTS. */
1525VTS* VTS__new ( void );
1526
1527/* Delete this VTS in its entirety. */
1528void VTS__delete ( VTS* vts );
1529
1530/* Create a new singleton VTS. */
1531VTS* VTS__singleton ( Thr* thr, ULong tym );
1532
1533/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1534 not modified. */
1535VTS* VTS__tick ( Thr* me, VTS* vts );
1536
1537/* Return a new VTS constructed as the join (max) of the 2 args.
1538 Neither arg is modified. */
1539VTS* VTS__join ( VTS* a, VTS* b );
1540
1541/* Compute the partial ordering relation of the two args. */
1542typedef
1543 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1544 POrd;
1545
1546POrd VTS__cmp ( VTS* a, VTS* b );
1547
1548/* Compute an arbitrary structural (total) ordering on the two args,
1549 based on their VCs, so they can be looked up in a table, tree, etc.
1550 Returns -1, 0 or 1. */
1551Word VTS__cmp_structural ( VTS* a, VTS* b );
1552
1553/* Debugging only. Display the given VTS in the buffer. */
1554void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1555
1556/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001557ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001558
1559#endif /* ! __HB_VTS_H */
1560
1561
1562/*--------------- to do with Vector Timestamps ---------------*/
1563
1564/* Scalar Timestamp */
1565typedef
1566 struct {
1567 Thr* thr;
1568 ULong tym;
1569 }
1570 ScalarTS;
1571
1572
1573static Bool is_sane_VTS ( VTS* vts )
1574{
1575 UWord i, n;
1576 ScalarTS *st1, *st2;
1577 if (!vts) return False;
1578 if (!vts->ts) return False;
1579 n = VG_(sizeXA)( vts->ts );
1580 if (n >= 2) {
1581 for (i = 0; i < n-1; i++) {
1582 st1 = VG_(indexXA)( vts->ts, i );
1583 st2 = VG_(indexXA)( vts->ts, i+1 );
1584 if (st1->thr >= st2->thr)
1585 return False;
1586 if (st1->tym == 0 || st2->tym == 0)
1587 return False;
1588 }
1589 }
1590 return True;
1591}
1592
1593
1594/* Create a new, empty VTS.
1595*/
1596VTS* VTS__new ( void )
1597{
1598 VTS* vts;
1599 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1600 tl_assert(vts);
1601 vts->id = VtsID_INVALID;
1602 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1603 HG_(free), sizeof(ScalarTS) );
1604 tl_assert(vts->ts);
1605 return vts;
1606}
1607
1608
1609/* Delete this VTS in its entirety.
1610*/
1611void VTS__delete ( VTS* vts )
1612{
1613 tl_assert(vts);
1614 tl_assert(vts->ts);
1615 VG_(deleteXA)( vts->ts );
1616 HG_(free)(vts);
1617}
1618
1619
1620/* Create a new singleton VTS.
1621*/
1622VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1623 ScalarTS st;
1624 VTS* vts;
1625 tl_assert(thr);
1626 tl_assert(tym >= 1);
1627 vts = VTS__new();
1628 st.thr = thr;
1629 st.tym = tym;
1630 VG_(addToXA)( vts->ts, &st );
1631 return vts;
1632}
1633
1634
1635/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1636 not modified.
1637*/
1638VTS* VTS__tick ( Thr* me, VTS* vts )
1639{
1640 ScalarTS* here = NULL;
1641 ScalarTS tmp;
1642 VTS* res;
1643 Word i, n;
1644 tl_assert(me);
1645 tl_assert(is_sane_VTS(vts));
1646 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1647 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1648 res = VTS__new();
1649 n = VG_(sizeXA)( vts->ts );
1650
1651 /* main loop doesn't handle zero-entry case correctly, so
1652 special-case it. */
1653 if (n == 0) {
1654 tmp.thr = me;
1655 tmp.tym = 1;
1656 VG_(addToXA)( res->ts, &tmp );
1657 tl_assert(is_sane_VTS(res));
1658 return res;
1659 }
1660
1661 for (i = 0; i < n; i++) {
1662 here = VG_(indexXA)( vts->ts, i );
1663 if (me < here->thr) {
1664 /* We just went past 'me', without seeing it. */
1665 tmp.thr = me;
1666 tmp.tym = 1;
1667 VG_(addToXA)( res->ts, &tmp );
1668 tmp = *here;
1669 VG_(addToXA)( res->ts, &tmp );
1670 i++;
1671 break;
1672 }
1673 else if (me == here->thr) {
1674 tmp = *here;
1675 tmp.tym++;
1676 VG_(addToXA)( res->ts, &tmp );
1677 i++;
1678 break;
1679 }
1680 else /* me > here->thr */ {
1681 tmp = *here;
1682 VG_(addToXA)( res->ts, &tmp );
1683 }
1684 }
1685 tl_assert(i >= 0 && i <= n);
1686 if (i == n && here && here->thr < me) {
1687 tmp.thr = me;
1688 tmp.tym = 1;
1689 VG_(addToXA)( res->ts, &tmp );
1690 } else {
1691 for (/*keepgoing*/; i < n; i++) {
1692 here = VG_(indexXA)( vts->ts, i );
1693 tmp = *here;
1694 VG_(addToXA)( res->ts, &tmp );
1695 }
1696 }
1697 tl_assert(is_sane_VTS(res));
1698 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1699 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1700 return res;
1701}
1702
1703
1704/* Return a new VTS constructed as the join (max) of the 2 args.
1705 Neither arg is modified.
1706*/
1707VTS* VTS__join ( VTS* a, VTS* b )
1708{
1709 Word ia, ib, useda, usedb;
1710 ULong tyma, tymb, tymMax;
1711 Thr* thr;
1712 VTS* res;
1713 ScalarTS *tmpa, *tmpb;
1714
1715 tl_assert(a && a->ts);
1716 tl_assert(b && b->ts);
1717 useda = VG_(sizeXA)( a->ts );
1718 usedb = VG_(sizeXA)( b->ts );
1719
1720 res = VTS__new();
1721 ia = ib = 0;
1722
1723 while (1) {
1724
1725 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1726 from a and b in order, where thr is the next Thr*
1727 occurring in either a or b, and tyma/b are the relevant
1728 scalar timestamps, taking into account implicit zeroes. */
1729 tl_assert(ia >= 0 && ia <= useda);
1730 tl_assert(ib >= 0 && ib <= usedb);
1731 tmpa = tmpb = NULL;
1732
1733 if (ia == useda && ib == usedb) {
1734 /* both empty - done */
1735 break;
1736 }
1737 else
1738 if (ia == useda && ib != usedb) {
1739 /* a empty, use up b */
1740 tmpb = VG_(indexXA)( b->ts, ib );
1741 thr = tmpb->thr;
1742 tyma = 0;
1743 tymb = tmpb->tym;
1744 ib++;
1745 }
1746 else
1747 if (ia != useda && ib == usedb) {
1748 /* b empty, use up a */
1749 tmpa = VG_(indexXA)( a->ts, ia );
1750 thr = tmpa->thr;
1751 tyma = tmpa->tym;
1752 tymb = 0;
1753 ia++;
1754 }
1755 else {
1756 /* both not empty; extract lowest-Thr*'d triple */
1757 tmpa = VG_(indexXA)( a->ts, ia );
1758 tmpb = VG_(indexXA)( b->ts, ib );
1759 if (tmpa->thr < tmpb->thr) {
1760 /* a has the lowest unconsidered Thr* */
1761 thr = tmpa->thr;
1762 tyma = tmpa->tym;
1763 tymb = 0;
1764 ia++;
1765 }
1766 else
1767 if (tmpa->thr > tmpb->thr) {
1768 /* b has the lowest unconsidered Thr* */
1769 thr = tmpb->thr;
1770 tyma = 0;
1771 tymb = tmpb->tym;
1772 ib++;
1773 } else {
1774 /* they both next mention the same Thr* */
1775 tl_assert(tmpa->thr == tmpb->thr);
1776 thr = tmpa->thr; /* == tmpb->thr */
1777 tyma = tmpa->tym;
1778 tymb = tmpb->tym;
1779 ia++;
1780 ib++;
1781 }
1782 }
1783
1784 /* having laboriously determined (thr, tyma, tymb), do something
1785 useful with it. */
1786 tymMax = tyma > tymb ? tyma : tymb;
1787 if (tymMax > 0) {
1788 ScalarTS st;
1789 st.thr = thr;
1790 st.tym = tymMax;
1791 VG_(addToXA)( res->ts, &st );
1792 }
1793
1794 }
1795
1796 tl_assert(is_sane_VTS( res ));
1797
1798 return res;
1799}
1800
1801
1802/* Compute the partial ordering relation of the two args.
1803*/
1804POrd VTS__cmp ( VTS* a, VTS* b )
1805{
1806 Word ia, ib, useda, usedb;
1807 ULong tyma, tymb;
1808 Thr* thr;
1809 ScalarTS *tmpa, *tmpb;
1810
1811 Bool all_leq = True;
1812 Bool all_geq = True;
1813
1814 tl_assert(a && a->ts);
1815 tl_assert(b && b->ts);
1816 useda = VG_(sizeXA)( a->ts );
1817 usedb = VG_(sizeXA)( b->ts );
1818
1819 ia = ib = 0;
1820
1821 while (1) {
1822
1823 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1824 from a and b in order, where thr is the next Thr*
1825 occurring in either a or b, and tyma/b are the relevant
1826 scalar timestamps, taking into account implicit zeroes. */
1827 tl_assert(ia >= 0 && ia <= useda);
1828 tl_assert(ib >= 0 && ib <= usedb);
1829 tmpa = tmpb = NULL;
1830
1831 if (ia == useda && ib == usedb) {
1832 /* both empty - done */
1833 break;
1834 }
1835 else
1836 if (ia == useda && ib != usedb) {
1837 /* a empty, use up b */
1838 tmpb = VG_(indexXA)( b->ts, ib );
1839 thr = tmpb->thr;
1840 tyma = 0;
1841 tymb = tmpb->tym;
1842 ib++;
1843 }
1844 else
1845 if (ia != useda && ib == usedb) {
1846 /* b empty, use up a */
1847 tmpa = VG_(indexXA)( a->ts, ia );
1848 thr = tmpa->thr;
1849 tyma = tmpa->tym;
1850 tymb = 0;
1851 ia++;
1852 }
1853 else {
1854 /* both not empty; extract lowest-Thr*'d triple */
1855 tmpa = VG_(indexXA)( a->ts, ia );
1856 tmpb = VG_(indexXA)( b->ts, ib );
1857 if (tmpa->thr < tmpb->thr) {
1858 /* a has the lowest unconsidered Thr* */
1859 thr = tmpa->thr;
1860 tyma = tmpa->tym;
1861 tymb = 0;
1862 ia++;
1863 }
1864 else
1865 if (tmpa->thr > tmpb->thr) {
1866 /* b has the lowest unconsidered Thr* */
1867 thr = tmpb->thr;
1868 tyma = 0;
1869 tymb = tmpb->tym;
1870 ib++;
1871 } else {
1872 /* they both next mention the same Thr* */
1873 tl_assert(tmpa->thr == tmpb->thr);
1874 thr = tmpa->thr; /* == tmpb->thr */
1875 tyma = tmpa->tym;
1876 tymb = tmpb->tym;
1877 ia++;
1878 ib++;
1879 }
1880 }
1881
1882 /* having laboriously determined (thr, tyma, tymb), do something
1883 useful with it. */
1884 if (tyma < tymb)
1885 all_geq = False;
1886 if (tyma > tymb)
1887 all_leq = False;
1888 }
1889
1890 if (all_leq && all_geq)
1891 return POrd_EQ;
1892 /* now we know they aren't equal, so either all_leq or all_geq or
1893 both are false. */
1894 if (all_leq)
1895 return POrd_LT;
1896 if (all_geq)
1897 return POrd_GT;
1898 /* hmm, neither all_geq or all_leq. This means unordered. */
1899 return POrd_UN;
1900}
1901
1902
1903/* Compute an arbitrary structural (total) ordering on the two args,
1904 based on their VCs, so they can be looked up in a table, tree, etc.
1905 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1906*/
1907Word VTS__cmp_structural ( VTS* a, VTS* b )
1908{
1909 /* We just need to generate an arbitrary total ordering based on
1910 a->ts and b->ts. Preferably do it in a way which comes across likely
1911 differences relatively quickly. */
1912 Word i, useda, usedb;
1913 ScalarTS *tmpa, *tmpb;
1914
1915 tl_assert(a && a->ts);
1916 tl_assert(b && b->ts);
1917 useda = VG_(sizeXA)( a->ts );
1918 usedb = VG_(sizeXA)( b->ts );
1919
1920 if (useda < usedb) return -1;
1921 if (useda > usedb) return 1;
1922
1923 /* Same length vectors, so let's step through them together. */
1924 tl_assert(useda == usedb);
1925 for (i = 0; i < useda; i++) {
1926 tmpa = VG_(indexXA)( a->ts, i );
1927 tmpb = VG_(indexXA)( b->ts, i );
1928 if (tmpa->tym < tmpb->tym) return -1;
1929 if (tmpa->tym > tmpb->tym) return 1;
1930 if (tmpa->thr < tmpb->thr) return -1;
1931 if (tmpa->thr > tmpb->thr) return 1;
1932 }
1933
1934 /* They're identical. */
1935 return 0;
1936}
1937
1938
1939/* Debugging only. Display the given VTS in the buffer.
1940*/
1941void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1942 ScalarTS* st;
1943 HChar unit[64];
1944 Word i, n;
1945 Int avail = nBuf;
1946 tl_assert(vts && vts->ts);
1947 tl_assert(nBuf > 16);
1948 buf[0] = '[';
1949 buf[1] = 0;
1950 n = VG_(sizeXA)( vts->ts );
1951 for (i = 0; i < n; i++) {
1952 tl_assert(avail >= 40);
1953 st = VG_(indexXA)( vts->ts, i );
1954 VG_(memset)(unit, 0, sizeof(unit));
1955 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1956 st->thr, st->tym);
1957 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1958 VG_(strcat)(buf, " ...]");
1959 buf[nBuf-1] = 0;
1960 return;
1961 }
1962 VG_(strcat)(buf, unit);
1963 avail -= VG_(strlen)(unit);
1964 }
1965 VG_(strcat)(buf, "]");
1966 buf[nBuf-1] = 0;
1967}
1968
1969
1970/* Debugging only. Return vts[index], so to speak.
1971*/
1972ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1973 UWord i, n;
1974 tl_assert(vts && vts->ts);
1975 n = VG_(sizeXA)( vts->ts );
1976 for (i = 0; i < n; i++) {
1977 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1978 if (st->thr == idx)
1979 return st->tym;
1980 }
1981 return 0;
1982}
1983
1984
1985/////////////////////////////////////////////////////////////////
1986/////////////////////////////////////////////////////////////////
1987// //
1988// SECTION END vts primitives //
1989// //
1990/////////////////////////////////////////////////////////////////
1991/////////////////////////////////////////////////////////////////
1992
1993
1994
1995/////////////////////////////////////////////////////////////////
1996/////////////////////////////////////////////////////////////////
1997// //
1998// SECTION BEGIN main library //
1999// //
2000/////////////////////////////////////////////////////////////////
2001/////////////////////////////////////////////////////////////////
2002
2003
2004/////////////////////////////////////////////////////////
2005// //
2006// VTS set //
2007// //
2008/////////////////////////////////////////////////////////
2009
2010static WordFM* /* VTS* void void */ vts_set = NULL;
2011
2012static void vts_set_init ( void )
2013{
2014 tl_assert(!vts_set);
2015 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2016 HG_(free),
2017 (Word(*)(UWord,UWord))VTS__cmp_structural );
2018 tl_assert(vts_set);
2019}
2020
2021/* Given a newly made VTS, look in vts_set to see if we already have
2022 an identical one. If yes, free up this one and return instead a
2023 pointer to the existing one. If no, add this one to the set and
2024 return the same pointer. Caller differentiates the two cases by
2025 comparing returned pointer with the supplied one (although that
2026 does require that the supplied VTS is not already in the set).
2027*/
2028static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2029{
2030 UWord keyW, valW;
2031 /* lookup cand (by value) */
2032 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2033 /* found it */
2034 tl_assert(valW == 0);
2035 /* if this fails, cand (by ref) was already present (!) */
2036 tl_assert(keyW != (UWord)cand);
2037 VTS__delete(cand);
2038 return (VTS*)keyW;
2039 } else {
2040 /* not present. Add and return pointer to same. */
2041 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2042 return cand;
2043 }
2044}
2045
2046
2047/////////////////////////////////////////////////////////
2048// //
2049// VTS table //
2050// //
2051/////////////////////////////////////////////////////////
2052
2053static void VtsID__invalidate_caches ( void ); /* fwds */
2054
2055/* A type to hold VTS table entries. Invariants:
2056 If .vts == NULL, then this entry is not in use, so:
2057 - .rc == 0
2058 - this entry is on the freelist (unfortunately, does not imply
2059 any constraints on value for .nextfree)
2060 If .vts != NULL, then this entry is in use:
2061 - .vts is findable in vts_set
2062 - .vts->id == this entry number
2063 - no specific value for .rc (even 0 is OK)
2064 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2065*/
2066typedef
2067 struct {
2068 VTS* vts; /* vts, in vts_set */
2069 UWord rc; /* reference count - enough for entire aspace */
2070 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2071 }
2072 VtsTE;
2073
2074/* The VTS table. */
2075static XArray* /* of VtsTE */ vts_tab = NULL;
2076
2077/* An index into the VTS table, indicating the start of the list of
2078 free (available for use) entries. If the list is empty, this is
2079 VtsID_INVALID. */
2080static VtsID vts_tab_freelist = VtsID_INVALID;
2081
2082/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2083 vts_tab equals or exceeds this size. After GC, the value here is
2084 set appropriately so as to check for the next GC point. */
2085static Word vts_next_GC_at = 1000;
2086
2087static void vts_tab_init ( void )
2088{
2089 vts_tab
2090 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2091 HG_(free), sizeof(VtsTE) );
2092 vts_tab_freelist
2093 = VtsID_INVALID;
2094 tl_assert(vts_tab);
2095}
2096
2097/* Add ii to the free list, checking that it looks out-of-use. */
2098static void add_to_free_list ( VtsID ii )
2099{
2100 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2101 tl_assert(ie->vts == NULL);
2102 tl_assert(ie->rc == 0);
2103 tl_assert(ie->freelink == VtsID_INVALID);
2104 ie->freelink = vts_tab_freelist;
2105 vts_tab_freelist = ii;
2106}
2107
2108/* Get an entry from the free list. This will return VtsID_INVALID if
2109 the free list is empty. */
2110static VtsID get_from_free_list ( void )
2111{
2112 VtsID ii;
2113 VtsTE* ie;
2114 if (vts_tab_freelist == VtsID_INVALID)
2115 return VtsID_INVALID;
2116 ii = vts_tab_freelist;
2117 ie = VG_(indexXA)( vts_tab, ii );
2118 tl_assert(ie->vts == NULL);
2119 tl_assert(ie->rc == 0);
2120 vts_tab_freelist = ie->freelink;
2121 return ii;
2122}
2123
2124/* Produce a new VtsID that can be used, either by getting it from
2125 the freelist, or, if that is empty, by expanding vts_tab. */
2126static VtsID get_new_VtsID ( void )
2127{
2128 VtsID ii;
2129 VtsTE te;
2130 ii = get_from_free_list();
2131 if (ii != VtsID_INVALID)
2132 return ii;
2133 te.vts = NULL;
2134 te.rc = 0;
2135 te.freelink = VtsID_INVALID;
2136 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2137 return ii;
2138}
2139
2140
2141/* Indirect callback from lib_zsm. */
2142static void VtsID__rcinc ( VtsID ii )
2143{
2144 VtsTE* ie;
2145 /* VG_(indexXA) does a range check for us */
2146 ie = VG_(indexXA)( vts_tab, ii );
2147 tl_assert(ie->vts); /* else it's not in use */
2148 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2149 tl_assert(ie->vts->id == ii);
2150 ie->rc++;
2151}
2152
2153/* Indirect callback from lib_zsm. */
2154static void VtsID__rcdec ( VtsID ii )
2155{
2156 VtsTE* ie;
2157 /* VG_(indexXA) does a range check for us */
2158 ie = VG_(indexXA)( vts_tab, ii );
2159 tl_assert(ie->vts); /* else it's not in use */
2160 tl_assert(ie->rc > 0); /* else RC snafu */
2161 tl_assert(ie->vts->id == ii);
2162 ie->rc--;
2163}
2164
2165
2166/* Look up 'cand' in our collection of VTSs. If present, deallocate
2167 it and return the VtsID for the pre-existing version. If not
2168 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2169 for it, and return that. */
2170static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2171{
2172 VTS* auld;
2173 tl_assert(cand->id == VtsID_INVALID);
2174 auld = vts_set__find_and_dealloc__or_add(cand);
2175 if (auld != cand) {
2176 /* We already have an Aulde one. Use that. */
2177 VtsTE* ie;
2178 tl_assert(auld->id != VtsID_INVALID);
2179 ie = VG_(indexXA)( vts_tab, auld->id );
2180 tl_assert(ie->vts == auld);
2181 return auld->id;
2182 } else {
2183 VtsID ii = get_new_VtsID();
2184 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2185 ie->vts = cand;
2186 ie->rc = 0;
2187 ie->freelink = VtsID_INVALID;
2188 cand->id = ii;
2189 return ii;
2190 }
2191}
2192
2193
2194static void show_vts_stats ( HChar* caller )
2195{
2196 UWord nSet, nTab, nLive;
2197 ULong totrc;
2198 UWord n, i;
2199 nSet = VG_(sizeFM)( vts_set );
2200 nTab = VG_(sizeXA)( vts_tab );
2201 totrc = 0;
2202 nLive = 0;
2203 n = VG_(sizeXA)( vts_tab );
2204 for (i = 0; i < n; i++) {
2205 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2206 if (ie->vts) {
2207 nLive++;
2208 totrc += (ULong)ie->rc;
2209 } else {
2210 tl_assert(ie->rc == 0);
2211 }
2212 }
2213 VG_(printf)(" show_vts_stats %s\n", caller);
2214 VG_(printf)(" vts_tab size %4lu\n", nTab);
2215 VG_(printf)(" vts_tab live %4lu\n", nLive);
2216 VG_(printf)(" vts_set size %4lu\n", nSet);
2217 VG_(printf)(" total rc %4llu\n", totrc);
2218}
2219
2220/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002221__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002222static void vts_tab__do_GC ( Bool show_stats )
2223{
2224 UWord i, nTab, nLive, nFreed;
2225
2226 /* check this is actually necessary. */
2227 tl_assert(vts_tab_freelist == VtsID_INVALID);
2228
2229 /* empty the caches for partial order checks and binary joins. We
2230 could do better and prune out the entries to be deleted, but it
2231 ain't worth the hassle. */
2232 VtsID__invalidate_caches();
2233
2234 /* First, make the reference counts up to date. */
2235 zsm_flush_cache();
2236
2237 nTab = VG_(sizeXA)( vts_tab );
2238
2239 if (show_stats) {
2240 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2241 show_vts_stats("before GC");
2242 }
2243
2244 /* Now we can inspect the entire vts_tab. Any entries
2245 with zero .rc fields are now no longer in use and can be
2246 free list, removed from vts_set, and deleted. */
2247 nFreed = 0;
2248 for (i = 0; i < nTab; i++) {
2249 Bool present;
2250 UWord oldK = 0, oldV = 0;
2251 VtsTE* te = VG_(indexXA)( vts_tab, i );
2252 if (te->vts == NULL) {
2253 tl_assert(te->rc == 0);
2254 continue; /* already on the free list (presumably) */
2255 }
2256 if (te->rc > 0)
2257 continue; /* in use */
2258 /* Ok, we got one we can free. */
2259 tl_assert(te->vts->id == i);
2260 /* first, remove it from vts_set. */
2261 present = VG_(delFromFM)( vts_set,
2262 &oldK, &oldV, (UWord)te->vts );
2263 tl_assert(present); /* else it isn't in vts_set ?! */
2264 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2265 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2266 /* now free the VTS itself */
2267 VTS__delete(te->vts);
2268 te->vts = NULL;
2269 /* and finally put this entry on the free list */
2270 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2271 add_to_free_list( i );
2272 nFreed++;
2273 }
2274
2275 /* Now figure out when the next GC should be. We'll allow the
2276 number of VTSs to double before GCing again. Except of course
2277 that since we can't (or, at least, don't) shrink vts_tab, we
2278 can't set the threshhold value smaller than it. */
2279 tl_assert(nFreed <= nTab);
2280 nLive = nTab - nFreed;
2281 tl_assert(nLive >= 0 && nLive <= nTab);
2282 vts_next_GC_at = 2 * nLive;
2283 if (vts_next_GC_at < nTab)
2284 vts_next_GC_at = nTab;
2285
2286 if (show_stats) {
2287 show_vts_stats("after GC");
2288 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2289 }
2290
sewardjd024ae52008-11-09 20:47:57 +00002291 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002292 static UInt ctr = 0;
2293 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002294 VG_(message)(Vg_DebugMsg,
2295 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
sewardjf98e1c02008-10-25 16:22:41 +00002296 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2297 }
2298}
2299
2300
2301/////////////////////////////////////////////////////////
2302// //
2303// Vts IDs //
2304// //
2305/////////////////////////////////////////////////////////
2306
2307//////////////////////////
2308static ULong stats__getOrdering_queries = 0;
2309static ULong stats__getOrdering_misses = 0;
2310static ULong stats__join2_queries = 0;
2311static ULong stats__join2_misses = 0;
2312
2313static inline UInt ROL32 ( UInt w, Int n ) {
2314 w = (w << n) | (w >> (32-n));
2315 return w;
2316}
2317static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2318 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2319 return hash % nTab;
2320}
2321
2322#define N_GETORDERING_CACHE 1023
2323static
2324 struct { VtsID vi1; VtsID vi2; POrd ord; }
2325 getOrdering_cache[N_GETORDERING_CACHE];
2326
2327#define N_JOIN2_CACHE 1023
2328static
2329 struct { VtsID vi1; VtsID vi2; VtsID res; }
2330 join2_cache[N_JOIN2_CACHE];
2331
2332static void VtsID__invalidate_caches ( void ) {
2333 Int i;
2334 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2335 getOrdering_cache[i].vi1 = VtsID_INVALID;
2336 getOrdering_cache[i].vi2 = VtsID_INVALID;
2337 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2338 }
2339 for (i = 0; i < N_JOIN2_CACHE; i++) {
2340 join2_cache[i].vi1 = VtsID_INVALID;
2341 join2_cache[i].vi2 = VtsID_INVALID;
2342 join2_cache[i].res = VtsID_INVALID;
2343 }
2344}
2345//////////////////////////
2346
sewardjd52392d2008-11-08 20:36:26 +00002347//static Bool VtsID__is_valid ( VtsID vi ) {
2348// VtsTE* ve;
2349// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2350// return False;
2351// ve = VG_(indexXA)( vts_tab, vi );
2352// if (!ve->vts)
2353// return False;
2354// tl_assert(ve->vts->id == vi);
2355// return True;
2356//}
sewardjf98e1c02008-10-25 16:22:41 +00002357
2358static VTS* VtsID__to_VTS ( VtsID vi ) {
2359 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2360 tl_assert(te->vts);
2361 return te->vts;
2362}
2363
2364static void VtsID__pp ( VtsID vi ) {
2365 HChar buf[100];
2366 VTS* vts = VtsID__to_VTS(vi);
2367 VTS__show( buf, sizeof(buf)-1, vts );
2368 buf[sizeof(buf)-1] = 0;
2369 VG_(printf)("%s", buf);
2370}
2371
2372/* compute partial ordering relation of vi1 and vi2. */
2373__attribute__((noinline))
2374static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2375 UInt hash;
2376 POrd ord;
2377 VTS *v1, *v2;
2378 //if (vi1 == vi2) return POrd_EQ;
2379 tl_assert(vi1 != vi2);
2380 ////++
2381 stats__getOrdering_queries++;
2382 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2383 if (getOrdering_cache[hash].vi1 == vi1
2384 && getOrdering_cache[hash].vi2 == vi2)
2385 return getOrdering_cache[hash].ord;
2386 stats__getOrdering_misses++;
2387 ////--
2388 v1 = VtsID__to_VTS(vi1);
2389 v2 = VtsID__to_VTS(vi2);
2390 ord = VTS__cmp( v1, v2 );
2391 ////++
2392 getOrdering_cache[hash].vi1 = vi1;
2393 getOrdering_cache[hash].vi2 = vi2;
2394 getOrdering_cache[hash].ord = ord;
2395 ////--
2396 return ord;
2397}
2398static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2399 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2400}
2401
2402/* compute binary join */
2403__attribute__((noinline))
2404static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2405 UInt hash;
2406 VtsID res;
2407 VTS *vts1, *vts2, *nyu;
2408 //if (vi1 == vi2) return vi1;
2409 tl_assert(vi1 != vi2);
2410 ////++
2411 stats__join2_queries++;
2412 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2413 if (join2_cache[hash].vi1 == vi1
2414 && join2_cache[hash].vi2 == vi2)
2415 return join2_cache[hash].res;
2416 stats__join2_misses++;
2417 ////--
2418 vts1 = VtsID__to_VTS(vi1);
2419 vts2 = VtsID__to_VTS(vi2);
2420 nyu = VTS__join(vts1,vts2);
2421 res = vts_tab__find_and_dealloc__or_add(nyu);
2422 ////++
2423 join2_cache[hash].vi1 = vi1;
2424 join2_cache[hash].vi2 = vi2;
2425 join2_cache[hash].res = res;
2426 ////--
2427 return res;
2428}
2429static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2430 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2431}
2432
2433/* create a singleton VTS, namely [thr:1] */
2434static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2435 VTS* nyu = VTS__singleton(thr,tym);
2436 return vts_tab__find_and_dealloc__or_add(nyu);
2437}
2438
2439/* tick operation, creates value 1 if specified index is absent */
2440static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2441 VTS* vts = VtsID__to_VTS(vi);
2442 VTS* nyu = VTS__tick(idx,vts);
2443 return vts_tab__find_and_dealloc__or_add(nyu);
2444}
2445
2446/* index into a VTS (only for assertions) */
2447static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2448 VTS* vts = VtsID__to_VTS(vi);
2449 return VTS__indexAt_SLOW( vts, idx );
2450}
2451
2452
2453/////////////////////////////////////////////////////////
2454// //
2455// Threads //
2456// //
2457/////////////////////////////////////////////////////////
2458
2459struct _Thr {
2460 /* Current VTSs for this thread. They change as we go along. viR
2461 is the VTS to be used for reads, viW for writes. Usually they
2462 are the same, but can differ when we deal with reader-writer
2463 locks. It is always the case that VtsID__getOrdering(viW,viR)
2464 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2465 lagging behind, viR. */
2466 VtsID viR;
2467 VtsID viW;
2468 /* opaque (to us) data we hold on behalf of the library's user. */
2469 void* opaque;
2470};
2471
2472static Thr* Thr__new ( void ) {
2473 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2474 thr->viR = VtsID_INVALID;
2475 thr->viW = VtsID_INVALID;
2476 return thr;
2477}
2478
2479
2480/////////////////////////////////////////////////////////
2481// //
2482// Shadow Values //
2483// //
2484/////////////////////////////////////////////////////////
2485
2486// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2487// hb_zsm.h. We have to do everything else here.
2488
2489/* SVal is 64 bit unsigned int.
2490
2491 <---------30---------> <---------30--------->
2492 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2493 01 X--------------------X XX X--------------------X E(rror)
2494 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2495 11 X--------------------X XX X--------------------X I: SVal_INVALID
2496*/
2497#define SVAL_TAGMASK (3ULL << 62)
2498
2499static inline Bool SVal__isC ( SVal s ) {
2500 return (0ULL << 62) == (s & SVAL_TAGMASK);
2501}
2502static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2503 //tl_assert(VtsID__is_valid(rmini));
2504 //tl_assert(VtsID__is_valid(wmini));
2505 return (((ULong)rmini) << 32) | ((ULong)wmini);
2506}
2507static inline VtsID SVal__unC_Rmin ( SVal s ) {
2508 tl_assert(SVal__isC(s));
2509 return (VtsID)(s >> 32);
2510}
2511static inline VtsID SVal__unC_Wmin ( SVal s ) {
2512 tl_assert(SVal__isC(s));
2513 return (VtsID)(s & 0xFFFFFFFFULL);
2514}
2515
2516static Bool SVal__isE ( SVal s ) {
2517 return (1ULL << 62) == (s & SVAL_TAGMASK);
2518}
2519static SVal SVal__mkE ( void ) {
2520 return 1ULL << 62;
2521}
2522
2523static Bool SVal__isA ( SVal s ) {
2524 return (2ULL << 62) == (s & SVAL_TAGMASK);
2525}
2526static SVal SVal__mkA ( void ) {
2527 return 2ULL << 62;
2528}
2529
2530/* Direct callback from lib_zsm. */
2531static void SVal__rcinc ( SVal s ) {
2532 if (SVal__isC(s)) {
2533 VtsID__rcinc( SVal__unC_Rmin(s) );
2534 VtsID__rcinc( SVal__unC_Wmin(s) );
2535 }
2536}
2537
2538/* Direct callback from lib_zsm. */
2539static void SVal__rcdec ( SVal s ) {
2540 if (SVal__isC(s)) {
2541 VtsID__rcdec( SVal__unC_Rmin(s) );
2542 VtsID__rcdec( SVal__unC_Wmin(s) );
2543 }
2544}
2545
2546
2547/////////////////////////////////////////////////////////
2548// //
2549// Change-event map2 //
2550// //
2551/////////////////////////////////////////////////////////
2552
2553#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
2554#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2555
2556/* This is in two parts:
2557
2558 1. An OSet of RCECs. This is a set of reference-counted stack
2559 traces. When the reference count of a stack trace becomes zero,
2560 it is removed from the set and freed up. The intent is to have
2561 a set of stack traces which can be referred to from (2), but to
2562 only represent each one once. The set is indexed/searched by
2563 ordering on the stack trace vectors.
2564
2565 2. An OSet of OldRefs. These store information about each old ref
2566 that we need to record. It is indexed by address of the
2567 location for which the information is recorded. For LRU
2568 purposes, each OldRef also contains a generation number,
2569 indicating when it was most recently accessed.
2570
2571 The important part of an OldRef is, however, its accs[] array.
2572 This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
2573 allows us to collect the last access-traceback by up to
2574 N_OLDREF_ACCS different threads for this location. The accs[]
2575 array is a MTF-array. If a pair falls off the end, that's too
2576 bad -- we will lose info about that thread's access to this
2577 location.
2578
2579 When this OSet becomes too big, we can throw away the entries
2580 whose generation numbers are below some threshold; hence doing
2581 approximate LRU discarding. For each discarded OldRef we must
2582 of course decrement the reference count on the all RCECs it
2583 refers to, in order that entries from (1) eventually get
2584 discarded too.
2585*/
2586
2587
2588static UWord stats__ctxt_rcdec1 = 0;
2589static UWord stats__ctxt_rcdec2 = 0;
2590static UWord stats__ctxt_rcdec3 = 0;
2591static UWord stats__ctxt_rcdec_calls = 0;
2592static UWord stats__ctxt_rcdec_discards = 0;
2593static UWord stats__ctxt_rcdec1_eq = 0;
2594
2595static UWord stats__ctxt_tab_curr = 0;
2596static UWord stats__ctxt_tab_max = 0;
2597
2598static UWord stats__ctxt_tab_qs = 0;
2599static UWord stats__ctxt_tab_cmps = 0;
2600
2601
2602///////////////////////////////////////////////////////
2603//// Part (1): An OSet of RCECs
2604///
2605
2606#define N_FRAMES 8
2607
2608// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2609#define RCEC_MAGIC 0xab88abb2UL
2610
2611//#define N_RCEC_TAB 98317 /* prime */
2612#define N_RCEC_TAB 196613 /* prime */
2613
2614typedef
2615 struct _RCEC {
2616 struct _RCEC* next;
2617 UWord magic;
2618 UWord rc;
2619 UWord rcX; /* used for crosschecking */
2620 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2621 }
2622 RCEC;
2623
2624static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2625
2626
2627/* Gives an arbitrary total order on RCEC .frames fields */
2628static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2629 Word i;
2630 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2631 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2632 if (ec1->frames[0] < ec2->frames[0]) return -1;
2633 if (ec1->frames[0] > ec2->frames[0]) return 1;
2634 for (i = 1; i < 1 + N_FRAMES; i++) {
2635 if (ec1->frames[i] < ec2->frames[i]) return -1;
2636 if (ec1->frames[i] > ec2->frames[i]) return 1;
2637 }
2638 return 0;
2639}
2640
2641
2642/* Dec the ref of this RCEC. */
2643static void ctxt__rcdec ( RCEC* ec )
2644{
2645 stats__ctxt_rcdec_calls++;
2646 tl_assert(ec && ec->magic == RCEC_MAGIC);
2647 tl_assert(ec->rc > 0);
2648 ec->rc--;
2649}
2650
2651static void ctxt__rcinc ( RCEC* ec )
2652{
2653 tl_assert(ec && ec->magic == RCEC_MAGIC);
2654 ec->rc++;
2655}
2656
2657
2658/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2659 move it one step closer the the front of the list, so as to make
2660 subsequent searches for it cheaper. */
2661static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2662{
2663 RCEC *ec0, *ec1, *ec2;
2664 if (ec == *headp)
2665 tl_assert(0); /* already at head of list */
2666 tl_assert(ec != NULL);
2667 ec0 = *headp;
2668 ec1 = NULL;
2669 ec2 = NULL;
2670 while (True) {
2671 if (ec0 == NULL || ec0 == ec) break;
2672 ec2 = ec1;
2673 ec1 = ec0;
2674 ec0 = ec0->next;
2675 }
2676 tl_assert(ec0 == ec);
2677 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2678 RCEC* tmp;
2679 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2680 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2681 closer to the start of the list. */
2682 tl_assert(ec2->next == ec1);
2683 tl_assert(ec1->next == ec0);
2684 tmp = ec0->next;
2685 ec2->next = ec0;
2686 ec0->next = ec1;
2687 ec1->next = tmp;
2688 }
2689 else
2690 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2691 /* it's second in the list. */
2692 tl_assert(*headp == ec1);
2693 tl_assert(ec1->next == ec0);
2694 ec1->next = ec0->next;
2695 ec0->next = ec1;
2696 *headp = ec0;
2697 }
2698}
2699
2700
2701/* Find the given RCEC in the tree, and return a pointer to it. Or,
2702 if not present, add the given one to the tree (by making a copy of
2703 it, so the caller can immediately deallocate the original) and
2704 return a pointer to the copy. The caller can safely have 'example'
2705 on its stack, since we will always return a pointer to a copy of
2706 it, not to the original. Note that the inserted node will have .rc
2707 of zero and so the caller must immediatly increment it. */
2708__attribute__((noinline))
2709static RCEC* ctxt__find_or_add ( RCEC* example )
2710{
2711 UWord hent;
2712 RCEC* copy;
2713 tl_assert(example && example->magic == RCEC_MAGIC);
2714 tl_assert(example->rc == 0);
2715
2716 /* Search the hash table to see if we already have it. */
2717 stats__ctxt_tab_qs++;
2718 hent = example->frames[0] % N_RCEC_TAB;
2719 copy = contextTab[hent];
2720 while (1) {
2721 if (!copy) break;
2722 tl_assert(copy->magic == RCEC_MAGIC);
2723 stats__ctxt_tab_cmps++;
2724 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2725 copy = copy->next;
2726 }
2727
2728 if (copy) {
2729 tl_assert(copy != example);
2730 /* optimisation: if it's not at the head of its list, move 1
2731 step fwds, to make future searches cheaper */
2732 if (copy != contextTab[hent]) {
2733 move_RCEC_one_step_forward( &contextTab[hent], copy );
2734 }
2735 } else {
2736 copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
2737 tl_assert(copy != example);
2738 *copy = *example;
2739 copy->next = contextTab[hent];
2740 contextTab[hent] = copy;
2741 stats__ctxt_tab_curr++;
2742 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2743 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2744 }
2745 return copy;
2746}
2747
2748static inline UWord ROLW ( UWord w, Int n )
2749{
2750 Int bpw = 8 * sizeof(UWord);
2751 w = (w << n) | (w >> (bpw-n));
2752 return w;
2753}
2754
2755__attribute__((noinline))
2756static RCEC* get_RCEC ( Thr* thr )
2757{
2758 UWord hash, i;
2759 RCEC example;
2760 example.magic = RCEC_MAGIC;
2761 example.rc = 0;
2762 example.rcX = 0;
2763 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2764 hash = 0;
2765 for (i = 1; i < 1 + N_FRAMES; i++) {
2766 hash ^= example.frames[i];
2767 hash = ROLW(hash, 19);
2768 }
2769 example.frames[0] = hash;
2770 return ctxt__find_or_add( &example );
2771}
2772
2773///////////////////////////////////////////////////////
2774//// Part (2): An OSet of OldRefs, that refer to (1)
2775///
2776
2777// (UInt) `echo "Old Reference Information" | md5sum`
2778#define OldRef_MAGIC 0x30b1f075UL
2779
2780typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2781
2782#define N_OLDREF_ACCS 3
2783
2784typedef
2785 struct {
2786 Addr ea;
2787 UWord magic;
2788 UWord gen; /* when most recently accessed */
2789 /* unused slots in this array have .thr == NULL */
2790 Thr_n_RCEC accs[N_OLDREF_ACCS];
2791 }
2792 OldRef;
2793
sewardjf98e1c02008-10-25 16:22:41 +00002794static OSet* oldrefTree = NULL; /* OSet* of OldRef */
2795static UWord oldrefGen = 0; /* current LRU generation # */
2796static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2797static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
2798
2799static void event_map_bind ( Addr a, Thr* thr )
2800{
2801 OldRef key, *ref;
2802 RCEC* here;
2803 Word i, j;
2804
2805 key.ea = a;
2806 key.magic = OldRef_MAGIC;
2807
2808 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2809
2810 if (ref) {
2811
2812 /* We already have a record for this address. We now need to
2813 see if we have a stack trace pertaining to this thread's
2814 access. */
2815 tl_assert(ref->magic == OldRef_MAGIC);
2816
2817 tl_assert(thr);
2818 for (i = 0; i < N_OLDREF_ACCS; i++) {
2819 if (ref->accs[i].thr == thr)
2820 break;
2821 }
2822
2823 if (i < N_OLDREF_ACCS) {
2824 /* thread 'thr' has an entry at index 'i'. Update it. */
2825 if (i > 0) {
2826 Thr_n_RCEC tmp = ref->accs[i-1];
2827 ref->accs[i-1] = ref->accs[i];
2828 ref->accs[i] = tmp;
2829 i--;
2830 }
2831 here = get_RCEC( thr );
2832 if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
2833 ctxt__rcinc( here );
2834 stats__ctxt_rcdec1++;
2835 ctxt__rcdec( ref->accs[i].rcec );
2836 ref->accs[i].rcec = here;
2837 tl_assert(ref->accs[i].thr == thr);
2838 } else {
2839 here = get_RCEC( thr );
2840 ctxt__rcinc( here );
2841 /* No entry for this thread. Shuffle all of them down one
2842 slot, and put the new entry at the start of the array. */
2843 if (ref->accs[N_OLDREF_ACCS-1].thr) {
2844 /* the last slot is in use. We must dec the rc on the
2845 associated rcec. */
2846 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
2847 stats__ctxt_rcdec2++;
2848 ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
2849 } else {
2850 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
2851 }
2852 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
2853 ref->accs[j] = ref->accs[j-1];
2854 ref->accs[0].thr = thr;
2855 ref->accs[0].rcec = here;
2856 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2857 so we can't add a NULL thr. */
2858 }
2859
2860 ref->gen = oldrefGen;
2861 tl_assert(ref->ea == a);
2862
2863 } else {
2864
2865 /* We don't have a record for this address. Create a new one. */
2866 if (oldrefTreeN >= oldrefGenIncAt) {
2867 oldrefGen++;
2868 oldrefGenIncAt = oldrefTreeN + 50000;
2869 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
2870 oldrefGen, oldrefTreeN );
2871 }
2872 here = get_RCEC( thr );
2873 ctxt__rcinc(here);
2874 ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
2875 ref->magic = OldRef_MAGIC;
2876 ref->gen = oldrefGen;
2877 ref->ea = a;
2878 ref->accs[0].rcec = here;
2879 ref->accs[0].thr = thr;
2880 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2881 so we can't add a NULL thr. */
2882 for (j = 1; j < N_OLDREF_ACCS; j++) {
2883 ref->accs[j].thr = NULL;
2884 ref->accs[j].rcec = NULL;
2885 }
2886 VG_(OSetGen_Insert)( oldrefTree, ref );
2887 oldrefTreeN++;
2888
2889 }
2890}
2891
2892
2893static
sewardjd52392d2008-11-08 20:36:26 +00002894Bool event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjf98e1c02008-10-25 16:22:41 +00002895 /*OUT*/Thr** resThr,
2896 Thr* thr_acc, Addr a )
2897{
2898 Word i;
2899 OldRef key, *ref;
2900
2901 tl_assert(thr_acc);
2902
2903 key.ea = a;
2904 key.magic = OldRef_MAGIC;
2905
2906 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2907 if (ref) {
2908 tl_assert(ref->magic == OldRef_MAGIC);
2909 tl_assert(ref->accs[0].thr); /* first slot must always be used */
2910
2911 for (i = 0; i < N_OLDREF_ACCS; i++) {
2912 if (ref->accs[i].thr != NULL
2913 && ref->accs[i].thr != thr_acc)
2914 break;
2915 }
2916 /* If we didn't find an entry for some thread other than
2917 thr_acc, just return the entry for thread 0. It'll look
2918 pretty stupid to the user though. */
2919 if (i == N_OLDREF_ACCS)
2920 i = 0;
2921
2922 tl_assert(i >= 0 && i < N_OLDREF_ACCS);
2923 tl_assert(ref->accs[i].thr);
2924 tl_assert(ref->accs[i].rcec);
2925 tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
2926
sewardjd52392d2008-11-08 20:36:26 +00002927 *resEC = VG_(make_ExeContext_from_StackTrace)(
2928 &ref->accs[i].rcec->frames[1], N_FRAMES
2929 );
sewardjf98e1c02008-10-25 16:22:41 +00002930 *resThr = ref->accs[i].thr;
2931 return True;
2932 } else {
2933 return False;
2934 }
2935}
2936
2937static void event_map_init ( void )
2938{
2939 Word i;
2940 tl_assert(!contextTab);
2941 contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
2942 N_RCEC_TAB * sizeof(RCEC*) );
2943 tl_assert(contextTab);
2944 for (i = 0; i < N_RCEC_TAB; i++)
2945 contextTab[i] = NULL;
2946
2947 tl_assert(!oldrefTree);
2948 tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
2949 oldrefTree = VG_(OSetGen_Create)(
2950 offsetof(OldRef,ea), /* == 0 */
2951 NULL, /* use unboxed cmp on OldRefs */
2952 HG_(zalloc), "libhb.event_map_init.2 (oldref tree)",
2953 HG_(free)
2954 );
2955 tl_assert(oldrefTree);
2956
2957 oldrefGen = 0;
2958 oldrefGenIncAt = 0;
2959 oldrefTreeN = 0;
2960}
2961
2962static void event_map__check_reference_counts ( Bool before )
2963{
2964 RCEC* rcec;
2965 OldRef* oldref;
2966 Word i;
2967 UWord nEnts = 0;
2968
2969 /* Set the 'check' reference counts to zero. Also, optionally
2970 check that the real reference counts are non-zero. We allow
2971 these to fall to zero before a GC, but the GC must get rid of
2972 all those that are zero, hence none should be zero after a
2973 GC. */
2974 for (i = 0; i < N_RCEC_TAB; i++) {
2975 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
2976 nEnts++;
2977 tl_assert(rcec);
2978 tl_assert(rcec->magic == RCEC_MAGIC);
2979 if (!before)
2980 tl_assert(rcec->rc > 0);
2981 rcec->rcX = 0;
2982 }
2983 }
2984
2985 /* check that the stats are sane */
2986 tl_assert(nEnts == stats__ctxt_tab_curr);
2987 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
2988
2989 /* visit all the referencing points, inc check ref counts */
2990 VG_(OSetGen_ResetIter)( oldrefTree );
2991 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
2992 tl_assert(oldref->magic == OldRef_MAGIC);
2993 for (i = 0; i < N_OLDREF_ACCS; i++) {
2994 if (oldref->accs[i].thr) {
2995 tl_assert(oldref->accs[i].rcec);
2996 tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
2997 oldref->accs[i].rcec->rcX++;
2998 } else {
2999 tl_assert(!oldref->accs[i].rcec);
3000 }
3001 }
3002 }
3003
3004 /* compare check ref counts with actual */
3005 for (i = 0; i < N_RCEC_TAB; i++) {
3006 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3007 tl_assert(rcec->rc == rcec->rcX);
3008 }
3009 }
3010}
3011
sewardj8fd92d32008-11-20 23:17:01 +00003012__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003013static void event_map_maybe_GC ( void )
3014{
3015 OldRef* oldref;
3016 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003017 XArray* refs2del;
3018 Word i, j, n2del;
3019
sewardj8fd92d32008-11-20 23:17:01 +00003020 UWord* genMap = NULL;
3021 UWord genMap_min = 0;
3022 UWord genMap_size = 0;
3023
sewardjf98e1c02008-10-25 16:22:41 +00003024 if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
3025 return;
3026
3027 if (0)
3028 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3029
3030 /* Check our counting is sane */
3031 tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
3032
3033 /* Check the reference counts */
3034 event_map__check_reference_counts( True/*before*/ );
3035
sewardj8fd92d32008-11-20 23:17:01 +00003036 /* Compute the distribution of generation values in the ref tree.
3037 There are likely only to be a few different generation numbers
3038 in the whole tree, but we don't know what they are. Hence use a
3039 dynamically resized array of counters. The array is genMap[0
3040 .. genMap_size-1], where genMap[0] is the count for the
3041 generation number genMap_min, genMap[1] is the count for
3042 genMap_min+1, etc. If a new number is seen outside the range
3043 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3044 copied into a larger array, and genMap_min and genMap_size are
3045 adjusted accordingly. */
3046
sewardjf98e1c02008-10-25 16:22:41 +00003047 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003048
3049 VG_(OSetGen_ResetIter)( oldrefTree );
3050 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
sewardj8fd92d32008-11-20 23:17:01 +00003051
3052 UWord ea;
sewardjf98e1c02008-10-25 16:22:41 +00003053 UWord key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003054
3055 /* BEGIN find 'ea', which is the index in genMap holding the
3056 count for generation number 'key'. */
3057 if (UNLIKELY(genMap == NULL)) {
3058 /* deal with the first key to be seen, so that the following
3059 cases don't need to handle the complexity of a NULL count
3060 array. */
3061 genMap_min = key;
3062 genMap_size = 1;
3063 genMap = HG_(zalloc)( "libhb.emmG.1a",
3064 genMap_size * sizeof(UWord) );
3065 ea = 0;
3066 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3067 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003068 }
sewardj8fd92d32008-11-20 23:17:01 +00003069 else
3070 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3071 /* this is the expected (almost-always-happens) case: 'key'
3072 is already mapped in the array. */
3073 ea = key - genMap_min;
3074 }
3075 else
3076 if (key < genMap_min) {
3077 /* 'key' appears before the start of the current array.
3078 Extend the current array by allocating a larger one and
3079 copying the current one to the upper end of it. */
3080 Word more;
3081 UWord* map2;
3082 more = genMap_min - key;
3083 tl_assert(more > 0);
3084 map2 = HG_(zalloc)( "libhb.emmG.1b",
3085 (genMap_size + more) * sizeof(UWord) );
3086 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3087 HG_(free)( genMap );
3088 genMap = map2;
3089 genMap_size += more;
3090 genMap_min -= more;
3091 ea = 0;
3092 tl_assert(genMap_min == key);
3093 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3094 key, genMap_min, genMap_min+genMap_size- 1 );
3095 }
3096 else {
3097 /* 'key' appears after the end of the current array. Extend
3098 the current array by allocating a larger one and copying
3099 the current one to the lower end of it. */
3100 Word more;
3101 UWord* map2;
3102 tl_assert(key >= genMap_min + genMap_size);
3103 more = key - (genMap_min + genMap_size) + 1;
3104 tl_assert(more > 0);
3105 map2 = HG_(zalloc)( "libhb.emmG.1c",
3106 (genMap_size + more) * sizeof(UWord) );
3107 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3108 HG_(free)( genMap );
3109 genMap = map2;
3110 genMap_size += more;
3111 ea = genMap_size - 1;;
3112 tl_assert(genMap_min + genMap_size - 1 == key);
3113 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3114 key, genMap_min, genMap_min+genMap_size- 1 );
3115 }
3116 /* END find 'ea' from 'key' */
3117
3118 tl_assert(ea >= 0 && ea < genMap_size);
3119 /* and the whole point of this elaborate compuation of 'ea' is .. */
3120 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003121 }
3122
sewardj8fd92d32008-11-20 23:17:01 +00003123 tl_assert(genMap);
3124 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003125
sewardj8fd92d32008-11-20 23:17:01 +00003126 /* Sanity check what we just computed */
3127 { UWord sum = 0;
3128 for (i = 0; i < genMap_size; i++) {
3129 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3130 i + genMap_min, genMap[i] );
3131 sum += genMap[i];
3132 }
3133 tl_assert(sum == oldrefTreeN);
3134 }
3135
3136 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003137 retained = oldrefTreeN;
3138 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003139
3140 for (i = 0; i < genMap_size; i++) {
3141 keyW = i + genMap_min;
3142 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003143 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3144 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3145 tl_assert(keyW >= maxGen);
3146 tl_assert(retained >= valW);
3147 if (retained - valW
3148 > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
3149 retained -= valW;
3150 maxGen = keyW;
3151 } else {
3152 break;
3153 }
3154 }
sewardjf98e1c02008-10-25 16:22:41 +00003155
sewardj8fd92d32008-11-20 23:17:01 +00003156 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003157
sewardj9b1f0fd2008-11-18 23:40:00 +00003158 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003159
3160 /* Now make up a big list of the oldrefTree entries we want to
3161 delete. We can't simultaneously traverse the tree and delete
3162 stuff from it, so first we need to copy them off somewhere
3163 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003164 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjf98e1c02008-10-25 16:22:41 +00003165 HG_(free), sizeof(OldRef*) );
3166
sewardj9b1f0fd2008-11-18 23:40:00 +00003167 if (retained < oldrefTreeN) {
3168
3169 /* This is the normal (expected) case. We discard any ref whose
3170 generation number <= maxGen. */
3171 VG_(OSetGen_ResetIter)( oldrefTree );
3172 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3173 tl_assert(oldref->magic == OldRef_MAGIC);
3174 if (oldref->gen <= maxGen) {
3175 VG_(addToXA)( refs2del, &oldref );
3176 }
sewardjf98e1c02008-10-25 16:22:41 +00003177 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003178 if (VG_(clo_verbosity) > 1) {
3179 VG_(message)(Vg_DebugMsg,
3180 "libhb: EvM GC: delete generations %lu and below, "
3181 "retaining %lu entries",
3182 maxGen, retained );
3183 }
3184
3185 } else {
3186
3187 static UInt rand_seed = 0; /* leave as static */
3188
3189 /* Degenerate case: there's only one generation in the entire
3190 tree, so we need to have some other way of deciding which
3191 refs to throw away. Just throw out half of them randomly. */
3192 tl_assert(retained == oldrefTreeN);
3193 VG_(OSetGen_ResetIter)( oldrefTree );
3194 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3195 UInt n;
3196 tl_assert(oldref->magic == OldRef_MAGIC);
3197 n = VG_(random)( &rand_seed );
3198 if ((n & 0xFFF) < 0x800) {
3199 VG_(addToXA)( refs2del, &oldref );
3200 retained--;
3201 }
3202 }
3203 if (VG_(clo_verbosity) > 1) {
3204 VG_(message)(Vg_DebugMsg,
3205 "libhb: EvM GC: randomly delete half the entries, "
3206 "retaining %lu entries",
3207 retained );
3208 }
3209
sewardjf98e1c02008-10-25 16:22:41 +00003210 }
3211
3212 n2del = VG_(sizeXA)( refs2del );
3213 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3214
3215 if (0) VG_(printf)("%s","deleting entries\n");
3216 for (i = 0; i < n2del; i++) {
3217 void* nd;
3218 OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
3219 tl_assert(ref);
3220 tl_assert(ref->magic == OldRef_MAGIC);
3221 for (j = 0; j < N_OLDREF_ACCS; j++) {
3222 if (ref->accs[j].rcec) {
3223 tl_assert(ref->accs[j].thr);
3224 stats__ctxt_rcdec3++;
3225 ctxt__rcdec( ref->accs[j].rcec );
3226 } else {
3227 tl_assert(!ref->accs[j].thr);
3228 }
3229 }
3230 nd = VG_(OSetGen_Remove)( oldrefTree, ref );
3231 VG_(OSetGen_FreeNode)( oldrefTree, nd );
3232 }
3233
3234 VG_(deleteXA)( refs2del );
3235
3236 tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
3237
3238 oldrefTreeN = retained;
3239 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3240
3241 /* Throw away all RCECs with zero reference counts */
3242 for (i = 0; i < N_RCEC_TAB; i++) {
3243 RCEC** pp = &contextTab[i];
3244 RCEC* p = *pp;
3245 while (p) {
3246 if (p->rc == 0) {
3247 *pp = p->next;
3248 HG_(free)(p);
3249 p = *pp;
3250 tl_assert(stats__ctxt_tab_curr > 0);
3251 stats__ctxt_tab_curr--;
3252 } else {
3253 pp = &p->next;
3254 p = p->next;
3255 }
3256 }
3257 }
3258
3259 /* Check the reference counts */
3260 event_map__check_reference_counts( False/*after*/ );
3261
3262 //if (0)
3263 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3264 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3265
3266}
3267
3268
3269/////////////////////////////////////////////////////////
3270// //
3271// Core MSM //
3272// //
3273/////////////////////////////////////////////////////////
3274
sewardjb0e009d2008-11-19 16:35:15 +00003275/* Logic in msm_read/msm_write updated/verified after re-analysis,
3276 19 Nov 08. */
3277
sewardjf98e1c02008-10-25 16:22:41 +00003278#define MSM_CONFACC 1
3279
sewardjf98e1c02008-10-25 16:22:41 +00003280#define MSM_CHECK 0
3281
sewardjb0e009d2008-11-19 16:35:15 +00003282/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
3283 nonzero, the effect is that when a race is detected for a location,
3284 that location is put into a special 'error' state and no further
3285 checking of it is done until it returns to a 'normal' state, which
3286 requires it to be deallocated and reallocated.
3287
3288 This is a bad idea, because of the interaction with suppressions.
3289 Suppose there is a race on the location, but the error is
3290 suppressed. The location now is marked as in-error. Now any
3291 subsequent race -- including ones we want to see -- will never be
3292 detected until the location is deallocated and reallocated.
3293
3294 Hence set MSM_CHECK to zero. This causes raced-on locations to
3295 remain in the normal 'C' (constrained) state, but places on them
3296 the constraint that the next accesses happen-after both the
3297 existing constraint and the relevant vector clock of the thread
3298 doing the racing access.
3299*/
3300#define MSM_RACE2ERR 0
3301
sewardjf98e1c02008-10-25 16:22:41 +00003302static ULong stats__msm_read = 0;
3303static ULong stats__msm_read_change = 0;
3304static ULong stats__msm_write = 0;
3305static ULong stats__msm_write_change = 0;
3306
3307__attribute__((noinline))
3308static void record_race_info ( Thr* acc_thr,
3309 Addr acc_addr, SizeT szB, Bool isWrite,
3310 SVal svOld, SVal svNew )
3311{
3312 Bool found;
3313 Thr* thrp = NULL;
sewardjd52392d2008-11-08 20:36:26 +00003314 ExeContext* where = NULL;
3315 ExeContext* wherep = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00003316 where = main_get_EC( acc_thr );
3317 found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
3318 if (found) {
3319 tl_assert(wherep);
3320 tl_assert(thrp);
3321 tl_assert(thrp->opaque);
3322 tl_assert(acc_thr->opaque);
3323 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3324 isWrite, szB, NULL/*mb_lastlock*/,
3325 wherep, thrp->opaque );
3326 } else {
3327 tl_assert(!wherep);
3328 tl_assert(!thrp);
3329 tl_assert(acc_thr->opaque);
3330 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3331 isWrite, szB, NULL/*mb_lastlock*/,
3332 NULL, NULL );
3333 }
3334}
3335
3336static Bool is_sane_SVal_C ( SVal sv ) {
3337 POrd ord;
3338 if (!SVal__isC(sv)) return True;
3339 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3340 if (ord == POrd_EQ || ord == POrd_LT) return True;
3341 return False;
3342}
3343
3344
3345/* Compute new state following a read */
3346static inline SVal msm_read ( SVal svOld,
3347 /* The following are only needed for
3348 creating error reports. */
3349 Thr* acc_thr,
3350 Addr acc_addr, SizeT szB )
3351{
3352 SVal svNew = SVal_INVALID;
3353 stats__msm_read++;
3354
3355 /* Redundant sanity check on the constraints */
3356 if (MSM_CHECK) {
3357 tl_assert(is_sane_SVal_C(svOld));
3358 }
3359
3360 if (SVal__isC(svOld)) {
3361 POrd ord;
3362 VtsID tviR = acc_thr->viR;
3363 VtsID tviW = acc_thr->viW;
3364 VtsID rmini = SVal__unC_Rmin(svOld);
3365 VtsID wmini = SVal__unC_Wmin(svOld);
3366
3367 ord = VtsID__getOrdering(rmini,tviR);
3368 if (ord == POrd_EQ || ord == POrd_LT) {
3369 /* no race */
3370 /* Note: RWLOCK subtlety: use tviW, not tviR */
3371 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3372 goto out;
3373 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003374 /* assert on sanity of constraints. */
3375 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3376 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003377 svNew = MSM_RACE2ERR
3378 ? SVal__mkE()
sewardj3b0c4d72008-11-20 11:20:50 +00003379#if 0
3380 //std
sewardjb0e009d2008-11-19 16:35:15 +00003381 : SVal__mkC( VtsID__join2(wmini,tviR),
3382 VtsID__join2(wmini,tviW) );
sewardj3b0c4d72008-11-20 11:20:50 +00003383#else
3384 // relaxed
3385 : SVal__mkC( VtsID__join2(rmini,tviR),
3386 VtsID__join2(wmini,tviW) );
3387#endif
sewardjf98e1c02008-10-25 16:22:41 +00003388 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
3389 svOld, svNew );
3390 goto out;
3391 }
3392 }
3393 if (SVal__isA(svOld)) {
3394 /* reading no-access memory (sigh); leave unchanged */
3395 /* check for no pollution */
3396 tl_assert(svOld == SVal_NOACCESS);
3397 svNew = SVal_NOACCESS;
3398 goto out;
3399 }
3400 if (SVal__isE(svOld)) {
3401 /* no race, location is already "in error" */
3402 svNew = SVal__mkE();
3403 goto out;
3404 }
3405 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3406 tl_assert(0);
3407
3408 out:
3409 if (MSM_CHECK) {
3410 tl_assert(is_sane_SVal_C(svNew));
3411 }
3412 tl_assert(svNew != SVal_INVALID);
3413 if (svNew != svOld) {
3414 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3415 event_map_bind( acc_addr, acc_thr );
3416 stats__msm_read_change++;
3417 }
3418 }
3419 return svNew;
3420}
3421
3422
3423/* Compute new state following a write */
3424static inline SVal msm_write ( SVal svOld,
3425 /* The following are only needed for
3426 creating error reports. */
3427 Thr* acc_thr,
3428 Addr acc_addr, SizeT szB )
3429{
3430 SVal svNew = SVal_INVALID;
3431 stats__msm_write++;
3432
3433 /* Redundant sanity check on the constraints */
3434 if (MSM_CHECK) {
3435 tl_assert(is_sane_SVal_C(svOld));
3436 }
3437
3438 if (SVal__isC(svOld)) {
3439 POrd ord;
3440 VtsID tviW = acc_thr->viW;
3441 VtsID wmini = SVal__unC_Wmin(svOld);
3442
3443 ord = VtsID__getOrdering(wmini,tviW);
3444 if (ord == POrd_EQ || ord == POrd_LT) {
3445 /* no race */
3446 svNew = SVal__mkC( tviW, tviW );
3447 goto out;
3448 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003449 VtsID tviR = acc_thr->viR;
sewardjf98e1c02008-10-25 16:22:41 +00003450 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00003451 /* assert on sanity of constraints. */
3452 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3453 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003454 svNew = MSM_RACE2ERR
3455 ? SVal__mkE()
sewardj3b0c4d72008-11-20 11:20:50 +00003456#if 0
3457 // std
sewardjb0e009d2008-11-19 16:35:15 +00003458 : SVal__mkC( VtsID__join2(wmini,tviR),
sewardjf98e1c02008-10-25 16:22:41 +00003459 VtsID__join2(wmini,tviW) );
sewardj3b0c4d72008-11-20 11:20:50 +00003460#else
3461 // relaxed
3462 : SVal__mkC( VtsID__join2(rmini,tviR),
3463 VtsID__join2(wmini,tviW) );
3464#endif
sewardjf98e1c02008-10-25 16:22:41 +00003465 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
3466 svOld, svNew );
3467 goto out;
3468 }
3469 }
3470 if (SVal__isA(svOld)) {
3471 /* writing no-access memory (sigh); leave unchanged */
3472 /* check for no pollution */
3473 tl_assert(svOld == SVal_NOACCESS);
3474 svNew = SVal_NOACCESS;
3475 goto out;
3476 }
3477 if (SVal__isE(svOld)) {
3478 /* no race, location is already "in error" */
3479 svNew = SVal__mkE();
3480 goto out;
3481 }
3482 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3483 tl_assert(0);
3484
3485 out:
3486 if (MSM_CHECK) {
3487 tl_assert(is_sane_SVal_C(svNew));
3488 }
3489 tl_assert(svNew != SVal_INVALID);
3490 if (svNew != svOld) {
3491 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3492 event_map_bind( acc_addr, acc_thr );
3493 stats__msm_write_change++;
3494 }
3495 }
3496 return svNew;
3497}
3498
3499
3500/////////////////////////////////////////////////////////
3501// //
3502// Apply core MSM to specific memory locations //
3503// //
3504/////////////////////////////////////////////////////////
3505
3506/*------------- ZSM accesses: 8 bit apply ------------- */
3507
3508void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3509 CacheLine* cl;
3510 UWord cloff, tno, toff;
3511 SVal svOld, svNew;
3512 UShort descr;
3513 stats__cline_read8s++;
3514 cl = get_cacheline(a);
3515 cloff = get_cacheline_offset(a);
3516 tno = get_treeno(a);
3517 toff = get_tree_offset(a); /* == 0 .. 7 */
3518 descr = cl->descrs[tno];
3519 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3520 SVal* tree = &cl->svals[tno << 3];
3521 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3522 if (SCE_CACHELINE)
3523 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3524 }
3525 svOld = cl->svals[cloff];
3526 svNew = msm_read( svOld, thr,a,1 );
3527 tl_assert(svNew != SVal_INVALID);
3528 cl->svals[cloff] = svNew;
3529}
3530
3531void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3532 CacheLine* cl;
3533 UWord cloff, tno, toff;
3534 SVal svOld, svNew;
3535 UShort descr;
3536 stats__cline_read8s++;
3537 cl = get_cacheline(a);
3538 cloff = get_cacheline_offset(a);
3539 tno = get_treeno(a);
3540 toff = get_tree_offset(a); /* == 0 .. 7 */
3541 descr = cl->descrs[tno];
3542 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3543 SVal* tree = &cl->svals[tno << 3];
3544 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3545 if (SCE_CACHELINE)
3546 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3547 }
3548 svOld = cl->svals[cloff];
3549 svNew = msm_write( svOld, thr,a,1 );
3550 tl_assert(svNew != SVal_INVALID);
3551 cl->svals[cloff] = svNew;
3552}
3553
3554/*------------- ZSM accesses: 16 bit apply ------------- */
3555
3556void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3557 CacheLine* cl;
3558 UWord cloff, tno, toff;
3559 SVal svOld, svNew;
3560 UShort descr;
3561 stats__cline_read16s++;
3562 if (UNLIKELY(!aligned16(a))) goto slowcase;
3563 cl = get_cacheline(a);
3564 cloff = get_cacheline_offset(a);
3565 tno = get_treeno(a);
3566 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3567 descr = cl->descrs[tno];
3568 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3569 if (valid_value_is_below_me_16(descr, toff)) {
3570 goto slowcase;
3571 } else {
3572 SVal* tree = &cl->svals[tno << 3];
3573 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3574 }
3575 if (SCE_CACHELINE)
3576 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3577 }
3578 svOld = cl->svals[cloff];
3579 svNew = msm_read( svOld, thr,a,2 );
3580 tl_assert(svNew != SVal_INVALID);
3581 cl->svals[cloff] = svNew;
3582 return;
3583 slowcase: /* misaligned, or must go further down the tree */
3584 stats__cline_16to8splits++;
3585 zsm_apply8___msm_read( thr, a + 0 );
3586 zsm_apply8___msm_read( thr, a + 1 );
3587}
3588
3589void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3590 CacheLine* cl;
3591 UWord cloff, tno, toff;
3592 SVal svOld, svNew;
3593 UShort descr;
3594 stats__cline_read16s++;
3595 if (UNLIKELY(!aligned16(a))) goto slowcase;
3596 cl = get_cacheline(a);
3597 cloff = get_cacheline_offset(a);
3598 tno = get_treeno(a);
3599 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3600 descr = cl->descrs[tno];
3601 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3602 if (valid_value_is_below_me_16(descr, toff)) {
3603 goto slowcase;
3604 } else {
3605 SVal* tree = &cl->svals[tno << 3];
3606 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3607 }
3608 if (SCE_CACHELINE)
3609 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3610 }
3611 svOld = cl->svals[cloff];
3612 svNew = msm_write( svOld, thr,a,2 );
3613 tl_assert(svNew != SVal_INVALID);
3614 cl->svals[cloff] = svNew;
3615 return;
3616 slowcase: /* misaligned, or must go further down the tree */
3617 stats__cline_16to8splits++;
3618 zsm_apply8___msm_write( thr, a + 0 );
3619 zsm_apply8___msm_write( thr, a + 1 );
3620}
3621
3622/*------------- ZSM accesses: 32 bit apply ------------- */
3623
3624void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3625 CacheLine* cl;
3626 UWord cloff, tno, toff;
3627 SVal svOld, svNew;
3628 UShort descr;
3629 if (UNLIKELY(!aligned32(a))) goto slowcase;
3630 cl = get_cacheline(a);
3631 cloff = get_cacheline_offset(a);
3632 tno = get_treeno(a);
3633 toff = get_tree_offset(a); /* == 0 or 4 */
3634 descr = cl->descrs[tno];
3635 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3636 if (valid_value_is_above_me_32(descr, toff)) {
3637 SVal* tree = &cl->svals[tno << 3];
3638 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3639 } else {
3640 goto slowcase;
3641 }
3642 if (SCE_CACHELINE)
3643 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3644 }
3645 svOld = cl->svals[cloff];
3646 svNew = msm_read( svOld, thr,a,4 );
3647 tl_assert(svNew != SVal_INVALID);
3648 cl->svals[cloff] = svNew;
3649 return;
3650 slowcase: /* misaligned, or must go further down the tree */
3651 stats__cline_32to16splits++;
3652 zsm_apply16___msm_read( thr, a + 0 );
3653 zsm_apply16___msm_read( thr, a + 2 );
3654}
3655
3656void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3657 CacheLine* cl;
3658 UWord cloff, tno, toff;
3659 SVal svOld, svNew;
3660 UShort descr;
3661 if (UNLIKELY(!aligned32(a))) goto slowcase;
3662 cl = get_cacheline(a);
3663 cloff = get_cacheline_offset(a);
3664 tno = get_treeno(a);
3665 toff = get_tree_offset(a); /* == 0 or 4 */
3666 descr = cl->descrs[tno];
3667 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3668 if (valid_value_is_above_me_32(descr, toff)) {
3669 SVal* tree = &cl->svals[tno << 3];
3670 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3671 } else {
3672 goto slowcase;
3673 }
3674 if (SCE_CACHELINE)
3675 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3676 }
3677 svOld = cl->svals[cloff];
3678 svNew = msm_write( svOld, thr,a,4 );
3679 tl_assert(svNew != SVal_INVALID);
3680 cl->svals[cloff] = svNew;
3681 return;
3682 slowcase: /* misaligned, or must go further down the tree */
3683 stats__cline_32to16splits++;
3684 zsm_apply16___msm_write( thr, a + 0 );
3685 zsm_apply16___msm_write( thr, a + 2 );
3686}
3687
3688/*------------- ZSM accesses: 64 bit apply ------------- */
3689
3690void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3691 CacheLine* cl;
3692 UWord cloff, tno, toff;
3693 SVal svOld, svNew;
3694 UShort descr;
3695 stats__cline_read64s++;
3696 if (UNLIKELY(!aligned64(a))) goto slowcase;
3697 cl = get_cacheline(a);
3698 cloff = get_cacheline_offset(a);
3699 tno = get_treeno(a);
3700 toff = get_tree_offset(a); /* == 0, unused */
3701 descr = cl->descrs[tno];
3702 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3703 goto slowcase;
3704 }
3705 svOld = cl->svals[cloff];
3706 svNew = msm_read( svOld, thr,a,8 );
3707 tl_assert(svNew != SVal_INVALID);
3708 cl->svals[cloff] = svNew;
3709 return;
3710 slowcase: /* misaligned, or must go further down the tree */
3711 stats__cline_64to32splits++;
3712 zsm_apply32___msm_read( thr, a + 0 );
3713 zsm_apply32___msm_read( thr, a + 4 );
3714}
3715
3716void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
3717 CacheLine* cl;
3718 UWord cloff, tno, toff;
3719 SVal svOld, svNew;
3720 UShort descr;
3721 stats__cline_read64s++;
3722 if (UNLIKELY(!aligned64(a))) goto slowcase;
3723 cl = get_cacheline(a);
3724 cloff = get_cacheline_offset(a);
3725 tno = get_treeno(a);
3726 toff = get_tree_offset(a); /* == 0, unused */
3727 descr = cl->descrs[tno];
3728 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3729 goto slowcase;
3730 }
3731 svOld = cl->svals[cloff];
3732 svNew = msm_write( svOld, thr,a,8 );
3733 tl_assert(svNew != SVal_INVALID);
3734 cl->svals[cloff] = svNew;
3735 return;
3736 slowcase: /* misaligned, or must go further down the tree */
3737 stats__cline_64to32splits++;
3738 zsm_apply32___msm_write( thr, a + 0 );
3739 zsm_apply32___msm_write( thr, a + 4 );
3740}
3741
3742/*--------------- ZSM accesses: 8 bit write --------------- */
3743
3744static
3745void zsm_write8 ( Addr a, SVal svNew ) {
3746 CacheLine* cl;
3747 UWord cloff, tno, toff;
3748 UShort descr;
3749 stats__cline_set8s++;
3750 cl = get_cacheline(a);
3751 cloff = get_cacheline_offset(a);
3752 tno = get_treeno(a);
3753 toff = get_tree_offset(a); /* == 0 .. 7 */
3754 descr = cl->descrs[tno];
3755 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3756 SVal* tree = &cl->svals[tno << 3];
3757 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3758 if (SCE_CACHELINE)
3759 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3760 }
3761 tl_assert(svNew != SVal_INVALID);
3762 cl->svals[cloff] = svNew;
3763}
3764
3765/*--------------- ZSM accesses: 16 bit write --------------- */
3766
3767static
3768void zsm_write16 ( Addr a, SVal svNew ) {
3769 CacheLine* cl;
3770 UWord cloff, tno, toff;
3771 UShort descr;
3772 stats__cline_set16s++;
3773 if (UNLIKELY(!aligned16(a))) goto slowcase;
3774 cl = get_cacheline(a);
3775 cloff = get_cacheline_offset(a);
3776 tno = get_treeno(a);
3777 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3778 descr = cl->descrs[tno];
3779 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3780 if (valid_value_is_below_me_16(descr, toff)) {
3781 /* Writing at this level. Need to fix up 'descr'. */
3782 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
3783 /* At this point, the tree does not match cl->descr[tno] any
3784 more. The assignments below will fix it up. */
3785 } else {
3786 /* We can't indiscriminately write on the w16 node as in the
3787 w64 case, as that might make the node inconsistent with
3788 its parent. So first, pull down to this level. */
3789 SVal* tree = &cl->svals[tno << 3];
3790 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3791 if (SCE_CACHELINE)
3792 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3793 }
3794 }
3795 tl_assert(svNew != SVal_INVALID);
3796 cl->svals[cloff + 0] = svNew;
3797 cl->svals[cloff + 1] = SVal_INVALID;
3798 return;
3799 slowcase: /* misaligned */
3800 stats__cline_16to8splits++;
3801 zsm_write8( a + 0, svNew );
3802 zsm_write8( a + 1, svNew );
3803}
3804
3805/*--------------- ZSM accesses: 32 bit write --------------- */
3806
3807static
3808void zsm_write32 ( Addr a, SVal svNew ) {
3809 CacheLine* cl;
3810 UWord cloff, tno, toff;
3811 UShort descr;
3812 stats__cline_set32s++;
3813 if (UNLIKELY(!aligned32(a))) goto slowcase;
3814 cl = get_cacheline(a);
3815 cloff = get_cacheline_offset(a);
3816 tno = get_treeno(a);
3817 toff = get_tree_offset(a); /* == 0 or 4 */
3818 descr = cl->descrs[tno];
3819 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3820 if (valid_value_is_above_me_32(descr, toff)) {
3821 /* We can't indiscriminately write on the w32 node as in the
3822 w64 case, as that might make the node inconsistent with
3823 its parent. So first, pull down to this level. */
3824 SVal* tree = &cl->svals[tno << 3];
3825 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3826 if (SCE_CACHELINE)
3827 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3828 } else {
3829 /* Writing at this level. Need to fix up 'descr'. */
3830 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
3831 /* At this point, the tree does not match cl->descr[tno] any
3832 more. The assignments below will fix it up. */
3833 }
3834 }
3835 tl_assert(svNew != SVal_INVALID);
3836 cl->svals[cloff + 0] = svNew;
3837 cl->svals[cloff + 1] = SVal_INVALID;
3838 cl->svals[cloff + 2] = SVal_INVALID;
3839 cl->svals[cloff + 3] = SVal_INVALID;
3840 return;
3841 slowcase: /* misaligned */
3842 stats__cline_32to16splits++;
3843 zsm_write16( a + 0, svNew );
3844 zsm_write16( a + 2, svNew );
3845}
3846
3847/*--------------- ZSM accesses: 64 bit write --------------- */
3848
3849static
3850void zsm_write64 ( Addr a, SVal svNew ) {
3851 CacheLine* cl;
3852 UWord cloff, tno, toff;
3853 stats__cline_set64s++;
3854 if (UNLIKELY(!aligned64(a))) goto slowcase;
3855 cl = get_cacheline(a);
3856 cloff = get_cacheline_offset(a);
3857 tno = get_treeno(a);
3858 toff = get_tree_offset(a); /* == 0 */
3859 cl->descrs[tno] = TREE_DESCR_64;
3860 tl_assert(svNew != SVal_INVALID);
3861 cl->svals[cloff + 0] = svNew;
3862 cl->svals[cloff + 1] = SVal_INVALID;
3863 cl->svals[cloff + 2] = SVal_INVALID;
3864 cl->svals[cloff + 3] = SVal_INVALID;
3865 cl->svals[cloff + 4] = SVal_INVALID;
3866 cl->svals[cloff + 5] = SVal_INVALID;
3867 cl->svals[cloff + 6] = SVal_INVALID;
3868 cl->svals[cloff + 7] = SVal_INVALID;
3869 return;
3870 slowcase: /* misaligned */
3871 stats__cline_64to32splits++;
3872 zsm_write32( a + 0, svNew );
3873 zsm_write32( a + 4, svNew );
3874}
3875
3876/*------------- ZSM accesses: 8 bit read/copy ------------- */
3877
3878static
3879SVal zsm_read8 ( Addr a ) {
3880 CacheLine* cl;
3881 UWord cloff, tno, toff;
3882 UShort descr;
3883 stats__cline_get8s++;
3884 cl = get_cacheline(a);
3885 cloff = get_cacheline_offset(a);
3886 tno = get_treeno(a);
3887 toff = get_tree_offset(a); /* == 0 .. 7 */
3888 descr = cl->descrs[tno];
3889 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3890 SVal* tree = &cl->svals[tno << 3];
3891 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3892 }
3893 return cl->svals[cloff];
3894}
3895
3896static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
3897 SVal sv;
3898 stats__cline_copy8s++;
3899 sv = zsm_read8( src );
3900 zsm_write8( dst, sv );
3901}
3902
3903/* ------------ Shadow memory range setting ops ------------ */
3904
3905void zsm_apply_range___msm_read ( Thr* thr,
3906 Addr a, SizeT len )
3907{
3908 /* fast track a couple of common cases */
3909 if (len == 4 && aligned32(a)) {
3910 zsm_apply32___msm_read( thr, a );
3911 return;
3912 }
3913 if (len == 8 && aligned64(a)) {
3914 zsm_apply64___msm_read( thr, a );
3915 return;
3916 }
3917
3918 /* be completely general (but as efficient as possible) */
3919 if (len == 0) return;
3920
3921 if (!aligned16(a) && len >= 1) {
3922 zsm_apply8___msm_read( thr, a );
3923 a += 1;
3924 len -= 1;
3925 tl_assert(aligned16(a));
3926 }
3927 if (len == 0) return;
3928
3929 if (!aligned32(a) && len >= 2) {
3930 zsm_apply16___msm_read( thr, a );
3931 a += 2;
3932 len -= 2;
3933 tl_assert(aligned32(a));
3934 }
3935 if (len == 0) return;
3936
3937 if (!aligned64(a) && len >= 4) {
3938 zsm_apply32___msm_read( thr, a );
3939 a += 4;
3940 len -= 4;
3941 tl_assert(aligned64(a));
3942 }
3943 if (len == 0) return;
3944
3945 if (len >= 8) {
3946 tl_assert(aligned64(a));
3947 while (len >= 8) {
3948 zsm_apply64___msm_read( thr, a );
3949 a += 8;
3950 len -= 8;
3951 }
3952 tl_assert(aligned64(a));
3953 }
3954 if (len == 0) return;
3955
3956 if (len >= 4)
3957 tl_assert(aligned32(a));
3958 if (len >= 4) {
3959 zsm_apply32___msm_read( thr, a );
3960 a += 4;
3961 len -= 4;
3962 }
3963 if (len == 0) return;
3964
3965 if (len >= 2)
3966 tl_assert(aligned16(a));
3967 if (len >= 2) {
3968 zsm_apply16___msm_read( thr, a );
3969 a += 2;
3970 len -= 2;
3971 }
3972 if (len == 0) return;
3973
3974 if (len >= 1) {
3975 zsm_apply8___msm_read( thr, a );
3976 a += 1;
3977 len -= 1;
3978 }
3979 tl_assert(len == 0);
3980}
3981
3982
3983
3984void zsm_apply_range___msm_write ( Thr* thr,
3985 Addr a, SizeT len )
3986{
3987 /* fast track a couple of common cases */
3988 if (len == 4 && aligned32(a)) {
3989 zsm_apply32___msm_write( thr, a );
3990 return;
3991 }
3992 if (len == 8 && aligned64(a)) {
3993 zsm_apply64___msm_write( thr, a );
3994 return;
3995 }
3996
3997 /* be completely general (but as efficient as possible) */
3998 if (len == 0) return;
3999
4000 if (!aligned16(a) && len >= 1) {
4001 zsm_apply8___msm_write( thr, a );
4002 a += 1;
4003 len -= 1;
4004 tl_assert(aligned16(a));
4005 }
4006 if (len == 0) return;
4007
4008 if (!aligned32(a) && len >= 2) {
4009 zsm_apply16___msm_write( thr, a );
4010 a += 2;
4011 len -= 2;
4012 tl_assert(aligned32(a));
4013 }
4014 if (len == 0) return;
4015
4016 if (!aligned64(a) && len >= 4) {
4017 zsm_apply32___msm_write( thr, a );
4018 a += 4;
4019 len -= 4;
4020 tl_assert(aligned64(a));
4021 }
4022 if (len == 0) return;
4023
4024 if (len >= 8) {
4025 tl_assert(aligned64(a));
4026 while (len >= 8) {
4027 zsm_apply64___msm_write( thr, a );
4028 a += 8;
4029 len -= 8;
4030 }
4031 tl_assert(aligned64(a));
4032 }
4033 if (len == 0) return;
4034
4035 if (len >= 4)
4036 tl_assert(aligned32(a));
4037 if (len >= 4) {
4038 zsm_apply32___msm_write( thr, a );
4039 a += 4;
4040 len -= 4;
4041 }
4042 if (len == 0) return;
4043
4044 if (len >= 2)
4045 tl_assert(aligned16(a));
4046 if (len >= 2) {
4047 zsm_apply16___msm_write( thr, a );
4048 a += 2;
4049 len -= 2;
4050 }
4051 if (len == 0) return;
4052
4053 if (len >= 1) {
4054 zsm_apply8___msm_write( thr, a );
4055 a += 1;
4056 len -= 1;
4057 }
4058 tl_assert(len == 0);
4059}
4060
4061
4062
4063
4064/* Block-copy states (needed for implementing realloc()). */
4065
4066static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
4067{
4068 SizeT i;
4069 if (len == 0)
4070 return;
4071
4072 /* assert for non-overlappingness */
4073 tl_assert(src+len <= dst || dst+len <= src);
4074
4075 /* To be simple, just copy byte by byte. But so as not to wreck
4076 performance for later accesses to dst[0 .. len-1], normalise
4077 destination lines as we finish with them, and also normalise the
4078 line containing the first and last address. */
4079 for (i = 0; i < len; i++) {
4080 Bool normalise
4081 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4082 || i == 0 /* first in range */
4083 || i == len-1; /* last in range */
4084 zsm_copy8( src+i, dst+i, normalise );
4085 }
4086}
4087
4088
4089/* For setting address ranges to a given value. Has considerable
4090 sophistication so as to avoid generating large numbers of pointless
4091 cache loads/writebacks for large ranges. */
4092
4093/* Do small ranges in-cache, in the obvious way. */
4094static
4095void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
4096{
4097 /* fast track a couple of common cases */
4098 if (len == 4 && aligned32(a)) {
4099 zsm_write32( a, svNew );
4100 return;
4101 }
4102 if (len == 8 && aligned64(a)) {
4103 zsm_write64( a, svNew );
4104 return;
4105 }
4106
4107 /* be completely general (but as efficient as possible) */
4108 if (len == 0) return;
4109
4110 if (!aligned16(a) && len >= 1) {
4111 zsm_write8( a, svNew );
4112 a += 1;
4113 len -= 1;
4114 tl_assert(aligned16(a));
4115 }
4116 if (len == 0) return;
4117
4118 if (!aligned32(a) && len >= 2) {
4119 zsm_write16( a, svNew );
4120 a += 2;
4121 len -= 2;
4122 tl_assert(aligned32(a));
4123 }
4124 if (len == 0) return;
4125
4126 if (!aligned64(a) && len >= 4) {
4127 zsm_write32( a, svNew );
4128 a += 4;
4129 len -= 4;
4130 tl_assert(aligned64(a));
4131 }
4132 if (len == 0) return;
4133
4134 if (len >= 8) {
4135 tl_assert(aligned64(a));
4136 while (len >= 8) {
4137 zsm_write64( a, svNew );
4138 a += 8;
4139 len -= 8;
4140 }
4141 tl_assert(aligned64(a));
4142 }
4143 if (len == 0) return;
4144
4145 if (len >= 4)
4146 tl_assert(aligned32(a));
4147 if (len >= 4) {
4148 zsm_write32( a, svNew );
4149 a += 4;
4150 len -= 4;
4151 }
4152 if (len == 0) return;
4153
4154 if (len >= 2)
4155 tl_assert(aligned16(a));
4156 if (len >= 2) {
4157 zsm_write16( a, svNew );
4158 a += 2;
4159 len -= 2;
4160 }
4161 if (len == 0) return;
4162
4163 if (len >= 1) {
4164 zsm_write8( a, svNew );
4165 a += 1;
4166 len -= 1;
4167 }
4168 tl_assert(len == 0);
4169}
4170
4171
4172/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4173 for larger ranges, try to operate directly on the out-of-cache
4174 representation, rather than dragging lines into the cache,
4175 overwriting them, and forcing them out. This turns out to be an
4176 important performance optimisation. */
4177
4178static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4179{
4180 tl_assert(svNew != SVal_INVALID);
4181 stats__cache_make_New_arange += (ULong)len;
4182
4183 if (0 && len > 500)
4184 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4185
4186 if (0) {
4187 static UWord n_New_in_cache = 0;
4188 static UWord n_New_not_in_cache = 0;
4189 /* tag is 'a' with the in-line offset masked out,
4190 eg a[31]..a[4] 0000 */
4191 Addr tag = a & ~(N_LINE_ARANGE - 1);
4192 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4193 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4194 n_New_in_cache++;
4195 } else {
4196 n_New_not_in_cache++;
4197 }
4198 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4199 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4200 n_New_in_cache, n_New_not_in_cache );
4201 }
4202
4203 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4204 zsm_set_range_SMALL( a, len, svNew );
4205 } else {
4206 Addr before_start = a;
4207 Addr aligned_start = cacheline_ROUNDUP(a);
4208 Addr after_start = cacheline_ROUNDDN(a + len);
4209 UWord before_len = aligned_start - before_start;
4210 UWord aligned_len = after_start - aligned_start;
4211 UWord after_len = a + len - after_start;
4212 tl_assert(before_start <= aligned_start);
4213 tl_assert(aligned_start <= after_start);
4214 tl_assert(before_len < N_LINE_ARANGE);
4215 tl_assert(after_len < N_LINE_ARANGE);
4216 tl_assert(get_cacheline_offset(aligned_start) == 0);
4217 if (get_cacheline_offset(a) == 0) {
4218 tl_assert(before_len == 0);
4219 tl_assert(a == aligned_start);
4220 }
4221 if (get_cacheline_offset(a+len) == 0) {
4222 tl_assert(after_len == 0);
4223 tl_assert(after_start == a+len);
4224 }
4225 if (before_len > 0) {
4226 zsm_set_range_SMALL( before_start, before_len, svNew );
4227 }
4228 if (after_len > 0) {
4229 zsm_set_range_SMALL( after_start, after_len, svNew );
4230 }
4231 stats__cache_make_New_inZrep += (ULong)aligned_len;
4232
4233 while (1) {
4234 Addr tag;
4235 UWord wix;
4236 if (aligned_start >= after_start)
4237 break;
4238 tl_assert(get_cacheline_offset(aligned_start) == 0);
4239 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4240 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4241 if (tag == cache_shmem.tags0[wix]) {
4242 UWord i;
4243 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4244 zsm_write64( aligned_start + i * 8, svNew );
4245 } else {
4246 UWord i;
4247 Word zix;
4248 SecMap* sm;
4249 LineZ* lineZ;
4250 /* This line is not in the cache. Do not force it in; instead
4251 modify it in-place. */
4252 /* find the Z line to write in and rcdec it or the
4253 associated F line. */
4254 find_Z_for_writing( &sm, &zix, tag );
4255 tl_assert(sm);
4256 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4257 lineZ = &sm->linesZ[zix];
4258 lineZ->dict[0] = svNew;
4259 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4260 for (i = 0; i < N_LINE_ARANGE/4; i++)
4261 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4262 rcinc_LineZ(lineZ);
4263 }
4264 aligned_start += N_LINE_ARANGE;
4265 aligned_len -= N_LINE_ARANGE;
4266 }
4267 tl_assert(aligned_start == after_start);
4268 tl_assert(aligned_len == 0);
4269 }
4270}
4271
4272
4273/////////////////////////////////////////////////////////
4274// //
4275// Synchronisation objects //
4276// //
4277/////////////////////////////////////////////////////////
4278
4279// (UInt) `echo "Synchronisation object" | md5sum`
4280#define SO_MAGIC 0x56b3c5b0U
4281
4282struct _SO {
4283 VtsID viR; /* r-clock of sender */
4284 VtsID viW; /* w-clock of sender */
4285 UInt magic;
4286};
4287
4288static SO* SO__Alloc ( void ) {
4289 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4290 so->viR = VtsID_INVALID;
4291 so->viW = VtsID_INVALID;
4292 so->magic = SO_MAGIC;
4293 return so;
4294}
4295static void SO__Dealloc ( SO* so ) {
4296 tl_assert(so);
4297 tl_assert(so->magic == SO_MAGIC);
4298 if (so->viR == VtsID_INVALID) {
4299 tl_assert(so->viW == VtsID_INVALID);
4300 } else {
4301 tl_assert(so->viW != VtsID_INVALID);
4302 VtsID__rcdec(so->viR);
4303 VtsID__rcdec(so->viW);
4304 }
4305 so->magic = 0;
4306 HG_(free)( so );
4307}
4308
4309
4310/////////////////////////////////////////////////////////
4311// //
4312// Top Level API //
4313// //
4314/////////////////////////////////////////////////////////
4315
4316static void show_thread_state ( HChar* str, Thr* t )
4317{
4318 if (1) return;
4319 if (t->viR == t->viW) {
4320 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4321 VtsID__pp( t->viR );
4322 VG_(printf)("%s","\n");
4323 } else {
4324 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4325 VtsID__pp( t->viR );
4326 VG_(printf)(" viW %u==", t->viW);
4327 VtsID__pp( t->viW );
4328 VG_(printf)("%s","\n");
4329 }
4330}
4331
4332
4333Thr* libhb_init (
4334 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004335 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004336 )
4337{
4338 Thr* thr;
4339 VtsID vi;
4340 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004341 tl_assert(get_EC);
4342 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004343 main_get_EC = get_EC;
4344
4345 // No need to initialise hg_wordfm.
4346 // No need to initialise hg_wordset.
4347
4348 vts_set_init();
4349 vts_tab_init();
4350 event_map_init();
4351 VtsID__invalidate_caches();
4352
4353 // initialise shadow memory
4354 zsm_init( SVal__rcinc, SVal__rcdec );
4355
4356 thr = Thr__new();
4357 vi = VtsID__mk_Singleton( thr, 1 );
4358 thr->viR = vi;
4359 thr->viW = vi;
4360 VtsID__rcinc(thr->viR);
4361 VtsID__rcinc(thr->viW);
4362
4363 show_thread_state(" root", thr);
4364 return thr;
4365}
4366
4367Thr* libhb_create ( Thr* parent )
4368{
4369 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4370 the child's index. Since the child's index is guaranteed
4371 unique, it has never been seen before, so the implicit value
4372 before the tick is zero and after that is one. */
4373 Thr* child = Thr__new();
4374
4375 child->viR = VtsID__tick( parent->viR, child );
4376 child->viW = VtsID__tick( parent->viW, child );
4377 VtsID__rcinc(child->viR);
4378 VtsID__rcinc(child->viW);
4379
4380 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4381 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4382
4383 /* and the parent has to move along too */
4384 VtsID__rcdec(parent->viR);
4385 VtsID__rcdec(parent->viW);
4386 parent->viR = VtsID__tick( parent->viR, parent );
4387 parent->viW = VtsID__tick( parent->viW, parent );
4388 VtsID__rcinc(parent->viR);
4389 VtsID__rcinc(parent->viW);
4390
4391 show_thread_state(" child", child);
4392 show_thread_state("parent", parent);
4393
4394 return child;
4395}
4396
4397/* Shut down the library, and print stats (in fact that's _all_
4398 this is for. */
4399void libhb_shutdown ( Bool show_stats )
4400{
4401 if (show_stats) {
4402 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4403 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4404 stats__secmaps_allocd,
4405 stats__secmap_ga_space_covered);
4406 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4407 stats__secmap_linesZ_allocd,
4408 stats__secmap_linesZ_bytes);
4409 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4410 stats__secmap_linesF_allocd,
4411 stats__secmap_linesF_bytes);
4412 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4413 stats__secmap_iterator_steppings);
4414 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4415 stats__secmaps_search, stats__secmaps_search_slow);
4416
4417 VG_(printf)("%s","\n");
4418 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4419 stats__cache_totrefs, stats__cache_totmisses );
4420 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4421 stats__cache_Z_fetches, stats__cache_F_fetches );
4422 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4423 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4424 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4425 stats__cache_invals, stats__cache_flushes );
4426 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4427 stats__cache_make_New_arange,
4428 stats__cache_make_New_inZrep);
4429
4430 VG_(printf)("%s","\n");
4431 VG_(printf)(" cline: %'10lu normalises\n",
4432 stats__cline_normalises );
4433 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4434 stats__cline_read64s,
4435 stats__cline_read32s,
4436 stats__cline_read16s,
4437 stats__cline_read8s );
4438 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4439 stats__cline_write64s,
4440 stats__cline_write32s,
4441 stats__cline_write16s,
4442 stats__cline_write8s );
4443 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4444 stats__cline_set64s,
4445 stats__cline_set32s,
4446 stats__cline_set16s,
4447 stats__cline_set8s );
4448 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4449 stats__cline_get8s, stats__cline_copy8s );
4450 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4451 stats__cline_64to32splits,
4452 stats__cline_32to16splits,
4453 stats__cline_16to8splits );
4454 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4455 stats__cline_64to32pulldown,
4456 stats__cline_32to16pulldown,
4457 stats__cline_16to8pulldown );
4458 if (0)
4459 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4460 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4461
4462 VG_(printf)("%s","\n");
4463
4464 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4465 stats__msm_read, stats__msm_read_change);
4466 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4467 stats__msm_write, stats__msm_write_change);
4468 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4469 stats__getOrdering_queries, stats__getOrdering_misses);
4470 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4471 stats__join2_queries, stats__join2_misses);
4472
4473 VG_(printf)("%s","\n");
4474 VG_(printf)(
4475 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4476 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4477 );
4478 VG_(printf)( " libhb: %lu entries in vts_set\n",
4479 VG_(sizeFM)( vts_set ) );
4480
4481 VG_(printf)("%s","\n");
4482 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4483 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4484 stats__ctxt_rcdec2,
4485 stats__ctxt_rcdec3 );
4486 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4487 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4488 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4489 (UWord)N_RCEC_TAB,
4490 stats__ctxt_tab_curr );
4491 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4492 stats__ctxt_tab_qs,
4493 stats__ctxt_tab_cmps );
4494#if 0
4495 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4496 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4497 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4498 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4499 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4500 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4501 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4502 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4503 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4504 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4505 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4506 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4507 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4508 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4509
4510 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4511 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4512 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4513 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4514#endif
4515
4516 VG_(printf)("%s","<<< END libhb stats >>>\n");
4517 VG_(printf)("%s","\n");
4518
4519 }
4520}
4521
4522void libhb_async_exit ( Thr* thr )
4523{
4524 /* is there anything we need to do? */
4525}
4526
4527/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4528 a Seg that points at a VTS is its one-and-only owner, and ditto for
4529 a SO that points at a VTS. */
4530
4531SO* libhb_so_alloc ( void )
4532{
4533 return SO__Alloc();
4534}
4535
4536void libhb_so_dealloc ( SO* so )
4537{
4538 tl_assert(so);
4539 tl_assert(so->magic == SO_MAGIC);
4540 SO__Dealloc(so);
4541}
4542
4543/* See comments in libhb.h for details on the meaning of
4544 strong vs weak sends and strong vs weak receives. */
4545void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4546{
4547 /* Copy the VTSs from 'thr' into the sync object, and then move
4548 the thread along one step. */
4549
4550 tl_assert(so);
4551 tl_assert(so->magic == SO_MAGIC);
4552
4553 /* stay sane .. a thread's read-clock must always lead or be the
4554 same as its write-clock */
4555 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4556 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4557 }
4558
4559 /* since we're overwriting the VtsIDs in the SO, we need to drop
4560 any references made by the previous contents thereof */
4561 if (so->viR == VtsID_INVALID) {
4562 tl_assert(so->viW == VtsID_INVALID);
4563 so->viR = thr->viR;
4564 so->viW = thr->viW;
4565 VtsID__rcinc(so->viR);
4566 VtsID__rcinc(so->viW);
4567 } else {
4568 /* In a strong send, we dump any previous VC in the SO and
4569 install the sending thread's VC instead. For a weak send we
4570 must join2 with what's already there. */
4571 tl_assert(so->viW != VtsID_INVALID);
4572 VtsID__rcdec(so->viR);
4573 VtsID__rcdec(so->viW);
4574 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4575 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4576 VtsID__rcinc(so->viR);
4577 VtsID__rcinc(so->viW);
4578 }
4579
4580 /* move both parent clocks along */
4581 VtsID__rcdec(thr->viR);
4582 VtsID__rcdec(thr->viW);
4583 thr->viR = VtsID__tick( thr->viR, thr );
4584 thr->viW = VtsID__tick( thr->viW, thr );
4585 VtsID__rcinc(thr->viR);
4586 VtsID__rcinc(thr->viW);
4587 if (strong_send)
4588 show_thread_state("s-send", thr);
4589 else
4590 show_thread_state("w-send", thr);
4591}
4592
4593void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4594{
4595 tl_assert(so);
4596 tl_assert(so->magic == SO_MAGIC);
4597
4598 if (so->viR != VtsID_INVALID) {
4599 tl_assert(so->viW != VtsID_INVALID);
4600
4601 /* Weak receive (basically, an R-acquisition of a R-W lock).
4602 This advances the read-clock of the receiver, but not the
4603 write-clock. */
4604 VtsID__rcdec(thr->viR);
4605 thr->viR = VtsID__join2( thr->viR, so->viR );
4606 VtsID__rcinc(thr->viR);
4607
4608 /* For a strong receive, we also advance the receiver's write
4609 clock, which means the receive as a whole is essentially
4610 equivalent to a W-acquisition of a R-W lock. */
4611 if (strong_recv) {
4612 VtsID__rcdec(thr->viW);
4613 thr->viW = VtsID__join2( thr->viW, so->viW );
4614 VtsID__rcinc(thr->viW);
4615 }
4616
4617 if (strong_recv)
4618 show_thread_state("s-recv", thr);
4619 else
4620 show_thread_state("w-recv", thr);
4621
4622 } else {
4623 tl_assert(so->viW == VtsID_INVALID);
4624 /* Deal with degenerate case: 'so' has no vts, so there has been
4625 no message posted to it. Just ignore this case. */
4626 show_thread_state("d-recv", thr);
4627 }
4628}
4629
4630Bool libhb_so_everSent ( SO* so )
4631{
4632 if (so->viR == VtsID_INVALID) {
4633 tl_assert(so->viW == VtsID_INVALID);
4634 return False;
4635 } else {
4636 tl_assert(so->viW != VtsID_INVALID);
4637 return True;
4638 }
4639}
4640
4641#define XXX1 0 // 0x67a106c
4642#define XXX2 0
4643
4644static Bool TRACEME(Addr a, SizeT szB) {
4645 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4646 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4647 return False;
4648}
4649static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4650 SVal sv = zsm_read8(a);
4651 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4652 show_thread_state("", thr);
4653 VG_(printf)("%s","\n");
4654}
4655
4656void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4657{
4658 SVal sv = SVal__mkC(thr->viW, thr->viW);
4659 tl_assert(is_sane_SVal_C(sv));
4660 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4661 zsm_set_range( a, szB, sv );
4662 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4663}
4664
4665void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4666{
4667 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4668 zsm_set_range( a, szB, SVal__mkA() );
4669 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4670}
4671
4672void* libhb_get_Thr_opaque ( Thr* thr ) {
4673 tl_assert(thr);
4674 return thr->opaque;
4675}
4676
4677void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4678 tl_assert(thr);
4679 thr->opaque = v;
4680}
4681
4682void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4683{
4684 zsm_copy_range(dst, src, len);
4685}
4686
4687void libhb_maybe_GC ( void )
4688{
4689 event_map_maybe_GC();
4690 /* If there are still freelist entries available, no need for a
4691 GC. */
4692 if (vts_tab_freelist != VtsID_INVALID)
4693 return;
4694 /* So all the table entries are full, and we're having to expand
4695 the table. But did we hit the threshhold point yet? */
4696 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
4697 return;
4698 vts_tab__do_GC( False/*don't show stats*/ );
4699}
4700
4701
4702/////////////////////////////////////////////////////////////////
4703/////////////////////////////////////////////////////////////////
4704// //
4705// SECTION END main library //
4706// //
4707/////////////////////////////////////////////////////////////////
4708/////////////////////////////////////////////////////////////////
4709
4710/*--------------------------------------------------------------------*/
4711/*--- end libhb_main.c ---*/
4712/*--------------------------------------------------------------------*/