blob: 7143ef86202f7b07caf6137d377f188aeebfca69 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
39#include "pub_tool_xarray.h"
40#include "pub_tool_oset.h"
41#include "pub_tool_threadstate.h"
42#include "pub_tool_aspacemgr.h"
43#include "pub_tool_execontext.h"
44#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000045#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000046#include "hg_basics.h"
47#include "hg_wordset.h"
48#include "hg_lock_n_thread.h"
49#include "hg_errors.h"
50
51#include "libhb.h"
52
53
54/* fwds for
55 Globals needed by other parts of the library. These are set
56 once at startup and then never changed. */
57static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +000058static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +000059
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62// //
63// //
64// //
65/////////////////////////////////////////////////////////////////
66/////////////////////////////////////////////////////////////////
67
68
69/////////////////////////////////////////////////////////////////
70/////////////////////////////////////////////////////////////////
71// //
72// SECTION BEGIN compressed shadow memory //
73// //
74/////////////////////////////////////////////////////////////////
75/////////////////////////////////////////////////////////////////
76
77#ifndef __HB_ZSM_H
78#define __HB_ZSM_H
79
80typedef ULong SVal;
81
82/* This value has special significance to the implementation, and callers
83 may not store it in the shadow memory. */
84#define SVal_INVALID (3ULL << 62)
85
86/* This is the default value for shadow memory. Initially the shadow
87 memory contains no accessible areas and so all reads produce this
88 value. TODO: make this caller-defineable. */
89#define SVal_NOACCESS (2ULL << 62)
90
91/* Initialise the library. Once initialised, it will (or may) call
92 rcinc and rcdec in response to all the calls below, in order to
93 allow the user to do reference counting on the SVals stored herein.
94 It is important to understand, however, that due to internal
95 caching, the reference counts are in general inaccurate, and can be
96 both above or below the true reference count for an item. In
97 particular, the library may indicate that the reference count for
98 an item is zero, when in fact it is not.
99
100 To make the reference counting exact and therefore non-pointless,
101 call zsm_flush_cache. Immediately after it returns, the reference
102 counts for all items, as deduced by the caller by observing calls
103 to rcinc and rcdec, will be correct, and so any items with a zero
104 reference count may be freed (or at least considered to be
105 unreferenced by this library).
106*/
107static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
108
109static void zsm_set_range ( Addr, SizeT, SVal );
110static SVal zsm_read8 ( Addr );
111static void zsm_copy_range ( Addr, Addr, SizeT );
112static void zsm_flush_cache ( void );
113
114#endif /* ! __HB_ZSM_H */
115
116
117/* For the shadow mem cache stuff we may want more intrusive
118 checks. Unfortunately there's no almost-zero-cost way to make them
119 selectable at run time. Hence set the #if 0 to #if 1 and
120 rebuild if you want them. */
121#if 0
122# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
123# define inline __attribute__((noinline))
124 /* probably want to ditch -fomit-frame-pointer too */
125#else
126# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
127#endif
128
129/* For the SegmentID, SegmentSet and SVal stuff we may want more
130 intrusive checks. Again there's no zero cost way to do this. Set
131 the #if 0 to #if 1 and rebuild if you want them. */
132#if 0
133# define SCE_SVALS 1 /* sanity-check shadow value stuff */
134#else
135# define SCE_SVALS 0
136#endif
137
138
139/* Round a up to the next multiple of N. N must be a power of 2 */
140#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
141/* Round a down to the next multiple of N. N must be a power of 2 */
142#define ROUNDDN(a, N) ((a) & ~(N-1))
143
144
145
146/* ------ User-supplied RC functions ------ */
147static void(*rcinc)(SVal) = NULL;
148static void(*rcdec)(SVal) = NULL;
149
150
151/* ------ CacheLine ------ */
152
153#define N_LINE_BITS 6 /* must be >= 3 */
154#define N_LINE_ARANGE (1 << N_LINE_BITS)
155#define N_LINE_TREES (N_LINE_ARANGE >> 3)
156
157typedef
158 struct {
159 UShort descrs[N_LINE_TREES];
160 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
161 }
162 CacheLine;
163
164#define TREE_DESCR_16_0 (1<<0)
165#define TREE_DESCR_32_0 (1<<1)
166#define TREE_DESCR_16_1 (1<<2)
167#define TREE_DESCR_64 (1<<3)
168#define TREE_DESCR_16_2 (1<<4)
169#define TREE_DESCR_32_1 (1<<5)
170#define TREE_DESCR_16_3 (1<<6)
171#define TREE_DESCR_8_0 (1<<7)
172#define TREE_DESCR_8_1 (1<<8)
173#define TREE_DESCR_8_2 (1<<9)
174#define TREE_DESCR_8_3 (1<<10)
175#define TREE_DESCR_8_4 (1<<11)
176#define TREE_DESCR_8_5 (1<<12)
177#define TREE_DESCR_8_6 (1<<13)
178#define TREE_DESCR_8_7 (1<<14)
179#define TREE_DESCR_DTY (1<<15)
180
181typedef
182 struct {
183 SVal dict[4]; /* can represent up to 4 diff values in the line */
184 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
185 dict indexes */
186 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
187 LineF to use, and dict[2..] are also SVal_INVALID. */
188 }
189 LineZ; /* compressed rep for a cache line */
190
191typedef
192 struct {
193 Bool inUse;
194 SVal w64s[N_LINE_ARANGE];
195 }
196 LineF; /* full rep for a cache line */
197
198/* Shadow memory.
199 Primary map is a WordFM Addr SecMap*.
200 SecMaps cover some page-size-ish section of address space and hold
201 a compressed representation.
202 CacheLine-sized chunks of SecMaps are copied into a Cache, being
203 decompressed when moved into the cache and recompressed on the
204 way out. Because of this, the cache must operate as a writeback
205 cache, not a writethrough one.
206
207 Each SecMap must hold a power-of-2 number of CacheLines. Hence
208 N_SECMAP_BITS must >= N_LINE_BITS.
209*/
210#define N_SECMAP_BITS 13
211#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
212
213// # CacheLines held by a SecMap
214#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
215
216/* The data in the SecMap is held in the array of LineZs. Each LineZ
217 either carries the required data directly, in a compressed
218 representation, or it holds (in .dict[0]) an index to the LineF in
219 .linesF that holds the full representation.
220
221 Currently-unused LineF's have their .inUse bit set to zero.
222 Since each in-use LineF is referred to be exactly one LineZ,
223 the number of .linesZ[] that refer to .linesF should equal
224 the number of .linesF[] that have .inUse == True.
225
226 RC obligations: the RCs presented to the user include exactly
227 the values in:
228 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
229 * F reps that are in use (.inUse == True)
230
231 Hence the following actions at the following transitions are required:
232
233 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
234 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
235 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
236 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
237*/
238typedef
239 struct {
240 UInt magic;
241 LineZ linesZ[N_SECMAP_ZLINES];
242 LineF* linesF;
243 UInt linesF_size;
244 }
245 SecMap;
246
247#define SecMap_MAGIC 0x571e58cbU
248
249static inline Bool is_sane_SecMap ( SecMap* sm ) {
250 return sm != NULL && sm->magic == SecMap_MAGIC;
251}
252
253/* ------ Cache ------ */
254
255#define N_WAY_BITS 16
256#define N_WAY_NENT (1 << N_WAY_BITS)
257
258/* Each tag is the address of the associated CacheLine, rounded down
259 to a CacheLine address boundary. A CacheLine size must be a power
260 of 2 and must be 8 or more. Hence an easy way to initialise the
261 cache so it is empty is to set all the tag values to any value % 8
262 != 0, eg 1. This means all queries in the cache initially miss.
263 It does however require us to detect and not writeback, any line
264 with a bogus tag. */
265typedef
266 struct {
267 CacheLine lyns0[N_WAY_NENT];
268 Addr tags0[N_WAY_NENT];
269 }
270 Cache;
271
272static inline Bool is_valid_scache_tag ( Addr tag ) {
273 /* a valid tag should be naturally aligned to the start of
274 a CacheLine. */
275 return 0 == (tag & (N_LINE_ARANGE - 1));
276}
277
278
279/* --------- Primary data structures --------- */
280
281/* Shadow memory primary map */
282static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
283static Cache cache_shmem;
284
285
286static UWord stats__secmaps_search = 0; // # SM finds
287static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
288static UWord stats__secmaps_allocd = 0; // # SecMaps issued
289static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
290static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
291static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
292static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
293static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
294static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
295static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
296static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
297static UWord stats__cache_F_fetches = 0; // # F lines fetched
298static UWord stats__cache_F_wbacks = 0; // # F lines written back
299static UWord stats__cache_invals = 0; // # cache invals
300static UWord stats__cache_flushes = 0; // # cache flushes
301static UWord stats__cache_totrefs = 0; // # total accesses
302static UWord stats__cache_totmisses = 0; // # misses
303static ULong stats__cache_make_New_arange = 0; // total arange made New
304static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
305static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
306static UWord stats__cline_read64s = 0; // # calls to s_m_read64
307static UWord stats__cline_read32s = 0; // # calls to s_m_read32
308static UWord stats__cline_read16s = 0; // # calls to s_m_read16
309static UWord stats__cline_read8s = 0; // # calls to s_m_read8
310static UWord stats__cline_write64s = 0; // # calls to s_m_write64
311static UWord stats__cline_write32s = 0; // # calls to s_m_write32
312static UWord stats__cline_write16s = 0; // # calls to s_m_write16
313static UWord stats__cline_write8s = 0; // # calls to s_m_write8
314static UWord stats__cline_set64s = 0; // # calls to s_m_set64
315static UWord stats__cline_set32s = 0; // # calls to s_m_set32
316static UWord stats__cline_set16s = 0; // # calls to s_m_set16
317static UWord stats__cline_set8s = 0; // # calls to s_m_set8
318static UWord stats__cline_get8s = 0; // # calls to s_m_get8
319static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
320static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
321static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
322static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
323static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
324static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
325static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
326
327static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
328 return a & ~(N_SECMAP_ARANGE - 1);
329}
330static inline UWord shmem__get_SecMap_offset ( Addr a ) {
331 return a & (N_SECMAP_ARANGE - 1);
332}
333
334
335/*----------------------------------------------------------------*/
336/*--- map_shmem :: WordFM Addr SecMap ---*/
337/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
338/*----------------------------------------------------------------*/
339
340/*--------------- SecMap allocation --------------- */
341
342static HChar* shmem__bigchunk_next = NULL;
343static HChar* shmem__bigchunk_end1 = NULL;
344
345static void* shmem__bigchunk_alloc ( SizeT n )
346{
347 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
348 tl_assert(n > 0);
349 n = VG_ROUNDUP(n, 16);
350 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
351 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
352 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
353 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
354 if (0)
355 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
356 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
357 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
358 if (shmem__bigchunk_next == NULL)
359 VG_(out_of_memory_NORETURN)(
360 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
361 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
362 }
363 tl_assert(shmem__bigchunk_next);
364 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
365 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
366 shmem__bigchunk_next += n;
367 return shmem__bigchunk_next - n;
368}
369
370static SecMap* shmem__alloc_SecMap ( void )
371{
372 Word i, j;
373 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
374 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
375 tl_assert(sm);
376 sm->magic = SecMap_MAGIC;
377 for (i = 0; i < N_SECMAP_ZLINES; i++) {
378 sm->linesZ[i].dict[0] = SVal_NOACCESS;
379 sm->linesZ[i].dict[1] = SVal_INVALID;
380 sm->linesZ[i].dict[2] = SVal_INVALID;
381 sm->linesZ[i].dict[3] = SVal_INVALID;
382 for (j = 0; j < N_LINE_ARANGE/4; j++)
383 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
384 }
385 sm->linesF = NULL;
386 sm->linesF_size = 0;
387 stats__secmaps_allocd++;
388 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
389 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
390 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
391 return sm;
392}
393
394typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
395static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
396
397static SecMap* shmem__find_SecMap ( Addr ga )
398{
399 SecMap* sm = NULL;
400 Addr gaKey = shmem__round_to_SecMap_base(ga);
401 // Cache
402 stats__secmaps_search++;
403 if (LIKELY(gaKey == smCache[0].gaKey))
404 return smCache[0].sm;
405 if (LIKELY(gaKey == smCache[1].gaKey)) {
406 SMCacheEnt tmp = smCache[0];
407 smCache[0] = smCache[1];
408 smCache[1] = tmp;
409 return smCache[0].sm;
410 }
411 if (gaKey == smCache[2].gaKey) {
412 SMCacheEnt tmp = smCache[1];
413 smCache[1] = smCache[2];
414 smCache[2] = tmp;
415 return smCache[1].sm;
416 }
417 // end Cache
418 stats__secmaps_search_slow++;
419 if (VG_(lookupFM)( map_shmem,
420 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
421 tl_assert(sm != NULL);
422 smCache[2] = smCache[1];
423 smCache[1] = smCache[0];
424 smCache[0].gaKey = gaKey;
425 smCache[0].sm = sm;
426 } else {
427 tl_assert(sm == NULL);
428 }
429 return sm;
430}
431
432static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
433{
434 SecMap* sm = shmem__find_SecMap ( ga );
435 if (LIKELY(sm)) {
436 return sm;
437 } else {
438 /* create a new one */
439 Addr gaKey = shmem__round_to_SecMap_base(ga);
440 sm = shmem__alloc_SecMap();
441 tl_assert(sm);
442 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
443 return sm;
444 }
445}
446
447
448/* ------------ LineF and LineZ related ------------ */
449
450static void rcinc_LineF ( LineF* lineF ) {
451 UWord i;
452 tl_assert(lineF->inUse);
453 for (i = 0; i < N_LINE_ARANGE; i++)
454 rcinc(lineF->w64s[i]);
455}
456
457static void rcdec_LineF ( LineF* lineF ) {
458 UWord i;
459 tl_assert(lineF->inUse);
460 for (i = 0; i < N_LINE_ARANGE; i++)
461 rcdec(lineF->w64s[i]);
462}
463
464static void rcinc_LineZ ( LineZ* lineZ ) {
465 tl_assert(lineZ->dict[0] != SVal_INVALID);
466 rcinc(lineZ->dict[0]);
467 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
468 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
469 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
470}
471
472static void rcdec_LineZ ( LineZ* lineZ ) {
473 tl_assert(lineZ->dict[0] != SVal_INVALID);
474 rcdec(lineZ->dict[0]);
475 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
476 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
477 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
478}
479
480inline
481static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
482 Word bix, shft, mask, prep;
483 tl_assert(ix >= 0);
484 bix = ix >> 2;
485 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
486 mask = 3 << shft;
487 prep = b2 << shft;
488 arr[bix] = (arr[bix] & ~mask) | prep;
489}
490
491inline
492static UWord read_twobit_array ( UChar* arr, UWord ix ) {
493 Word bix, shft;
494 tl_assert(ix >= 0);
495 bix = ix >> 2;
496 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
497 return (arr[bix] >> shft) & 3;
498}
499
500/* Given address 'tag', find either the Z or F line containing relevant
501 data, so it can be read into the cache.
502*/
503static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
504 /*OUT*/LineF** fp, Addr tag ) {
505 LineZ* lineZ;
506 LineF* lineF;
507 UWord zix;
508 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
509 UWord smoff = shmem__get_SecMap_offset(tag);
510 /* since smoff is derived from a valid tag, it should be
511 cacheline-aligned. */
512 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
513 zix = smoff >> N_LINE_BITS;
514 tl_assert(zix < N_SECMAP_ZLINES);
515 lineZ = &sm->linesZ[zix];
516 lineF = NULL;
517 if (lineZ->dict[0] == SVal_INVALID) {
518 UInt fix = (UInt)lineZ->dict[1];
519 tl_assert(sm->linesF);
520 tl_assert(sm->linesF_size > 0);
521 tl_assert(fix >= 0 && fix < sm->linesF_size);
522 lineF = &sm->linesF[fix];
523 tl_assert(lineF->inUse);
524 lineZ = NULL;
525 }
526 *zp = lineZ;
527 *fp = lineF;
528}
529
530/* Given address 'tag', return the relevant SecMap and the index of
531 the LineZ within it, in the expectation that the line is to be
532 overwritten. Regardless of whether 'tag' is currently associated
533 with a Z or F representation, to rcdec on the current
534 representation, in recognition of the fact that the contents are
535 just about to be overwritten. */
536static __attribute__((noinline))
537void find_Z_for_writing ( /*OUT*/SecMap** smp,
538 /*OUT*/Word* zixp,
539 Addr tag ) {
540 LineZ* lineZ;
541 LineF* lineF;
542 UWord zix;
543 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
544 UWord smoff = shmem__get_SecMap_offset(tag);
545 /* since smoff is derived from a valid tag, it should be
546 cacheline-aligned. */
547 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
548 zix = smoff >> N_LINE_BITS;
549 tl_assert(zix < N_SECMAP_ZLINES);
550 lineZ = &sm->linesZ[zix];
551 lineF = NULL;
552 /* re RCs, we are freeing up this LineZ/LineF so that new data can
553 be parked in it. Hence have to rcdec it accordingly. */
554 /* If lineZ has an associated lineF, free it up. */
555 if (lineZ->dict[0] == SVal_INVALID) {
556 UInt fix = (UInt)lineZ->dict[1];
557 tl_assert(sm->linesF);
558 tl_assert(sm->linesF_size > 0);
559 tl_assert(fix >= 0 && fix < sm->linesF_size);
560 lineF = &sm->linesF[fix];
561 tl_assert(lineF->inUse);
562 rcdec_LineF(lineF);
563 lineF->inUse = False;
564 } else {
565 rcdec_LineZ(lineZ);
566 }
567 *smp = sm;
568 *zixp = zix;
569}
570
571static __attribute__((noinline))
572void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
573 UInt i, new_size;
574 LineF* nyu;
575
576 if (sm->linesF) {
577 tl_assert(sm->linesF_size > 0);
578 } else {
579 tl_assert(sm->linesF_size == 0);
580 }
581
582 if (sm->linesF) {
583 for (i = 0; i < sm->linesF_size; i++) {
584 if (!sm->linesF[i].inUse) {
585 *fixp = (Word)i;
586 return;
587 }
588 }
589 }
590
591 /* No free F line found. Expand existing array and try again. */
592 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
593 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
594 new_size * sizeof(LineF) );
595 tl_assert(nyu);
596
597 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
598 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
599 * sizeof(LineF);
600
601 if (0)
602 VG_(printf)("SM %p: expand F array from %d to %d\n",
603 sm, (Int)sm->linesF_size, new_size);
604
605 for (i = 0; i < new_size; i++)
606 nyu[i].inUse = False;
607
608 if (sm->linesF) {
609 for (i = 0; i < sm->linesF_size; i++) {
610 tl_assert(sm->linesF[i].inUse);
611 nyu[i] = sm->linesF[i];
612 }
613 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
614 HG_(free)(sm->linesF);
615 }
616
617 sm->linesF = nyu;
618 sm->linesF_size = new_size;
619
620 for (i = 0; i < sm->linesF_size; i++) {
621 if (!sm->linesF[i].inUse) {
622 *fixp = (Word)i;
623 return;
624 }
625 }
626
627 /*NOTREACHED*/
628 tl_assert(0);
629}
630
631
632/* ------------ CacheLine and implicit-tree related ------------ */
633
634__attribute__((unused))
635static void pp_CacheLine ( CacheLine* cl ) {
636 Word i;
637 if (!cl) {
638 VG_(printf)("%s","pp_CacheLine(NULL)\n");
639 return;
640 }
641 for (i = 0; i < N_LINE_TREES; i++)
642 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
643 for (i = 0; i < N_LINE_ARANGE; i++)
644 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
645}
646
647static UChar descr_to_validbits ( UShort descr )
648{
649 /* a.k.a Party Time for gcc's constant folder */
650# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
651 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
652 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
653 ( (b8_5) << 12) | ( (b8_4) << 11) | \
654 ( (b8_3) << 10) | ( (b8_2) << 9) | \
655 ( (b8_1) << 8) | ( (b8_0) << 7) | \
656 ( (b16_3) << 6) | ( (b32_1) << 5) | \
657 ( (b16_2) << 4) | ( (b64) << 3) | \
658 ( (b16_1) << 2) | ( (b32_0) << 1) | \
659 ( (b16_0) << 0) ) )
660
661# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
662 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
663 ( (bit5) << 5) | ( (bit4) << 4) | \
664 ( (bit3) << 3) | ( (bit2) << 2) | \
665 ( (bit1) << 1) | ( (bit0) << 0) ) )
666
667 /* these should all get folded out at compile time */
668 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
669 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
670 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
671 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
672 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
673 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
674 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
675 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
676 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
677
678 switch (descr) {
679 /*
680 +--------------------------------- TREE_DESCR_8_7
681 | +------------------- TREE_DESCR_8_0
682 | | +---------------- TREE_DESCR_16_3
683 | | | +-------------- TREE_DESCR_32_1
684 | | | | +------------ TREE_DESCR_16_2
685 | | | | | +--------- TREE_DESCR_64
686 | | | | | | +------ TREE_DESCR_16_1
687 | | | | | | | +---- TREE_DESCR_32_0
688 | | | | | | | | +-- TREE_DESCR_16_0
689 | | | | | | | | |
690 | | | | | | | | | GRANULARITY, 7 -> 0 */
691 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
692 return BYTE(1,1,1,1,1,1,1,1);
693 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
694 return BYTE(1,1,0,1,1,1,1,1);
695 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
696 return BYTE(0,1,1,1,1,1,1,1);
697 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
698 return BYTE(0,1,0,1,1,1,1,1);
699
700 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
701 return BYTE(1,1,1,1,1,1,0,1);
702 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
703 return BYTE(1,1,0,1,1,1,0,1);
704 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
705 return BYTE(0,1,1,1,1,1,0,1);
706 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
707 return BYTE(0,1,0,1,1,1,0,1);
708
709 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
710 return BYTE(1,1,1,1,0,1,1,1);
711 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
712 return BYTE(1,1,0,1,0,1,1,1);
713 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
714 return BYTE(0,1,1,1,0,1,1,1);
715 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
716 return BYTE(0,1,0,1,0,1,1,1);
717
718 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
719 return BYTE(1,1,1,1,0,1,0,1);
720 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
721 return BYTE(1,1,0,1,0,1,0,1);
722 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
723 return BYTE(0,1,1,1,0,1,0,1);
724 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
725 return BYTE(0,1,0,1,0,1,0,1);
726
727 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
728 return BYTE(0,0,0,1,1,1,1,1);
729 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
730 return BYTE(0,0,0,1,1,1,0,1);
731 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
732 return BYTE(0,0,0,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
734 return BYTE(0,0,0,1,0,1,0,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
737 return BYTE(1,1,1,1,0,0,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
739 return BYTE(1,1,0,1,0,0,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
741 return BYTE(0,1,1,1,0,0,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
743 return BYTE(0,1,0,1,0,0,0,1);
744
745 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
746 return BYTE(0,0,0,1,0,0,0,1);
747
748 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
749 return BYTE(0,0,0,0,0,0,0,1);
750
751 default: return BYTE(0,0,0,0,0,0,0,0);
752 /* INVALID - any valid descr produces at least one
753 valid bit in tree[0..7]*/
754 }
755 /* NOTREACHED*/
756 tl_assert(0);
757
758# undef DESCR
759# undef BYTE
760}
761
762__attribute__((unused))
763static Bool is_sane_Descr ( UShort descr ) {
764 return descr_to_validbits(descr) != 0;
765}
766
767static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
768 VG_(sprintf)(dst,
769 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
770 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
771 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
772 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
773 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
774 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
775 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
776 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
777 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
778 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
779 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
780 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
781 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
782 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
783 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
784 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
785 );
786}
787static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
788 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
789 (Int)((byte & 128) ? 1 : 0),
790 (Int)((byte & 64) ? 1 : 0),
791 (Int)((byte & 32) ? 1 : 0),
792 (Int)((byte & 16) ? 1 : 0),
793 (Int)((byte & 8) ? 1 : 0),
794 (Int)((byte & 4) ? 1 : 0),
795 (Int)((byte & 2) ? 1 : 0),
796 (Int)((byte & 1) ? 1 : 0)
797 );
798}
799
800static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
801 Word i;
802 UChar validbits = descr_to_validbits(descr);
803 HChar buf[128], buf2[128];
804 if (validbits == 0)
805 goto bad;
806 for (i = 0; i < 8; i++) {
807 if (validbits & (1<<i)) {
808 if (tree[i] == SVal_INVALID)
809 goto bad;
810 } else {
811 if (tree[i] != SVal_INVALID)
812 goto bad;
813 }
814 }
815 return True;
816 bad:
817 sprintf_Descr( buf, descr );
818 sprintf_Byte( buf2, validbits );
819 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
820 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
821 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
822 for (i = 0; i < 8; i++)
823 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
824 VG_(printf)("%s","}\n");
825 return 0;
826}
827
828static Bool is_sane_CacheLine ( CacheLine* cl )
829{
830 Word tno, cloff;
831
832 if (!cl) goto bad;
833
834 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
835 UShort descr = cl->descrs[tno];
836 SVal* tree = &cl->svals[cloff];
837 if (!is_sane_Descr_and_Tree(descr, tree))
838 goto bad;
839 }
840 tl_assert(cloff == N_LINE_ARANGE);
841 return True;
842 bad:
843 pp_CacheLine(cl);
844 return False;
845}
846
847static UShort normalise_tree ( /*MOD*/SVal* tree )
848{
849 UShort descr;
850 /* pre: incoming tree[0..7] does not have any invalid shvals, in
851 particular no zeroes. */
852 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
853 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
854 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
855 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
856 tl_assert(0);
857
858 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
859 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
860 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
861 /* build 16-bit layer */
862 if (tree[1] == tree[0]) {
863 tree[1] = SVal_INVALID;
864 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
865 descr |= TREE_DESCR_16_0;
866 }
867 if (tree[3] == tree[2]) {
868 tree[3] = SVal_INVALID;
869 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
870 descr |= TREE_DESCR_16_1;
871 }
872 if (tree[5] == tree[4]) {
873 tree[5] = SVal_INVALID;
874 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
875 descr |= TREE_DESCR_16_2;
876 }
877 if (tree[7] == tree[6]) {
878 tree[7] = SVal_INVALID;
879 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
880 descr |= TREE_DESCR_16_3;
881 }
882 /* build 32-bit layer */
883 if (tree[2] == tree[0]
884 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
885 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
886 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
887 descr |= TREE_DESCR_32_0;
888 }
889 if (tree[6] == tree[4]
890 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
891 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
892 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
893 descr |= TREE_DESCR_32_1;
894 }
895 /* build 64-bit layer */
896 if (tree[4] == tree[0]
897 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
898 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
899 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
900 descr |= TREE_DESCR_64;
901 }
902 return descr;
903}
904
905/* This takes a cacheline where all the data is at the leaves
906 (w8[..]) and builds a correctly normalised tree. */
907static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
908{
909 Word tno, cloff;
910 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
911 SVal* tree = &cl->svals[cloff];
912 cl->descrs[tno] = normalise_tree( tree );
913 }
914 tl_assert(cloff == N_LINE_ARANGE);
915 if (SCE_CACHELINE)
916 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
917 stats__cline_normalises++;
918}
919
920
921typedef struct { UChar count; SVal sval; } CountedSVal;
922
923static
924void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
925 /*OUT*/Word* dstUsedP,
926 Word nDst, CacheLine* src )
927{
928 Word tno, cloff, dstUsed;
929
930 tl_assert(nDst == N_LINE_ARANGE);
931 dstUsed = 0;
932
933 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
934 UShort descr = src->descrs[tno];
935 SVal* tree = &src->svals[cloff];
936
937 /* sequentialise the tree described by (descr,tree). */
938# define PUT(_n,_v) \
939 do { dst[dstUsed ].count = (_n); \
940 dst[dstUsed++].sval = (_v); \
941 } while (0)
942
943 /* byte 0 */
944 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
945 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
946 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
947 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
948 /* byte 1 */
949 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
950 /* byte 2 */
951 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
952 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
953 /* byte 3 */
954 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
955 /* byte 4 */
956 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
957 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
958 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
959 /* byte 5 */
960 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
961 /* byte 6 */
962 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
963 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
964 /* byte 7 */
965 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
966
967# undef PUT
968 /* END sequentialise the tree described by (descr,tree). */
969
970 }
971 tl_assert(cloff == N_LINE_ARANGE);
972 tl_assert(dstUsed <= nDst);
973
974 *dstUsedP = dstUsed;
975}
976
977/* Write the cacheline 'wix' to backing store. Where it ends up
978 is determined by its tag field. */
979static __attribute__((noinline)) void cacheline_wback ( UWord wix )
980{
981 Word i, j, k, m;
982 Addr tag;
983 SecMap* sm;
984 CacheLine* cl;
985 LineZ* lineZ;
986 LineF* lineF;
987 Word zix, fix, csvalsUsed;
988 CountedSVal csvals[N_LINE_ARANGE];
989 SVal sv;
990
991 if (0)
992 VG_(printf)("scache wback line %d\n", (Int)wix);
993
994 tl_assert(wix >= 0 && wix < N_WAY_NENT);
995
996 tag = cache_shmem.tags0[wix];
997 cl = &cache_shmem.lyns0[wix];
998
999 /* The cache line may have been invalidated; if so, ignore it. */
1000 if (!is_valid_scache_tag(tag))
1001 return;
1002
1003 /* Where are we going to put it? */
1004 sm = NULL;
1005 lineZ = NULL;
1006 lineF = NULL;
1007 zix = fix = -1;
1008
1009 /* find the Z line to write in and rcdec it or the associated F
1010 line. */
1011 find_Z_for_writing( &sm, &zix, tag );
1012
1013 tl_assert(sm);
1014 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1015 lineZ = &sm->linesZ[zix];
1016
1017 /* Generate the data to be stored */
1018 if (SCE_CACHELINE)
1019 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1020
1021 csvalsUsed = -1;
1022 sequentialise_CacheLine( csvals, &csvalsUsed,
1023 N_LINE_ARANGE, cl );
1024 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1025 if (0) VG_(printf)("%lu ", csvalsUsed);
1026
1027 lineZ->dict[0] = lineZ->dict[1]
1028 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1029
1030 /* i indexes actual shadow values, k is cursor in csvals */
1031 i = 0;
1032 for (k = 0; k < csvalsUsed; k++) {
1033
1034 sv = csvals[k].sval;
1035 if (SCE_SVALS)
1036 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1037 /* do we already have it? */
1038 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1039 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1040 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1041 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1042 /* no. look for a free slot. */
1043 if (SCE_SVALS)
1044 tl_assert(sv != SVal_INVALID);
1045 if (lineZ->dict[0]
1046 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1047 if (lineZ->dict[1]
1048 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1049 if (lineZ->dict[2]
1050 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1051 if (lineZ->dict[3]
1052 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1053 break; /* we'll have to use the f rep */
1054 dict_ok:
1055 m = csvals[k].count;
1056 if (m == 8) {
1057 write_twobit_array( lineZ->ix2s, i+0, j );
1058 write_twobit_array( lineZ->ix2s, i+1, j );
1059 write_twobit_array( lineZ->ix2s, i+2, j );
1060 write_twobit_array( lineZ->ix2s, i+3, j );
1061 write_twobit_array( lineZ->ix2s, i+4, j );
1062 write_twobit_array( lineZ->ix2s, i+5, j );
1063 write_twobit_array( lineZ->ix2s, i+6, j );
1064 write_twobit_array( lineZ->ix2s, i+7, j );
1065 i += 8;
1066 }
1067 else if (m == 4) {
1068 write_twobit_array( lineZ->ix2s, i+0, j );
1069 write_twobit_array( lineZ->ix2s, i+1, j );
1070 write_twobit_array( lineZ->ix2s, i+2, j );
1071 write_twobit_array( lineZ->ix2s, i+3, j );
1072 i += 4;
1073 }
1074 else if (m == 1) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 i += 1;
1077 }
1078 else if (m == 2) {
1079 write_twobit_array( lineZ->ix2s, i+0, j );
1080 write_twobit_array( lineZ->ix2s, i+1, j );
1081 i += 2;
1082 }
1083 else {
1084 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1085 }
1086
1087 }
1088
1089 if (LIKELY(i == N_LINE_ARANGE)) {
1090 /* Construction of the compressed representation was
1091 successful. */
1092 rcinc_LineZ(lineZ);
1093 stats__cache_Z_wbacks++;
1094 } else {
1095 /* Cannot use the compressed(z) representation. Use the full(f)
1096 rep instead. */
1097 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1098 alloc_F_for_writing( sm, &fix );
1099 tl_assert(sm->linesF);
1100 tl_assert(sm->linesF_size > 0);
1101 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1102 lineF = &sm->linesF[fix];
1103 tl_assert(!lineF->inUse);
1104 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1105 lineZ->dict[1] = (SVal)fix;
1106 lineF->inUse = True;
1107 i = 0;
1108 for (k = 0; k < csvalsUsed; k++) {
1109 if (SCE_SVALS)
1110 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1111 sv = csvals[k].sval;
1112 if (SCE_SVALS)
1113 tl_assert(sv != SVal_INVALID);
1114 for (m = csvals[k].count; m > 0; m--) {
1115 lineF->w64s[i] = sv;
1116 i++;
1117 }
1118 }
1119 tl_assert(i == N_LINE_ARANGE);
1120 rcinc_LineF(lineF);
1121 stats__cache_F_wbacks++;
1122 }
1123
1124 //if (anyShared)
1125 // sm->mbHasShared = True;
1126
1127 /* mb_tidy_one_cacheline(); */
1128}
1129
1130/* Fetch the cacheline 'wix' from the backing store. The tag
1131 associated with 'wix' is assumed to have already been filled in;
1132 hence that is used to determine where in the backing store to read
1133 from. */
1134static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1135{
1136 Word i;
1137 Addr tag;
1138 CacheLine* cl;
1139 LineZ* lineZ;
1140 LineF* lineF;
1141
1142 if (0)
1143 VG_(printf)("scache fetch line %d\n", (Int)wix);
1144
1145 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1146
1147 tag = cache_shmem.tags0[wix];
1148 cl = &cache_shmem.lyns0[wix];
1149
1150 /* reject nonsense requests */
1151 tl_assert(is_valid_scache_tag(tag));
1152
1153 lineZ = NULL;
1154 lineF = NULL;
1155 find_ZF_for_reading( &lineZ, &lineF, tag );
1156 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1157
1158 /* expand the data into the bottom layer of the tree, then get
1159 cacheline_normalise to build the descriptor array. */
1160 if (lineF) {
1161 tl_assert(lineF->inUse);
1162 for (i = 0; i < N_LINE_ARANGE; i++) {
1163 cl->svals[i] = lineF->w64s[i];
1164 }
1165 stats__cache_F_fetches++;
1166 } else {
1167 for (i = 0; i < N_LINE_ARANGE; i++) {
1168 SVal sv;
1169 UWord ix = read_twobit_array( lineZ->ix2s, i );
1170 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1171 sv = lineZ->dict[ix];
1172 tl_assert(sv != SVal_INVALID);
1173 cl->svals[i] = sv;
1174 }
1175 stats__cache_Z_fetches++;
1176 }
1177 normalise_CacheLine( cl );
1178}
1179
1180static void shmem__invalidate_scache ( void ) {
1181 Word wix;
1182 if (0) VG_(printf)("%s","scache inval\n");
1183 tl_assert(!is_valid_scache_tag(1));
1184 for (wix = 0; wix < N_WAY_NENT; wix++) {
1185 cache_shmem.tags0[wix] = 1/*INVALID*/;
1186 }
1187 stats__cache_invals++;
1188}
1189
1190static void shmem__flush_and_invalidate_scache ( void ) {
1191 Word wix;
1192 Addr tag;
1193 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1194 tl_assert(!is_valid_scache_tag(1));
1195 for (wix = 0; wix < N_WAY_NENT; wix++) {
1196 tag = cache_shmem.tags0[wix];
1197 if (tag == 1/*INVALID*/) {
1198 /* already invalid; nothing to do */
1199 } else {
1200 tl_assert(is_valid_scache_tag(tag));
1201 cacheline_wback( wix );
1202 }
1203 cache_shmem.tags0[wix] = 1/*INVALID*/;
1204 }
1205 stats__cache_flushes++;
1206 stats__cache_invals++;
1207}
1208
1209
1210static inline Bool aligned16 ( Addr a ) {
1211 return 0 == (a & 1);
1212}
1213static inline Bool aligned32 ( Addr a ) {
1214 return 0 == (a & 3);
1215}
1216static inline Bool aligned64 ( Addr a ) {
1217 return 0 == (a & 7);
1218}
1219static inline UWord get_cacheline_offset ( Addr a ) {
1220 return (UWord)(a & (N_LINE_ARANGE - 1));
1221}
1222static inline Addr cacheline_ROUNDUP ( Addr a ) {
1223 return ROUNDUP(a, N_LINE_ARANGE);
1224}
1225static inline Addr cacheline_ROUNDDN ( Addr a ) {
1226 return ROUNDDN(a, N_LINE_ARANGE);
1227}
1228static inline UWord get_treeno ( Addr a ) {
1229 return get_cacheline_offset(a) >> 3;
1230}
1231static inline UWord get_tree_offset ( Addr a ) {
1232 return a & 7;
1233}
1234
1235static __attribute__((noinline))
1236 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1237static inline CacheLine* get_cacheline ( Addr a )
1238{
1239 /* tag is 'a' with the in-line offset masked out,
1240 eg a[31]..a[4] 0000 */
1241 Addr tag = a & ~(N_LINE_ARANGE - 1);
1242 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1243 stats__cache_totrefs++;
1244 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1245 return &cache_shmem.lyns0[wix];
1246 } else {
1247 return get_cacheline_MISS( a );
1248 }
1249}
1250
1251static __attribute__((noinline))
1252 CacheLine* get_cacheline_MISS ( Addr a )
1253{
1254 /* tag is 'a' with the in-line offset masked out,
1255 eg a[31]..a[4] 0000 */
1256
1257 CacheLine* cl;
1258 Addr* tag_old_p;
1259 Addr tag = a & ~(N_LINE_ARANGE - 1);
1260 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1261
1262 tl_assert(tag != cache_shmem.tags0[wix]);
1263
1264 /* Dump the old line into the backing store. */
1265 stats__cache_totmisses++;
1266
1267 cl = &cache_shmem.lyns0[wix];
1268 tag_old_p = &cache_shmem.tags0[wix];
1269
1270 if (is_valid_scache_tag( *tag_old_p )) {
1271 /* EXPENSIVE and REDUNDANT: callee does it */
1272 if (SCE_CACHELINE)
1273 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1274 cacheline_wback( wix );
1275 }
1276 /* and reload the new one */
1277 *tag_old_p = tag;
1278 cacheline_fetch( wix );
1279 if (SCE_CACHELINE)
1280 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1281 return cl;
1282}
1283
1284static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1285 stats__cline_64to32pulldown++;
1286 switch (toff) {
1287 case 0: case 4:
1288 tl_assert(descr & TREE_DESCR_64);
1289 tree[4] = tree[0];
1290 descr &= ~TREE_DESCR_64;
1291 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1292 break;
1293 default:
1294 tl_assert(0);
1295 }
1296 return descr;
1297}
1298
1299static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1300 stats__cline_32to16pulldown++;
1301 switch (toff) {
1302 case 0: case 2:
1303 if (!(descr & TREE_DESCR_32_0)) {
1304 descr = pulldown_to_32(tree, 0, descr);
1305 }
1306 tl_assert(descr & TREE_DESCR_32_0);
1307 tree[2] = tree[0];
1308 descr &= ~TREE_DESCR_32_0;
1309 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1310 break;
1311 case 4: case 6:
1312 if (!(descr & TREE_DESCR_32_1)) {
1313 descr = pulldown_to_32(tree, 4, descr);
1314 }
1315 tl_assert(descr & TREE_DESCR_32_1);
1316 tree[6] = tree[4];
1317 descr &= ~TREE_DESCR_32_1;
1318 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1319 break;
1320 default:
1321 tl_assert(0);
1322 }
1323 return descr;
1324}
1325
1326static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1327 stats__cline_16to8pulldown++;
1328 switch (toff) {
1329 case 0: case 1:
1330 if (!(descr & TREE_DESCR_16_0)) {
1331 descr = pulldown_to_16(tree, 0, descr);
1332 }
1333 tl_assert(descr & TREE_DESCR_16_0);
1334 tree[1] = tree[0];
1335 descr &= ~TREE_DESCR_16_0;
1336 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1337 break;
1338 case 2: case 3:
1339 if (!(descr & TREE_DESCR_16_1)) {
1340 descr = pulldown_to_16(tree, 2, descr);
1341 }
1342 tl_assert(descr & TREE_DESCR_16_1);
1343 tree[3] = tree[2];
1344 descr &= ~TREE_DESCR_16_1;
1345 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1346 break;
1347 case 4: case 5:
1348 if (!(descr & TREE_DESCR_16_2)) {
1349 descr = pulldown_to_16(tree, 4, descr);
1350 }
1351 tl_assert(descr & TREE_DESCR_16_2);
1352 tree[5] = tree[4];
1353 descr &= ~TREE_DESCR_16_2;
1354 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1355 break;
1356 case 6: case 7:
1357 if (!(descr & TREE_DESCR_16_3)) {
1358 descr = pulldown_to_16(tree, 6, descr);
1359 }
1360 tl_assert(descr & TREE_DESCR_16_3);
1361 tree[7] = tree[6];
1362 descr &= ~TREE_DESCR_16_3;
1363 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1364 break;
1365 default:
1366 tl_assert(0);
1367 }
1368 return descr;
1369}
1370
1371
1372static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1373 UShort mask;
1374 switch (toff) {
1375 case 0:
1376 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1377 tl_assert( (descr & mask) == mask );
1378 descr &= ~mask;
1379 descr |= TREE_DESCR_16_0;
1380 break;
1381 case 2:
1382 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1383 tl_assert( (descr & mask) == mask );
1384 descr &= ~mask;
1385 descr |= TREE_DESCR_16_1;
1386 break;
1387 case 4:
1388 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1389 tl_assert( (descr & mask) == mask );
1390 descr &= ~mask;
1391 descr |= TREE_DESCR_16_2;
1392 break;
1393 case 6:
1394 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1395 tl_assert( (descr & mask) == mask );
1396 descr &= ~mask;
1397 descr |= TREE_DESCR_16_3;
1398 break;
1399 default:
1400 tl_assert(0);
1401 }
1402 return descr;
1403}
1404
1405static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1406 UShort mask;
1407 switch (toff) {
1408 case 0:
1409 if (!(descr & TREE_DESCR_16_0))
1410 descr = pullup_descr_to_16(descr, 0);
1411 if (!(descr & TREE_DESCR_16_1))
1412 descr = pullup_descr_to_16(descr, 2);
1413 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1414 tl_assert( (descr & mask) == mask );
1415 descr &= ~mask;
1416 descr |= TREE_DESCR_32_0;
1417 break;
1418 case 4:
1419 if (!(descr & TREE_DESCR_16_2))
1420 descr = pullup_descr_to_16(descr, 4);
1421 if (!(descr & TREE_DESCR_16_3))
1422 descr = pullup_descr_to_16(descr, 6);
1423 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1424 tl_assert( (descr & mask) == mask );
1425 descr &= ~mask;
1426 descr |= TREE_DESCR_32_1;
1427 break;
1428 default:
1429 tl_assert(0);
1430 }
1431 return descr;
1432}
1433
1434static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1435 switch (toff) {
1436 case 0: case 4:
1437 return 0 != (descr & TREE_DESCR_64);
1438 default:
1439 tl_assert(0);
1440 }
1441}
1442
1443static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1444 switch (toff) {
1445 case 0:
1446 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1447 case 2:
1448 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1449 case 4:
1450 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1451 case 6:
1452 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1453 default:
1454 tl_assert(0);
1455 }
1456}
1457
1458/* ------------ Cache management ------------ */
1459
1460static void zsm_flush_cache ( void )
1461{
1462 shmem__flush_and_invalidate_scache();
1463}
1464
1465
1466static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1467{
1468 tl_assert( sizeof(UWord) == sizeof(Addr) );
1469
1470 rcinc = p_rcinc;
1471 rcdec = p_rcdec;
1472
1473 tl_assert(map_shmem == NULL);
1474 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1475 HG_(free),
1476 NULL/*unboxed UWord cmp*/);
1477 tl_assert(map_shmem != NULL);
1478 shmem__invalidate_scache();
1479
1480 /* a SecMap must contain an integral number of CacheLines */
1481 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1482 /* also ... a CacheLine holds an integral number of trees */
1483 tl_assert(0 == (N_LINE_ARANGE % 8));
1484}
1485
1486/////////////////////////////////////////////////////////////////
1487/////////////////////////////////////////////////////////////////
1488// //
1489// SECTION END compressed shadow memory //
1490// //
1491/////////////////////////////////////////////////////////////////
1492/////////////////////////////////////////////////////////////////
1493
1494
1495
1496/////////////////////////////////////////////////////////////////
1497/////////////////////////////////////////////////////////////////
1498// //
1499// SECTION BEGIN vts primitives //
1500// //
1501/////////////////////////////////////////////////////////////////
1502/////////////////////////////////////////////////////////////////
1503
1504#ifndef __HB_VTS_H
1505#define __HB_VTS_H
1506
1507/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1508 lowest 30 bits of an SVal. */
1509typedef UInt VtsID;
1510#define VtsID_INVALID 0xFFFFFFFF
1511
1512/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1513 a backlink for the caller's convenience. Since we have no idea
1514 what to set that to in the library, it always gets set to
1515 VtsID_INVALID. */
1516typedef
1517 struct {
1518 VtsID id;
1519 XArray* ts; /* XArray* ScalarTS(abstract) */
1520 }
1521 VTS;
1522
1523
1524/* Create a new, empty VTS. */
1525VTS* VTS__new ( void );
1526
1527/* Delete this VTS in its entirety. */
1528void VTS__delete ( VTS* vts );
1529
1530/* Create a new singleton VTS. */
1531VTS* VTS__singleton ( Thr* thr, ULong tym );
1532
1533/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1534 not modified. */
1535VTS* VTS__tick ( Thr* me, VTS* vts );
1536
1537/* Return a new VTS constructed as the join (max) of the 2 args.
1538 Neither arg is modified. */
1539VTS* VTS__join ( VTS* a, VTS* b );
1540
1541/* Compute the partial ordering relation of the two args. */
1542typedef
1543 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1544 POrd;
1545
1546POrd VTS__cmp ( VTS* a, VTS* b );
1547
1548/* Compute an arbitrary structural (total) ordering on the two args,
1549 based on their VCs, so they can be looked up in a table, tree, etc.
1550 Returns -1, 0 or 1. */
1551Word VTS__cmp_structural ( VTS* a, VTS* b );
1552
1553/* Debugging only. Display the given VTS in the buffer. */
1554void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1555
1556/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001557ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001558
1559#endif /* ! __HB_VTS_H */
1560
1561
1562/*--------------- to do with Vector Timestamps ---------------*/
1563
1564/* Scalar Timestamp */
1565typedef
1566 struct {
1567 Thr* thr;
1568 ULong tym;
1569 }
1570 ScalarTS;
1571
1572
1573static Bool is_sane_VTS ( VTS* vts )
1574{
1575 UWord i, n;
1576 ScalarTS *st1, *st2;
1577 if (!vts) return False;
1578 if (!vts->ts) return False;
1579 n = VG_(sizeXA)( vts->ts );
1580 if (n >= 2) {
1581 for (i = 0; i < n-1; i++) {
1582 st1 = VG_(indexXA)( vts->ts, i );
1583 st2 = VG_(indexXA)( vts->ts, i+1 );
1584 if (st1->thr >= st2->thr)
1585 return False;
1586 if (st1->tym == 0 || st2->tym == 0)
1587 return False;
1588 }
1589 }
1590 return True;
1591}
1592
1593
1594/* Create a new, empty VTS.
1595*/
1596VTS* VTS__new ( void )
1597{
1598 VTS* vts;
1599 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1600 tl_assert(vts);
1601 vts->id = VtsID_INVALID;
1602 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1603 HG_(free), sizeof(ScalarTS) );
1604 tl_assert(vts->ts);
1605 return vts;
1606}
1607
1608
1609/* Delete this VTS in its entirety.
1610*/
1611void VTS__delete ( VTS* vts )
1612{
1613 tl_assert(vts);
1614 tl_assert(vts->ts);
1615 VG_(deleteXA)( vts->ts );
1616 HG_(free)(vts);
1617}
1618
1619
1620/* Create a new singleton VTS.
1621*/
1622VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1623 ScalarTS st;
1624 VTS* vts;
1625 tl_assert(thr);
1626 tl_assert(tym >= 1);
1627 vts = VTS__new();
1628 st.thr = thr;
1629 st.tym = tym;
1630 VG_(addToXA)( vts->ts, &st );
1631 return vts;
1632}
1633
1634
1635/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1636 not modified.
1637*/
1638VTS* VTS__tick ( Thr* me, VTS* vts )
1639{
1640 ScalarTS* here = NULL;
1641 ScalarTS tmp;
1642 VTS* res;
1643 Word i, n;
1644 tl_assert(me);
1645 tl_assert(is_sane_VTS(vts));
1646 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1647 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1648 res = VTS__new();
1649 n = VG_(sizeXA)( vts->ts );
1650
1651 /* main loop doesn't handle zero-entry case correctly, so
1652 special-case it. */
1653 if (n == 0) {
1654 tmp.thr = me;
1655 tmp.tym = 1;
1656 VG_(addToXA)( res->ts, &tmp );
1657 tl_assert(is_sane_VTS(res));
1658 return res;
1659 }
1660
1661 for (i = 0; i < n; i++) {
1662 here = VG_(indexXA)( vts->ts, i );
1663 if (me < here->thr) {
1664 /* We just went past 'me', without seeing it. */
1665 tmp.thr = me;
1666 tmp.tym = 1;
1667 VG_(addToXA)( res->ts, &tmp );
1668 tmp = *here;
1669 VG_(addToXA)( res->ts, &tmp );
1670 i++;
1671 break;
1672 }
1673 else if (me == here->thr) {
1674 tmp = *here;
1675 tmp.tym++;
1676 VG_(addToXA)( res->ts, &tmp );
1677 i++;
1678 break;
1679 }
1680 else /* me > here->thr */ {
1681 tmp = *here;
1682 VG_(addToXA)( res->ts, &tmp );
1683 }
1684 }
1685 tl_assert(i >= 0 && i <= n);
1686 if (i == n && here && here->thr < me) {
1687 tmp.thr = me;
1688 tmp.tym = 1;
1689 VG_(addToXA)( res->ts, &tmp );
1690 } else {
1691 for (/*keepgoing*/; i < n; i++) {
1692 here = VG_(indexXA)( vts->ts, i );
1693 tmp = *here;
1694 VG_(addToXA)( res->ts, &tmp );
1695 }
1696 }
1697 tl_assert(is_sane_VTS(res));
1698 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1699 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1700 return res;
1701}
1702
1703
1704/* Return a new VTS constructed as the join (max) of the 2 args.
1705 Neither arg is modified.
1706*/
1707VTS* VTS__join ( VTS* a, VTS* b )
1708{
1709 Word ia, ib, useda, usedb;
1710 ULong tyma, tymb, tymMax;
1711 Thr* thr;
1712 VTS* res;
1713 ScalarTS *tmpa, *tmpb;
1714
1715 tl_assert(a && a->ts);
1716 tl_assert(b && b->ts);
1717 useda = VG_(sizeXA)( a->ts );
1718 usedb = VG_(sizeXA)( b->ts );
1719
1720 res = VTS__new();
1721 ia = ib = 0;
1722
1723 while (1) {
1724
1725 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1726 from a and b in order, where thr is the next Thr*
1727 occurring in either a or b, and tyma/b are the relevant
1728 scalar timestamps, taking into account implicit zeroes. */
1729 tl_assert(ia >= 0 && ia <= useda);
1730 tl_assert(ib >= 0 && ib <= usedb);
1731 tmpa = tmpb = NULL;
1732
1733 if (ia == useda && ib == usedb) {
1734 /* both empty - done */
1735 break;
1736 }
1737 else
1738 if (ia == useda && ib != usedb) {
1739 /* a empty, use up b */
1740 tmpb = VG_(indexXA)( b->ts, ib );
1741 thr = tmpb->thr;
1742 tyma = 0;
1743 tymb = tmpb->tym;
1744 ib++;
1745 }
1746 else
1747 if (ia != useda && ib == usedb) {
1748 /* b empty, use up a */
1749 tmpa = VG_(indexXA)( a->ts, ia );
1750 thr = tmpa->thr;
1751 tyma = tmpa->tym;
1752 tymb = 0;
1753 ia++;
1754 }
1755 else {
1756 /* both not empty; extract lowest-Thr*'d triple */
1757 tmpa = VG_(indexXA)( a->ts, ia );
1758 tmpb = VG_(indexXA)( b->ts, ib );
1759 if (tmpa->thr < tmpb->thr) {
1760 /* a has the lowest unconsidered Thr* */
1761 thr = tmpa->thr;
1762 tyma = tmpa->tym;
1763 tymb = 0;
1764 ia++;
1765 }
1766 else
1767 if (tmpa->thr > tmpb->thr) {
1768 /* b has the lowest unconsidered Thr* */
1769 thr = tmpb->thr;
1770 tyma = 0;
1771 tymb = tmpb->tym;
1772 ib++;
1773 } else {
1774 /* they both next mention the same Thr* */
1775 tl_assert(tmpa->thr == tmpb->thr);
1776 thr = tmpa->thr; /* == tmpb->thr */
1777 tyma = tmpa->tym;
1778 tymb = tmpb->tym;
1779 ia++;
1780 ib++;
1781 }
1782 }
1783
1784 /* having laboriously determined (thr, tyma, tymb), do something
1785 useful with it. */
1786 tymMax = tyma > tymb ? tyma : tymb;
1787 if (tymMax > 0) {
1788 ScalarTS st;
1789 st.thr = thr;
1790 st.tym = tymMax;
1791 VG_(addToXA)( res->ts, &st );
1792 }
1793
1794 }
1795
1796 tl_assert(is_sane_VTS( res ));
1797
1798 return res;
1799}
1800
1801
1802/* Compute the partial ordering relation of the two args.
1803*/
1804POrd VTS__cmp ( VTS* a, VTS* b )
1805{
1806 Word ia, ib, useda, usedb;
1807 ULong tyma, tymb;
1808 Thr* thr;
1809 ScalarTS *tmpa, *tmpb;
1810
1811 Bool all_leq = True;
1812 Bool all_geq = True;
1813
1814 tl_assert(a && a->ts);
1815 tl_assert(b && b->ts);
1816 useda = VG_(sizeXA)( a->ts );
1817 usedb = VG_(sizeXA)( b->ts );
1818
1819 ia = ib = 0;
1820
1821 while (1) {
1822
1823 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1824 from a and b in order, where thr is the next Thr*
1825 occurring in either a or b, and tyma/b are the relevant
1826 scalar timestamps, taking into account implicit zeroes. */
1827 tl_assert(ia >= 0 && ia <= useda);
1828 tl_assert(ib >= 0 && ib <= usedb);
1829 tmpa = tmpb = NULL;
1830
1831 if (ia == useda && ib == usedb) {
1832 /* both empty - done */
1833 break;
1834 }
1835 else
1836 if (ia == useda && ib != usedb) {
1837 /* a empty, use up b */
1838 tmpb = VG_(indexXA)( b->ts, ib );
1839 thr = tmpb->thr;
1840 tyma = 0;
1841 tymb = tmpb->tym;
1842 ib++;
1843 }
1844 else
1845 if (ia != useda && ib == usedb) {
1846 /* b empty, use up a */
1847 tmpa = VG_(indexXA)( a->ts, ia );
1848 thr = tmpa->thr;
1849 tyma = tmpa->tym;
1850 tymb = 0;
1851 ia++;
1852 }
1853 else {
1854 /* both not empty; extract lowest-Thr*'d triple */
1855 tmpa = VG_(indexXA)( a->ts, ia );
1856 tmpb = VG_(indexXA)( b->ts, ib );
1857 if (tmpa->thr < tmpb->thr) {
1858 /* a has the lowest unconsidered Thr* */
1859 thr = tmpa->thr;
1860 tyma = tmpa->tym;
1861 tymb = 0;
1862 ia++;
1863 }
1864 else
1865 if (tmpa->thr > tmpb->thr) {
1866 /* b has the lowest unconsidered Thr* */
1867 thr = tmpb->thr;
1868 tyma = 0;
1869 tymb = tmpb->tym;
1870 ib++;
1871 } else {
1872 /* they both next mention the same Thr* */
1873 tl_assert(tmpa->thr == tmpb->thr);
1874 thr = tmpa->thr; /* == tmpb->thr */
1875 tyma = tmpa->tym;
1876 tymb = tmpb->tym;
1877 ia++;
1878 ib++;
1879 }
1880 }
1881
1882 /* having laboriously determined (thr, tyma, tymb), do something
1883 useful with it. */
1884 if (tyma < tymb)
1885 all_geq = False;
1886 if (tyma > tymb)
1887 all_leq = False;
1888 }
1889
1890 if (all_leq && all_geq)
1891 return POrd_EQ;
1892 /* now we know they aren't equal, so either all_leq or all_geq or
1893 both are false. */
1894 if (all_leq)
1895 return POrd_LT;
1896 if (all_geq)
1897 return POrd_GT;
1898 /* hmm, neither all_geq or all_leq. This means unordered. */
1899 return POrd_UN;
1900}
1901
1902
1903/* Compute an arbitrary structural (total) ordering on the two args,
1904 based on their VCs, so they can be looked up in a table, tree, etc.
1905 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1906*/
1907Word VTS__cmp_structural ( VTS* a, VTS* b )
1908{
1909 /* We just need to generate an arbitrary total ordering based on
1910 a->ts and b->ts. Preferably do it in a way which comes across likely
1911 differences relatively quickly. */
1912 Word i, useda, usedb;
1913 ScalarTS *tmpa, *tmpb;
1914
1915 tl_assert(a && a->ts);
1916 tl_assert(b && b->ts);
1917 useda = VG_(sizeXA)( a->ts );
1918 usedb = VG_(sizeXA)( b->ts );
1919
1920 if (useda < usedb) return -1;
1921 if (useda > usedb) return 1;
1922
1923 /* Same length vectors, so let's step through them together. */
1924 tl_assert(useda == usedb);
1925 for (i = 0; i < useda; i++) {
1926 tmpa = VG_(indexXA)( a->ts, i );
1927 tmpb = VG_(indexXA)( b->ts, i );
1928 if (tmpa->tym < tmpb->tym) return -1;
1929 if (tmpa->tym > tmpb->tym) return 1;
1930 if (tmpa->thr < tmpb->thr) return -1;
1931 if (tmpa->thr > tmpb->thr) return 1;
1932 }
1933
1934 /* They're identical. */
1935 return 0;
1936}
1937
1938
1939/* Debugging only. Display the given VTS in the buffer.
1940*/
1941void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1942 ScalarTS* st;
1943 HChar unit[64];
1944 Word i, n;
1945 Int avail = nBuf;
1946 tl_assert(vts && vts->ts);
1947 tl_assert(nBuf > 16);
1948 buf[0] = '[';
1949 buf[1] = 0;
1950 n = VG_(sizeXA)( vts->ts );
1951 for (i = 0; i < n; i++) {
1952 tl_assert(avail >= 40);
1953 st = VG_(indexXA)( vts->ts, i );
1954 VG_(memset)(unit, 0, sizeof(unit));
1955 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1956 st->thr, st->tym);
1957 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1958 VG_(strcat)(buf, " ...]");
1959 buf[nBuf-1] = 0;
1960 return;
1961 }
1962 VG_(strcat)(buf, unit);
1963 avail -= VG_(strlen)(unit);
1964 }
1965 VG_(strcat)(buf, "]");
1966 buf[nBuf-1] = 0;
1967}
1968
1969
1970/* Debugging only. Return vts[index], so to speak.
1971*/
1972ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1973 UWord i, n;
1974 tl_assert(vts && vts->ts);
1975 n = VG_(sizeXA)( vts->ts );
1976 for (i = 0; i < n; i++) {
1977 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1978 if (st->thr == idx)
1979 return st->tym;
1980 }
1981 return 0;
1982}
1983
1984
1985/////////////////////////////////////////////////////////////////
1986/////////////////////////////////////////////////////////////////
1987// //
1988// SECTION END vts primitives //
1989// //
1990/////////////////////////////////////////////////////////////////
1991/////////////////////////////////////////////////////////////////
1992
1993
1994
1995/////////////////////////////////////////////////////////////////
1996/////////////////////////////////////////////////////////////////
1997// //
1998// SECTION BEGIN main library //
1999// //
2000/////////////////////////////////////////////////////////////////
2001/////////////////////////////////////////////////////////////////
2002
2003
2004/////////////////////////////////////////////////////////
2005// //
2006// VTS set //
2007// //
2008/////////////////////////////////////////////////////////
2009
2010static WordFM* /* VTS* void void */ vts_set = NULL;
2011
2012static void vts_set_init ( void )
2013{
2014 tl_assert(!vts_set);
2015 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2016 HG_(free),
2017 (Word(*)(UWord,UWord))VTS__cmp_structural );
2018 tl_assert(vts_set);
2019}
2020
2021/* Given a newly made VTS, look in vts_set to see if we already have
2022 an identical one. If yes, free up this one and return instead a
2023 pointer to the existing one. If no, add this one to the set and
2024 return the same pointer. Caller differentiates the two cases by
2025 comparing returned pointer with the supplied one (although that
2026 does require that the supplied VTS is not already in the set).
2027*/
2028static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2029{
2030 UWord keyW, valW;
2031 /* lookup cand (by value) */
2032 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2033 /* found it */
2034 tl_assert(valW == 0);
2035 /* if this fails, cand (by ref) was already present (!) */
2036 tl_assert(keyW != (UWord)cand);
2037 VTS__delete(cand);
2038 return (VTS*)keyW;
2039 } else {
2040 /* not present. Add and return pointer to same. */
2041 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2042 return cand;
2043 }
2044}
2045
2046
2047/////////////////////////////////////////////////////////
2048// //
2049// VTS table //
2050// //
2051/////////////////////////////////////////////////////////
2052
2053static void VtsID__invalidate_caches ( void ); /* fwds */
2054
2055/* A type to hold VTS table entries. Invariants:
2056 If .vts == NULL, then this entry is not in use, so:
2057 - .rc == 0
2058 - this entry is on the freelist (unfortunately, does not imply
2059 any constraints on value for .nextfree)
2060 If .vts != NULL, then this entry is in use:
2061 - .vts is findable in vts_set
2062 - .vts->id == this entry number
2063 - no specific value for .rc (even 0 is OK)
2064 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2065*/
2066typedef
2067 struct {
2068 VTS* vts; /* vts, in vts_set */
2069 UWord rc; /* reference count - enough for entire aspace */
2070 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2071 }
2072 VtsTE;
2073
2074/* The VTS table. */
2075static XArray* /* of VtsTE */ vts_tab = NULL;
2076
2077/* An index into the VTS table, indicating the start of the list of
2078 free (available for use) entries. If the list is empty, this is
2079 VtsID_INVALID. */
2080static VtsID vts_tab_freelist = VtsID_INVALID;
2081
2082/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2083 vts_tab equals or exceeds this size. After GC, the value here is
2084 set appropriately so as to check for the next GC point. */
2085static Word vts_next_GC_at = 1000;
2086
2087static void vts_tab_init ( void )
2088{
2089 vts_tab
2090 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2091 HG_(free), sizeof(VtsTE) );
2092 vts_tab_freelist
2093 = VtsID_INVALID;
2094 tl_assert(vts_tab);
2095}
2096
2097/* Add ii to the free list, checking that it looks out-of-use. */
2098static void add_to_free_list ( VtsID ii )
2099{
2100 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2101 tl_assert(ie->vts == NULL);
2102 tl_assert(ie->rc == 0);
2103 tl_assert(ie->freelink == VtsID_INVALID);
2104 ie->freelink = vts_tab_freelist;
2105 vts_tab_freelist = ii;
2106}
2107
2108/* Get an entry from the free list. This will return VtsID_INVALID if
2109 the free list is empty. */
2110static VtsID get_from_free_list ( void )
2111{
2112 VtsID ii;
2113 VtsTE* ie;
2114 if (vts_tab_freelist == VtsID_INVALID)
2115 return VtsID_INVALID;
2116 ii = vts_tab_freelist;
2117 ie = VG_(indexXA)( vts_tab, ii );
2118 tl_assert(ie->vts == NULL);
2119 tl_assert(ie->rc == 0);
2120 vts_tab_freelist = ie->freelink;
2121 return ii;
2122}
2123
2124/* Produce a new VtsID that can be used, either by getting it from
2125 the freelist, or, if that is empty, by expanding vts_tab. */
2126static VtsID get_new_VtsID ( void )
2127{
2128 VtsID ii;
2129 VtsTE te;
2130 ii = get_from_free_list();
2131 if (ii != VtsID_INVALID)
2132 return ii;
2133 te.vts = NULL;
2134 te.rc = 0;
2135 te.freelink = VtsID_INVALID;
2136 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2137 return ii;
2138}
2139
2140
2141/* Indirect callback from lib_zsm. */
2142static void VtsID__rcinc ( VtsID ii )
2143{
2144 VtsTE* ie;
2145 /* VG_(indexXA) does a range check for us */
2146 ie = VG_(indexXA)( vts_tab, ii );
2147 tl_assert(ie->vts); /* else it's not in use */
2148 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2149 tl_assert(ie->vts->id == ii);
2150 ie->rc++;
2151}
2152
2153/* Indirect callback from lib_zsm. */
2154static void VtsID__rcdec ( VtsID ii )
2155{
2156 VtsTE* ie;
2157 /* VG_(indexXA) does a range check for us */
2158 ie = VG_(indexXA)( vts_tab, ii );
2159 tl_assert(ie->vts); /* else it's not in use */
2160 tl_assert(ie->rc > 0); /* else RC snafu */
2161 tl_assert(ie->vts->id == ii);
2162 ie->rc--;
2163}
2164
2165
2166/* Look up 'cand' in our collection of VTSs. If present, deallocate
2167 it and return the VtsID for the pre-existing version. If not
2168 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2169 for it, and return that. */
2170static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2171{
2172 VTS* auld;
2173 tl_assert(cand->id == VtsID_INVALID);
2174 auld = vts_set__find_and_dealloc__or_add(cand);
2175 if (auld != cand) {
2176 /* We already have an Aulde one. Use that. */
2177 VtsTE* ie;
2178 tl_assert(auld->id != VtsID_INVALID);
2179 ie = VG_(indexXA)( vts_tab, auld->id );
2180 tl_assert(ie->vts == auld);
2181 return auld->id;
2182 } else {
2183 VtsID ii = get_new_VtsID();
2184 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2185 ie->vts = cand;
2186 ie->rc = 0;
2187 ie->freelink = VtsID_INVALID;
2188 cand->id = ii;
2189 return ii;
2190 }
2191}
2192
2193
2194static void show_vts_stats ( HChar* caller )
2195{
2196 UWord nSet, nTab, nLive;
2197 ULong totrc;
2198 UWord n, i;
2199 nSet = VG_(sizeFM)( vts_set );
2200 nTab = VG_(sizeXA)( vts_tab );
2201 totrc = 0;
2202 nLive = 0;
2203 n = VG_(sizeXA)( vts_tab );
2204 for (i = 0; i < n; i++) {
2205 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2206 if (ie->vts) {
2207 nLive++;
2208 totrc += (ULong)ie->rc;
2209 } else {
2210 tl_assert(ie->rc == 0);
2211 }
2212 }
2213 VG_(printf)(" show_vts_stats %s\n", caller);
2214 VG_(printf)(" vts_tab size %4lu\n", nTab);
2215 VG_(printf)(" vts_tab live %4lu\n", nLive);
2216 VG_(printf)(" vts_set size %4lu\n", nSet);
2217 VG_(printf)(" total rc %4llu\n", totrc);
2218}
2219
2220/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002221__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002222static void vts_tab__do_GC ( Bool show_stats )
2223{
2224 UWord i, nTab, nLive, nFreed;
2225
2226 /* check this is actually necessary. */
2227 tl_assert(vts_tab_freelist == VtsID_INVALID);
2228
2229 /* empty the caches for partial order checks and binary joins. We
2230 could do better and prune out the entries to be deleted, but it
2231 ain't worth the hassle. */
2232 VtsID__invalidate_caches();
2233
2234 /* First, make the reference counts up to date. */
2235 zsm_flush_cache();
2236
2237 nTab = VG_(sizeXA)( vts_tab );
2238
2239 if (show_stats) {
2240 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2241 show_vts_stats("before GC");
2242 }
2243
2244 /* Now we can inspect the entire vts_tab. Any entries
2245 with zero .rc fields are now no longer in use and can be
2246 free list, removed from vts_set, and deleted. */
2247 nFreed = 0;
2248 for (i = 0; i < nTab; i++) {
2249 Bool present;
2250 UWord oldK = 0, oldV = 0;
2251 VtsTE* te = VG_(indexXA)( vts_tab, i );
2252 if (te->vts == NULL) {
2253 tl_assert(te->rc == 0);
2254 continue; /* already on the free list (presumably) */
2255 }
2256 if (te->rc > 0)
2257 continue; /* in use */
2258 /* Ok, we got one we can free. */
2259 tl_assert(te->vts->id == i);
2260 /* first, remove it from vts_set. */
2261 present = VG_(delFromFM)( vts_set,
2262 &oldK, &oldV, (UWord)te->vts );
2263 tl_assert(present); /* else it isn't in vts_set ?! */
2264 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2265 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2266 /* now free the VTS itself */
2267 VTS__delete(te->vts);
2268 te->vts = NULL;
2269 /* and finally put this entry on the free list */
2270 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2271 add_to_free_list( i );
2272 nFreed++;
2273 }
2274
2275 /* Now figure out when the next GC should be. We'll allow the
2276 number of VTSs to double before GCing again. Except of course
2277 that since we can't (or, at least, don't) shrink vts_tab, we
2278 can't set the threshhold value smaller than it. */
2279 tl_assert(nFreed <= nTab);
2280 nLive = nTab - nFreed;
2281 tl_assert(nLive >= 0 && nLive <= nTab);
2282 vts_next_GC_at = 2 * nLive;
2283 if (vts_next_GC_at < nTab)
2284 vts_next_GC_at = nTab;
2285
2286 if (show_stats) {
2287 show_vts_stats("after GC");
2288 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2289 }
2290
sewardjd024ae52008-11-09 20:47:57 +00002291 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002292 static UInt ctr = 0;
2293 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002294 VG_(message)(Vg_DebugMsg,
2295 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
sewardjf98e1c02008-10-25 16:22:41 +00002296 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2297 }
2298}
2299
2300
2301/////////////////////////////////////////////////////////
2302// //
2303// Vts IDs //
2304// //
2305/////////////////////////////////////////////////////////
2306
2307//////////////////////////
2308static ULong stats__getOrdering_queries = 0;
2309static ULong stats__getOrdering_misses = 0;
2310static ULong stats__join2_queries = 0;
2311static ULong stats__join2_misses = 0;
2312
2313static inline UInt ROL32 ( UInt w, Int n ) {
2314 w = (w << n) | (w >> (32-n));
2315 return w;
2316}
2317static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2318 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2319 return hash % nTab;
2320}
2321
2322#define N_GETORDERING_CACHE 1023
2323static
2324 struct { VtsID vi1; VtsID vi2; POrd ord; }
2325 getOrdering_cache[N_GETORDERING_CACHE];
2326
2327#define N_JOIN2_CACHE 1023
2328static
2329 struct { VtsID vi1; VtsID vi2; VtsID res; }
2330 join2_cache[N_JOIN2_CACHE];
2331
2332static void VtsID__invalidate_caches ( void ) {
2333 Int i;
2334 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2335 getOrdering_cache[i].vi1 = VtsID_INVALID;
2336 getOrdering_cache[i].vi2 = VtsID_INVALID;
2337 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2338 }
2339 for (i = 0; i < N_JOIN2_CACHE; i++) {
2340 join2_cache[i].vi1 = VtsID_INVALID;
2341 join2_cache[i].vi2 = VtsID_INVALID;
2342 join2_cache[i].res = VtsID_INVALID;
2343 }
2344}
2345//////////////////////////
2346
sewardjd52392d2008-11-08 20:36:26 +00002347//static Bool VtsID__is_valid ( VtsID vi ) {
2348// VtsTE* ve;
2349// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2350// return False;
2351// ve = VG_(indexXA)( vts_tab, vi );
2352// if (!ve->vts)
2353// return False;
2354// tl_assert(ve->vts->id == vi);
2355// return True;
2356//}
sewardjf98e1c02008-10-25 16:22:41 +00002357
2358static VTS* VtsID__to_VTS ( VtsID vi ) {
2359 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2360 tl_assert(te->vts);
2361 return te->vts;
2362}
2363
2364static void VtsID__pp ( VtsID vi ) {
2365 HChar buf[100];
2366 VTS* vts = VtsID__to_VTS(vi);
2367 VTS__show( buf, sizeof(buf)-1, vts );
2368 buf[sizeof(buf)-1] = 0;
2369 VG_(printf)("%s", buf);
2370}
2371
2372/* compute partial ordering relation of vi1 and vi2. */
2373__attribute__((noinline))
2374static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2375 UInt hash;
2376 POrd ord;
2377 VTS *v1, *v2;
2378 //if (vi1 == vi2) return POrd_EQ;
2379 tl_assert(vi1 != vi2);
2380 ////++
2381 stats__getOrdering_queries++;
2382 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2383 if (getOrdering_cache[hash].vi1 == vi1
2384 && getOrdering_cache[hash].vi2 == vi2)
2385 return getOrdering_cache[hash].ord;
2386 stats__getOrdering_misses++;
2387 ////--
2388 v1 = VtsID__to_VTS(vi1);
2389 v2 = VtsID__to_VTS(vi2);
2390 ord = VTS__cmp( v1, v2 );
2391 ////++
2392 getOrdering_cache[hash].vi1 = vi1;
2393 getOrdering_cache[hash].vi2 = vi2;
2394 getOrdering_cache[hash].ord = ord;
2395 ////--
2396 return ord;
2397}
2398static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2399 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2400}
2401
2402/* compute binary join */
2403__attribute__((noinline))
2404static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2405 UInt hash;
2406 VtsID res;
2407 VTS *vts1, *vts2, *nyu;
2408 //if (vi1 == vi2) return vi1;
2409 tl_assert(vi1 != vi2);
2410 ////++
2411 stats__join2_queries++;
2412 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2413 if (join2_cache[hash].vi1 == vi1
2414 && join2_cache[hash].vi2 == vi2)
2415 return join2_cache[hash].res;
2416 stats__join2_misses++;
2417 ////--
2418 vts1 = VtsID__to_VTS(vi1);
2419 vts2 = VtsID__to_VTS(vi2);
2420 nyu = VTS__join(vts1,vts2);
2421 res = vts_tab__find_and_dealloc__or_add(nyu);
2422 ////++
2423 join2_cache[hash].vi1 = vi1;
2424 join2_cache[hash].vi2 = vi2;
2425 join2_cache[hash].res = res;
2426 ////--
2427 return res;
2428}
2429static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2430 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2431}
2432
2433/* create a singleton VTS, namely [thr:1] */
2434static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2435 VTS* nyu = VTS__singleton(thr,tym);
2436 return vts_tab__find_and_dealloc__or_add(nyu);
2437}
2438
2439/* tick operation, creates value 1 if specified index is absent */
2440static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2441 VTS* vts = VtsID__to_VTS(vi);
2442 VTS* nyu = VTS__tick(idx,vts);
2443 return vts_tab__find_and_dealloc__or_add(nyu);
2444}
2445
2446/* index into a VTS (only for assertions) */
2447static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2448 VTS* vts = VtsID__to_VTS(vi);
2449 return VTS__indexAt_SLOW( vts, idx );
2450}
2451
2452
2453/////////////////////////////////////////////////////////
2454// //
2455// Threads //
2456// //
2457/////////////////////////////////////////////////////////
2458
2459struct _Thr {
2460 /* Current VTSs for this thread. They change as we go along. viR
2461 is the VTS to be used for reads, viW for writes. Usually they
2462 are the same, but can differ when we deal with reader-writer
2463 locks. It is always the case that VtsID__getOrdering(viW,viR)
2464 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2465 lagging behind, viR. */
2466 VtsID viR;
2467 VtsID viW;
2468 /* opaque (to us) data we hold on behalf of the library's user. */
2469 void* opaque;
2470};
2471
2472static Thr* Thr__new ( void ) {
2473 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2474 thr->viR = VtsID_INVALID;
2475 thr->viW = VtsID_INVALID;
2476 return thr;
2477}
2478
2479
2480/////////////////////////////////////////////////////////
2481// //
2482// Shadow Values //
2483// //
2484/////////////////////////////////////////////////////////
2485
2486// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2487// hb_zsm.h. We have to do everything else here.
2488
2489/* SVal is 64 bit unsigned int.
2490
2491 <---------30---------> <---------30--------->
2492 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2493 01 X--------------------X XX X--------------------X E(rror)
2494 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2495 11 X--------------------X XX X--------------------X I: SVal_INVALID
2496*/
2497#define SVAL_TAGMASK (3ULL << 62)
2498
2499static inline Bool SVal__isC ( SVal s ) {
2500 return (0ULL << 62) == (s & SVAL_TAGMASK);
2501}
2502static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2503 //tl_assert(VtsID__is_valid(rmini));
2504 //tl_assert(VtsID__is_valid(wmini));
2505 return (((ULong)rmini) << 32) | ((ULong)wmini);
2506}
2507static inline VtsID SVal__unC_Rmin ( SVal s ) {
2508 tl_assert(SVal__isC(s));
2509 return (VtsID)(s >> 32);
2510}
2511static inline VtsID SVal__unC_Wmin ( SVal s ) {
2512 tl_assert(SVal__isC(s));
2513 return (VtsID)(s & 0xFFFFFFFFULL);
2514}
2515
2516static Bool SVal__isE ( SVal s ) {
2517 return (1ULL << 62) == (s & SVAL_TAGMASK);
2518}
2519static SVal SVal__mkE ( void ) {
2520 return 1ULL << 62;
2521}
2522
2523static Bool SVal__isA ( SVal s ) {
2524 return (2ULL << 62) == (s & SVAL_TAGMASK);
2525}
2526static SVal SVal__mkA ( void ) {
2527 return 2ULL << 62;
2528}
2529
2530/* Direct callback from lib_zsm. */
2531static void SVal__rcinc ( SVal s ) {
2532 if (SVal__isC(s)) {
2533 VtsID__rcinc( SVal__unC_Rmin(s) );
2534 VtsID__rcinc( SVal__unC_Wmin(s) );
2535 }
2536}
2537
2538/* Direct callback from lib_zsm. */
2539static void SVal__rcdec ( SVal s ) {
2540 if (SVal__isC(s)) {
2541 VtsID__rcdec( SVal__unC_Rmin(s) );
2542 VtsID__rcdec( SVal__unC_Wmin(s) );
2543 }
2544}
2545
2546
2547/////////////////////////////////////////////////////////
2548// //
sewardjd86e3a22008-12-03 11:39:37 +00002549// A simple group (memory) allocator //
2550// //
2551/////////////////////////////////////////////////////////
2552
2553//////////////// BEGIN general group allocator
2554typedef
2555 struct {
2556 UWord elemSzB; /* element size */
2557 UWord nPerGroup; /* # elems per group */
2558 void* (*alloc)(HChar*, SizeT); /* group allocator */
2559 HChar* cc; /* group allocator's cc */
2560 void (*free)(void*); /* group allocator's free-er (unused) */
2561 /* XArray of void* (pointers to groups). The groups themselves.
2562 Each element is a pointer to a block of size (elemSzB *
2563 nPerGroup) bytes. */
2564 XArray* groups;
2565 /* next free element. Is a pointer to an element in one of the
2566 groups pointed to by .groups. */
2567 void* nextFree;
2568 }
2569 GroupAlloc;
2570
2571static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
2572 UWord elemSzB,
2573 UWord nPerGroup,
2574 void* (*alloc)(HChar*, SizeT),
2575 HChar* cc,
2576 void (*free)(void*) )
2577{
2578 tl_assert(0 == (elemSzB % sizeof(UWord)));
2579 tl_assert(elemSzB >= sizeof(UWord));
2580 tl_assert(nPerGroup >= 100); /* let's say */
2581 tl_assert(alloc);
2582 tl_assert(cc);
2583 tl_assert(free);
2584 tl_assert(ga);
2585 VG_(memset)(ga, 0, sizeof(*ga));
2586 ga->elemSzB = elemSzB;
2587 ga->nPerGroup = nPerGroup;
2588 ga->groups = NULL;
2589 ga->alloc = alloc;
2590 ga->cc = cc;
2591 ga->free = free;
2592 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
2593 ga->nextFree = NULL;
2594 tl_assert(ga->groups);
2595}
2596
2597/* The freelist is empty. Allocate a new group and put all the new
2598 elements in it onto the freelist. */
2599__attribute__((noinline))
2600static void gal_add_new_group ( GroupAlloc* ga )
2601{
2602 Word i;
2603 UWord* group;
2604 tl_assert(ga);
2605 tl_assert(ga->nextFree == NULL);
2606 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
2607 tl_assert(group);
2608 /* extend the freelist through the new group. Place the freelist
2609 pointer in the first word of each element. That's why the
2610 element size must be at least one word. */
2611 for (i = ga->nPerGroup-1; i >= 0; i--) {
2612 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
2613 UWord* elem = (UWord*)elemC;
2614 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
2615 *elem = (UWord)ga->nextFree;
2616 ga->nextFree = elem;
2617 }
2618 /* and add to our collection of groups */
2619 VG_(addToXA)( ga->groups, &group );
2620}
2621
2622inline static void* gal_Alloc ( GroupAlloc* ga )
2623{
2624 UWord* elem;
2625 if (UNLIKELY(ga->nextFree == NULL)) {
2626 gal_add_new_group(ga);
2627 }
2628 elem = ga->nextFree;
2629 ga->nextFree = (void*)*elem;
2630 *elem = 0; /* unnecessary, but just to be on the safe side */
2631 return elem;
2632}
2633
2634inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
2635{
2636 tl_assert(n == ga->elemSzB);
2637 return gal_Alloc( ga );
2638}
2639
2640inline static void gal_Free ( GroupAlloc* ga, void* p )
2641{
2642 UWord* elem = (UWord*)p;
2643 *elem = (UWord)ga->nextFree;
2644 ga->nextFree = elem;
2645}
2646//////////////// END general group allocator
2647
2648
2649/////////////////////////////////////////////////////////
2650// //
sewardjf98e1c02008-10-25 16:22:41 +00002651// Change-event map2 //
2652// //
2653/////////////////////////////////////////////////////////
2654
2655#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
2656#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2657
2658/* This is in two parts:
2659
2660 1. An OSet of RCECs. This is a set of reference-counted stack
2661 traces. When the reference count of a stack trace becomes zero,
2662 it is removed from the set and freed up. The intent is to have
2663 a set of stack traces which can be referred to from (2), but to
2664 only represent each one once. The set is indexed/searched by
2665 ordering on the stack trace vectors.
2666
2667 2. An OSet of OldRefs. These store information about each old ref
2668 that we need to record. It is indexed by address of the
2669 location for which the information is recorded. For LRU
2670 purposes, each OldRef also contains a generation number,
2671 indicating when it was most recently accessed.
2672
2673 The important part of an OldRef is, however, its accs[] array.
2674 This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
2675 allows us to collect the last access-traceback by up to
2676 N_OLDREF_ACCS different threads for this location. The accs[]
2677 array is a MTF-array. If a pair falls off the end, that's too
2678 bad -- we will lose info about that thread's access to this
2679 location.
2680
2681 When this OSet becomes too big, we can throw away the entries
2682 whose generation numbers are below some threshold; hence doing
2683 approximate LRU discarding. For each discarded OldRef we must
2684 of course decrement the reference count on the all RCECs it
2685 refers to, in order that entries from (1) eventually get
2686 discarded too.
2687*/
2688
2689
2690static UWord stats__ctxt_rcdec1 = 0;
2691static UWord stats__ctxt_rcdec2 = 0;
2692static UWord stats__ctxt_rcdec3 = 0;
2693static UWord stats__ctxt_rcdec_calls = 0;
2694static UWord stats__ctxt_rcdec_discards = 0;
2695static UWord stats__ctxt_rcdec1_eq = 0;
2696
2697static UWord stats__ctxt_tab_curr = 0;
2698static UWord stats__ctxt_tab_max = 0;
2699
2700static UWord stats__ctxt_tab_qs = 0;
2701static UWord stats__ctxt_tab_cmps = 0;
2702
2703
2704///////////////////////////////////////////////////////
2705//// Part (1): An OSet of RCECs
2706///
2707
2708#define N_FRAMES 8
2709
2710// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2711#define RCEC_MAGIC 0xab88abb2UL
2712
2713//#define N_RCEC_TAB 98317 /* prime */
2714#define N_RCEC_TAB 196613 /* prime */
2715
2716typedef
2717 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00002718 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002719 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00002720 UWord rc;
2721 UWord rcX; /* used for crosschecking */
2722 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2723 }
2724 RCEC;
2725
2726static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2727
2728
2729/* Gives an arbitrary total order on RCEC .frames fields */
2730static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2731 Word i;
2732 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2733 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2734 if (ec1->frames[0] < ec2->frames[0]) return -1;
2735 if (ec1->frames[0] > ec2->frames[0]) return 1;
2736 for (i = 1; i < 1 + N_FRAMES; i++) {
2737 if (ec1->frames[i] < ec2->frames[i]) return -1;
2738 if (ec1->frames[i] > ec2->frames[i]) return 1;
2739 }
2740 return 0;
2741}
2742
2743
2744/* Dec the ref of this RCEC. */
2745static void ctxt__rcdec ( RCEC* ec )
2746{
2747 stats__ctxt_rcdec_calls++;
2748 tl_assert(ec && ec->magic == RCEC_MAGIC);
2749 tl_assert(ec->rc > 0);
2750 ec->rc--;
2751}
2752
2753static void ctxt__rcinc ( RCEC* ec )
2754{
2755 tl_assert(ec && ec->magic == RCEC_MAGIC);
2756 ec->rc++;
2757}
2758
2759
sewardjd86e3a22008-12-03 11:39:37 +00002760//////////// BEGIN RCEC group allocator
2761static GroupAlloc rcec_group_allocator;
2762
2763static RCEC* alloc_RCEC ( void ) {
2764 return gal_Alloc ( &rcec_group_allocator );
2765}
2766
2767static void free_RCEC ( RCEC* rcec ) {
2768 tl_assert(rcec->magic == RCEC_MAGIC);
2769 gal_Free( &rcec_group_allocator, rcec );
2770}
2771//////////// END OldRef group allocator
2772
2773
sewardjf98e1c02008-10-25 16:22:41 +00002774/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2775 move it one step closer the the front of the list, so as to make
2776 subsequent searches for it cheaper. */
2777static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2778{
2779 RCEC *ec0, *ec1, *ec2;
2780 if (ec == *headp)
2781 tl_assert(0); /* already at head of list */
2782 tl_assert(ec != NULL);
2783 ec0 = *headp;
2784 ec1 = NULL;
2785 ec2 = NULL;
2786 while (True) {
2787 if (ec0 == NULL || ec0 == ec) break;
2788 ec2 = ec1;
2789 ec1 = ec0;
2790 ec0 = ec0->next;
2791 }
2792 tl_assert(ec0 == ec);
2793 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2794 RCEC* tmp;
2795 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2796 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2797 closer to the start of the list. */
2798 tl_assert(ec2->next == ec1);
2799 tl_assert(ec1->next == ec0);
2800 tmp = ec0->next;
2801 ec2->next = ec0;
2802 ec0->next = ec1;
2803 ec1->next = tmp;
2804 }
2805 else
2806 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2807 /* it's second in the list. */
2808 tl_assert(*headp == ec1);
2809 tl_assert(ec1->next == ec0);
2810 ec1->next = ec0->next;
2811 ec0->next = ec1;
2812 *headp = ec0;
2813 }
2814}
2815
2816
2817/* Find the given RCEC in the tree, and return a pointer to it. Or,
2818 if not present, add the given one to the tree (by making a copy of
2819 it, so the caller can immediately deallocate the original) and
2820 return a pointer to the copy. The caller can safely have 'example'
2821 on its stack, since we will always return a pointer to a copy of
2822 it, not to the original. Note that the inserted node will have .rc
2823 of zero and so the caller must immediatly increment it. */
2824__attribute__((noinline))
2825static RCEC* ctxt__find_or_add ( RCEC* example )
2826{
2827 UWord hent;
2828 RCEC* copy;
2829 tl_assert(example && example->magic == RCEC_MAGIC);
2830 tl_assert(example->rc == 0);
2831
2832 /* Search the hash table to see if we already have it. */
2833 stats__ctxt_tab_qs++;
2834 hent = example->frames[0] % N_RCEC_TAB;
2835 copy = contextTab[hent];
2836 while (1) {
2837 if (!copy) break;
2838 tl_assert(copy->magic == RCEC_MAGIC);
2839 stats__ctxt_tab_cmps++;
2840 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2841 copy = copy->next;
2842 }
2843
2844 if (copy) {
2845 tl_assert(copy != example);
2846 /* optimisation: if it's not at the head of its list, move 1
2847 step fwds, to make future searches cheaper */
2848 if (copy != contextTab[hent]) {
2849 move_RCEC_one_step_forward( &contextTab[hent], copy );
2850 }
2851 } else {
sewardjd86e3a22008-12-03 11:39:37 +00002852 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00002853 tl_assert(copy != example);
2854 *copy = *example;
2855 copy->next = contextTab[hent];
2856 contextTab[hent] = copy;
2857 stats__ctxt_tab_curr++;
2858 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2859 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2860 }
2861 return copy;
2862}
2863
2864static inline UWord ROLW ( UWord w, Int n )
2865{
2866 Int bpw = 8 * sizeof(UWord);
2867 w = (w << n) | (w >> (bpw-n));
2868 return w;
2869}
2870
2871__attribute__((noinline))
2872static RCEC* get_RCEC ( Thr* thr )
2873{
2874 UWord hash, i;
2875 RCEC example;
2876 example.magic = RCEC_MAGIC;
2877 example.rc = 0;
2878 example.rcX = 0;
2879 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2880 hash = 0;
2881 for (i = 1; i < 1 + N_FRAMES; i++) {
2882 hash ^= example.frames[i];
2883 hash = ROLW(hash, 19);
2884 }
2885 example.frames[0] = hash;
2886 return ctxt__find_or_add( &example );
2887}
2888
2889///////////////////////////////////////////////////////
sewardjd86e3a22008-12-03 11:39:37 +00002890//// Part (2): A WordFM guest-addr -> OldRef, that refer to (1)
sewardjf98e1c02008-10-25 16:22:41 +00002891///
2892
2893// (UInt) `echo "Old Reference Information" | md5sum`
2894#define OldRef_MAGIC 0x30b1f075UL
2895
2896typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2897
2898#define N_OLDREF_ACCS 3
2899
2900typedef
2901 struct {
sewardjd86e3a22008-12-03 11:39:37 +00002902 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002903 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00002904 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00002905 /* unused slots in this array have .thr == NULL */
2906 Thr_n_RCEC accs[N_OLDREF_ACCS];
2907 }
2908 OldRef;
2909
sewardjd86e3a22008-12-03 11:39:37 +00002910
2911//////////// BEGIN OldRef group allocator
2912static GroupAlloc oldref_group_allocator;
2913
2914static OldRef* alloc_OldRef ( void ) {
2915 return gal_Alloc ( &oldref_group_allocator );
2916}
2917
2918static void free_OldRef ( OldRef* r ) {
2919 tl_assert(r->magic == OldRef_MAGIC);
2920 gal_Free( &oldref_group_allocator, r );
2921}
2922//////////// END OldRef group allocator
2923
2924//////////// BEGIN oldrefTree node group allocator
2925static GroupAlloc oldrefnd_group_allocator;
2926static void* oldrefnd_first_alloc = NULL;
2927
2928static void* alloc_OldRef_nd ( HChar* cc, SizeT n ) {
2929 if (UNLIKELY(oldrefnd_first_alloc == NULL)) {
2930 oldrefnd_first_alloc = HG_(zalloc)( "libhb.alloc_OldRef_nd.1", n );
2931 return oldrefnd_first_alloc;
2932 } else {
2933 return gal_Alloc_w_size_check ( &oldrefnd_group_allocator, n );
2934 }
2935}
2936
2937static void free_OldRef_nd ( void* p ) {
2938 if (UNLIKELY(p == oldrefnd_first_alloc)) {
2939 HG_(free)( oldrefnd_first_alloc );
2940 oldrefnd_first_alloc = NULL;
2941 } else {
2942 gal_Free( &oldrefnd_group_allocator, p );
2943 }
2944}
2945//////////// BEGIN oldrefTree node group allocator
2946
2947static WordFM* oldrefTree = NULL; /* WordFM* Addr OldRef* */
2948static UWord oldrefGen = 0; /* current LRU generation # */
2949static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2950static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00002951
2952static void event_map_bind ( Addr a, Thr* thr )
2953{
sewardjd86e3a22008-12-03 11:39:37 +00002954 OldRef* ref;
2955 RCEC* here;
2956 Word i, j;
2957 UWord keyW, valW;
2958 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00002959
sewardjd86e3a22008-12-03 11:39:37 +00002960 b = VG_(lookupFM)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00002961
sewardjd86e3a22008-12-03 11:39:37 +00002962 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00002963
2964 /* We already have a record for this address. We now need to
2965 see if we have a stack trace pertaining to this thread's
2966 access. */
sewardjd86e3a22008-12-03 11:39:37 +00002967 tl_assert(keyW == a);
2968 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00002969 tl_assert(ref->magic == OldRef_MAGIC);
2970
2971 tl_assert(thr);
2972 for (i = 0; i < N_OLDREF_ACCS; i++) {
2973 if (ref->accs[i].thr == thr)
2974 break;
2975 }
2976
2977 if (i < N_OLDREF_ACCS) {
2978 /* thread 'thr' has an entry at index 'i'. Update it. */
2979 if (i > 0) {
2980 Thr_n_RCEC tmp = ref->accs[i-1];
2981 ref->accs[i-1] = ref->accs[i];
2982 ref->accs[i] = tmp;
2983 i--;
2984 }
2985 here = get_RCEC( thr );
2986 if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
2987 ctxt__rcinc( here );
2988 stats__ctxt_rcdec1++;
2989 ctxt__rcdec( ref->accs[i].rcec );
2990 ref->accs[i].rcec = here;
2991 tl_assert(ref->accs[i].thr == thr);
2992 } else {
2993 here = get_RCEC( thr );
2994 ctxt__rcinc( here );
2995 /* No entry for this thread. Shuffle all of them down one
2996 slot, and put the new entry at the start of the array. */
2997 if (ref->accs[N_OLDREF_ACCS-1].thr) {
2998 /* the last slot is in use. We must dec the rc on the
2999 associated rcec. */
3000 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3001 stats__ctxt_rcdec2++;
3002 ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
3003 } else {
3004 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3005 }
3006 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3007 ref->accs[j] = ref->accs[j-1];
3008 ref->accs[0].thr = thr;
3009 ref->accs[0].rcec = here;
3010 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
3011 so we can't add a NULL thr. */
3012 }
3013
3014 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003015
3016 } else {
3017
3018 /* We don't have a record for this address. Create a new one. */
3019 if (oldrefTreeN >= oldrefGenIncAt) {
3020 oldrefGen++;
3021 oldrefGenIncAt = oldrefTreeN + 50000;
3022 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3023 oldrefGen, oldrefTreeN );
3024 }
3025 here = get_RCEC( thr );
3026 ctxt__rcinc(here);
sewardjd86e3a22008-12-03 11:39:37 +00003027
3028
3029 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003030 ref->magic = OldRef_MAGIC;
3031 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003032 ref->accs[0].rcec = here;
3033 ref->accs[0].thr = thr;
3034 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
3035 so we can't add a NULL thr. */
3036 for (j = 1; j < N_OLDREF_ACCS; j++) {
3037 ref->accs[j].thr = NULL;
3038 ref->accs[j].rcec = NULL;
3039 }
sewardjd86e3a22008-12-03 11:39:37 +00003040 VG_(addToFM)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003041 oldrefTreeN++;
3042
3043 }
3044}
3045
3046
3047static
sewardjd52392d2008-11-08 20:36:26 +00003048Bool event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjf98e1c02008-10-25 16:22:41 +00003049 /*OUT*/Thr** resThr,
3050 Thr* thr_acc, Addr a )
3051{
sewardjd86e3a22008-12-03 11:39:37 +00003052 Word i;
3053 OldRef* ref;
3054 UWord keyW, valW;
3055 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003056
sewardjd86e3a22008-12-03 11:39:37 +00003057 tl_assert(thr_acc);
sewardjf98e1c02008-10-25 16:22:41 +00003058
sewardjd86e3a22008-12-03 11:39:37 +00003059 b = VG_(lookupFM)( oldrefTree, &keyW, &valW, a );
3060 if (b) {
3061 ref = (OldRef*)valW;
3062 tl_assert(keyW == a);
sewardjf98e1c02008-10-25 16:22:41 +00003063 tl_assert(ref->magic == OldRef_MAGIC);
3064 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3065
3066 for (i = 0; i < N_OLDREF_ACCS; i++) {
3067 if (ref->accs[i].thr != NULL
3068 && ref->accs[i].thr != thr_acc)
3069 break;
3070 }
3071 /* If we didn't find an entry for some thread other than
3072 thr_acc, just return the entry for thread 0. It'll look
3073 pretty stupid to the user though. */
3074 if (i == N_OLDREF_ACCS)
3075 i = 0;
3076
3077 tl_assert(i >= 0 && i < N_OLDREF_ACCS);
3078 tl_assert(ref->accs[i].thr);
3079 tl_assert(ref->accs[i].rcec);
3080 tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
3081
sewardjd52392d2008-11-08 20:36:26 +00003082 *resEC = VG_(make_ExeContext_from_StackTrace)(
3083 &ref->accs[i].rcec->frames[1], N_FRAMES
3084 );
sewardjf98e1c02008-10-25 16:22:41 +00003085 *resThr = ref->accs[i].thr;
3086 return True;
3087 } else {
3088 return False;
3089 }
3090}
3091
3092static void event_map_init ( void )
3093{
3094 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003095
3096 /* Context (RCEC) group allocator */
3097 init_GroupAlloc ( &rcec_group_allocator,
3098 sizeof(RCEC),
3099 1000 /* RCECs per group */,
3100 HG_(zalloc),
3101 "libhb.event_map_init.1 (RCEC groups)",
3102 HG_(free) );
3103
3104 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003105 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003106 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003107 N_RCEC_TAB * sizeof(RCEC*) );
3108 tl_assert(contextTab);
3109 for (i = 0; i < N_RCEC_TAB; i++)
3110 contextTab[i] = NULL;
3111
sewardjd86e3a22008-12-03 11:39:37 +00003112 /* Oldref group allocator */
3113 init_GroupAlloc ( &oldref_group_allocator,
3114 sizeof(OldRef),
3115 1000 /* OldRefs per group */,
3116 HG_(zalloc),
3117 "libhb.event_map_init.3 (OldRef groups)",
3118 HG_(free) );
3119
3120 /* Oldref node group allocator */
3121 init_GroupAlloc ( &oldrefnd_group_allocator,
3122 VG_(getNodeSizeFM)(),
3123 1000 /* OldRefs per group */,
3124 HG_(zalloc),
3125 "libhb.event_map_init.3 (OldRef tree node groups)",
3126 HG_(free) );
3127
3128 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003129 tl_assert(!oldrefTree);
sewardjd86e3a22008-12-03 11:39:37 +00003130 oldrefTree = VG_(newFM)(
3131 alloc_OldRef_nd,
3132 "libhb.event_map_init.4 (oldref tree)",
3133 free_OldRef_nd,
3134 NULL /* use unboxed cmp */
sewardjf98e1c02008-10-25 16:22:41 +00003135 );
3136 tl_assert(oldrefTree);
3137
3138 oldrefGen = 0;
3139 oldrefGenIncAt = 0;
3140 oldrefTreeN = 0;
3141}
3142
3143static void event_map__check_reference_counts ( Bool before )
3144{
3145 RCEC* rcec;
3146 OldRef* oldref;
3147 Word i;
3148 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003149 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003150
3151 /* Set the 'check' reference counts to zero. Also, optionally
3152 check that the real reference counts are non-zero. We allow
3153 these to fall to zero before a GC, but the GC must get rid of
3154 all those that are zero, hence none should be zero after a
3155 GC. */
3156 for (i = 0; i < N_RCEC_TAB; i++) {
3157 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3158 nEnts++;
3159 tl_assert(rcec);
3160 tl_assert(rcec->magic == RCEC_MAGIC);
3161 if (!before)
3162 tl_assert(rcec->rc > 0);
3163 rcec->rcX = 0;
3164 }
3165 }
3166
3167 /* check that the stats are sane */
3168 tl_assert(nEnts == stats__ctxt_tab_curr);
3169 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3170
3171 /* visit all the referencing points, inc check ref counts */
sewardjd86e3a22008-12-03 11:39:37 +00003172 VG_(initIterFM)( oldrefTree );
3173 while (VG_(nextIterFM)( oldrefTree, &keyW, &valW )) {
3174 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003175 tl_assert(oldref->magic == OldRef_MAGIC);
3176 for (i = 0; i < N_OLDREF_ACCS; i++) {
3177 if (oldref->accs[i].thr) {
3178 tl_assert(oldref->accs[i].rcec);
3179 tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
3180 oldref->accs[i].rcec->rcX++;
3181 } else {
3182 tl_assert(!oldref->accs[i].rcec);
3183 }
3184 }
3185 }
3186
3187 /* compare check ref counts with actual */
3188 for (i = 0; i < N_RCEC_TAB; i++) {
3189 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3190 tl_assert(rcec->rc == rcec->rcX);
3191 }
3192 }
3193}
3194
sewardj8fd92d32008-11-20 23:17:01 +00003195__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003196static void event_map_maybe_GC ( void )
3197{
3198 OldRef* oldref;
3199 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003200 XArray* refs2del;
3201 Word i, j, n2del;
3202
sewardj8fd92d32008-11-20 23:17:01 +00003203 UWord* genMap = NULL;
3204 UWord genMap_min = 0;
3205 UWord genMap_size = 0;
3206
sewardjf98e1c02008-10-25 16:22:41 +00003207 if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
3208 return;
3209
3210 if (0)
3211 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3212
3213 /* Check our counting is sane */
sewardjd86e3a22008-12-03 11:39:37 +00003214 tl_assert(oldrefTreeN == VG_(sizeFM)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003215
3216 /* Check the reference counts */
3217 event_map__check_reference_counts( True/*before*/ );
3218
sewardj8fd92d32008-11-20 23:17:01 +00003219 /* Compute the distribution of generation values in the ref tree.
3220 There are likely only to be a few different generation numbers
3221 in the whole tree, but we don't know what they are. Hence use a
3222 dynamically resized array of counters. The array is genMap[0
3223 .. genMap_size-1], where genMap[0] is the count for the
3224 generation number genMap_min, genMap[1] is the count for
3225 genMap_min+1, etc. If a new number is seen outside the range
3226 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3227 copied into a larger array, and genMap_min and genMap_size are
3228 adjusted accordingly. */
3229
sewardjf98e1c02008-10-25 16:22:41 +00003230 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003231
sewardjd86e3a22008-12-03 11:39:37 +00003232 VG_(initIterFM)( oldrefTree );
3233 while ( VG_(nextIterFM)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003234
sewardjd86e3a22008-12-03 11:39:37 +00003235 UWord ea, key;
3236 oldref = (OldRef*)valW;
3237 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003238
3239 /* BEGIN find 'ea', which is the index in genMap holding the
3240 count for generation number 'key'. */
3241 if (UNLIKELY(genMap == NULL)) {
3242 /* deal with the first key to be seen, so that the following
3243 cases don't need to handle the complexity of a NULL count
3244 array. */
3245 genMap_min = key;
3246 genMap_size = 1;
3247 genMap = HG_(zalloc)( "libhb.emmG.1a",
3248 genMap_size * sizeof(UWord) );
3249 ea = 0;
3250 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3251 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003252 }
sewardj8fd92d32008-11-20 23:17:01 +00003253 else
3254 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3255 /* this is the expected (almost-always-happens) case: 'key'
3256 is already mapped in the array. */
3257 ea = key - genMap_min;
3258 }
3259 else
3260 if (key < genMap_min) {
3261 /* 'key' appears before the start of the current array.
3262 Extend the current array by allocating a larger one and
3263 copying the current one to the upper end of it. */
3264 Word more;
3265 UWord* map2;
3266 more = genMap_min - key;
3267 tl_assert(more > 0);
3268 map2 = HG_(zalloc)( "libhb.emmG.1b",
3269 (genMap_size + more) * sizeof(UWord) );
3270 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3271 HG_(free)( genMap );
3272 genMap = map2;
3273 genMap_size += more;
3274 genMap_min -= more;
3275 ea = 0;
3276 tl_assert(genMap_min == key);
3277 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3278 key, genMap_min, genMap_min+genMap_size- 1 );
3279 }
3280 else {
3281 /* 'key' appears after the end of the current array. Extend
3282 the current array by allocating a larger one and copying
3283 the current one to the lower end of it. */
3284 Word more;
3285 UWord* map2;
3286 tl_assert(key >= genMap_min + genMap_size);
3287 more = key - (genMap_min + genMap_size) + 1;
3288 tl_assert(more > 0);
3289 map2 = HG_(zalloc)( "libhb.emmG.1c",
3290 (genMap_size + more) * sizeof(UWord) );
3291 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3292 HG_(free)( genMap );
3293 genMap = map2;
3294 genMap_size += more;
3295 ea = genMap_size - 1;;
3296 tl_assert(genMap_min + genMap_size - 1 == key);
3297 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3298 key, genMap_min, genMap_min+genMap_size- 1 );
3299 }
3300 /* END find 'ea' from 'key' */
3301
3302 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003303 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003304 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003305 }
3306
sewardj8fd92d32008-11-20 23:17:01 +00003307 tl_assert(genMap);
3308 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003309
sewardj8fd92d32008-11-20 23:17:01 +00003310 /* Sanity check what we just computed */
3311 { UWord sum = 0;
3312 for (i = 0; i < genMap_size; i++) {
3313 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3314 i + genMap_min, genMap[i] );
3315 sum += genMap[i];
3316 }
3317 tl_assert(sum == oldrefTreeN);
3318 }
3319
3320 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003321 retained = oldrefTreeN;
3322 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003323
3324 for (i = 0; i < genMap_size; i++) {
3325 keyW = i + genMap_min;
3326 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003327 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3328 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3329 tl_assert(keyW >= maxGen);
3330 tl_assert(retained >= valW);
3331 if (retained - valW
3332 > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
3333 retained -= valW;
3334 maxGen = keyW;
3335 } else {
3336 break;
3337 }
3338 }
sewardjf98e1c02008-10-25 16:22:41 +00003339
sewardj8fd92d32008-11-20 23:17:01 +00003340 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003341
sewardj9b1f0fd2008-11-18 23:40:00 +00003342 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003343
3344 /* Now make up a big list of the oldrefTree entries we want to
3345 delete. We can't simultaneously traverse the tree and delete
3346 stuff from it, so first we need to copy them off somewhere
3347 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003348 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003349 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003350
sewardj9b1f0fd2008-11-18 23:40:00 +00003351 if (retained < oldrefTreeN) {
3352
3353 /* This is the normal (expected) case. We discard any ref whose
3354 generation number <= maxGen. */
sewardjd86e3a22008-12-03 11:39:37 +00003355 VG_(initIterFM)( oldrefTree );
3356 while (VG_(nextIterFM)( oldrefTree, &keyW, &valW )) {
3357 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003358 tl_assert(oldref->magic == OldRef_MAGIC);
3359 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003360 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003361 }
sewardjf98e1c02008-10-25 16:22:41 +00003362 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003363 if (VG_(clo_verbosity) > 1) {
3364 VG_(message)(Vg_DebugMsg,
3365 "libhb: EvM GC: delete generations %lu and below, "
3366 "retaining %lu entries",
3367 maxGen, retained );
3368 }
3369
3370 } else {
3371
3372 static UInt rand_seed = 0; /* leave as static */
3373
3374 /* Degenerate case: there's only one generation in the entire
3375 tree, so we need to have some other way of deciding which
3376 refs to throw away. Just throw out half of them randomly. */
3377 tl_assert(retained == oldrefTreeN);
sewardjd86e3a22008-12-03 11:39:37 +00003378 VG_(initIterFM)( oldrefTree );
3379 while (VG_(nextIterFM)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003380 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003381 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003382 tl_assert(oldref->magic == OldRef_MAGIC);
3383 n = VG_(random)( &rand_seed );
3384 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003385 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003386 retained--;
3387 }
3388 }
3389 if (VG_(clo_verbosity) > 1) {
3390 VG_(message)(Vg_DebugMsg,
3391 "libhb: EvM GC: randomly delete half the entries, "
3392 "retaining %lu entries",
3393 retained );
3394 }
3395
sewardjf98e1c02008-10-25 16:22:41 +00003396 }
3397
3398 n2del = VG_(sizeXA)( refs2del );
3399 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3400
3401 if (0) VG_(printf)("%s","deleting entries\n");
3402 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003403 Bool b;
3404 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
3405 b = VG_(delFromFM)( oldrefTree, &keyW, &valW, ga2del );
3406 tl_assert(b);
3407 tl_assert(keyW == ga2del);
3408 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003409 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjd86e3a22008-12-03 11:39:37 +00003410 if (oldref->accs[j].rcec) {
3411 tl_assert(oldref->accs[j].thr);
sewardjf98e1c02008-10-25 16:22:41 +00003412 stats__ctxt_rcdec3++;
sewardjd86e3a22008-12-03 11:39:37 +00003413 ctxt__rcdec( oldref->accs[j].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00003414 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003415 tl_assert(!oldref->accs[j].thr);
sewardjf98e1c02008-10-25 16:22:41 +00003416 }
3417 }
sewardjd86e3a22008-12-03 11:39:37 +00003418
3419 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00003420 }
3421
3422 VG_(deleteXA)( refs2del );
3423
sewardjd86e3a22008-12-03 11:39:37 +00003424 tl_assert( VG_(sizeFM)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00003425
3426 oldrefTreeN = retained;
3427 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3428
3429 /* Throw away all RCECs with zero reference counts */
3430 for (i = 0; i < N_RCEC_TAB; i++) {
3431 RCEC** pp = &contextTab[i];
3432 RCEC* p = *pp;
3433 while (p) {
3434 if (p->rc == 0) {
3435 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00003436 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00003437 p = *pp;
3438 tl_assert(stats__ctxt_tab_curr > 0);
3439 stats__ctxt_tab_curr--;
3440 } else {
3441 pp = &p->next;
3442 p = p->next;
3443 }
3444 }
3445 }
3446
3447 /* Check the reference counts */
3448 event_map__check_reference_counts( False/*after*/ );
3449
3450 //if (0)
3451 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3452 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3453
3454}
3455
3456
3457/////////////////////////////////////////////////////////
3458// //
3459// Core MSM //
3460// //
3461/////////////////////////////////////////////////////////
3462
sewardjb0e009d2008-11-19 16:35:15 +00003463/* Logic in msm_read/msm_write updated/verified after re-analysis,
3464 19 Nov 08. */
3465
sewardjf98e1c02008-10-25 16:22:41 +00003466#define MSM_CONFACC 1
3467
sewardjf98e1c02008-10-25 16:22:41 +00003468#define MSM_CHECK 0
3469
sewardjb0e009d2008-11-19 16:35:15 +00003470/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
3471 nonzero, the effect is that when a race is detected for a location,
3472 that location is put into a special 'error' state and no further
3473 checking of it is done until it returns to a 'normal' state, which
3474 requires it to be deallocated and reallocated.
3475
3476 This is a bad idea, because of the interaction with suppressions.
3477 Suppose there is a race on the location, but the error is
3478 suppressed. The location now is marked as in-error. Now any
3479 subsequent race -- including ones we want to see -- will never be
3480 detected until the location is deallocated and reallocated.
3481
3482 Hence set MSM_CHECK to zero. This causes raced-on locations to
3483 remain in the normal 'C' (constrained) state, but places on them
3484 the constraint that the next accesses happen-after both the
3485 existing constraint and the relevant vector clock of the thread
3486 doing the racing access.
3487*/
3488#define MSM_RACE2ERR 0
3489
sewardjf98e1c02008-10-25 16:22:41 +00003490static ULong stats__msm_read = 0;
3491static ULong stats__msm_read_change = 0;
3492static ULong stats__msm_write = 0;
3493static ULong stats__msm_write_change = 0;
3494
3495__attribute__((noinline))
3496static void record_race_info ( Thr* acc_thr,
3497 Addr acc_addr, SizeT szB, Bool isWrite,
3498 SVal svOld, SVal svNew )
3499{
3500 Bool found;
3501 Thr* thrp = NULL;
sewardjd52392d2008-11-08 20:36:26 +00003502 ExeContext* where = NULL;
3503 ExeContext* wherep = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00003504 where = main_get_EC( acc_thr );
3505 found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
3506 if (found) {
3507 tl_assert(wherep);
3508 tl_assert(thrp);
3509 tl_assert(thrp->opaque);
3510 tl_assert(acc_thr->opaque);
3511 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3512 isWrite, szB, NULL/*mb_lastlock*/,
3513 wherep, thrp->opaque );
3514 } else {
3515 tl_assert(!wherep);
3516 tl_assert(!thrp);
3517 tl_assert(acc_thr->opaque);
3518 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3519 isWrite, szB, NULL/*mb_lastlock*/,
3520 NULL, NULL );
3521 }
3522}
3523
3524static Bool is_sane_SVal_C ( SVal sv ) {
3525 POrd ord;
3526 if (!SVal__isC(sv)) return True;
3527 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3528 if (ord == POrd_EQ || ord == POrd_LT) return True;
3529 return False;
3530}
3531
3532
3533/* Compute new state following a read */
3534static inline SVal msm_read ( SVal svOld,
3535 /* The following are only needed for
3536 creating error reports. */
3537 Thr* acc_thr,
3538 Addr acc_addr, SizeT szB )
3539{
3540 SVal svNew = SVal_INVALID;
3541 stats__msm_read++;
3542
3543 /* Redundant sanity check on the constraints */
3544 if (MSM_CHECK) {
3545 tl_assert(is_sane_SVal_C(svOld));
3546 }
3547
3548 if (SVal__isC(svOld)) {
3549 POrd ord;
3550 VtsID tviR = acc_thr->viR;
3551 VtsID tviW = acc_thr->viW;
3552 VtsID rmini = SVal__unC_Rmin(svOld);
3553 VtsID wmini = SVal__unC_Wmin(svOld);
3554
3555 ord = VtsID__getOrdering(rmini,tviR);
3556 if (ord == POrd_EQ || ord == POrd_LT) {
3557 /* no race */
3558 /* Note: RWLOCK subtlety: use tviW, not tviR */
3559 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3560 goto out;
3561 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003562 /* assert on sanity of constraints. */
3563 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3564 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003565 svNew = MSM_RACE2ERR
3566 ? SVal__mkE()
sewardj3b0c4d72008-11-20 11:20:50 +00003567#if 0
3568 //std
sewardjb0e009d2008-11-19 16:35:15 +00003569 : SVal__mkC( VtsID__join2(wmini,tviR),
3570 VtsID__join2(wmini,tviW) );
sewardj3b0c4d72008-11-20 11:20:50 +00003571#else
3572 // relaxed
3573 : SVal__mkC( VtsID__join2(rmini,tviR),
3574 VtsID__join2(wmini,tviW) );
3575#endif
sewardjf98e1c02008-10-25 16:22:41 +00003576 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
3577 svOld, svNew );
3578 goto out;
3579 }
3580 }
3581 if (SVal__isA(svOld)) {
3582 /* reading no-access memory (sigh); leave unchanged */
3583 /* check for no pollution */
3584 tl_assert(svOld == SVal_NOACCESS);
3585 svNew = SVal_NOACCESS;
3586 goto out;
3587 }
3588 if (SVal__isE(svOld)) {
3589 /* no race, location is already "in error" */
3590 svNew = SVal__mkE();
3591 goto out;
3592 }
3593 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3594 tl_assert(0);
3595
3596 out:
3597 if (MSM_CHECK) {
3598 tl_assert(is_sane_SVal_C(svNew));
3599 }
3600 tl_assert(svNew != SVal_INVALID);
3601 if (svNew != svOld) {
3602 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3603 event_map_bind( acc_addr, acc_thr );
3604 stats__msm_read_change++;
3605 }
3606 }
3607 return svNew;
3608}
3609
3610
3611/* Compute new state following a write */
3612static inline SVal msm_write ( SVal svOld,
3613 /* The following are only needed for
3614 creating error reports. */
3615 Thr* acc_thr,
3616 Addr acc_addr, SizeT szB )
3617{
3618 SVal svNew = SVal_INVALID;
3619 stats__msm_write++;
3620
3621 /* Redundant sanity check on the constraints */
3622 if (MSM_CHECK) {
3623 tl_assert(is_sane_SVal_C(svOld));
3624 }
3625
3626 if (SVal__isC(svOld)) {
3627 POrd ord;
3628 VtsID tviW = acc_thr->viW;
3629 VtsID wmini = SVal__unC_Wmin(svOld);
3630
3631 ord = VtsID__getOrdering(wmini,tviW);
3632 if (ord == POrd_EQ || ord == POrd_LT) {
3633 /* no race */
3634 svNew = SVal__mkC( tviW, tviW );
3635 goto out;
3636 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003637 VtsID tviR = acc_thr->viR;
sewardjf98e1c02008-10-25 16:22:41 +00003638 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00003639 /* assert on sanity of constraints. */
3640 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3641 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003642 svNew = MSM_RACE2ERR
3643 ? SVal__mkE()
sewardj3b0c4d72008-11-20 11:20:50 +00003644#if 0
3645 // std
sewardjb0e009d2008-11-19 16:35:15 +00003646 : SVal__mkC( VtsID__join2(wmini,tviR),
sewardjf98e1c02008-10-25 16:22:41 +00003647 VtsID__join2(wmini,tviW) );
sewardj3b0c4d72008-11-20 11:20:50 +00003648#else
3649 // relaxed
3650 : SVal__mkC( VtsID__join2(rmini,tviR),
3651 VtsID__join2(wmini,tviW) );
3652#endif
sewardjf98e1c02008-10-25 16:22:41 +00003653 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
3654 svOld, svNew );
3655 goto out;
3656 }
3657 }
3658 if (SVal__isA(svOld)) {
3659 /* writing no-access memory (sigh); leave unchanged */
3660 /* check for no pollution */
3661 tl_assert(svOld == SVal_NOACCESS);
3662 svNew = SVal_NOACCESS;
3663 goto out;
3664 }
3665 if (SVal__isE(svOld)) {
3666 /* no race, location is already "in error" */
3667 svNew = SVal__mkE();
3668 goto out;
3669 }
3670 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3671 tl_assert(0);
3672
3673 out:
3674 if (MSM_CHECK) {
3675 tl_assert(is_sane_SVal_C(svNew));
3676 }
3677 tl_assert(svNew != SVal_INVALID);
3678 if (svNew != svOld) {
3679 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3680 event_map_bind( acc_addr, acc_thr );
3681 stats__msm_write_change++;
3682 }
3683 }
3684 return svNew;
3685}
3686
3687
3688/////////////////////////////////////////////////////////
3689// //
3690// Apply core MSM to specific memory locations //
3691// //
3692/////////////////////////////////////////////////////////
3693
3694/*------------- ZSM accesses: 8 bit apply ------------- */
3695
3696void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3697 CacheLine* cl;
3698 UWord cloff, tno, toff;
3699 SVal svOld, svNew;
3700 UShort descr;
3701 stats__cline_read8s++;
3702 cl = get_cacheline(a);
3703 cloff = get_cacheline_offset(a);
3704 tno = get_treeno(a);
3705 toff = get_tree_offset(a); /* == 0 .. 7 */
3706 descr = cl->descrs[tno];
3707 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3708 SVal* tree = &cl->svals[tno << 3];
3709 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3710 if (SCE_CACHELINE)
3711 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3712 }
3713 svOld = cl->svals[cloff];
3714 svNew = msm_read( svOld, thr,a,1 );
3715 tl_assert(svNew != SVal_INVALID);
3716 cl->svals[cloff] = svNew;
3717}
3718
3719void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3720 CacheLine* cl;
3721 UWord cloff, tno, toff;
3722 SVal svOld, svNew;
3723 UShort descr;
3724 stats__cline_read8s++;
3725 cl = get_cacheline(a);
3726 cloff = get_cacheline_offset(a);
3727 tno = get_treeno(a);
3728 toff = get_tree_offset(a); /* == 0 .. 7 */
3729 descr = cl->descrs[tno];
3730 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3731 SVal* tree = &cl->svals[tno << 3];
3732 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3733 if (SCE_CACHELINE)
3734 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3735 }
3736 svOld = cl->svals[cloff];
3737 svNew = msm_write( svOld, thr,a,1 );
3738 tl_assert(svNew != SVal_INVALID);
3739 cl->svals[cloff] = svNew;
3740}
3741
3742/*------------- ZSM accesses: 16 bit apply ------------- */
3743
3744void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3745 CacheLine* cl;
3746 UWord cloff, tno, toff;
3747 SVal svOld, svNew;
3748 UShort descr;
3749 stats__cline_read16s++;
3750 if (UNLIKELY(!aligned16(a))) goto slowcase;
3751 cl = get_cacheline(a);
3752 cloff = get_cacheline_offset(a);
3753 tno = get_treeno(a);
3754 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3755 descr = cl->descrs[tno];
3756 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3757 if (valid_value_is_below_me_16(descr, toff)) {
3758 goto slowcase;
3759 } else {
3760 SVal* tree = &cl->svals[tno << 3];
3761 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3762 }
3763 if (SCE_CACHELINE)
3764 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3765 }
3766 svOld = cl->svals[cloff];
3767 svNew = msm_read( svOld, thr,a,2 );
3768 tl_assert(svNew != SVal_INVALID);
3769 cl->svals[cloff] = svNew;
3770 return;
3771 slowcase: /* misaligned, or must go further down the tree */
3772 stats__cline_16to8splits++;
3773 zsm_apply8___msm_read( thr, a + 0 );
3774 zsm_apply8___msm_read( thr, a + 1 );
3775}
3776
3777void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3778 CacheLine* cl;
3779 UWord cloff, tno, toff;
3780 SVal svOld, svNew;
3781 UShort descr;
3782 stats__cline_read16s++;
3783 if (UNLIKELY(!aligned16(a))) goto slowcase;
3784 cl = get_cacheline(a);
3785 cloff = get_cacheline_offset(a);
3786 tno = get_treeno(a);
3787 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3788 descr = cl->descrs[tno];
3789 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3790 if (valid_value_is_below_me_16(descr, toff)) {
3791 goto slowcase;
3792 } else {
3793 SVal* tree = &cl->svals[tno << 3];
3794 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3795 }
3796 if (SCE_CACHELINE)
3797 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3798 }
3799 svOld = cl->svals[cloff];
3800 svNew = msm_write( svOld, thr,a,2 );
3801 tl_assert(svNew != SVal_INVALID);
3802 cl->svals[cloff] = svNew;
3803 return;
3804 slowcase: /* misaligned, or must go further down the tree */
3805 stats__cline_16to8splits++;
3806 zsm_apply8___msm_write( thr, a + 0 );
3807 zsm_apply8___msm_write( thr, a + 1 );
3808}
3809
3810/*------------- ZSM accesses: 32 bit apply ------------- */
3811
3812void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3813 CacheLine* cl;
3814 UWord cloff, tno, toff;
3815 SVal svOld, svNew;
3816 UShort descr;
3817 if (UNLIKELY(!aligned32(a))) goto slowcase;
3818 cl = get_cacheline(a);
3819 cloff = get_cacheline_offset(a);
3820 tno = get_treeno(a);
3821 toff = get_tree_offset(a); /* == 0 or 4 */
3822 descr = cl->descrs[tno];
3823 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3824 if (valid_value_is_above_me_32(descr, toff)) {
3825 SVal* tree = &cl->svals[tno << 3];
3826 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3827 } else {
3828 goto slowcase;
3829 }
3830 if (SCE_CACHELINE)
3831 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3832 }
3833 svOld = cl->svals[cloff];
3834 svNew = msm_read( svOld, thr,a,4 );
3835 tl_assert(svNew != SVal_INVALID);
3836 cl->svals[cloff] = svNew;
3837 return;
3838 slowcase: /* misaligned, or must go further down the tree */
3839 stats__cline_32to16splits++;
3840 zsm_apply16___msm_read( thr, a + 0 );
3841 zsm_apply16___msm_read( thr, a + 2 );
3842}
3843
3844void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3845 CacheLine* cl;
3846 UWord cloff, tno, toff;
3847 SVal svOld, svNew;
3848 UShort descr;
3849 if (UNLIKELY(!aligned32(a))) goto slowcase;
3850 cl = get_cacheline(a);
3851 cloff = get_cacheline_offset(a);
3852 tno = get_treeno(a);
3853 toff = get_tree_offset(a); /* == 0 or 4 */
3854 descr = cl->descrs[tno];
3855 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3856 if (valid_value_is_above_me_32(descr, toff)) {
3857 SVal* tree = &cl->svals[tno << 3];
3858 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3859 } else {
3860 goto slowcase;
3861 }
3862 if (SCE_CACHELINE)
3863 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3864 }
3865 svOld = cl->svals[cloff];
3866 svNew = msm_write( svOld, thr,a,4 );
3867 tl_assert(svNew != SVal_INVALID);
3868 cl->svals[cloff] = svNew;
3869 return;
3870 slowcase: /* misaligned, or must go further down the tree */
3871 stats__cline_32to16splits++;
3872 zsm_apply16___msm_write( thr, a + 0 );
3873 zsm_apply16___msm_write( thr, a + 2 );
3874}
3875
3876/*------------- ZSM accesses: 64 bit apply ------------- */
3877
3878void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3879 CacheLine* cl;
3880 UWord cloff, tno, toff;
3881 SVal svOld, svNew;
3882 UShort descr;
3883 stats__cline_read64s++;
3884 if (UNLIKELY(!aligned64(a))) goto slowcase;
3885 cl = get_cacheline(a);
3886 cloff = get_cacheline_offset(a);
3887 tno = get_treeno(a);
3888 toff = get_tree_offset(a); /* == 0, unused */
3889 descr = cl->descrs[tno];
3890 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3891 goto slowcase;
3892 }
3893 svOld = cl->svals[cloff];
3894 svNew = msm_read( svOld, thr,a,8 );
3895 tl_assert(svNew != SVal_INVALID);
3896 cl->svals[cloff] = svNew;
3897 return;
3898 slowcase: /* misaligned, or must go further down the tree */
3899 stats__cline_64to32splits++;
3900 zsm_apply32___msm_read( thr, a + 0 );
3901 zsm_apply32___msm_read( thr, a + 4 );
3902}
3903
3904void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
3905 CacheLine* cl;
3906 UWord cloff, tno, toff;
3907 SVal svOld, svNew;
3908 UShort descr;
3909 stats__cline_read64s++;
3910 if (UNLIKELY(!aligned64(a))) goto slowcase;
3911 cl = get_cacheline(a);
3912 cloff = get_cacheline_offset(a);
3913 tno = get_treeno(a);
3914 toff = get_tree_offset(a); /* == 0, unused */
3915 descr = cl->descrs[tno];
3916 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3917 goto slowcase;
3918 }
3919 svOld = cl->svals[cloff];
3920 svNew = msm_write( svOld, thr,a,8 );
3921 tl_assert(svNew != SVal_INVALID);
3922 cl->svals[cloff] = svNew;
3923 return;
3924 slowcase: /* misaligned, or must go further down the tree */
3925 stats__cline_64to32splits++;
3926 zsm_apply32___msm_write( thr, a + 0 );
3927 zsm_apply32___msm_write( thr, a + 4 );
3928}
3929
3930/*--------------- ZSM accesses: 8 bit write --------------- */
3931
3932static
3933void zsm_write8 ( Addr a, SVal svNew ) {
3934 CacheLine* cl;
3935 UWord cloff, tno, toff;
3936 UShort descr;
3937 stats__cline_set8s++;
3938 cl = get_cacheline(a);
3939 cloff = get_cacheline_offset(a);
3940 tno = get_treeno(a);
3941 toff = get_tree_offset(a); /* == 0 .. 7 */
3942 descr = cl->descrs[tno];
3943 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3944 SVal* tree = &cl->svals[tno << 3];
3945 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3946 if (SCE_CACHELINE)
3947 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3948 }
3949 tl_assert(svNew != SVal_INVALID);
3950 cl->svals[cloff] = svNew;
3951}
3952
3953/*--------------- ZSM accesses: 16 bit write --------------- */
3954
3955static
3956void zsm_write16 ( Addr a, SVal svNew ) {
3957 CacheLine* cl;
3958 UWord cloff, tno, toff;
3959 UShort descr;
3960 stats__cline_set16s++;
3961 if (UNLIKELY(!aligned16(a))) goto slowcase;
3962 cl = get_cacheline(a);
3963 cloff = get_cacheline_offset(a);
3964 tno = get_treeno(a);
3965 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3966 descr = cl->descrs[tno];
3967 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3968 if (valid_value_is_below_me_16(descr, toff)) {
3969 /* Writing at this level. Need to fix up 'descr'. */
3970 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
3971 /* At this point, the tree does not match cl->descr[tno] any
3972 more. The assignments below will fix it up. */
3973 } else {
3974 /* We can't indiscriminately write on the w16 node as in the
3975 w64 case, as that might make the node inconsistent with
3976 its parent. So first, pull down to this level. */
3977 SVal* tree = &cl->svals[tno << 3];
3978 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3979 if (SCE_CACHELINE)
3980 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3981 }
3982 }
3983 tl_assert(svNew != SVal_INVALID);
3984 cl->svals[cloff + 0] = svNew;
3985 cl->svals[cloff + 1] = SVal_INVALID;
3986 return;
3987 slowcase: /* misaligned */
3988 stats__cline_16to8splits++;
3989 zsm_write8( a + 0, svNew );
3990 zsm_write8( a + 1, svNew );
3991}
3992
3993/*--------------- ZSM accesses: 32 bit write --------------- */
3994
3995static
3996void zsm_write32 ( Addr a, SVal svNew ) {
3997 CacheLine* cl;
3998 UWord cloff, tno, toff;
3999 UShort descr;
4000 stats__cline_set32s++;
4001 if (UNLIKELY(!aligned32(a))) goto slowcase;
4002 cl = get_cacheline(a);
4003 cloff = get_cacheline_offset(a);
4004 tno = get_treeno(a);
4005 toff = get_tree_offset(a); /* == 0 or 4 */
4006 descr = cl->descrs[tno];
4007 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4008 if (valid_value_is_above_me_32(descr, toff)) {
4009 /* We can't indiscriminately write on the w32 node as in the
4010 w64 case, as that might make the node inconsistent with
4011 its parent. So first, pull down to this level. */
4012 SVal* tree = &cl->svals[tno << 3];
4013 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4014 if (SCE_CACHELINE)
4015 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4016 } else {
4017 /* Writing at this level. Need to fix up 'descr'. */
4018 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4019 /* At this point, the tree does not match cl->descr[tno] any
4020 more. The assignments below will fix it up. */
4021 }
4022 }
4023 tl_assert(svNew != SVal_INVALID);
4024 cl->svals[cloff + 0] = svNew;
4025 cl->svals[cloff + 1] = SVal_INVALID;
4026 cl->svals[cloff + 2] = SVal_INVALID;
4027 cl->svals[cloff + 3] = SVal_INVALID;
4028 return;
4029 slowcase: /* misaligned */
4030 stats__cline_32to16splits++;
4031 zsm_write16( a + 0, svNew );
4032 zsm_write16( a + 2, svNew );
4033}
4034
4035/*--------------- ZSM accesses: 64 bit write --------------- */
4036
4037static
4038void zsm_write64 ( Addr a, SVal svNew ) {
4039 CacheLine* cl;
4040 UWord cloff, tno, toff;
4041 stats__cline_set64s++;
4042 if (UNLIKELY(!aligned64(a))) goto slowcase;
4043 cl = get_cacheline(a);
4044 cloff = get_cacheline_offset(a);
4045 tno = get_treeno(a);
4046 toff = get_tree_offset(a); /* == 0 */
4047 cl->descrs[tno] = TREE_DESCR_64;
4048 tl_assert(svNew != SVal_INVALID);
4049 cl->svals[cloff + 0] = svNew;
4050 cl->svals[cloff + 1] = SVal_INVALID;
4051 cl->svals[cloff + 2] = SVal_INVALID;
4052 cl->svals[cloff + 3] = SVal_INVALID;
4053 cl->svals[cloff + 4] = SVal_INVALID;
4054 cl->svals[cloff + 5] = SVal_INVALID;
4055 cl->svals[cloff + 6] = SVal_INVALID;
4056 cl->svals[cloff + 7] = SVal_INVALID;
4057 return;
4058 slowcase: /* misaligned */
4059 stats__cline_64to32splits++;
4060 zsm_write32( a + 0, svNew );
4061 zsm_write32( a + 4, svNew );
4062}
4063
4064/*------------- ZSM accesses: 8 bit read/copy ------------- */
4065
4066static
4067SVal zsm_read8 ( Addr a ) {
4068 CacheLine* cl;
4069 UWord cloff, tno, toff;
4070 UShort descr;
4071 stats__cline_get8s++;
4072 cl = get_cacheline(a);
4073 cloff = get_cacheline_offset(a);
4074 tno = get_treeno(a);
4075 toff = get_tree_offset(a); /* == 0 .. 7 */
4076 descr = cl->descrs[tno];
4077 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4078 SVal* tree = &cl->svals[tno << 3];
4079 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4080 }
4081 return cl->svals[cloff];
4082}
4083
4084static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
4085 SVal sv;
4086 stats__cline_copy8s++;
4087 sv = zsm_read8( src );
4088 zsm_write8( dst, sv );
4089}
4090
4091/* ------------ Shadow memory range setting ops ------------ */
4092
4093void zsm_apply_range___msm_read ( Thr* thr,
4094 Addr a, SizeT len )
4095{
4096 /* fast track a couple of common cases */
4097 if (len == 4 && aligned32(a)) {
4098 zsm_apply32___msm_read( thr, a );
4099 return;
4100 }
4101 if (len == 8 && aligned64(a)) {
4102 zsm_apply64___msm_read( thr, a );
4103 return;
4104 }
4105
4106 /* be completely general (but as efficient as possible) */
4107 if (len == 0) return;
4108
4109 if (!aligned16(a) && len >= 1) {
4110 zsm_apply8___msm_read( thr, a );
4111 a += 1;
4112 len -= 1;
4113 tl_assert(aligned16(a));
4114 }
4115 if (len == 0) return;
4116
4117 if (!aligned32(a) && len >= 2) {
4118 zsm_apply16___msm_read( thr, a );
4119 a += 2;
4120 len -= 2;
4121 tl_assert(aligned32(a));
4122 }
4123 if (len == 0) return;
4124
4125 if (!aligned64(a) && len >= 4) {
4126 zsm_apply32___msm_read( thr, a );
4127 a += 4;
4128 len -= 4;
4129 tl_assert(aligned64(a));
4130 }
4131 if (len == 0) return;
4132
4133 if (len >= 8) {
4134 tl_assert(aligned64(a));
4135 while (len >= 8) {
4136 zsm_apply64___msm_read( thr, a );
4137 a += 8;
4138 len -= 8;
4139 }
4140 tl_assert(aligned64(a));
4141 }
4142 if (len == 0) return;
4143
4144 if (len >= 4)
4145 tl_assert(aligned32(a));
4146 if (len >= 4) {
4147 zsm_apply32___msm_read( thr, a );
4148 a += 4;
4149 len -= 4;
4150 }
4151 if (len == 0) return;
4152
4153 if (len >= 2)
4154 tl_assert(aligned16(a));
4155 if (len >= 2) {
4156 zsm_apply16___msm_read( thr, a );
4157 a += 2;
4158 len -= 2;
4159 }
4160 if (len == 0) return;
4161
4162 if (len >= 1) {
4163 zsm_apply8___msm_read( thr, a );
4164 a += 1;
4165 len -= 1;
4166 }
4167 tl_assert(len == 0);
4168}
4169
4170
4171
4172void zsm_apply_range___msm_write ( Thr* thr,
4173 Addr a, SizeT len )
4174{
4175 /* fast track a couple of common cases */
4176 if (len == 4 && aligned32(a)) {
4177 zsm_apply32___msm_write( thr, a );
4178 return;
4179 }
4180 if (len == 8 && aligned64(a)) {
4181 zsm_apply64___msm_write( thr, a );
4182 return;
4183 }
4184
4185 /* be completely general (but as efficient as possible) */
4186 if (len == 0) return;
4187
4188 if (!aligned16(a) && len >= 1) {
4189 zsm_apply8___msm_write( thr, a );
4190 a += 1;
4191 len -= 1;
4192 tl_assert(aligned16(a));
4193 }
4194 if (len == 0) return;
4195
4196 if (!aligned32(a) && len >= 2) {
4197 zsm_apply16___msm_write( thr, a );
4198 a += 2;
4199 len -= 2;
4200 tl_assert(aligned32(a));
4201 }
4202 if (len == 0) return;
4203
4204 if (!aligned64(a) && len >= 4) {
4205 zsm_apply32___msm_write( thr, a );
4206 a += 4;
4207 len -= 4;
4208 tl_assert(aligned64(a));
4209 }
4210 if (len == 0) return;
4211
4212 if (len >= 8) {
4213 tl_assert(aligned64(a));
4214 while (len >= 8) {
4215 zsm_apply64___msm_write( thr, a );
4216 a += 8;
4217 len -= 8;
4218 }
4219 tl_assert(aligned64(a));
4220 }
4221 if (len == 0) return;
4222
4223 if (len >= 4)
4224 tl_assert(aligned32(a));
4225 if (len >= 4) {
4226 zsm_apply32___msm_write( thr, a );
4227 a += 4;
4228 len -= 4;
4229 }
4230 if (len == 0) return;
4231
4232 if (len >= 2)
4233 tl_assert(aligned16(a));
4234 if (len >= 2) {
4235 zsm_apply16___msm_write( thr, a );
4236 a += 2;
4237 len -= 2;
4238 }
4239 if (len == 0) return;
4240
4241 if (len >= 1) {
4242 zsm_apply8___msm_write( thr, a );
4243 a += 1;
4244 len -= 1;
4245 }
4246 tl_assert(len == 0);
4247}
4248
4249
4250
4251
4252/* Block-copy states (needed for implementing realloc()). */
4253
4254static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
4255{
4256 SizeT i;
4257 if (len == 0)
4258 return;
4259
4260 /* assert for non-overlappingness */
4261 tl_assert(src+len <= dst || dst+len <= src);
4262
4263 /* To be simple, just copy byte by byte. But so as not to wreck
4264 performance for later accesses to dst[0 .. len-1], normalise
4265 destination lines as we finish with them, and also normalise the
4266 line containing the first and last address. */
4267 for (i = 0; i < len; i++) {
4268 Bool normalise
4269 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4270 || i == 0 /* first in range */
4271 || i == len-1; /* last in range */
4272 zsm_copy8( src+i, dst+i, normalise );
4273 }
4274}
4275
4276
4277/* For setting address ranges to a given value. Has considerable
4278 sophistication so as to avoid generating large numbers of pointless
4279 cache loads/writebacks for large ranges. */
4280
4281/* Do small ranges in-cache, in the obvious way. */
4282static
4283void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
4284{
4285 /* fast track a couple of common cases */
4286 if (len == 4 && aligned32(a)) {
4287 zsm_write32( a, svNew );
4288 return;
4289 }
4290 if (len == 8 && aligned64(a)) {
4291 zsm_write64( a, svNew );
4292 return;
4293 }
4294
4295 /* be completely general (but as efficient as possible) */
4296 if (len == 0) return;
4297
4298 if (!aligned16(a) && len >= 1) {
4299 zsm_write8( a, svNew );
4300 a += 1;
4301 len -= 1;
4302 tl_assert(aligned16(a));
4303 }
4304 if (len == 0) return;
4305
4306 if (!aligned32(a) && len >= 2) {
4307 zsm_write16( a, svNew );
4308 a += 2;
4309 len -= 2;
4310 tl_assert(aligned32(a));
4311 }
4312 if (len == 0) return;
4313
4314 if (!aligned64(a) && len >= 4) {
4315 zsm_write32( a, svNew );
4316 a += 4;
4317 len -= 4;
4318 tl_assert(aligned64(a));
4319 }
4320 if (len == 0) return;
4321
4322 if (len >= 8) {
4323 tl_assert(aligned64(a));
4324 while (len >= 8) {
4325 zsm_write64( a, svNew );
4326 a += 8;
4327 len -= 8;
4328 }
4329 tl_assert(aligned64(a));
4330 }
4331 if (len == 0) return;
4332
4333 if (len >= 4)
4334 tl_assert(aligned32(a));
4335 if (len >= 4) {
4336 zsm_write32( a, svNew );
4337 a += 4;
4338 len -= 4;
4339 }
4340 if (len == 0) return;
4341
4342 if (len >= 2)
4343 tl_assert(aligned16(a));
4344 if (len >= 2) {
4345 zsm_write16( a, svNew );
4346 a += 2;
4347 len -= 2;
4348 }
4349 if (len == 0) return;
4350
4351 if (len >= 1) {
4352 zsm_write8( a, svNew );
4353 a += 1;
4354 len -= 1;
4355 }
4356 tl_assert(len == 0);
4357}
4358
4359
4360/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4361 for larger ranges, try to operate directly on the out-of-cache
4362 representation, rather than dragging lines into the cache,
4363 overwriting them, and forcing them out. This turns out to be an
4364 important performance optimisation. */
4365
4366static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4367{
4368 tl_assert(svNew != SVal_INVALID);
4369 stats__cache_make_New_arange += (ULong)len;
4370
4371 if (0 && len > 500)
4372 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4373
4374 if (0) {
4375 static UWord n_New_in_cache = 0;
4376 static UWord n_New_not_in_cache = 0;
4377 /* tag is 'a' with the in-line offset masked out,
4378 eg a[31]..a[4] 0000 */
4379 Addr tag = a & ~(N_LINE_ARANGE - 1);
4380 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4381 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4382 n_New_in_cache++;
4383 } else {
4384 n_New_not_in_cache++;
4385 }
4386 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4387 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4388 n_New_in_cache, n_New_not_in_cache );
4389 }
4390
4391 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4392 zsm_set_range_SMALL( a, len, svNew );
4393 } else {
4394 Addr before_start = a;
4395 Addr aligned_start = cacheline_ROUNDUP(a);
4396 Addr after_start = cacheline_ROUNDDN(a + len);
4397 UWord before_len = aligned_start - before_start;
4398 UWord aligned_len = after_start - aligned_start;
4399 UWord after_len = a + len - after_start;
4400 tl_assert(before_start <= aligned_start);
4401 tl_assert(aligned_start <= after_start);
4402 tl_assert(before_len < N_LINE_ARANGE);
4403 tl_assert(after_len < N_LINE_ARANGE);
4404 tl_assert(get_cacheline_offset(aligned_start) == 0);
4405 if (get_cacheline_offset(a) == 0) {
4406 tl_assert(before_len == 0);
4407 tl_assert(a == aligned_start);
4408 }
4409 if (get_cacheline_offset(a+len) == 0) {
4410 tl_assert(after_len == 0);
4411 tl_assert(after_start == a+len);
4412 }
4413 if (before_len > 0) {
4414 zsm_set_range_SMALL( before_start, before_len, svNew );
4415 }
4416 if (after_len > 0) {
4417 zsm_set_range_SMALL( after_start, after_len, svNew );
4418 }
4419 stats__cache_make_New_inZrep += (ULong)aligned_len;
4420
4421 while (1) {
4422 Addr tag;
4423 UWord wix;
4424 if (aligned_start >= after_start)
4425 break;
4426 tl_assert(get_cacheline_offset(aligned_start) == 0);
4427 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4428 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4429 if (tag == cache_shmem.tags0[wix]) {
4430 UWord i;
4431 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4432 zsm_write64( aligned_start + i * 8, svNew );
4433 } else {
4434 UWord i;
4435 Word zix;
4436 SecMap* sm;
4437 LineZ* lineZ;
4438 /* This line is not in the cache. Do not force it in; instead
4439 modify it in-place. */
4440 /* find the Z line to write in and rcdec it or the
4441 associated F line. */
4442 find_Z_for_writing( &sm, &zix, tag );
4443 tl_assert(sm);
4444 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4445 lineZ = &sm->linesZ[zix];
4446 lineZ->dict[0] = svNew;
4447 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4448 for (i = 0; i < N_LINE_ARANGE/4; i++)
4449 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4450 rcinc_LineZ(lineZ);
4451 }
4452 aligned_start += N_LINE_ARANGE;
4453 aligned_len -= N_LINE_ARANGE;
4454 }
4455 tl_assert(aligned_start == after_start);
4456 tl_assert(aligned_len == 0);
4457 }
4458}
4459
4460
4461/////////////////////////////////////////////////////////
4462// //
4463// Synchronisation objects //
4464// //
4465/////////////////////////////////////////////////////////
4466
4467// (UInt) `echo "Synchronisation object" | md5sum`
4468#define SO_MAGIC 0x56b3c5b0U
4469
4470struct _SO {
4471 VtsID viR; /* r-clock of sender */
4472 VtsID viW; /* w-clock of sender */
4473 UInt magic;
4474};
4475
4476static SO* SO__Alloc ( void ) {
4477 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4478 so->viR = VtsID_INVALID;
4479 so->viW = VtsID_INVALID;
4480 so->magic = SO_MAGIC;
4481 return so;
4482}
4483static void SO__Dealloc ( SO* so ) {
4484 tl_assert(so);
4485 tl_assert(so->magic == SO_MAGIC);
4486 if (so->viR == VtsID_INVALID) {
4487 tl_assert(so->viW == VtsID_INVALID);
4488 } else {
4489 tl_assert(so->viW != VtsID_INVALID);
4490 VtsID__rcdec(so->viR);
4491 VtsID__rcdec(so->viW);
4492 }
4493 so->magic = 0;
4494 HG_(free)( so );
4495}
4496
4497
4498/////////////////////////////////////////////////////////
4499// //
4500// Top Level API //
4501// //
4502/////////////////////////////////////////////////////////
4503
4504static void show_thread_state ( HChar* str, Thr* t )
4505{
4506 if (1) return;
4507 if (t->viR == t->viW) {
4508 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4509 VtsID__pp( t->viR );
4510 VG_(printf)("%s","\n");
4511 } else {
4512 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4513 VtsID__pp( t->viR );
4514 VG_(printf)(" viW %u==", t->viW);
4515 VtsID__pp( t->viW );
4516 VG_(printf)("%s","\n");
4517 }
4518}
4519
4520
4521Thr* libhb_init (
4522 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004523 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004524 )
4525{
4526 Thr* thr;
4527 VtsID vi;
4528 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004529 tl_assert(get_EC);
4530 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004531 main_get_EC = get_EC;
4532
4533 // No need to initialise hg_wordfm.
4534 // No need to initialise hg_wordset.
4535
4536 vts_set_init();
4537 vts_tab_init();
4538 event_map_init();
4539 VtsID__invalidate_caches();
4540
4541 // initialise shadow memory
4542 zsm_init( SVal__rcinc, SVal__rcdec );
4543
4544 thr = Thr__new();
4545 vi = VtsID__mk_Singleton( thr, 1 );
4546 thr->viR = vi;
4547 thr->viW = vi;
4548 VtsID__rcinc(thr->viR);
4549 VtsID__rcinc(thr->viW);
4550
4551 show_thread_state(" root", thr);
4552 return thr;
4553}
4554
4555Thr* libhb_create ( Thr* parent )
4556{
4557 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4558 the child's index. Since the child's index is guaranteed
4559 unique, it has never been seen before, so the implicit value
4560 before the tick is zero and after that is one. */
4561 Thr* child = Thr__new();
4562
4563 child->viR = VtsID__tick( parent->viR, child );
4564 child->viW = VtsID__tick( parent->viW, child );
4565 VtsID__rcinc(child->viR);
4566 VtsID__rcinc(child->viW);
4567
4568 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4569 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4570
4571 /* and the parent has to move along too */
4572 VtsID__rcdec(parent->viR);
4573 VtsID__rcdec(parent->viW);
4574 parent->viR = VtsID__tick( parent->viR, parent );
4575 parent->viW = VtsID__tick( parent->viW, parent );
4576 VtsID__rcinc(parent->viR);
4577 VtsID__rcinc(parent->viW);
4578
4579 show_thread_state(" child", child);
4580 show_thread_state("parent", parent);
4581
4582 return child;
4583}
4584
4585/* Shut down the library, and print stats (in fact that's _all_
4586 this is for. */
4587void libhb_shutdown ( Bool show_stats )
4588{
4589 if (show_stats) {
4590 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4591 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4592 stats__secmaps_allocd,
4593 stats__secmap_ga_space_covered);
4594 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4595 stats__secmap_linesZ_allocd,
4596 stats__secmap_linesZ_bytes);
4597 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4598 stats__secmap_linesF_allocd,
4599 stats__secmap_linesF_bytes);
4600 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4601 stats__secmap_iterator_steppings);
4602 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4603 stats__secmaps_search, stats__secmaps_search_slow);
4604
4605 VG_(printf)("%s","\n");
4606 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4607 stats__cache_totrefs, stats__cache_totmisses );
4608 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4609 stats__cache_Z_fetches, stats__cache_F_fetches );
4610 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4611 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4612 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4613 stats__cache_invals, stats__cache_flushes );
4614 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4615 stats__cache_make_New_arange,
4616 stats__cache_make_New_inZrep);
4617
4618 VG_(printf)("%s","\n");
4619 VG_(printf)(" cline: %'10lu normalises\n",
4620 stats__cline_normalises );
4621 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4622 stats__cline_read64s,
4623 stats__cline_read32s,
4624 stats__cline_read16s,
4625 stats__cline_read8s );
4626 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4627 stats__cline_write64s,
4628 stats__cline_write32s,
4629 stats__cline_write16s,
4630 stats__cline_write8s );
4631 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4632 stats__cline_set64s,
4633 stats__cline_set32s,
4634 stats__cline_set16s,
4635 stats__cline_set8s );
4636 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4637 stats__cline_get8s, stats__cline_copy8s );
4638 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4639 stats__cline_64to32splits,
4640 stats__cline_32to16splits,
4641 stats__cline_16to8splits );
4642 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4643 stats__cline_64to32pulldown,
4644 stats__cline_32to16pulldown,
4645 stats__cline_16to8pulldown );
4646 if (0)
4647 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4648 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4649
4650 VG_(printf)("%s","\n");
4651
4652 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4653 stats__msm_read, stats__msm_read_change);
4654 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4655 stats__msm_write, stats__msm_write_change);
4656 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4657 stats__getOrdering_queries, stats__getOrdering_misses);
4658 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4659 stats__join2_queries, stats__join2_misses);
4660
4661 VG_(printf)("%s","\n");
4662 VG_(printf)(
4663 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4664 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4665 );
4666 VG_(printf)( " libhb: %lu entries in vts_set\n",
4667 VG_(sizeFM)( vts_set ) );
4668
4669 VG_(printf)("%s","\n");
4670 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4671 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4672 stats__ctxt_rcdec2,
4673 stats__ctxt_rcdec3 );
4674 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4675 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4676 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4677 (UWord)N_RCEC_TAB,
4678 stats__ctxt_tab_curr );
4679 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4680 stats__ctxt_tab_qs,
4681 stats__ctxt_tab_cmps );
4682#if 0
4683 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4684 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4685 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4686 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4687 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4688 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4689 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4690 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4691 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4692 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4693 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4694 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4695 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4696 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4697
4698 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4699 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4700 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4701 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4702#endif
4703
4704 VG_(printf)("%s","<<< END libhb stats >>>\n");
4705 VG_(printf)("%s","\n");
4706
4707 }
4708}
4709
4710void libhb_async_exit ( Thr* thr )
4711{
4712 /* is there anything we need to do? */
4713}
4714
4715/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4716 a Seg that points at a VTS is its one-and-only owner, and ditto for
4717 a SO that points at a VTS. */
4718
4719SO* libhb_so_alloc ( void )
4720{
4721 return SO__Alloc();
4722}
4723
4724void libhb_so_dealloc ( SO* so )
4725{
4726 tl_assert(so);
4727 tl_assert(so->magic == SO_MAGIC);
4728 SO__Dealloc(so);
4729}
4730
4731/* See comments in libhb.h for details on the meaning of
4732 strong vs weak sends and strong vs weak receives. */
4733void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4734{
4735 /* Copy the VTSs from 'thr' into the sync object, and then move
4736 the thread along one step. */
4737
4738 tl_assert(so);
4739 tl_assert(so->magic == SO_MAGIC);
4740
4741 /* stay sane .. a thread's read-clock must always lead or be the
4742 same as its write-clock */
4743 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4744 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4745 }
4746
4747 /* since we're overwriting the VtsIDs in the SO, we need to drop
4748 any references made by the previous contents thereof */
4749 if (so->viR == VtsID_INVALID) {
4750 tl_assert(so->viW == VtsID_INVALID);
4751 so->viR = thr->viR;
4752 so->viW = thr->viW;
4753 VtsID__rcinc(so->viR);
4754 VtsID__rcinc(so->viW);
4755 } else {
4756 /* In a strong send, we dump any previous VC in the SO and
4757 install the sending thread's VC instead. For a weak send we
4758 must join2 with what's already there. */
4759 tl_assert(so->viW != VtsID_INVALID);
4760 VtsID__rcdec(so->viR);
4761 VtsID__rcdec(so->viW);
4762 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4763 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4764 VtsID__rcinc(so->viR);
4765 VtsID__rcinc(so->viW);
4766 }
4767
4768 /* move both parent clocks along */
4769 VtsID__rcdec(thr->viR);
4770 VtsID__rcdec(thr->viW);
4771 thr->viR = VtsID__tick( thr->viR, thr );
4772 thr->viW = VtsID__tick( thr->viW, thr );
4773 VtsID__rcinc(thr->viR);
4774 VtsID__rcinc(thr->viW);
4775 if (strong_send)
4776 show_thread_state("s-send", thr);
4777 else
4778 show_thread_state("w-send", thr);
4779}
4780
4781void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4782{
4783 tl_assert(so);
4784 tl_assert(so->magic == SO_MAGIC);
4785
4786 if (so->viR != VtsID_INVALID) {
4787 tl_assert(so->viW != VtsID_INVALID);
4788
4789 /* Weak receive (basically, an R-acquisition of a R-W lock).
4790 This advances the read-clock of the receiver, but not the
4791 write-clock. */
4792 VtsID__rcdec(thr->viR);
4793 thr->viR = VtsID__join2( thr->viR, so->viR );
4794 VtsID__rcinc(thr->viR);
4795
4796 /* For a strong receive, we also advance the receiver's write
4797 clock, which means the receive as a whole is essentially
4798 equivalent to a W-acquisition of a R-W lock. */
4799 if (strong_recv) {
4800 VtsID__rcdec(thr->viW);
4801 thr->viW = VtsID__join2( thr->viW, so->viW );
4802 VtsID__rcinc(thr->viW);
4803 }
4804
4805 if (strong_recv)
4806 show_thread_state("s-recv", thr);
4807 else
4808 show_thread_state("w-recv", thr);
4809
4810 } else {
4811 tl_assert(so->viW == VtsID_INVALID);
4812 /* Deal with degenerate case: 'so' has no vts, so there has been
4813 no message posted to it. Just ignore this case. */
4814 show_thread_state("d-recv", thr);
4815 }
4816}
4817
4818Bool libhb_so_everSent ( SO* so )
4819{
4820 if (so->viR == VtsID_INVALID) {
4821 tl_assert(so->viW == VtsID_INVALID);
4822 return False;
4823 } else {
4824 tl_assert(so->viW != VtsID_INVALID);
4825 return True;
4826 }
4827}
4828
4829#define XXX1 0 // 0x67a106c
4830#define XXX2 0
4831
4832static Bool TRACEME(Addr a, SizeT szB) {
4833 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4834 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4835 return False;
4836}
4837static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4838 SVal sv = zsm_read8(a);
4839 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4840 show_thread_state("", thr);
4841 VG_(printf)("%s","\n");
4842}
4843
4844void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4845{
4846 SVal sv = SVal__mkC(thr->viW, thr->viW);
4847 tl_assert(is_sane_SVal_C(sv));
4848 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4849 zsm_set_range( a, szB, sv );
4850 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4851}
4852
4853void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4854{
4855 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4856 zsm_set_range( a, szB, SVal__mkA() );
4857 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4858}
4859
4860void* libhb_get_Thr_opaque ( Thr* thr ) {
4861 tl_assert(thr);
4862 return thr->opaque;
4863}
4864
4865void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4866 tl_assert(thr);
4867 thr->opaque = v;
4868}
4869
4870void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4871{
4872 zsm_copy_range(dst, src, len);
4873}
4874
4875void libhb_maybe_GC ( void )
4876{
4877 event_map_maybe_GC();
4878 /* If there are still freelist entries available, no need for a
4879 GC. */
4880 if (vts_tab_freelist != VtsID_INVALID)
4881 return;
4882 /* So all the table entries are full, and we're having to expand
4883 the table. But did we hit the threshhold point yet? */
4884 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
4885 return;
4886 vts_tab__do_GC( False/*don't show stats*/ );
4887}
4888
4889
4890/////////////////////////////////////////////////////////////////
4891/////////////////////////////////////////////////////////////////
4892// //
4893// SECTION END main library //
4894// //
4895/////////////////////////////////////////////////////////////////
4896/////////////////////////////////////////////////////////////////
4897
4898/*--------------------------------------------------------------------*/
4899/*--- end libhb_main.c ---*/
4900/*--------------------------------------------------------------------*/