blob: 436e70d343b73fcd8e59fc37a65326088d4061ec [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000046#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
55/* fwds for
56 Globals needed by other parts of the library. These are set
57 once at startup and then never changed. */
58static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +000059static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +000060
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63// //
64// //
65// //
66/////////////////////////////////////////////////////////////////
67/////////////////////////////////////////////////////////////////
68
69
70/////////////////////////////////////////////////////////////////
71/////////////////////////////////////////////////////////////////
72// //
73// SECTION BEGIN compressed shadow memory //
74// //
75/////////////////////////////////////////////////////////////////
76/////////////////////////////////////////////////////////////////
77
78#ifndef __HB_ZSM_H
79#define __HB_ZSM_H
80
81typedef ULong SVal;
82
83/* This value has special significance to the implementation, and callers
84 may not store it in the shadow memory. */
85#define SVal_INVALID (3ULL << 62)
86
87/* This is the default value for shadow memory. Initially the shadow
88 memory contains no accessible areas and so all reads produce this
89 value. TODO: make this caller-defineable. */
90#define SVal_NOACCESS (2ULL << 62)
91
92/* Initialise the library. Once initialised, it will (or may) call
93 rcinc and rcdec in response to all the calls below, in order to
94 allow the user to do reference counting on the SVals stored herein.
95 It is important to understand, however, that due to internal
96 caching, the reference counts are in general inaccurate, and can be
97 both above or below the true reference count for an item. In
98 particular, the library may indicate that the reference count for
99 an item is zero, when in fact it is not.
100
101 To make the reference counting exact and therefore non-pointless,
102 call zsm_flush_cache. Immediately after it returns, the reference
103 counts for all items, as deduced by the caller by observing calls
104 to rcinc and rcdec, will be correct, and so any items with a zero
105 reference count may be freed (or at least considered to be
106 unreferenced by this library).
107*/
108static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
109
110static void zsm_set_range ( Addr, SizeT, SVal );
111static SVal zsm_read8 ( Addr );
112static void zsm_copy_range ( Addr, Addr, SizeT );
113static void zsm_flush_cache ( void );
114
115#endif /* ! __HB_ZSM_H */
116
117
118/* For the shadow mem cache stuff we may want more intrusive
119 checks. Unfortunately there's no almost-zero-cost way to make them
120 selectable at run time. Hence set the #if 0 to #if 1 and
121 rebuild if you want them. */
122#if 0
123# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
124# define inline __attribute__((noinline))
125 /* probably want to ditch -fomit-frame-pointer too */
126#else
127# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
128#endif
129
130/* For the SegmentID, SegmentSet and SVal stuff we may want more
131 intrusive checks. Again there's no zero cost way to do this. Set
132 the #if 0 to #if 1 and rebuild if you want them. */
133#if 0
134# define SCE_SVALS 1 /* sanity-check shadow value stuff */
135#else
136# define SCE_SVALS 0
137#endif
138
139
140/* Round a up to the next multiple of N. N must be a power of 2 */
141#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
142/* Round a down to the next multiple of N. N must be a power of 2 */
143#define ROUNDDN(a, N) ((a) & ~(N-1))
144
145
146
147/* ------ User-supplied RC functions ------ */
148static void(*rcinc)(SVal) = NULL;
149static void(*rcdec)(SVal) = NULL;
150
151
152/* ------ CacheLine ------ */
153
154#define N_LINE_BITS 6 /* must be >= 3 */
155#define N_LINE_ARANGE (1 << N_LINE_BITS)
156#define N_LINE_TREES (N_LINE_ARANGE >> 3)
157
158typedef
159 struct {
160 UShort descrs[N_LINE_TREES];
161 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
162 }
163 CacheLine;
164
165#define TREE_DESCR_16_0 (1<<0)
166#define TREE_DESCR_32_0 (1<<1)
167#define TREE_DESCR_16_1 (1<<2)
168#define TREE_DESCR_64 (1<<3)
169#define TREE_DESCR_16_2 (1<<4)
170#define TREE_DESCR_32_1 (1<<5)
171#define TREE_DESCR_16_3 (1<<6)
172#define TREE_DESCR_8_0 (1<<7)
173#define TREE_DESCR_8_1 (1<<8)
174#define TREE_DESCR_8_2 (1<<9)
175#define TREE_DESCR_8_3 (1<<10)
176#define TREE_DESCR_8_4 (1<<11)
177#define TREE_DESCR_8_5 (1<<12)
178#define TREE_DESCR_8_6 (1<<13)
179#define TREE_DESCR_8_7 (1<<14)
180#define TREE_DESCR_DTY (1<<15)
181
182typedef
183 struct {
184 SVal dict[4]; /* can represent up to 4 diff values in the line */
185 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
186 dict indexes */
187 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
188 LineF to use, and dict[2..] are also SVal_INVALID. */
189 }
190 LineZ; /* compressed rep for a cache line */
191
192typedef
193 struct {
194 Bool inUse;
195 SVal w64s[N_LINE_ARANGE];
196 }
197 LineF; /* full rep for a cache line */
198
199/* Shadow memory.
200 Primary map is a WordFM Addr SecMap*.
201 SecMaps cover some page-size-ish section of address space and hold
202 a compressed representation.
203 CacheLine-sized chunks of SecMaps are copied into a Cache, being
204 decompressed when moved into the cache and recompressed on the
205 way out. Because of this, the cache must operate as a writeback
206 cache, not a writethrough one.
207
208 Each SecMap must hold a power-of-2 number of CacheLines. Hence
209 N_SECMAP_BITS must >= N_LINE_BITS.
210*/
211#define N_SECMAP_BITS 13
212#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
213
214// # CacheLines held by a SecMap
215#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
216
217/* The data in the SecMap is held in the array of LineZs. Each LineZ
218 either carries the required data directly, in a compressed
219 representation, or it holds (in .dict[0]) an index to the LineF in
220 .linesF that holds the full representation.
221
222 Currently-unused LineF's have their .inUse bit set to zero.
223 Since each in-use LineF is referred to be exactly one LineZ,
224 the number of .linesZ[] that refer to .linesF should equal
225 the number of .linesF[] that have .inUse == True.
226
227 RC obligations: the RCs presented to the user include exactly
228 the values in:
229 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
230 * F reps that are in use (.inUse == True)
231
232 Hence the following actions at the following transitions are required:
233
234 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
235 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
236 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
237 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
238*/
239typedef
240 struct {
241 UInt magic;
242 LineZ linesZ[N_SECMAP_ZLINES];
243 LineF* linesF;
244 UInt linesF_size;
245 }
246 SecMap;
247
248#define SecMap_MAGIC 0x571e58cbU
249
250static inline Bool is_sane_SecMap ( SecMap* sm ) {
251 return sm != NULL && sm->magic == SecMap_MAGIC;
252}
253
254/* ------ Cache ------ */
255
256#define N_WAY_BITS 16
257#define N_WAY_NENT (1 << N_WAY_BITS)
258
259/* Each tag is the address of the associated CacheLine, rounded down
260 to a CacheLine address boundary. A CacheLine size must be a power
261 of 2 and must be 8 or more. Hence an easy way to initialise the
262 cache so it is empty is to set all the tag values to any value % 8
263 != 0, eg 1. This means all queries in the cache initially miss.
264 It does however require us to detect and not writeback, any line
265 with a bogus tag. */
266typedef
267 struct {
268 CacheLine lyns0[N_WAY_NENT];
269 Addr tags0[N_WAY_NENT];
270 }
271 Cache;
272
273static inline Bool is_valid_scache_tag ( Addr tag ) {
274 /* a valid tag should be naturally aligned to the start of
275 a CacheLine. */
276 return 0 == (tag & (N_LINE_ARANGE - 1));
277}
278
279
280/* --------- Primary data structures --------- */
281
282/* Shadow memory primary map */
283static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
284static Cache cache_shmem;
285
286
287static UWord stats__secmaps_search = 0; // # SM finds
288static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
289static UWord stats__secmaps_allocd = 0; // # SecMaps issued
290static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
291static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
292static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
293static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
294static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
295static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
296static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
297static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
298static UWord stats__cache_F_fetches = 0; // # F lines fetched
299static UWord stats__cache_F_wbacks = 0; // # F lines written back
300static UWord stats__cache_invals = 0; // # cache invals
301static UWord stats__cache_flushes = 0; // # cache flushes
302static UWord stats__cache_totrefs = 0; // # total accesses
303static UWord stats__cache_totmisses = 0; // # misses
304static ULong stats__cache_make_New_arange = 0; // total arange made New
305static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
306static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
307static UWord stats__cline_read64s = 0; // # calls to s_m_read64
308static UWord stats__cline_read32s = 0; // # calls to s_m_read32
309static UWord stats__cline_read16s = 0; // # calls to s_m_read16
310static UWord stats__cline_read8s = 0; // # calls to s_m_read8
311static UWord stats__cline_write64s = 0; // # calls to s_m_write64
312static UWord stats__cline_write32s = 0; // # calls to s_m_write32
313static UWord stats__cline_write16s = 0; // # calls to s_m_write16
314static UWord stats__cline_write8s = 0; // # calls to s_m_write8
315static UWord stats__cline_set64s = 0; // # calls to s_m_set64
316static UWord stats__cline_set32s = 0; // # calls to s_m_set32
317static UWord stats__cline_set16s = 0; // # calls to s_m_set16
318static UWord stats__cline_set8s = 0; // # calls to s_m_set8
319static UWord stats__cline_get8s = 0; // # calls to s_m_get8
320static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
321static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
322static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
323static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
324static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
325static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
326static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
327
328static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
329 return a & ~(N_SECMAP_ARANGE - 1);
330}
331static inline UWord shmem__get_SecMap_offset ( Addr a ) {
332 return a & (N_SECMAP_ARANGE - 1);
333}
334
335
336/*----------------------------------------------------------------*/
337/*--- map_shmem :: WordFM Addr SecMap ---*/
338/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
339/*----------------------------------------------------------------*/
340
341/*--------------- SecMap allocation --------------- */
342
343static HChar* shmem__bigchunk_next = NULL;
344static HChar* shmem__bigchunk_end1 = NULL;
345
346static void* shmem__bigchunk_alloc ( SizeT n )
347{
348 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
349 tl_assert(n > 0);
350 n = VG_ROUNDUP(n, 16);
351 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
352 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
353 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
354 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
355 if (0)
356 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
357 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
358 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
359 if (shmem__bigchunk_next == NULL)
360 VG_(out_of_memory_NORETURN)(
361 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
362 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
363 }
364 tl_assert(shmem__bigchunk_next);
365 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
366 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
367 shmem__bigchunk_next += n;
368 return shmem__bigchunk_next - n;
369}
370
371static SecMap* shmem__alloc_SecMap ( void )
372{
373 Word i, j;
374 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
375 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
376 tl_assert(sm);
377 sm->magic = SecMap_MAGIC;
378 for (i = 0; i < N_SECMAP_ZLINES; i++) {
379 sm->linesZ[i].dict[0] = SVal_NOACCESS;
380 sm->linesZ[i].dict[1] = SVal_INVALID;
381 sm->linesZ[i].dict[2] = SVal_INVALID;
382 sm->linesZ[i].dict[3] = SVal_INVALID;
383 for (j = 0; j < N_LINE_ARANGE/4; j++)
384 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
385 }
386 sm->linesF = NULL;
387 sm->linesF_size = 0;
388 stats__secmaps_allocd++;
389 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
390 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
391 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
392 return sm;
393}
394
395typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
396static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
397
398static SecMap* shmem__find_SecMap ( Addr ga )
399{
400 SecMap* sm = NULL;
401 Addr gaKey = shmem__round_to_SecMap_base(ga);
402 // Cache
403 stats__secmaps_search++;
404 if (LIKELY(gaKey == smCache[0].gaKey))
405 return smCache[0].sm;
406 if (LIKELY(gaKey == smCache[1].gaKey)) {
407 SMCacheEnt tmp = smCache[0];
408 smCache[0] = smCache[1];
409 smCache[1] = tmp;
410 return smCache[0].sm;
411 }
412 if (gaKey == smCache[2].gaKey) {
413 SMCacheEnt tmp = smCache[1];
414 smCache[1] = smCache[2];
415 smCache[2] = tmp;
416 return smCache[1].sm;
417 }
418 // end Cache
419 stats__secmaps_search_slow++;
420 if (VG_(lookupFM)( map_shmem,
421 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
422 tl_assert(sm != NULL);
423 smCache[2] = smCache[1];
424 smCache[1] = smCache[0];
425 smCache[0].gaKey = gaKey;
426 smCache[0].sm = sm;
427 } else {
428 tl_assert(sm == NULL);
429 }
430 return sm;
431}
432
433static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
434{
435 SecMap* sm = shmem__find_SecMap ( ga );
436 if (LIKELY(sm)) {
437 return sm;
438 } else {
439 /* create a new one */
440 Addr gaKey = shmem__round_to_SecMap_base(ga);
441 sm = shmem__alloc_SecMap();
442 tl_assert(sm);
443 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
444 return sm;
445 }
446}
447
448
449/* ------------ LineF and LineZ related ------------ */
450
451static void rcinc_LineF ( LineF* lineF ) {
452 UWord i;
453 tl_assert(lineF->inUse);
454 for (i = 0; i < N_LINE_ARANGE; i++)
455 rcinc(lineF->w64s[i]);
456}
457
458static void rcdec_LineF ( LineF* lineF ) {
459 UWord i;
460 tl_assert(lineF->inUse);
461 for (i = 0; i < N_LINE_ARANGE; i++)
462 rcdec(lineF->w64s[i]);
463}
464
465static void rcinc_LineZ ( LineZ* lineZ ) {
466 tl_assert(lineZ->dict[0] != SVal_INVALID);
467 rcinc(lineZ->dict[0]);
468 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
469 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
470 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
471}
472
473static void rcdec_LineZ ( LineZ* lineZ ) {
474 tl_assert(lineZ->dict[0] != SVal_INVALID);
475 rcdec(lineZ->dict[0]);
476 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
477 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
478 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
479}
480
481inline
482static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
483 Word bix, shft, mask, prep;
484 tl_assert(ix >= 0);
485 bix = ix >> 2;
486 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
487 mask = 3 << shft;
488 prep = b2 << shft;
489 arr[bix] = (arr[bix] & ~mask) | prep;
490}
491
492inline
493static UWord read_twobit_array ( UChar* arr, UWord ix ) {
494 Word bix, shft;
495 tl_assert(ix >= 0);
496 bix = ix >> 2;
497 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
498 return (arr[bix] >> shft) & 3;
499}
500
501/* Given address 'tag', find either the Z or F line containing relevant
502 data, so it can be read into the cache.
503*/
504static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
505 /*OUT*/LineF** fp, Addr tag ) {
506 LineZ* lineZ;
507 LineF* lineF;
508 UWord zix;
509 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
510 UWord smoff = shmem__get_SecMap_offset(tag);
511 /* since smoff is derived from a valid tag, it should be
512 cacheline-aligned. */
513 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
514 zix = smoff >> N_LINE_BITS;
515 tl_assert(zix < N_SECMAP_ZLINES);
516 lineZ = &sm->linesZ[zix];
517 lineF = NULL;
518 if (lineZ->dict[0] == SVal_INVALID) {
519 UInt fix = (UInt)lineZ->dict[1];
520 tl_assert(sm->linesF);
521 tl_assert(sm->linesF_size > 0);
522 tl_assert(fix >= 0 && fix < sm->linesF_size);
523 lineF = &sm->linesF[fix];
524 tl_assert(lineF->inUse);
525 lineZ = NULL;
526 }
527 *zp = lineZ;
528 *fp = lineF;
529}
530
531/* Given address 'tag', return the relevant SecMap and the index of
532 the LineZ within it, in the expectation that the line is to be
533 overwritten. Regardless of whether 'tag' is currently associated
534 with a Z or F representation, to rcdec on the current
535 representation, in recognition of the fact that the contents are
536 just about to be overwritten. */
537static __attribute__((noinline))
538void find_Z_for_writing ( /*OUT*/SecMap** smp,
539 /*OUT*/Word* zixp,
540 Addr tag ) {
541 LineZ* lineZ;
542 LineF* lineF;
543 UWord zix;
544 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
545 UWord smoff = shmem__get_SecMap_offset(tag);
546 /* since smoff is derived from a valid tag, it should be
547 cacheline-aligned. */
548 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
549 zix = smoff >> N_LINE_BITS;
550 tl_assert(zix < N_SECMAP_ZLINES);
551 lineZ = &sm->linesZ[zix];
552 lineF = NULL;
553 /* re RCs, we are freeing up this LineZ/LineF so that new data can
554 be parked in it. Hence have to rcdec it accordingly. */
555 /* If lineZ has an associated lineF, free it up. */
556 if (lineZ->dict[0] == SVal_INVALID) {
557 UInt fix = (UInt)lineZ->dict[1];
558 tl_assert(sm->linesF);
559 tl_assert(sm->linesF_size > 0);
560 tl_assert(fix >= 0 && fix < sm->linesF_size);
561 lineF = &sm->linesF[fix];
562 tl_assert(lineF->inUse);
563 rcdec_LineF(lineF);
564 lineF->inUse = False;
565 } else {
566 rcdec_LineZ(lineZ);
567 }
568 *smp = sm;
569 *zixp = zix;
570}
571
572static __attribute__((noinline))
573void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
574 UInt i, new_size;
575 LineF* nyu;
576
577 if (sm->linesF) {
578 tl_assert(sm->linesF_size > 0);
579 } else {
580 tl_assert(sm->linesF_size == 0);
581 }
582
583 if (sm->linesF) {
584 for (i = 0; i < sm->linesF_size; i++) {
585 if (!sm->linesF[i].inUse) {
586 *fixp = (Word)i;
587 return;
588 }
589 }
590 }
591
592 /* No free F line found. Expand existing array and try again. */
593 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
594 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
595 new_size * sizeof(LineF) );
596 tl_assert(nyu);
597
598 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
599 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
600 * sizeof(LineF);
601
602 if (0)
603 VG_(printf)("SM %p: expand F array from %d to %d\n",
604 sm, (Int)sm->linesF_size, new_size);
605
606 for (i = 0; i < new_size; i++)
607 nyu[i].inUse = False;
608
609 if (sm->linesF) {
610 for (i = 0; i < sm->linesF_size; i++) {
611 tl_assert(sm->linesF[i].inUse);
612 nyu[i] = sm->linesF[i];
613 }
614 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
615 HG_(free)(sm->linesF);
616 }
617
618 sm->linesF = nyu;
619 sm->linesF_size = new_size;
620
621 for (i = 0; i < sm->linesF_size; i++) {
622 if (!sm->linesF[i].inUse) {
623 *fixp = (Word)i;
624 return;
625 }
626 }
627
628 /*NOTREACHED*/
629 tl_assert(0);
630}
631
632
633/* ------------ CacheLine and implicit-tree related ------------ */
634
635__attribute__((unused))
636static void pp_CacheLine ( CacheLine* cl ) {
637 Word i;
638 if (!cl) {
639 VG_(printf)("%s","pp_CacheLine(NULL)\n");
640 return;
641 }
642 for (i = 0; i < N_LINE_TREES; i++)
643 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
644 for (i = 0; i < N_LINE_ARANGE; i++)
645 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
646}
647
648static UChar descr_to_validbits ( UShort descr )
649{
650 /* a.k.a Party Time for gcc's constant folder */
651# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
652 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
653 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
654 ( (b8_5) << 12) | ( (b8_4) << 11) | \
655 ( (b8_3) << 10) | ( (b8_2) << 9) | \
656 ( (b8_1) << 8) | ( (b8_0) << 7) | \
657 ( (b16_3) << 6) | ( (b32_1) << 5) | \
658 ( (b16_2) << 4) | ( (b64) << 3) | \
659 ( (b16_1) << 2) | ( (b32_0) << 1) | \
660 ( (b16_0) << 0) ) )
661
662# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
663 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
664 ( (bit5) << 5) | ( (bit4) << 4) | \
665 ( (bit3) << 3) | ( (bit2) << 2) | \
666 ( (bit1) << 1) | ( (bit0) << 0) ) )
667
668 /* these should all get folded out at compile time */
669 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
670 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
671 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
672 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
673 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
674 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
675 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
676 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
677 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
678
679 switch (descr) {
680 /*
681 +--------------------------------- TREE_DESCR_8_7
682 | +------------------- TREE_DESCR_8_0
683 | | +---------------- TREE_DESCR_16_3
684 | | | +-------------- TREE_DESCR_32_1
685 | | | | +------------ TREE_DESCR_16_2
686 | | | | | +--------- TREE_DESCR_64
687 | | | | | | +------ TREE_DESCR_16_1
688 | | | | | | | +---- TREE_DESCR_32_0
689 | | | | | | | | +-- TREE_DESCR_16_0
690 | | | | | | | | |
691 | | | | | | | | | GRANULARITY, 7 -> 0 */
692 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
693 return BYTE(1,1,1,1,1,1,1,1);
694 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
695 return BYTE(1,1,0,1,1,1,1,1);
696 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
697 return BYTE(0,1,1,1,1,1,1,1);
698 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
699 return BYTE(0,1,0,1,1,1,1,1);
700
701 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
702 return BYTE(1,1,1,1,1,1,0,1);
703 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
704 return BYTE(1,1,0,1,1,1,0,1);
705 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
706 return BYTE(0,1,1,1,1,1,0,1);
707 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
708 return BYTE(0,1,0,1,1,1,0,1);
709
710 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
711 return BYTE(1,1,1,1,0,1,1,1);
712 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
713 return BYTE(1,1,0,1,0,1,1,1);
714 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
715 return BYTE(0,1,1,1,0,1,1,1);
716 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
717 return BYTE(0,1,0,1,0,1,1,1);
718
719 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
720 return BYTE(1,1,1,1,0,1,0,1);
721 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
722 return BYTE(1,1,0,1,0,1,0,1);
723 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
724 return BYTE(0,1,1,1,0,1,0,1);
725 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
726 return BYTE(0,1,0,1,0,1,0,1);
727
728 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
729 return BYTE(0,0,0,1,1,1,1,1);
730 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
731 return BYTE(0,0,0,1,1,1,0,1);
732 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
733 return BYTE(0,0,0,1,0,1,1,1);
734 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
735 return BYTE(0,0,0,1,0,1,0,1);
736
737 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
738 return BYTE(1,1,1,1,0,0,0,1);
739 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
740 return BYTE(1,1,0,1,0,0,0,1);
741 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
742 return BYTE(0,1,1,1,0,0,0,1);
743 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
744 return BYTE(0,1,0,1,0,0,0,1);
745
746 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
747 return BYTE(0,0,0,1,0,0,0,1);
748
749 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
750 return BYTE(0,0,0,0,0,0,0,1);
751
752 default: return BYTE(0,0,0,0,0,0,0,0);
753 /* INVALID - any valid descr produces at least one
754 valid bit in tree[0..7]*/
755 }
756 /* NOTREACHED*/
757 tl_assert(0);
758
759# undef DESCR
760# undef BYTE
761}
762
763__attribute__((unused))
764static Bool is_sane_Descr ( UShort descr ) {
765 return descr_to_validbits(descr) != 0;
766}
767
768static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
769 VG_(sprintf)(dst,
770 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
771 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
772 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
773 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
774 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
775 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
776 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
777 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
778 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
779 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
780 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
781 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
782 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
783 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
784 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
785 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
786 );
787}
788static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
789 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
790 (Int)((byte & 128) ? 1 : 0),
791 (Int)((byte & 64) ? 1 : 0),
792 (Int)((byte & 32) ? 1 : 0),
793 (Int)((byte & 16) ? 1 : 0),
794 (Int)((byte & 8) ? 1 : 0),
795 (Int)((byte & 4) ? 1 : 0),
796 (Int)((byte & 2) ? 1 : 0),
797 (Int)((byte & 1) ? 1 : 0)
798 );
799}
800
801static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
802 Word i;
803 UChar validbits = descr_to_validbits(descr);
804 HChar buf[128], buf2[128];
805 if (validbits == 0)
806 goto bad;
807 for (i = 0; i < 8; i++) {
808 if (validbits & (1<<i)) {
809 if (tree[i] == SVal_INVALID)
810 goto bad;
811 } else {
812 if (tree[i] != SVal_INVALID)
813 goto bad;
814 }
815 }
816 return True;
817 bad:
818 sprintf_Descr( buf, descr );
819 sprintf_Byte( buf2, validbits );
820 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
821 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
822 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
823 for (i = 0; i < 8; i++)
824 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
825 VG_(printf)("%s","}\n");
826 return 0;
827}
828
829static Bool is_sane_CacheLine ( CacheLine* cl )
830{
831 Word tno, cloff;
832
833 if (!cl) goto bad;
834
835 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
836 UShort descr = cl->descrs[tno];
837 SVal* tree = &cl->svals[cloff];
838 if (!is_sane_Descr_and_Tree(descr, tree))
839 goto bad;
840 }
841 tl_assert(cloff == N_LINE_ARANGE);
842 return True;
843 bad:
844 pp_CacheLine(cl);
845 return False;
846}
847
848static UShort normalise_tree ( /*MOD*/SVal* tree )
849{
850 UShort descr;
851 /* pre: incoming tree[0..7] does not have any invalid shvals, in
852 particular no zeroes. */
853 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
854 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
855 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
856 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
857 tl_assert(0);
858
859 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
860 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
861 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
862 /* build 16-bit layer */
863 if (tree[1] == tree[0]) {
864 tree[1] = SVal_INVALID;
865 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
866 descr |= TREE_DESCR_16_0;
867 }
868 if (tree[3] == tree[2]) {
869 tree[3] = SVal_INVALID;
870 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
871 descr |= TREE_DESCR_16_1;
872 }
873 if (tree[5] == tree[4]) {
874 tree[5] = SVal_INVALID;
875 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
876 descr |= TREE_DESCR_16_2;
877 }
878 if (tree[7] == tree[6]) {
879 tree[7] = SVal_INVALID;
880 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
881 descr |= TREE_DESCR_16_3;
882 }
883 /* build 32-bit layer */
884 if (tree[2] == tree[0]
885 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
886 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
887 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
888 descr |= TREE_DESCR_32_0;
889 }
890 if (tree[6] == tree[4]
891 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
892 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
893 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
894 descr |= TREE_DESCR_32_1;
895 }
896 /* build 64-bit layer */
897 if (tree[4] == tree[0]
898 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
899 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
900 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
901 descr |= TREE_DESCR_64;
902 }
903 return descr;
904}
905
906/* This takes a cacheline where all the data is at the leaves
907 (w8[..]) and builds a correctly normalised tree. */
908static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
909{
910 Word tno, cloff;
911 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
912 SVal* tree = &cl->svals[cloff];
913 cl->descrs[tno] = normalise_tree( tree );
914 }
915 tl_assert(cloff == N_LINE_ARANGE);
916 if (SCE_CACHELINE)
917 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
918 stats__cline_normalises++;
919}
920
921
922typedef struct { UChar count; SVal sval; } CountedSVal;
923
924static
925void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
926 /*OUT*/Word* dstUsedP,
927 Word nDst, CacheLine* src )
928{
929 Word tno, cloff, dstUsed;
930
931 tl_assert(nDst == N_LINE_ARANGE);
932 dstUsed = 0;
933
934 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
935 UShort descr = src->descrs[tno];
936 SVal* tree = &src->svals[cloff];
937
938 /* sequentialise the tree described by (descr,tree). */
939# define PUT(_n,_v) \
940 do { dst[dstUsed ].count = (_n); \
941 dst[dstUsed++].sval = (_v); \
942 } while (0)
943
944 /* byte 0 */
945 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
946 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
947 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
948 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
949 /* byte 1 */
950 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
951 /* byte 2 */
952 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
953 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
954 /* byte 3 */
955 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
956 /* byte 4 */
957 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
958 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
959 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
960 /* byte 5 */
961 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
962 /* byte 6 */
963 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
964 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
965 /* byte 7 */
966 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
967
968# undef PUT
969 /* END sequentialise the tree described by (descr,tree). */
970
971 }
972 tl_assert(cloff == N_LINE_ARANGE);
973 tl_assert(dstUsed <= nDst);
974
975 *dstUsedP = dstUsed;
976}
977
978/* Write the cacheline 'wix' to backing store. Where it ends up
979 is determined by its tag field. */
980static __attribute__((noinline)) void cacheline_wback ( UWord wix )
981{
982 Word i, j, k, m;
983 Addr tag;
984 SecMap* sm;
985 CacheLine* cl;
986 LineZ* lineZ;
987 LineF* lineF;
988 Word zix, fix, csvalsUsed;
989 CountedSVal csvals[N_LINE_ARANGE];
990 SVal sv;
991
992 if (0)
993 VG_(printf)("scache wback line %d\n", (Int)wix);
994
995 tl_assert(wix >= 0 && wix < N_WAY_NENT);
996
997 tag = cache_shmem.tags0[wix];
998 cl = &cache_shmem.lyns0[wix];
999
1000 /* The cache line may have been invalidated; if so, ignore it. */
1001 if (!is_valid_scache_tag(tag))
1002 return;
1003
1004 /* Where are we going to put it? */
1005 sm = NULL;
1006 lineZ = NULL;
1007 lineF = NULL;
1008 zix = fix = -1;
1009
1010 /* find the Z line to write in and rcdec it or the associated F
1011 line. */
1012 find_Z_for_writing( &sm, &zix, tag );
1013
1014 tl_assert(sm);
1015 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1016 lineZ = &sm->linesZ[zix];
1017
1018 /* Generate the data to be stored */
1019 if (SCE_CACHELINE)
1020 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1021
1022 csvalsUsed = -1;
1023 sequentialise_CacheLine( csvals, &csvalsUsed,
1024 N_LINE_ARANGE, cl );
1025 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1026 if (0) VG_(printf)("%lu ", csvalsUsed);
1027
1028 lineZ->dict[0] = lineZ->dict[1]
1029 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1030
1031 /* i indexes actual shadow values, k is cursor in csvals */
1032 i = 0;
1033 for (k = 0; k < csvalsUsed; k++) {
1034
1035 sv = csvals[k].sval;
1036 if (SCE_SVALS)
1037 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1038 /* do we already have it? */
1039 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1040 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1041 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1042 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1043 /* no. look for a free slot. */
1044 if (SCE_SVALS)
1045 tl_assert(sv != SVal_INVALID);
1046 if (lineZ->dict[0]
1047 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1048 if (lineZ->dict[1]
1049 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1050 if (lineZ->dict[2]
1051 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1052 if (lineZ->dict[3]
1053 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1054 break; /* we'll have to use the f rep */
1055 dict_ok:
1056 m = csvals[k].count;
1057 if (m == 8) {
1058 write_twobit_array( lineZ->ix2s, i+0, j );
1059 write_twobit_array( lineZ->ix2s, i+1, j );
1060 write_twobit_array( lineZ->ix2s, i+2, j );
1061 write_twobit_array( lineZ->ix2s, i+3, j );
1062 write_twobit_array( lineZ->ix2s, i+4, j );
1063 write_twobit_array( lineZ->ix2s, i+5, j );
1064 write_twobit_array( lineZ->ix2s, i+6, j );
1065 write_twobit_array( lineZ->ix2s, i+7, j );
1066 i += 8;
1067 }
1068 else if (m == 4) {
1069 write_twobit_array( lineZ->ix2s, i+0, j );
1070 write_twobit_array( lineZ->ix2s, i+1, j );
1071 write_twobit_array( lineZ->ix2s, i+2, j );
1072 write_twobit_array( lineZ->ix2s, i+3, j );
1073 i += 4;
1074 }
1075 else if (m == 1) {
1076 write_twobit_array( lineZ->ix2s, i+0, j );
1077 i += 1;
1078 }
1079 else if (m == 2) {
1080 write_twobit_array( lineZ->ix2s, i+0, j );
1081 write_twobit_array( lineZ->ix2s, i+1, j );
1082 i += 2;
1083 }
1084 else {
1085 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1086 }
1087
1088 }
1089
1090 if (LIKELY(i == N_LINE_ARANGE)) {
1091 /* Construction of the compressed representation was
1092 successful. */
1093 rcinc_LineZ(lineZ);
1094 stats__cache_Z_wbacks++;
1095 } else {
1096 /* Cannot use the compressed(z) representation. Use the full(f)
1097 rep instead. */
1098 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1099 alloc_F_for_writing( sm, &fix );
1100 tl_assert(sm->linesF);
1101 tl_assert(sm->linesF_size > 0);
1102 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1103 lineF = &sm->linesF[fix];
1104 tl_assert(!lineF->inUse);
1105 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1106 lineZ->dict[1] = (SVal)fix;
1107 lineF->inUse = True;
1108 i = 0;
1109 for (k = 0; k < csvalsUsed; k++) {
1110 if (SCE_SVALS)
1111 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1112 sv = csvals[k].sval;
1113 if (SCE_SVALS)
1114 tl_assert(sv != SVal_INVALID);
1115 for (m = csvals[k].count; m > 0; m--) {
1116 lineF->w64s[i] = sv;
1117 i++;
1118 }
1119 }
1120 tl_assert(i == N_LINE_ARANGE);
1121 rcinc_LineF(lineF);
1122 stats__cache_F_wbacks++;
1123 }
1124
1125 //if (anyShared)
1126 // sm->mbHasShared = True;
1127
1128 /* mb_tidy_one_cacheline(); */
1129}
1130
1131/* Fetch the cacheline 'wix' from the backing store. The tag
1132 associated with 'wix' is assumed to have already been filled in;
1133 hence that is used to determine where in the backing store to read
1134 from. */
1135static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1136{
1137 Word i;
1138 Addr tag;
1139 CacheLine* cl;
1140 LineZ* lineZ;
1141 LineF* lineF;
1142
1143 if (0)
1144 VG_(printf)("scache fetch line %d\n", (Int)wix);
1145
1146 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1147
1148 tag = cache_shmem.tags0[wix];
1149 cl = &cache_shmem.lyns0[wix];
1150
1151 /* reject nonsense requests */
1152 tl_assert(is_valid_scache_tag(tag));
1153
1154 lineZ = NULL;
1155 lineF = NULL;
1156 find_ZF_for_reading( &lineZ, &lineF, tag );
1157 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1158
1159 /* expand the data into the bottom layer of the tree, then get
1160 cacheline_normalise to build the descriptor array. */
1161 if (lineF) {
1162 tl_assert(lineF->inUse);
1163 for (i = 0; i < N_LINE_ARANGE; i++) {
1164 cl->svals[i] = lineF->w64s[i];
1165 }
1166 stats__cache_F_fetches++;
1167 } else {
1168 for (i = 0; i < N_LINE_ARANGE; i++) {
1169 SVal sv;
1170 UWord ix = read_twobit_array( lineZ->ix2s, i );
1171 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1172 sv = lineZ->dict[ix];
1173 tl_assert(sv != SVal_INVALID);
1174 cl->svals[i] = sv;
1175 }
1176 stats__cache_Z_fetches++;
1177 }
1178 normalise_CacheLine( cl );
1179}
1180
1181static void shmem__invalidate_scache ( void ) {
1182 Word wix;
1183 if (0) VG_(printf)("%s","scache inval\n");
1184 tl_assert(!is_valid_scache_tag(1));
1185 for (wix = 0; wix < N_WAY_NENT; wix++) {
1186 cache_shmem.tags0[wix] = 1/*INVALID*/;
1187 }
1188 stats__cache_invals++;
1189}
1190
1191static void shmem__flush_and_invalidate_scache ( void ) {
1192 Word wix;
1193 Addr tag;
1194 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1195 tl_assert(!is_valid_scache_tag(1));
1196 for (wix = 0; wix < N_WAY_NENT; wix++) {
1197 tag = cache_shmem.tags0[wix];
1198 if (tag == 1/*INVALID*/) {
1199 /* already invalid; nothing to do */
1200 } else {
1201 tl_assert(is_valid_scache_tag(tag));
1202 cacheline_wback( wix );
1203 }
1204 cache_shmem.tags0[wix] = 1/*INVALID*/;
1205 }
1206 stats__cache_flushes++;
1207 stats__cache_invals++;
1208}
1209
1210
1211static inline Bool aligned16 ( Addr a ) {
1212 return 0 == (a & 1);
1213}
1214static inline Bool aligned32 ( Addr a ) {
1215 return 0 == (a & 3);
1216}
1217static inline Bool aligned64 ( Addr a ) {
1218 return 0 == (a & 7);
1219}
1220static inline UWord get_cacheline_offset ( Addr a ) {
1221 return (UWord)(a & (N_LINE_ARANGE - 1));
1222}
1223static inline Addr cacheline_ROUNDUP ( Addr a ) {
1224 return ROUNDUP(a, N_LINE_ARANGE);
1225}
1226static inline Addr cacheline_ROUNDDN ( Addr a ) {
1227 return ROUNDDN(a, N_LINE_ARANGE);
1228}
1229static inline UWord get_treeno ( Addr a ) {
1230 return get_cacheline_offset(a) >> 3;
1231}
1232static inline UWord get_tree_offset ( Addr a ) {
1233 return a & 7;
1234}
1235
1236static __attribute__((noinline))
1237 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1238static inline CacheLine* get_cacheline ( Addr a )
1239{
1240 /* tag is 'a' with the in-line offset masked out,
1241 eg a[31]..a[4] 0000 */
1242 Addr tag = a & ~(N_LINE_ARANGE - 1);
1243 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1244 stats__cache_totrefs++;
1245 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1246 return &cache_shmem.lyns0[wix];
1247 } else {
1248 return get_cacheline_MISS( a );
1249 }
1250}
1251
1252static __attribute__((noinline))
1253 CacheLine* get_cacheline_MISS ( Addr a )
1254{
1255 /* tag is 'a' with the in-line offset masked out,
1256 eg a[31]..a[4] 0000 */
1257
1258 CacheLine* cl;
1259 Addr* tag_old_p;
1260 Addr tag = a & ~(N_LINE_ARANGE - 1);
1261 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1262
1263 tl_assert(tag != cache_shmem.tags0[wix]);
1264
1265 /* Dump the old line into the backing store. */
1266 stats__cache_totmisses++;
1267
1268 cl = &cache_shmem.lyns0[wix];
1269 tag_old_p = &cache_shmem.tags0[wix];
1270
1271 if (is_valid_scache_tag( *tag_old_p )) {
1272 /* EXPENSIVE and REDUNDANT: callee does it */
1273 if (SCE_CACHELINE)
1274 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1275 cacheline_wback( wix );
1276 }
1277 /* and reload the new one */
1278 *tag_old_p = tag;
1279 cacheline_fetch( wix );
1280 if (SCE_CACHELINE)
1281 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1282 return cl;
1283}
1284
1285static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1286 stats__cline_64to32pulldown++;
1287 switch (toff) {
1288 case 0: case 4:
1289 tl_assert(descr & TREE_DESCR_64);
1290 tree[4] = tree[0];
1291 descr &= ~TREE_DESCR_64;
1292 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1293 break;
1294 default:
1295 tl_assert(0);
1296 }
1297 return descr;
1298}
1299
1300static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1301 stats__cline_32to16pulldown++;
1302 switch (toff) {
1303 case 0: case 2:
1304 if (!(descr & TREE_DESCR_32_0)) {
1305 descr = pulldown_to_32(tree, 0, descr);
1306 }
1307 tl_assert(descr & TREE_DESCR_32_0);
1308 tree[2] = tree[0];
1309 descr &= ~TREE_DESCR_32_0;
1310 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1311 break;
1312 case 4: case 6:
1313 if (!(descr & TREE_DESCR_32_1)) {
1314 descr = pulldown_to_32(tree, 4, descr);
1315 }
1316 tl_assert(descr & TREE_DESCR_32_1);
1317 tree[6] = tree[4];
1318 descr &= ~TREE_DESCR_32_1;
1319 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1320 break;
1321 default:
1322 tl_assert(0);
1323 }
1324 return descr;
1325}
1326
1327static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1328 stats__cline_16to8pulldown++;
1329 switch (toff) {
1330 case 0: case 1:
1331 if (!(descr & TREE_DESCR_16_0)) {
1332 descr = pulldown_to_16(tree, 0, descr);
1333 }
1334 tl_assert(descr & TREE_DESCR_16_0);
1335 tree[1] = tree[0];
1336 descr &= ~TREE_DESCR_16_0;
1337 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1338 break;
1339 case 2: case 3:
1340 if (!(descr & TREE_DESCR_16_1)) {
1341 descr = pulldown_to_16(tree, 2, descr);
1342 }
1343 tl_assert(descr & TREE_DESCR_16_1);
1344 tree[3] = tree[2];
1345 descr &= ~TREE_DESCR_16_1;
1346 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1347 break;
1348 case 4: case 5:
1349 if (!(descr & TREE_DESCR_16_2)) {
1350 descr = pulldown_to_16(tree, 4, descr);
1351 }
1352 tl_assert(descr & TREE_DESCR_16_2);
1353 tree[5] = tree[4];
1354 descr &= ~TREE_DESCR_16_2;
1355 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1356 break;
1357 case 6: case 7:
1358 if (!(descr & TREE_DESCR_16_3)) {
1359 descr = pulldown_to_16(tree, 6, descr);
1360 }
1361 tl_assert(descr & TREE_DESCR_16_3);
1362 tree[7] = tree[6];
1363 descr &= ~TREE_DESCR_16_3;
1364 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1365 break;
1366 default:
1367 tl_assert(0);
1368 }
1369 return descr;
1370}
1371
1372
1373static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1374 UShort mask;
1375 switch (toff) {
1376 case 0:
1377 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1378 tl_assert( (descr & mask) == mask );
1379 descr &= ~mask;
1380 descr |= TREE_DESCR_16_0;
1381 break;
1382 case 2:
1383 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1384 tl_assert( (descr & mask) == mask );
1385 descr &= ~mask;
1386 descr |= TREE_DESCR_16_1;
1387 break;
1388 case 4:
1389 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1390 tl_assert( (descr & mask) == mask );
1391 descr &= ~mask;
1392 descr |= TREE_DESCR_16_2;
1393 break;
1394 case 6:
1395 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1396 tl_assert( (descr & mask) == mask );
1397 descr &= ~mask;
1398 descr |= TREE_DESCR_16_3;
1399 break;
1400 default:
1401 tl_assert(0);
1402 }
1403 return descr;
1404}
1405
1406static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1407 UShort mask;
1408 switch (toff) {
1409 case 0:
1410 if (!(descr & TREE_DESCR_16_0))
1411 descr = pullup_descr_to_16(descr, 0);
1412 if (!(descr & TREE_DESCR_16_1))
1413 descr = pullup_descr_to_16(descr, 2);
1414 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1415 tl_assert( (descr & mask) == mask );
1416 descr &= ~mask;
1417 descr |= TREE_DESCR_32_0;
1418 break;
1419 case 4:
1420 if (!(descr & TREE_DESCR_16_2))
1421 descr = pullup_descr_to_16(descr, 4);
1422 if (!(descr & TREE_DESCR_16_3))
1423 descr = pullup_descr_to_16(descr, 6);
1424 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1425 tl_assert( (descr & mask) == mask );
1426 descr &= ~mask;
1427 descr |= TREE_DESCR_32_1;
1428 break;
1429 default:
1430 tl_assert(0);
1431 }
1432 return descr;
1433}
1434
1435static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1436 switch (toff) {
1437 case 0: case 4:
1438 return 0 != (descr & TREE_DESCR_64);
1439 default:
1440 tl_assert(0);
1441 }
1442}
1443
1444static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1445 switch (toff) {
1446 case 0:
1447 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1448 case 2:
1449 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1450 case 4:
1451 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1452 case 6:
1453 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1454 default:
1455 tl_assert(0);
1456 }
1457}
1458
1459/* ------------ Cache management ------------ */
1460
1461static void zsm_flush_cache ( void )
1462{
1463 shmem__flush_and_invalidate_scache();
1464}
1465
1466
1467static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1468{
1469 tl_assert( sizeof(UWord) == sizeof(Addr) );
1470
1471 rcinc = p_rcinc;
1472 rcdec = p_rcdec;
1473
1474 tl_assert(map_shmem == NULL);
1475 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1476 HG_(free),
1477 NULL/*unboxed UWord cmp*/);
1478 tl_assert(map_shmem != NULL);
1479 shmem__invalidate_scache();
1480
1481 /* a SecMap must contain an integral number of CacheLines */
1482 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1483 /* also ... a CacheLine holds an integral number of trees */
1484 tl_assert(0 == (N_LINE_ARANGE % 8));
1485}
1486
1487/////////////////////////////////////////////////////////////////
1488/////////////////////////////////////////////////////////////////
1489// //
1490// SECTION END compressed shadow memory //
1491// //
1492/////////////////////////////////////////////////////////////////
1493/////////////////////////////////////////////////////////////////
1494
1495
1496
1497/////////////////////////////////////////////////////////////////
1498/////////////////////////////////////////////////////////////////
1499// //
1500// SECTION BEGIN vts primitives //
1501// //
1502/////////////////////////////////////////////////////////////////
1503/////////////////////////////////////////////////////////////////
1504
1505#ifndef __HB_VTS_H
1506#define __HB_VTS_H
1507
1508/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1509 lowest 30 bits of an SVal. */
1510typedef UInt VtsID;
1511#define VtsID_INVALID 0xFFFFFFFF
1512
1513/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1514 a backlink for the caller's convenience. Since we have no idea
1515 what to set that to in the library, it always gets set to
1516 VtsID_INVALID. */
1517typedef
1518 struct {
1519 VtsID id;
1520 XArray* ts; /* XArray* ScalarTS(abstract) */
1521 }
1522 VTS;
1523
1524
1525/* Create a new, empty VTS. */
1526VTS* VTS__new ( void );
1527
1528/* Delete this VTS in its entirety. */
1529void VTS__delete ( VTS* vts );
1530
1531/* Create a new singleton VTS. */
1532VTS* VTS__singleton ( Thr* thr, ULong tym );
1533
1534/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1535 not modified. */
1536VTS* VTS__tick ( Thr* me, VTS* vts );
1537
1538/* Return a new VTS constructed as the join (max) of the 2 args.
1539 Neither arg is modified. */
1540VTS* VTS__join ( VTS* a, VTS* b );
1541
1542/* Compute the partial ordering relation of the two args. */
1543typedef
1544 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1545 POrd;
1546
1547POrd VTS__cmp ( VTS* a, VTS* b );
1548
1549/* Compute an arbitrary structural (total) ordering on the two args,
1550 based on their VCs, so they can be looked up in a table, tree, etc.
1551 Returns -1, 0 or 1. */
1552Word VTS__cmp_structural ( VTS* a, VTS* b );
1553
1554/* Debugging only. Display the given VTS in the buffer. */
1555void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1556
1557/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001558ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001559
1560#endif /* ! __HB_VTS_H */
1561
1562
1563/*--------------- to do with Vector Timestamps ---------------*/
1564
1565/* Scalar Timestamp */
1566typedef
1567 struct {
1568 Thr* thr;
1569 ULong tym;
1570 }
1571 ScalarTS;
1572
1573
1574static Bool is_sane_VTS ( VTS* vts )
1575{
1576 UWord i, n;
1577 ScalarTS *st1, *st2;
1578 if (!vts) return False;
1579 if (!vts->ts) return False;
1580 n = VG_(sizeXA)( vts->ts );
1581 if (n >= 2) {
1582 for (i = 0; i < n-1; i++) {
1583 st1 = VG_(indexXA)( vts->ts, i );
1584 st2 = VG_(indexXA)( vts->ts, i+1 );
1585 if (st1->thr >= st2->thr)
1586 return False;
1587 if (st1->tym == 0 || st2->tym == 0)
1588 return False;
1589 }
1590 }
1591 return True;
1592}
1593
1594
1595/* Create a new, empty VTS.
1596*/
1597VTS* VTS__new ( void )
1598{
1599 VTS* vts;
1600 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1601 tl_assert(vts);
1602 vts->id = VtsID_INVALID;
1603 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1604 HG_(free), sizeof(ScalarTS) );
1605 tl_assert(vts->ts);
1606 return vts;
1607}
1608
1609
1610/* Delete this VTS in its entirety.
1611*/
1612void VTS__delete ( VTS* vts )
1613{
1614 tl_assert(vts);
1615 tl_assert(vts->ts);
1616 VG_(deleteXA)( vts->ts );
1617 HG_(free)(vts);
1618}
1619
1620
1621/* Create a new singleton VTS.
1622*/
1623VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1624 ScalarTS st;
1625 VTS* vts;
1626 tl_assert(thr);
1627 tl_assert(tym >= 1);
1628 vts = VTS__new();
1629 st.thr = thr;
1630 st.tym = tym;
1631 VG_(addToXA)( vts->ts, &st );
1632 return vts;
1633}
1634
1635
1636/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1637 not modified.
1638*/
1639VTS* VTS__tick ( Thr* me, VTS* vts )
1640{
1641 ScalarTS* here = NULL;
1642 ScalarTS tmp;
1643 VTS* res;
1644 Word i, n;
1645 tl_assert(me);
1646 tl_assert(is_sane_VTS(vts));
1647 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1648 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1649 res = VTS__new();
1650 n = VG_(sizeXA)( vts->ts );
1651
1652 /* main loop doesn't handle zero-entry case correctly, so
1653 special-case it. */
1654 if (n == 0) {
1655 tmp.thr = me;
1656 tmp.tym = 1;
1657 VG_(addToXA)( res->ts, &tmp );
1658 tl_assert(is_sane_VTS(res));
1659 return res;
1660 }
1661
1662 for (i = 0; i < n; i++) {
1663 here = VG_(indexXA)( vts->ts, i );
1664 if (me < here->thr) {
1665 /* We just went past 'me', without seeing it. */
1666 tmp.thr = me;
1667 tmp.tym = 1;
1668 VG_(addToXA)( res->ts, &tmp );
1669 tmp = *here;
1670 VG_(addToXA)( res->ts, &tmp );
1671 i++;
1672 break;
1673 }
1674 else if (me == here->thr) {
1675 tmp = *here;
1676 tmp.tym++;
1677 VG_(addToXA)( res->ts, &tmp );
1678 i++;
1679 break;
1680 }
1681 else /* me > here->thr */ {
1682 tmp = *here;
1683 VG_(addToXA)( res->ts, &tmp );
1684 }
1685 }
1686 tl_assert(i >= 0 && i <= n);
1687 if (i == n && here && here->thr < me) {
1688 tmp.thr = me;
1689 tmp.tym = 1;
1690 VG_(addToXA)( res->ts, &tmp );
1691 } else {
1692 for (/*keepgoing*/; i < n; i++) {
1693 here = VG_(indexXA)( vts->ts, i );
1694 tmp = *here;
1695 VG_(addToXA)( res->ts, &tmp );
1696 }
1697 }
1698 tl_assert(is_sane_VTS(res));
1699 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1700 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1701 return res;
1702}
1703
1704
1705/* Return a new VTS constructed as the join (max) of the 2 args.
1706 Neither arg is modified.
1707*/
1708VTS* VTS__join ( VTS* a, VTS* b )
1709{
1710 Word ia, ib, useda, usedb;
1711 ULong tyma, tymb, tymMax;
1712 Thr* thr;
1713 VTS* res;
1714 ScalarTS *tmpa, *tmpb;
1715
1716 tl_assert(a && a->ts);
1717 tl_assert(b && b->ts);
1718 useda = VG_(sizeXA)( a->ts );
1719 usedb = VG_(sizeXA)( b->ts );
1720
1721 res = VTS__new();
1722 ia = ib = 0;
1723
1724 while (1) {
1725
1726 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1727 from a and b in order, where thr is the next Thr*
1728 occurring in either a or b, and tyma/b are the relevant
1729 scalar timestamps, taking into account implicit zeroes. */
1730 tl_assert(ia >= 0 && ia <= useda);
1731 tl_assert(ib >= 0 && ib <= usedb);
1732 tmpa = tmpb = NULL;
1733
1734 if (ia == useda && ib == usedb) {
1735 /* both empty - done */
1736 break;
1737 }
1738 else
1739 if (ia == useda && ib != usedb) {
1740 /* a empty, use up b */
1741 tmpb = VG_(indexXA)( b->ts, ib );
1742 thr = tmpb->thr;
1743 tyma = 0;
1744 tymb = tmpb->tym;
1745 ib++;
1746 }
1747 else
1748 if (ia != useda && ib == usedb) {
1749 /* b empty, use up a */
1750 tmpa = VG_(indexXA)( a->ts, ia );
1751 thr = tmpa->thr;
1752 tyma = tmpa->tym;
1753 tymb = 0;
1754 ia++;
1755 }
1756 else {
1757 /* both not empty; extract lowest-Thr*'d triple */
1758 tmpa = VG_(indexXA)( a->ts, ia );
1759 tmpb = VG_(indexXA)( b->ts, ib );
1760 if (tmpa->thr < tmpb->thr) {
1761 /* a has the lowest unconsidered Thr* */
1762 thr = tmpa->thr;
1763 tyma = tmpa->tym;
1764 tymb = 0;
1765 ia++;
1766 }
1767 else
1768 if (tmpa->thr > tmpb->thr) {
1769 /* b has the lowest unconsidered Thr* */
1770 thr = tmpb->thr;
1771 tyma = 0;
1772 tymb = tmpb->tym;
1773 ib++;
1774 } else {
1775 /* they both next mention the same Thr* */
1776 tl_assert(tmpa->thr == tmpb->thr);
1777 thr = tmpa->thr; /* == tmpb->thr */
1778 tyma = tmpa->tym;
1779 tymb = tmpb->tym;
1780 ia++;
1781 ib++;
1782 }
1783 }
1784
1785 /* having laboriously determined (thr, tyma, tymb), do something
1786 useful with it. */
1787 tymMax = tyma > tymb ? tyma : tymb;
1788 if (tymMax > 0) {
1789 ScalarTS st;
1790 st.thr = thr;
1791 st.tym = tymMax;
1792 VG_(addToXA)( res->ts, &st );
1793 }
1794
1795 }
1796
1797 tl_assert(is_sane_VTS( res ));
1798
1799 return res;
1800}
1801
1802
1803/* Compute the partial ordering relation of the two args.
1804*/
1805POrd VTS__cmp ( VTS* a, VTS* b )
1806{
1807 Word ia, ib, useda, usedb;
1808 ULong tyma, tymb;
1809 Thr* thr;
1810 ScalarTS *tmpa, *tmpb;
1811
1812 Bool all_leq = True;
1813 Bool all_geq = True;
1814
1815 tl_assert(a && a->ts);
1816 tl_assert(b && b->ts);
1817 useda = VG_(sizeXA)( a->ts );
1818 usedb = VG_(sizeXA)( b->ts );
1819
1820 ia = ib = 0;
1821
1822 while (1) {
1823
1824 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1825 from a and b in order, where thr is the next Thr*
1826 occurring in either a or b, and tyma/b are the relevant
1827 scalar timestamps, taking into account implicit zeroes. */
1828 tl_assert(ia >= 0 && ia <= useda);
1829 tl_assert(ib >= 0 && ib <= usedb);
1830 tmpa = tmpb = NULL;
1831
1832 if (ia == useda && ib == usedb) {
1833 /* both empty - done */
1834 break;
1835 }
1836 else
1837 if (ia == useda && ib != usedb) {
1838 /* a empty, use up b */
1839 tmpb = VG_(indexXA)( b->ts, ib );
1840 thr = tmpb->thr;
1841 tyma = 0;
1842 tymb = tmpb->tym;
1843 ib++;
1844 }
1845 else
1846 if (ia != useda && ib == usedb) {
1847 /* b empty, use up a */
1848 tmpa = VG_(indexXA)( a->ts, ia );
1849 thr = tmpa->thr;
1850 tyma = tmpa->tym;
1851 tymb = 0;
1852 ia++;
1853 }
1854 else {
1855 /* both not empty; extract lowest-Thr*'d triple */
1856 tmpa = VG_(indexXA)( a->ts, ia );
1857 tmpb = VG_(indexXA)( b->ts, ib );
1858 if (tmpa->thr < tmpb->thr) {
1859 /* a has the lowest unconsidered Thr* */
1860 thr = tmpa->thr;
1861 tyma = tmpa->tym;
1862 tymb = 0;
1863 ia++;
1864 }
1865 else
1866 if (tmpa->thr > tmpb->thr) {
1867 /* b has the lowest unconsidered Thr* */
1868 thr = tmpb->thr;
1869 tyma = 0;
1870 tymb = tmpb->tym;
1871 ib++;
1872 } else {
1873 /* they both next mention the same Thr* */
1874 tl_assert(tmpa->thr == tmpb->thr);
1875 thr = tmpa->thr; /* == tmpb->thr */
1876 tyma = tmpa->tym;
1877 tymb = tmpb->tym;
1878 ia++;
1879 ib++;
1880 }
1881 }
1882
1883 /* having laboriously determined (thr, tyma, tymb), do something
1884 useful with it. */
1885 if (tyma < tymb)
1886 all_geq = False;
1887 if (tyma > tymb)
1888 all_leq = False;
1889 }
1890
1891 if (all_leq && all_geq)
1892 return POrd_EQ;
1893 /* now we know they aren't equal, so either all_leq or all_geq or
1894 both are false. */
1895 if (all_leq)
1896 return POrd_LT;
1897 if (all_geq)
1898 return POrd_GT;
1899 /* hmm, neither all_geq or all_leq. This means unordered. */
1900 return POrd_UN;
1901}
1902
1903
1904/* Compute an arbitrary structural (total) ordering on the two args,
1905 based on their VCs, so they can be looked up in a table, tree, etc.
1906 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1907*/
1908Word VTS__cmp_structural ( VTS* a, VTS* b )
1909{
1910 /* We just need to generate an arbitrary total ordering based on
1911 a->ts and b->ts. Preferably do it in a way which comes across likely
1912 differences relatively quickly. */
1913 Word i, useda, usedb;
1914 ScalarTS *tmpa, *tmpb;
1915
1916 tl_assert(a && a->ts);
1917 tl_assert(b && b->ts);
1918 useda = VG_(sizeXA)( a->ts );
1919 usedb = VG_(sizeXA)( b->ts );
1920
1921 if (useda < usedb) return -1;
1922 if (useda > usedb) return 1;
1923
1924 /* Same length vectors, so let's step through them together. */
1925 tl_assert(useda == usedb);
1926 for (i = 0; i < useda; i++) {
1927 tmpa = VG_(indexXA)( a->ts, i );
1928 tmpb = VG_(indexXA)( b->ts, i );
1929 if (tmpa->tym < tmpb->tym) return -1;
1930 if (tmpa->tym > tmpb->tym) return 1;
1931 if (tmpa->thr < tmpb->thr) return -1;
1932 if (tmpa->thr > tmpb->thr) return 1;
1933 }
1934
1935 /* They're identical. */
1936 return 0;
1937}
1938
1939
1940/* Debugging only. Display the given VTS in the buffer.
1941*/
1942void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1943 ScalarTS* st;
1944 HChar unit[64];
1945 Word i, n;
1946 Int avail = nBuf;
1947 tl_assert(vts && vts->ts);
1948 tl_assert(nBuf > 16);
1949 buf[0] = '[';
1950 buf[1] = 0;
1951 n = VG_(sizeXA)( vts->ts );
1952 for (i = 0; i < n; i++) {
1953 tl_assert(avail >= 40);
1954 st = VG_(indexXA)( vts->ts, i );
1955 VG_(memset)(unit, 0, sizeof(unit));
1956 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1957 st->thr, st->tym);
1958 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1959 VG_(strcat)(buf, " ...]");
1960 buf[nBuf-1] = 0;
1961 return;
1962 }
1963 VG_(strcat)(buf, unit);
1964 avail -= VG_(strlen)(unit);
1965 }
1966 VG_(strcat)(buf, "]");
1967 buf[nBuf-1] = 0;
1968}
1969
1970
1971/* Debugging only. Return vts[index], so to speak.
1972*/
1973ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1974 UWord i, n;
1975 tl_assert(vts && vts->ts);
1976 n = VG_(sizeXA)( vts->ts );
1977 for (i = 0; i < n; i++) {
1978 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1979 if (st->thr == idx)
1980 return st->tym;
1981 }
1982 return 0;
1983}
1984
1985
1986/////////////////////////////////////////////////////////////////
1987/////////////////////////////////////////////////////////////////
1988// //
1989// SECTION END vts primitives //
1990// //
1991/////////////////////////////////////////////////////////////////
1992/////////////////////////////////////////////////////////////////
1993
1994
1995
1996/////////////////////////////////////////////////////////////////
1997/////////////////////////////////////////////////////////////////
1998// //
1999// SECTION BEGIN main library //
2000// //
2001/////////////////////////////////////////////////////////////////
2002/////////////////////////////////////////////////////////////////
2003
2004
2005/////////////////////////////////////////////////////////
2006// //
2007// VTS set //
2008// //
2009/////////////////////////////////////////////////////////
2010
2011static WordFM* /* VTS* void void */ vts_set = NULL;
2012
2013static void vts_set_init ( void )
2014{
2015 tl_assert(!vts_set);
2016 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2017 HG_(free),
2018 (Word(*)(UWord,UWord))VTS__cmp_structural );
2019 tl_assert(vts_set);
2020}
2021
2022/* Given a newly made VTS, look in vts_set to see if we already have
2023 an identical one. If yes, free up this one and return instead a
2024 pointer to the existing one. If no, add this one to the set and
2025 return the same pointer. Caller differentiates the two cases by
2026 comparing returned pointer with the supplied one (although that
2027 does require that the supplied VTS is not already in the set).
2028*/
2029static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2030{
2031 UWord keyW, valW;
2032 /* lookup cand (by value) */
2033 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2034 /* found it */
2035 tl_assert(valW == 0);
2036 /* if this fails, cand (by ref) was already present (!) */
2037 tl_assert(keyW != (UWord)cand);
2038 VTS__delete(cand);
2039 return (VTS*)keyW;
2040 } else {
2041 /* not present. Add and return pointer to same. */
2042 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2043 return cand;
2044 }
2045}
2046
2047
2048/////////////////////////////////////////////////////////
2049// //
2050// VTS table //
2051// //
2052/////////////////////////////////////////////////////////
2053
2054static void VtsID__invalidate_caches ( void ); /* fwds */
2055
2056/* A type to hold VTS table entries. Invariants:
2057 If .vts == NULL, then this entry is not in use, so:
2058 - .rc == 0
2059 - this entry is on the freelist (unfortunately, does not imply
2060 any constraints on value for .nextfree)
2061 If .vts != NULL, then this entry is in use:
2062 - .vts is findable in vts_set
2063 - .vts->id == this entry number
2064 - no specific value for .rc (even 0 is OK)
2065 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2066*/
2067typedef
2068 struct {
2069 VTS* vts; /* vts, in vts_set */
2070 UWord rc; /* reference count - enough for entire aspace */
2071 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2072 }
2073 VtsTE;
2074
2075/* The VTS table. */
2076static XArray* /* of VtsTE */ vts_tab = NULL;
2077
2078/* An index into the VTS table, indicating the start of the list of
2079 free (available for use) entries. If the list is empty, this is
2080 VtsID_INVALID. */
2081static VtsID vts_tab_freelist = VtsID_INVALID;
2082
2083/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2084 vts_tab equals or exceeds this size. After GC, the value here is
2085 set appropriately so as to check for the next GC point. */
2086static Word vts_next_GC_at = 1000;
2087
2088static void vts_tab_init ( void )
2089{
2090 vts_tab
2091 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2092 HG_(free), sizeof(VtsTE) );
2093 vts_tab_freelist
2094 = VtsID_INVALID;
2095 tl_assert(vts_tab);
2096}
2097
2098/* Add ii to the free list, checking that it looks out-of-use. */
2099static void add_to_free_list ( VtsID ii )
2100{
2101 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2102 tl_assert(ie->vts == NULL);
2103 tl_assert(ie->rc == 0);
2104 tl_assert(ie->freelink == VtsID_INVALID);
2105 ie->freelink = vts_tab_freelist;
2106 vts_tab_freelist = ii;
2107}
2108
2109/* Get an entry from the free list. This will return VtsID_INVALID if
2110 the free list is empty. */
2111static VtsID get_from_free_list ( void )
2112{
2113 VtsID ii;
2114 VtsTE* ie;
2115 if (vts_tab_freelist == VtsID_INVALID)
2116 return VtsID_INVALID;
2117 ii = vts_tab_freelist;
2118 ie = VG_(indexXA)( vts_tab, ii );
2119 tl_assert(ie->vts == NULL);
2120 tl_assert(ie->rc == 0);
2121 vts_tab_freelist = ie->freelink;
2122 return ii;
2123}
2124
2125/* Produce a new VtsID that can be used, either by getting it from
2126 the freelist, or, if that is empty, by expanding vts_tab. */
2127static VtsID get_new_VtsID ( void )
2128{
2129 VtsID ii;
2130 VtsTE te;
2131 ii = get_from_free_list();
2132 if (ii != VtsID_INVALID)
2133 return ii;
2134 te.vts = NULL;
2135 te.rc = 0;
2136 te.freelink = VtsID_INVALID;
2137 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2138 return ii;
2139}
2140
2141
2142/* Indirect callback from lib_zsm. */
2143static void VtsID__rcinc ( VtsID ii )
2144{
2145 VtsTE* ie;
2146 /* VG_(indexXA) does a range check for us */
2147 ie = VG_(indexXA)( vts_tab, ii );
2148 tl_assert(ie->vts); /* else it's not in use */
2149 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2150 tl_assert(ie->vts->id == ii);
2151 ie->rc++;
2152}
2153
2154/* Indirect callback from lib_zsm. */
2155static void VtsID__rcdec ( VtsID ii )
2156{
2157 VtsTE* ie;
2158 /* VG_(indexXA) does a range check for us */
2159 ie = VG_(indexXA)( vts_tab, ii );
2160 tl_assert(ie->vts); /* else it's not in use */
2161 tl_assert(ie->rc > 0); /* else RC snafu */
2162 tl_assert(ie->vts->id == ii);
2163 ie->rc--;
2164}
2165
2166
2167/* Look up 'cand' in our collection of VTSs. If present, deallocate
2168 it and return the VtsID for the pre-existing version. If not
2169 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2170 for it, and return that. */
2171static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2172{
2173 VTS* auld;
2174 tl_assert(cand->id == VtsID_INVALID);
2175 auld = vts_set__find_and_dealloc__or_add(cand);
2176 if (auld != cand) {
2177 /* We already have an Aulde one. Use that. */
2178 VtsTE* ie;
2179 tl_assert(auld->id != VtsID_INVALID);
2180 ie = VG_(indexXA)( vts_tab, auld->id );
2181 tl_assert(ie->vts == auld);
2182 return auld->id;
2183 } else {
2184 VtsID ii = get_new_VtsID();
2185 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2186 ie->vts = cand;
2187 ie->rc = 0;
2188 ie->freelink = VtsID_INVALID;
2189 cand->id = ii;
2190 return ii;
2191 }
2192}
2193
2194
2195static void show_vts_stats ( HChar* caller )
2196{
2197 UWord nSet, nTab, nLive;
2198 ULong totrc;
2199 UWord n, i;
2200 nSet = VG_(sizeFM)( vts_set );
2201 nTab = VG_(sizeXA)( vts_tab );
2202 totrc = 0;
2203 nLive = 0;
2204 n = VG_(sizeXA)( vts_tab );
2205 for (i = 0; i < n; i++) {
2206 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2207 if (ie->vts) {
2208 nLive++;
2209 totrc += (ULong)ie->rc;
2210 } else {
2211 tl_assert(ie->rc == 0);
2212 }
2213 }
2214 VG_(printf)(" show_vts_stats %s\n", caller);
2215 VG_(printf)(" vts_tab size %4lu\n", nTab);
2216 VG_(printf)(" vts_tab live %4lu\n", nLive);
2217 VG_(printf)(" vts_set size %4lu\n", nSet);
2218 VG_(printf)(" total rc %4llu\n", totrc);
2219}
2220
2221/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002222__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002223static void vts_tab__do_GC ( Bool show_stats )
2224{
2225 UWord i, nTab, nLive, nFreed;
2226
2227 /* check this is actually necessary. */
2228 tl_assert(vts_tab_freelist == VtsID_INVALID);
2229
2230 /* empty the caches for partial order checks and binary joins. We
2231 could do better and prune out the entries to be deleted, but it
2232 ain't worth the hassle. */
2233 VtsID__invalidate_caches();
2234
2235 /* First, make the reference counts up to date. */
2236 zsm_flush_cache();
2237
2238 nTab = VG_(sizeXA)( vts_tab );
2239
2240 if (show_stats) {
2241 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2242 show_vts_stats("before GC");
2243 }
2244
2245 /* Now we can inspect the entire vts_tab. Any entries
2246 with zero .rc fields are now no longer in use and can be
2247 free list, removed from vts_set, and deleted. */
2248 nFreed = 0;
2249 for (i = 0; i < nTab; i++) {
2250 Bool present;
2251 UWord oldK = 0, oldV = 0;
2252 VtsTE* te = VG_(indexXA)( vts_tab, i );
2253 if (te->vts == NULL) {
2254 tl_assert(te->rc == 0);
2255 continue; /* already on the free list (presumably) */
2256 }
2257 if (te->rc > 0)
2258 continue; /* in use */
2259 /* Ok, we got one we can free. */
2260 tl_assert(te->vts->id == i);
2261 /* first, remove it from vts_set. */
2262 present = VG_(delFromFM)( vts_set,
2263 &oldK, &oldV, (UWord)te->vts );
2264 tl_assert(present); /* else it isn't in vts_set ?! */
2265 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2266 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2267 /* now free the VTS itself */
2268 VTS__delete(te->vts);
2269 te->vts = NULL;
2270 /* and finally put this entry on the free list */
2271 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2272 add_to_free_list( i );
2273 nFreed++;
2274 }
2275
2276 /* Now figure out when the next GC should be. We'll allow the
2277 number of VTSs to double before GCing again. Except of course
2278 that since we can't (or, at least, don't) shrink vts_tab, we
2279 can't set the threshhold value smaller than it. */
2280 tl_assert(nFreed <= nTab);
2281 nLive = nTab - nFreed;
2282 tl_assert(nLive >= 0 && nLive <= nTab);
2283 vts_next_GC_at = 2 * nLive;
2284 if (vts_next_GC_at < nTab)
2285 vts_next_GC_at = nTab;
2286
2287 if (show_stats) {
2288 show_vts_stats("after GC");
2289 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2290 }
2291
sewardjd024ae52008-11-09 20:47:57 +00002292 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002293 static UInt ctr = 0;
2294 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002295 VG_(message)(Vg_DebugMsg,
2296 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
sewardjf98e1c02008-10-25 16:22:41 +00002297 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2298 }
2299}
2300
2301
2302/////////////////////////////////////////////////////////
2303// //
2304// Vts IDs //
2305// //
2306/////////////////////////////////////////////////////////
2307
2308//////////////////////////
2309static ULong stats__getOrdering_queries = 0;
2310static ULong stats__getOrdering_misses = 0;
2311static ULong stats__join2_queries = 0;
2312static ULong stats__join2_misses = 0;
2313
2314static inline UInt ROL32 ( UInt w, Int n ) {
2315 w = (w << n) | (w >> (32-n));
2316 return w;
2317}
2318static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2319 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2320 return hash % nTab;
2321}
2322
2323#define N_GETORDERING_CACHE 1023
2324static
2325 struct { VtsID vi1; VtsID vi2; POrd ord; }
2326 getOrdering_cache[N_GETORDERING_CACHE];
2327
2328#define N_JOIN2_CACHE 1023
2329static
2330 struct { VtsID vi1; VtsID vi2; VtsID res; }
2331 join2_cache[N_JOIN2_CACHE];
2332
2333static void VtsID__invalidate_caches ( void ) {
2334 Int i;
2335 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2336 getOrdering_cache[i].vi1 = VtsID_INVALID;
2337 getOrdering_cache[i].vi2 = VtsID_INVALID;
2338 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2339 }
2340 for (i = 0; i < N_JOIN2_CACHE; i++) {
2341 join2_cache[i].vi1 = VtsID_INVALID;
2342 join2_cache[i].vi2 = VtsID_INVALID;
2343 join2_cache[i].res = VtsID_INVALID;
2344 }
2345}
2346//////////////////////////
2347
sewardjd52392d2008-11-08 20:36:26 +00002348//static Bool VtsID__is_valid ( VtsID vi ) {
2349// VtsTE* ve;
2350// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2351// return False;
2352// ve = VG_(indexXA)( vts_tab, vi );
2353// if (!ve->vts)
2354// return False;
2355// tl_assert(ve->vts->id == vi);
2356// return True;
2357//}
sewardjf98e1c02008-10-25 16:22:41 +00002358
2359static VTS* VtsID__to_VTS ( VtsID vi ) {
2360 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2361 tl_assert(te->vts);
2362 return te->vts;
2363}
2364
2365static void VtsID__pp ( VtsID vi ) {
2366 HChar buf[100];
2367 VTS* vts = VtsID__to_VTS(vi);
2368 VTS__show( buf, sizeof(buf)-1, vts );
2369 buf[sizeof(buf)-1] = 0;
2370 VG_(printf)("%s", buf);
2371}
2372
2373/* compute partial ordering relation of vi1 and vi2. */
2374__attribute__((noinline))
2375static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2376 UInt hash;
2377 POrd ord;
2378 VTS *v1, *v2;
2379 //if (vi1 == vi2) return POrd_EQ;
2380 tl_assert(vi1 != vi2);
2381 ////++
2382 stats__getOrdering_queries++;
2383 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2384 if (getOrdering_cache[hash].vi1 == vi1
2385 && getOrdering_cache[hash].vi2 == vi2)
2386 return getOrdering_cache[hash].ord;
2387 stats__getOrdering_misses++;
2388 ////--
2389 v1 = VtsID__to_VTS(vi1);
2390 v2 = VtsID__to_VTS(vi2);
2391 ord = VTS__cmp( v1, v2 );
2392 ////++
2393 getOrdering_cache[hash].vi1 = vi1;
2394 getOrdering_cache[hash].vi2 = vi2;
2395 getOrdering_cache[hash].ord = ord;
2396 ////--
2397 return ord;
2398}
2399static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2400 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2401}
2402
2403/* compute binary join */
2404__attribute__((noinline))
2405static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2406 UInt hash;
2407 VtsID res;
2408 VTS *vts1, *vts2, *nyu;
2409 //if (vi1 == vi2) return vi1;
2410 tl_assert(vi1 != vi2);
2411 ////++
2412 stats__join2_queries++;
2413 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2414 if (join2_cache[hash].vi1 == vi1
2415 && join2_cache[hash].vi2 == vi2)
2416 return join2_cache[hash].res;
2417 stats__join2_misses++;
2418 ////--
2419 vts1 = VtsID__to_VTS(vi1);
2420 vts2 = VtsID__to_VTS(vi2);
2421 nyu = VTS__join(vts1,vts2);
2422 res = vts_tab__find_and_dealloc__or_add(nyu);
2423 ////++
2424 join2_cache[hash].vi1 = vi1;
2425 join2_cache[hash].vi2 = vi2;
2426 join2_cache[hash].res = res;
2427 ////--
2428 return res;
2429}
2430static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2431 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2432}
2433
2434/* create a singleton VTS, namely [thr:1] */
2435static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2436 VTS* nyu = VTS__singleton(thr,tym);
2437 return vts_tab__find_and_dealloc__or_add(nyu);
2438}
2439
2440/* tick operation, creates value 1 if specified index is absent */
2441static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2442 VTS* vts = VtsID__to_VTS(vi);
2443 VTS* nyu = VTS__tick(idx,vts);
2444 return vts_tab__find_and_dealloc__or_add(nyu);
2445}
2446
2447/* index into a VTS (only for assertions) */
2448static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2449 VTS* vts = VtsID__to_VTS(vi);
2450 return VTS__indexAt_SLOW( vts, idx );
2451}
2452
2453
2454/////////////////////////////////////////////////////////
2455// //
2456// Threads //
2457// //
2458/////////////////////////////////////////////////////////
2459
2460struct _Thr {
2461 /* Current VTSs for this thread. They change as we go along. viR
2462 is the VTS to be used for reads, viW for writes. Usually they
2463 are the same, but can differ when we deal with reader-writer
2464 locks. It is always the case that VtsID__getOrdering(viW,viR)
2465 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2466 lagging behind, viR. */
2467 VtsID viR;
2468 VtsID viW;
2469 /* opaque (to us) data we hold on behalf of the library's user. */
2470 void* opaque;
2471};
2472
2473static Thr* Thr__new ( void ) {
2474 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2475 thr->viR = VtsID_INVALID;
2476 thr->viW = VtsID_INVALID;
2477 return thr;
2478}
2479
2480
2481/////////////////////////////////////////////////////////
2482// //
2483// Shadow Values //
2484// //
2485/////////////////////////////////////////////////////////
2486
2487// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2488// hb_zsm.h. We have to do everything else here.
2489
2490/* SVal is 64 bit unsigned int.
2491
2492 <---------30---------> <---------30--------->
2493 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2494 01 X--------------------X XX X--------------------X E(rror)
2495 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2496 11 X--------------------X XX X--------------------X I: SVal_INVALID
2497*/
2498#define SVAL_TAGMASK (3ULL << 62)
2499
2500static inline Bool SVal__isC ( SVal s ) {
2501 return (0ULL << 62) == (s & SVAL_TAGMASK);
2502}
2503static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2504 //tl_assert(VtsID__is_valid(rmini));
2505 //tl_assert(VtsID__is_valid(wmini));
2506 return (((ULong)rmini) << 32) | ((ULong)wmini);
2507}
2508static inline VtsID SVal__unC_Rmin ( SVal s ) {
2509 tl_assert(SVal__isC(s));
2510 return (VtsID)(s >> 32);
2511}
2512static inline VtsID SVal__unC_Wmin ( SVal s ) {
2513 tl_assert(SVal__isC(s));
2514 return (VtsID)(s & 0xFFFFFFFFULL);
2515}
2516
2517static Bool SVal__isE ( SVal s ) {
2518 return (1ULL << 62) == (s & SVAL_TAGMASK);
2519}
2520static SVal SVal__mkE ( void ) {
2521 return 1ULL << 62;
2522}
2523
2524static Bool SVal__isA ( SVal s ) {
2525 return (2ULL << 62) == (s & SVAL_TAGMASK);
2526}
2527static SVal SVal__mkA ( void ) {
2528 return 2ULL << 62;
2529}
2530
2531/* Direct callback from lib_zsm. */
2532static void SVal__rcinc ( SVal s ) {
2533 if (SVal__isC(s)) {
2534 VtsID__rcinc( SVal__unC_Rmin(s) );
2535 VtsID__rcinc( SVal__unC_Wmin(s) );
2536 }
2537}
2538
2539/* Direct callback from lib_zsm. */
2540static void SVal__rcdec ( SVal s ) {
2541 if (SVal__isC(s)) {
2542 VtsID__rcdec( SVal__unC_Rmin(s) );
2543 VtsID__rcdec( SVal__unC_Wmin(s) );
2544 }
2545}
2546
2547
2548/////////////////////////////////////////////////////////
2549// //
sewardjd86e3a22008-12-03 11:39:37 +00002550// A simple group (memory) allocator //
2551// //
2552/////////////////////////////////////////////////////////
2553
2554//////////////// BEGIN general group allocator
2555typedef
2556 struct {
2557 UWord elemSzB; /* element size */
2558 UWord nPerGroup; /* # elems per group */
2559 void* (*alloc)(HChar*, SizeT); /* group allocator */
2560 HChar* cc; /* group allocator's cc */
2561 void (*free)(void*); /* group allocator's free-er (unused) */
2562 /* XArray of void* (pointers to groups). The groups themselves.
2563 Each element is a pointer to a block of size (elemSzB *
2564 nPerGroup) bytes. */
2565 XArray* groups;
2566 /* next free element. Is a pointer to an element in one of the
2567 groups pointed to by .groups. */
2568 void* nextFree;
2569 }
2570 GroupAlloc;
2571
2572static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
2573 UWord elemSzB,
2574 UWord nPerGroup,
2575 void* (*alloc)(HChar*, SizeT),
2576 HChar* cc,
2577 void (*free)(void*) )
2578{
2579 tl_assert(0 == (elemSzB % sizeof(UWord)));
2580 tl_assert(elemSzB >= sizeof(UWord));
2581 tl_assert(nPerGroup >= 100); /* let's say */
2582 tl_assert(alloc);
2583 tl_assert(cc);
2584 tl_assert(free);
2585 tl_assert(ga);
2586 VG_(memset)(ga, 0, sizeof(*ga));
2587 ga->elemSzB = elemSzB;
2588 ga->nPerGroup = nPerGroup;
2589 ga->groups = NULL;
2590 ga->alloc = alloc;
2591 ga->cc = cc;
2592 ga->free = free;
2593 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
2594 ga->nextFree = NULL;
2595 tl_assert(ga->groups);
2596}
2597
2598/* The freelist is empty. Allocate a new group and put all the new
2599 elements in it onto the freelist. */
2600__attribute__((noinline))
2601static void gal_add_new_group ( GroupAlloc* ga )
2602{
2603 Word i;
2604 UWord* group;
2605 tl_assert(ga);
2606 tl_assert(ga->nextFree == NULL);
2607 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
2608 tl_assert(group);
2609 /* extend the freelist through the new group. Place the freelist
2610 pointer in the first word of each element. That's why the
2611 element size must be at least one word. */
2612 for (i = ga->nPerGroup-1; i >= 0; i--) {
2613 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
2614 UWord* elem = (UWord*)elemC;
2615 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
2616 *elem = (UWord)ga->nextFree;
2617 ga->nextFree = elem;
2618 }
2619 /* and add to our collection of groups */
2620 VG_(addToXA)( ga->groups, &group );
2621}
2622
2623inline static void* gal_Alloc ( GroupAlloc* ga )
2624{
2625 UWord* elem;
2626 if (UNLIKELY(ga->nextFree == NULL)) {
2627 gal_add_new_group(ga);
2628 }
2629 elem = ga->nextFree;
2630 ga->nextFree = (void*)*elem;
2631 *elem = 0; /* unnecessary, but just to be on the safe side */
2632 return elem;
2633}
2634
2635inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
2636{
2637 tl_assert(n == ga->elemSzB);
2638 return gal_Alloc( ga );
2639}
2640
2641inline static void gal_Free ( GroupAlloc* ga, void* p )
2642{
2643 UWord* elem = (UWord*)p;
2644 *elem = (UWord)ga->nextFree;
2645 ga->nextFree = elem;
2646}
2647//////////////// END general group allocator
2648
2649
2650/////////////////////////////////////////////////////////
2651// //
sewardjf98e1c02008-10-25 16:22:41 +00002652// Change-event map2 //
2653// //
2654/////////////////////////////////////////////////////////
2655
2656#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
2657#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2658
2659/* This is in two parts:
2660
2661 1. An OSet of RCECs. This is a set of reference-counted stack
2662 traces. When the reference count of a stack trace becomes zero,
2663 it is removed from the set and freed up. The intent is to have
2664 a set of stack traces which can be referred to from (2), but to
2665 only represent each one once. The set is indexed/searched by
2666 ordering on the stack trace vectors.
2667
2668 2. An OSet of OldRefs. These store information about each old ref
2669 that we need to record. It is indexed by address of the
2670 location for which the information is recorded. For LRU
2671 purposes, each OldRef also contains a generation number,
2672 indicating when it was most recently accessed.
2673
2674 The important part of an OldRef is, however, its accs[] array.
2675 This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
2676 allows us to collect the last access-traceback by up to
2677 N_OLDREF_ACCS different threads for this location. The accs[]
2678 array is a MTF-array. If a pair falls off the end, that's too
2679 bad -- we will lose info about that thread's access to this
2680 location.
2681
2682 When this OSet becomes too big, we can throw away the entries
2683 whose generation numbers are below some threshold; hence doing
2684 approximate LRU discarding. For each discarded OldRef we must
2685 of course decrement the reference count on the all RCECs it
2686 refers to, in order that entries from (1) eventually get
2687 discarded too.
2688*/
2689
2690
2691static UWord stats__ctxt_rcdec1 = 0;
2692static UWord stats__ctxt_rcdec2 = 0;
2693static UWord stats__ctxt_rcdec3 = 0;
2694static UWord stats__ctxt_rcdec_calls = 0;
2695static UWord stats__ctxt_rcdec_discards = 0;
2696static UWord stats__ctxt_rcdec1_eq = 0;
2697
2698static UWord stats__ctxt_tab_curr = 0;
2699static UWord stats__ctxt_tab_max = 0;
2700
2701static UWord stats__ctxt_tab_qs = 0;
2702static UWord stats__ctxt_tab_cmps = 0;
2703
2704
2705///////////////////////////////////////////////////////
2706//// Part (1): An OSet of RCECs
2707///
2708
2709#define N_FRAMES 8
2710
2711// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2712#define RCEC_MAGIC 0xab88abb2UL
2713
2714//#define N_RCEC_TAB 98317 /* prime */
2715#define N_RCEC_TAB 196613 /* prime */
2716
2717typedef
2718 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00002719 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002720 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00002721 UWord rc;
2722 UWord rcX; /* used for crosschecking */
2723 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2724 }
2725 RCEC;
2726
2727static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2728
2729
2730/* Gives an arbitrary total order on RCEC .frames fields */
2731static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2732 Word i;
2733 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2734 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2735 if (ec1->frames[0] < ec2->frames[0]) return -1;
2736 if (ec1->frames[0] > ec2->frames[0]) return 1;
2737 for (i = 1; i < 1 + N_FRAMES; i++) {
2738 if (ec1->frames[i] < ec2->frames[i]) return -1;
2739 if (ec1->frames[i] > ec2->frames[i]) return 1;
2740 }
2741 return 0;
2742}
2743
2744
2745/* Dec the ref of this RCEC. */
2746static void ctxt__rcdec ( RCEC* ec )
2747{
2748 stats__ctxt_rcdec_calls++;
2749 tl_assert(ec && ec->magic == RCEC_MAGIC);
2750 tl_assert(ec->rc > 0);
2751 ec->rc--;
2752}
2753
2754static void ctxt__rcinc ( RCEC* ec )
2755{
2756 tl_assert(ec && ec->magic == RCEC_MAGIC);
2757 ec->rc++;
2758}
2759
2760
sewardjd86e3a22008-12-03 11:39:37 +00002761//////////// BEGIN RCEC group allocator
2762static GroupAlloc rcec_group_allocator;
2763
2764static RCEC* alloc_RCEC ( void ) {
2765 return gal_Alloc ( &rcec_group_allocator );
2766}
2767
2768static void free_RCEC ( RCEC* rcec ) {
2769 tl_assert(rcec->magic == RCEC_MAGIC);
2770 gal_Free( &rcec_group_allocator, rcec );
2771}
2772//////////// END OldRef group allocator
2773
2774
sewardjf98e1c02008-10-25 16:22:41 +00002775/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2776 move it one step closer the the front of the list, so as to make
2777 subsequent searches for it cheaper. */
2778static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2779{
2780 RCEC *ec0, *ec1, *ec2;
2781 if (ec == *headp)
2782 tl_assert(0); /* already at head of list */
2783 tl_assert(ec != NULL);
2784 ec0 = *headp;
2785 ec1 = NULL;
2786 ec2 = NULL;
2787 while (True) {
2788 if (ec0 == NULL || ec0 == ec) break;
2789 ec2 = ec1;
2790 ec1 = ec0;
2791 ec0 = ec0->next;
2792 }
2793 tl_assert(ec0 == ec);
2794 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2795 RCEC* tmp;
2796 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2797 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2798 closer to the start of the list. */
2799 tl_assert(ec2->next == ec1);
2800 tl_assert(ec1->next == ec0);
2801 tmp = ec0->next;
2802 ec2->next = ec0;
2803 ec0->next = ec1;
2804 ec1->next = tmp;
2805 }
2806 else
2807 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2808 /* it's second in the list. */
2809 tl_assert(*headp == ec1);
2810 tl_assert(ec1->next == ec0);
2811 ec1->next = ec0->next;
2812 ec0->next = ec1;
2813 *headp = ec0;
2814 }
2815}
2816
2817
2818/* Find the given RCEC in the tree, and return a pointer to it. Or,
2819 if not present, add the given one to the tree (by making a copy of
2820 it, so the caller can immediately deallocate the original) and
2821 return a pointer to the copy. The caller can safely have 'example'
2822 on its stack, since we will always return a pointer to a copy of
2823 it, not to the original. Note that the inserted node will have .rc
2824 of zero and so the caller must immediatly increment it. */
2825__attribute__((noinline))
2826static RCEC* ctxt__find_or_add ( RCEC* example )
2827{
2828 UWord hent;
2829 RCEC* copy;
2830 tl_assert(example && example->magic == RCEC_MAGIC);
2831 tl_assert(example->rc == 0);
2832
2833 /* Search the hash table to see if we already have it. */
2834 stats__ctxt_tab_qs++;
2835 hent = example->frames[0] % N_RCEC_TAB;
2836 copy = contextTab[hent];
2837 while (1) {
2838 if (!copy) break;
2839 tl_assert(copy->magic == RCEC_MAGIC);
2840 stats__ctxt_tab_cmps++;
2841 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2842 copy = copy->next;
2843 }
2844
2845 if (copy) {
2846 tl_assert(copy != example);
2847 /* optimisation: if it's not at the head of its list, move 1
2848 step fwds, to make future searches cheaper */
2849 if (copy != contextTab[hent]) {
2850 move_RCEC_one_step_forward( &contextTab[hent], copy );
2851 }
2852 } else {
sewardjd86e3a22008-12-03 11:39:37 +00002853 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00002854 tl_assert(copy != example);
2855 *copy = *example;
2856 copy->next = contextTab[hent];
2857 contextTab[hent] = copy;
2858 stats__ctxt_tab_curr++;
2859 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2860 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2861 }
2862 return copy;
2863}
2864
2865static inline UWord ROLW ( UWord w, Int n )
2866{
2867 Int bpw = 8 * sizeof(UWord);
2868 w = (w << n) | (w >> (bpw-n));
2869 return w;
2870}
2871
2872__attribute__((noinline))
2873static RCEC* get_RCEC ( Thr* thr )
2874{
2875 UWord hash, i;
2876 RCEC example;
2877 example.magic = RCEC_MAGIC;
2878 example.rc = 0;
2879 example.rcX = 0;
2880 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2881 hash = 0;
2882 for (i = 1; i < 1 + N_FRAMES; i++) {
2883 hash ^= example.frames[i];
2884 hash = ROLW(hash, 19);
2885 }
2886 example.frames[0] = hash;
2887 return ctxt__find_or_add( &example );
2888}
2889
2890///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00002891//// Part (2):
2892/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00002893///
2894
2895// (UInt) `echo "Old Reference Information" | md5sum`
2896#define OldRef_MAGIC 0x30b1f075UL
2897
2898typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2899
2900#define N_OLDREF_ACCS 3
2901
2902typedef
2903 struct {
sewardjd86e3a22008-12-03 11:39:37 +00002904 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002905 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00002906 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00002907 /* unused slots in this array have .thr == NULL */
2908 Thr_n_RCEC accs[N_OLDREF_ACCS];
2909 }
2910 OldRef;
2911
sewardjd86e3a22008-12-03 11:39:37 +00002912
2913//////////// BEGIN OldRef group allocator
2914static GroupAlloc oldref_group_allocator;
2915
2916static OldRef* alloc_OldRef ( void ) {
2917 return gal_Alloc ( &oldref_group_allocator );
2918}
2919
2920static void free_OldRef ( OldRef* r ) {
2921 tl_assert(r->magic == OldRef_MAGIC);
2922 gal_Free( &oldref_group_allocator, r );
2923}
2924//////////// END OldRef group allocator
2925
sewardjd86e3a22008-12-03 11:39:37 +00002926
sewardjbc307e52008-12-06 22:10:54 +00002927static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
2928static UWord oldrefGen = 0; /* current LRU generation # */
2929static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2930static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00002931
2932static void event_map_bind ( Addr a, Thr* thr )
2933{
sewardjd86e3a22008-12-03 11:39:37 +00002934 OldRef* ref;
2935 RCEC* here;
2936 Word i, j;
2937 UWord keyW, valW;
2938 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00002939
sewardjbc307e52008-12-06 22:10:54 +00002940 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00002941
sewardjd86e3a22008-12-03 11:39:37 +00002942 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00002943
2944 /* We already have a record for this address. We now need to
2945 see if we have a stack trace pertaining to this thread's
2946 access. */
sewardjd86e3a22008-12-03 11:39:37 +00002947 tl_assert(keyW == a);
2948 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00002949 tl_assert(ref->magic == OldRef_MAGIC);
2950
2951 tl_assert(thr);
2952 for (i = 0; i < N_OLDREF_ACCS; i++) {
2953 if (ref->accs[i].thr == thr)
2954 break;
2955 }
2956
2957 if (i < N_OLDREF_ACCS) {
2958 /* thread 'thr' has an entry at index 'i'. Update it. */
2959 if (i > 0) {
2960 Thr_n_RCEC tmp = ref->accs[i-1];
2961 ref->accs[i-1] = ref->accs[i];
2962 ref->accs[i] = tmp;
2963 i--;
2964 }
2965 here = get_RCEC( thr );
2966 if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
2967 ctxt__rcinc( here );
2968 stats__ctxt_rcdec1++;
2969 ctxt__rcdec( ref->accs[i].rcec );
2970 ref->accs[i].rcec = here;
2971 tl_assert(ref->accs[i].thr == thr);
2972 } else {
2973 here = get_RCEC( thr );
2974 ctxt__rcinc( here );
2975 /* No entry for this thread. Shuffle all of them down one
2976 slot, and put the new entry at the start of the array. */
2977 if (ref->accs[N_OLDREF_ACCS-1].thr) {
2978 /* the last slot is in use. We must dec the rc on the
2979 associated rcec. */
2980 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
2981 stats__ctxt_rcdec2++;
2982 ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
2983 } else {
2984 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
2985 }
2986 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
2987 ref->accs[j] = ref->accs[j-1];
2988 ref->accs[0].thr = thr;
2989 ref->accs[0].rcec = here;
2990 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2991 so we can't add a NULL thr. */
2992 }
2993
2994 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00002995
2996 } else {
2997
2998 /* We don't have a record for this address. Create a new one. */
2999 if (oldrefTreeN >= oldrefGenIncAt) {
3000 oldrefGen++;
3001 oldrefGenIncAt = oldrefTreeN + 50000;
3002 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3003 oldrefGen, oldrefTreeN );
3004 }
3005 here = get_RCEC( thr );
3006 ctxt__rcinc(here);
sewardjd86e3a22008-12-03 11:39:37 +00003007
3008
3009 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003010 ref->magic = OldRef_MAGIC;
3011 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003012 ref->accs[0].rcec = here;
3013 ref->accs[0].thr = thr;
3014 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
3015 so we can't add a NULL thr. */
3016 for (j = 1; j < N_OLDREF_ACCS; j++) {
3017 ref->accs[j].thr = NULL;
3018 ref->accs[j].rcec = NULL;
3019 }
sewardjbc307e52008-12-06 22:10:54 +00003020 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003021 oldrefTreeN++;
3022
3023 }
3024}
3025
3026
3027static
sewardjd52392d2008-11-08 20:36:26 +00003028Bool event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjf98e1c02008-10-25 16:22:41 +00003029 /*OUT*/Thr** resThr,
3030 Thr* thr_acc, Addr a )
3031{
sewardjd86e3a22008-12-03 11:39:37 +00003032 Word i;
3033 OldRef* ref;
3034 UWord keyW, valW;
3035 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003036
sewardjd86e3a22008-12-03 11:39:37 +00003037 tl_assert(thr_acc);
sewardjf98e1c02008-10-25 16:22:41 +00003038
sewardjbc307e52008-12-06 22:10:54 +00003039 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjd86e3a22008-12-03 11:39:37 +00003040 if (b) {
3041 ref = (OldRef*)valW;
3042 tl_assert(keyW == a);
sewardjf98e1c02008-10-25 16:22:41 +00003043 tl_assert(ref->magic == OldRef_MAGIC);
3044 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3045
3046 for (i = 0; i < N_OLDREF_ACCS; i++) {
3047 if (ref->accs[i].thr != NULL
3048 && ref->accs[i].thr != thr_acc)
3049 break;
3050 }
3051 /* If we didn't find an entry for some thread other than
3052 thr_acc, just return the entry for thread 0. It'll look
3053 pretty stupid to the user though. */
3054 if (i == N_OLDREF_ACCS)
3055 i = 0;
3056
3057 tl_assert(i >= 0 && i < N_OLDREF_ACCS);
3058 tl_assert(ref->accs[i].thr);
3059 tl_assert(ref->accs[i].rcec);
3060 tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
3061
sewardjd52392d2008-11-08 20:36:26 +00003062 *resEC = VG_(make_ExeContext_from_StackTrace)(
3063 &ref->accs[i].rcec->frames[1], N_FRAMES
3064 );
sewardjf98e1c02008-10-25 16:22:41 +00003065 *resThr = ref->accs[i].thr;
3066 return True;
3067 } else {
3068 return False;
3069 }
3070}
3071
3072static void event_map_init ( void )
3073{
3074 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003075
3076 /* Context (RCEC) group allocator */
3077 init_GroupAlloc ( &rcec_group_allocator,
3078 sizeof(RCEC),
3079 1000 /* RCECs per group */,
3080 HG_(zalloc),
3081 "libhb.event_map_init.1 (RCEC groups)",
3082 HG_(free) );
3083
3084 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003085 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003086 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003087 N_RCEC_TAB * sizeof(RCEC*) );
3088 tl_assert(contextTab);
3089 for (i = 0; i < N_RCEC_TAB; i++)
3090 contextTab[i] = NULL;
3091
sewardjd86e3a22008-12-03 11:39:37 +00003092 /* Oldref group allocator */
3093 init_GroupAlloc ( &oldref_group_allocator,
3094 sizeof(OldRef),
3095 1000 /* OldRefs per group */,
3096 HG_(zalloc),
3097 "libhb.event_map_init.3 (OldRef groups)",
3098 HG_(free) );
3099
sewardjd86e3a22008-12-03 11:39:37 +00003100 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003101 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003102 oldrefTree = VG_(newSWA)(
3103 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003104 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003105 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003106 );
3107 tl_assert(oldrefTree);
3108
3109 oldrefGen = 0;
3110 oldrefGenIncAt = 0;
3111 oldrefTreeN = 0;
3112}
3113
3114static void event_map__check_reference_counts ( Bool before )
3115{
3116 RCEC* rcec;
3117 OldRef* oldref;
3118 Word i;
3119 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003120 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003121
3122 /* Set the 'check' reference counts to zero. Also, optionally
3123 check that the real reference counts are non-zero. We allow
3124 these to fall to zero before a GC, but the GC must get rid of
3125 all those that are zero, hence none should be zero after a
3126 GC. */
3127 for (i = 0; i < N_RCEC_TAB; i++) {
3128 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3129 nEnts++;
3130 tl_assert(rcec);
3131 tl_assert(rcec->magic == RCEC_MAGIC);
3132 if (!before)
3133 tl_assert(rcec->rc > 0);
3134 rcec->rcX = 0;
3135 }
3136 }
3137
3138 /* check that the stats are sane */
3139 tl_assert(nEnts == stats__ctxt_tab_curr);
3140 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3141
3142 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003143 VG_(initIterSWA)( oldrefTree );
3144 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003145 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003146 tl_assert(oldref->magic == OldRef_MAGIC);
3147 for (i = 0; i < N_OLDREF_ACCS; i++) {
3148 if (oldref->accs[i].thr) {
3149 tl_assert(oldref->accs[i].rcec);
3150 tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
3151 oldref->accs[i].rcec->rcX++;
3152 } else {
3153 tl_assert(!oldref->accs[i].rcec);
3154 }
3155 }
3156 }
3157
3158 /* compare check ref counts with actual */
3159 for (i = 0; i < N_RCEC_TAB; i++) {
3160 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3161 tl_assert(rcec->rc == rcec->rcX);
3162 }
3163 }
3164}
3165
sewardj8fd92d32008-11-20 23:17:01 +00003166__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003167static void event_map_maybe_GC ( void )
3168{
3169 OldRef* oldref;
3170 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003171 XArray* refs2del;
3172 Word i, j, n2del;
3173
sewardj8fd92d32008-11-20 23:17:01 +00003174 UWord* genMap = NULL;
3175 UWord genMap_min = 0;
3176 UWord genMap_size = 0;
3177
sewardjf98e1c02008-10-25 16:22:41 +00003178 if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
3179 return;
3180
3181 if (0)
3182 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3183
3184 /* Check our counting is sane */
sewardjbc307e52008-12-06 22:10:54 +00003185#warning Fixme1
3186 //tl_assert(oldrefTreeN == VG_(sizeFM)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003187
3188 /* Check the reference counts */
3189 event_map__check_reference_counts( True/*before*/ );
3190
sewardj8fd92d32008-11-20 23:17:01 +00003191 /* Compute the distribution of generation values in the ref tree.
3192 There are likely only to be a few different generation numbers
3193 in the whole tree, but we don't know what they are. Hence use a
3194 dynamically resized array of counters. The array is genMap[0
3195 .. genMap_size-1], where genMap[0] is the count for the
3196 generation number genMap_min, genMap[1] is the count for
3197 genMap_min+1, etc. If a new number is seen outside the range
3198 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3199 copied into a larger array, and genMap_min and genMap_size are
3200 adjusted accordingly. */
3201
sewardjf98e1c02008-10-25 16:22:41 +00003202 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003203
sewardjbc307e52008-12-06 22:10:54 +00003204 VG_(initIterSWA)( oldrefTree );
3205 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003206
sewardjd86e3a22008-12-03 11:39:37 +00003207 UWord ea, key;
3208 oldref = (OldRef*)valW;
3209 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003210
3211 /* BEGIN find 'ea', which is the index in genMap holding the
3212 count for generation number 'key'. */
3213 if (UNLIKELY(genMap == NULL)) {
3214 /* deal with the first key to be seen, so that the following
3215 cases don't need to handle the complexity of a NULL count
3216 array. */
3217 genMap_min = key;
3218 genMap_size = 1;
3219 genMap = HG_(zalloc)( "libhb.emmG.1a",
3220 genMap_size * sizeof(UWord) );
3221 ea = 0;
3222 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3223 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003224 }
sewardj8fd92d32008-11-20 23:17:01 +00003225 else
3226 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3227 /* this is the expected (almost-always-happens) case: 'key'
3228 is already mapped in the array. */
3229 ea = key - genMap_min;
3230 }
3231 else
3232 if (key < genMap_min) {
3233 /* 'key' appears before the start of the current array.
3234 Extend the current array by allocating a larger one and
3235 copying the current one to the upper end of it. */
3236 Word more;
3237 UWord* map2;
3238 more = genMap_min - key;
3239 tl_assert(more > 0);
3240 map2 = HG_(zalloc)( "libhb.emmG.1b",
3241 (genMap_size + more) * sizeof(UWord) );
3242 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3243 HG_(free)( genMap );
3244 genMap = map2;
3245 genMap_size += more;
3246 genMap_min -= more;
3247 ea = 0;
3248 tl_assert(genMap_min == key);
3249 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3250 key, genMap_min, genMap_min+genMap_size- 1 );
3251 }
3252 else {
3253 /* 'key' appears after the end of the current array. Extend
3254 the current array by allocating a larger one and copying
3255 the current one to the lower end of it. */
3256 Word more;
3257 UWord* map2;
3258 tl_assert(key >= genMap_min + genMap_size);
3259 more = key - (genMap_min + genMap_size) + 1;
3260 tl_assert(more > 0);
3261 map2 = HG_(zalloc)( "libhb.emmG.1c",
3262 (genMap_size + more) * sizeof(UWord) );
3263 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3264 HG_(free)( genMap );
3265 genMap = map2;
3266 genMap_size += more;
3267 ea = genMap_size - 1;;
3268 tl_assert(genMap_min + genMap_size - 1 == key);
3269 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3270 key, genMap_min, genMap_min+genMap_size- 1 );
3271 }
3272 /* END find 'ea' from 'key' */
3273
3274 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003275 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003276 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003277 }
3278
sewardj8fd92d32008-11-20 23:17:01 +00003279 tl_assert(genMap);
3280 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003281
sewardj8fd92d32008-11-20 23:17:01 +00003282 /* Sanity check what we just computed */
3283 { UWord sum = 0;
3284 for (i = 0; i < genMap_size; i++) {
3285 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3286 i + genMap_min, genMap[i] );
3287 sum += genMap[i];
3288 }
3289 tl_assert(sum == oldrefTreeN);
3290 }
3291
3292 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003293 retained = oldrefTreeN;
3294 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003295
3296 for (i = 0; i < genMap_size; i++) {
3297 keyW = i + genMap_min;
3298 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003299 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3300 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3301 tl_assert(keyW >= maxGen);
3302 tl_assert(retained >= valW);
3303 if (retained - valW
3304 > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
3305 retained -= valW;
3306 maxGen = keyW;
3307 } else {
3308 break;
3309 }
3310 }
sewardjf98e1c02008-10-25 16:22:41 +00003311
sewardj8fd92d32008-11-20 23:17:01 +00003312 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003313
sewardj9b1f0fd2008-11-18 23:40:00 +00003314 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003315
3316 /* Now make up a big list of the oldrefTree entries we want to
3317 delete. We can't simultaneously traverse the tree and delete
3318 stuff from it, so first we need to copy them off somewhere
3319 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003320 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003321 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003322
sewardj9b1f0fd2008-11-18 23:40:00 +00003323 if (retained < oldrefTreeN) {
3324
3325 /* This is the normal (expected) case. We discard any ref whose
3326 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003327 VG_(initIterSWA)( oldrefTree );
3328 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003329 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003330 tl_assert(oldref->magic == OldRef_MAGIC);
3331 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003332 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003333 }
sewardjf98e1c02008-10-25 16:22:41 +00003334 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003335 if (VG_(clo_verbosity) > 1) {
3336 VG_(message)(Vg_DebugMsg,
3337 "libhb: EvM GC: delete generations %lu and below, "
3338 "retaining %lu entries",
3339 maxGen, retained );
3340 }
3341
3342 } else {
3343
3344 static UInt rand_seed = 0; /* leave as static */
3345
3346 /* Degenerate case: there's only one generation in the entire
3347 tree, so we need to have some other way of deciding which
3348 refs to throw away. Just throw out half of them randomly. */
3349 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00003350 VG_(initIterSWA)( oldrefTree );
3351 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003352 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003353 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003354 tl_assert(oldref->magic == OldRef_MAGIC);
3355 n = VG_(random)( &rand_seed );
3356 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003357 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003358 retained--;
3359 }
3360 }
3361 if (VG_(clo_verbosity) > 1) {
3362 VG_(message)(Vg_DebugMsg,
3363 "libhb: EvM GC: randomly delete half the entries, "
3364 "retaining %lu entries",
3365 retained );
3366 }
3367
sewardjf98e1c02008-10-25 16:22:41 +00003368 }
3369
3370 n2del = VG_(sizeXA)( refs2del );
3371 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3372
3373 if (0) VG_(printf)("%s","deleting entries\n");
3374 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003375 Bool b;
3376 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00003377 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00003378 tl_assert(b);
3379 tl_assert(keyW == ga2del);
3380 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003381 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjd86e3a22008-12-03 11:39:37 +00003382 if (oldref->accs[j].rcec) {
3383 tl_assert(oldref->accs[j].thr);
sewardjf98e1c02008-10-25 16:22:41 +00003384 stats__ctxt_rcdec3++;
sewardjd86e3a22008-12-03 11:39:37 +00003385 ctxt__rcdec( oldref->accs[j].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00003386 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003387 tl_assert(!oldref->accs[j].thr);
sewardjf98e1c02008-10-25 16:22:41 +00003388 }
3389 }
sewardjd86e3a22008-12-03 11:39:37 +00003390
3391 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00003392 }
3393
3394 VG_(deleteXA)( refs2del );
3395
sewardjbc307e52008-12-06 22:10:54 +00003396#warning Fixme2
3397 //tl_assert( VG_(sizeFM)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00003398
3399 oldrefTreeN = retained;
3400 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3401
3402 /* Throw away all RCECs with zero reference counts */
3403 for (i = 0; i < N_RCEC_TAB; i++) {
3404 RCEC** pp = &contextTab[i];
3405 RCEC* p = *pp;
3406 while (p) {
3407 if (p->rc == 0) {
3408 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00003409 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00003410 p = *pp;
3411 tl_assert(stats__ctxt_tab_curr > 0);
3412 stats__ctxt_tab_curr--;
3413 } else {
3414 pp = &p->next;
3415 p = p->next;
3416 }
3417 }
3418 }
3419
3420 /* Check the reference counts */
3421 event_map__check_reference_counts( False/*after*/ );
3422
3423 //if (0)
3424 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3425 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3426
3427}
3428
3429
3430/////////////////////////////////////////////////////////
3431// //
3432// Core MSM //
3433// //
3434/////////////////////////////////////////////////////////
3435
sewardjb0e009d2008-11-19 16:35:15 +00003436/* Logic in msm_read/msm_write updated/verified after re-analysis,
3437 19 Nov 08. */
3438
sewardjf98e1c02008-10-25 16:22:41 +00003439#define MSM_CONFACC 1
3440
sewardjf98e1c02008-10-25 16:22:41 +00003441#define MSM_CHECK 0
3442
sewardjb0e009d2008-11-19 16:35:15 +00003443/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
3444 nonzero, the effect is that when a race is detected for a location,
3445 that location is put into a special 'error' state and no further
3446 checking of it is done until it returns to a 'normal' state, which
3447 requires it to be deallocated and reallocated.
3448
3449 This is a bad idea, because of the interaction with suppressions.
3450 Suppose there is a race on the location, but the error is
3451 suppressed. The location now is marked as in-error. Now any
3452 subsequent race -- including ones we want to see -- will never be
3453 detected until the location is deallocated and reallocated.
3454
3455 Hence set MSM_CHECK to zero. This causes raced-on locations to
3456 remain in the normal 'C' (constrained) state, but places on them
3457 the constraint that the next accesses happen-after both the
3458 existing constraint and the relevant vector clock of the thread
3459 doing the racing access.
3460*/
3461#define MSM_RACE2ERR 0
3462
sewardjf98e1c02008-10-25 16:22:41 +00003463static ULong stats__msm_read = 0;
3464static ULong stats__msm_read_change = 0;
3465static ULong stats__msm_write = 0;
3466static ULong stats__msm_write_change = 0;
3467
3468__attribute__((noinline))
3469static void record_race_info ( Thr* acc_thr,
3470 Addr acc_addr, SizeT szB, Bool isWrite,
3471 SVal svOld, SVal svNew )
3472{
3473 Bool found;
3474 Thr* thrp = NULL;
sewardjd52392d2008-11-08 20:36:26 +00003475 ExeContext* where = NULL;
3476 ExeContext* wherep = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00003477 where = main_get_EC( acc_thr );
3478 found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
3479 if (found) {
3480 tl_assert(wherep);
3481 tl_assert(thrp);
3482 tl_assert(thrp->opaque);
3483 tl_assert(acc_thr->opaque);
3484 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3485 isWrite, szB, NULL/*mb_lastlock*/,
3486 wherep, thrp->opaque );
3487 } else {
3488 tl_assert(!wherep);
3489 tl_assert(!thrp);
3490 tl_assert(acc_thr->opaque);
3491 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3492 isWrite, szB, NULL/*mb_lastlock*/,
3493 NULL, NULL );
3494 }
3495}
3496
3497static Bool is_sane_SVal_C ( SVal sv ) {
3498 POrd ord;
3499 if (!SVal__isC(sv)) return True;
3500 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3501 if (ord == POrd_EQ || ord == POrd_LT) return True;
3502 return False;
3503}
3504
3505
3506/* Compute new state following a read */
3507static inline SVal msm_read ( SVal svOld,
3508 /* The following are only needed for
3509 creating error reports. */
3510 Thr* acc_thr,
3511 Addr acc_addr, SizeT szB )
3512{
3513 SVal svNew = SVal_INVALID;
3514 stats__msm_read++;
3515
3516 /* Redundant sanity check on the constraints */
3517 if (MSM_CHECK) {
3518 tl_assert(is_sane_SVal_C(svOld));
3519 }
3520
3521 if (SVal__isC(svOld)) {
3522 POrd ord;
3523 VtsID tviR = acc_thr->viR;
3524 VtsID tviW = acc_thr->viW;
3525 VtsID rmini = SVal__unC_Rmin(svOld);
3526 VtsID wmini = SVal__unC_Wmin(svOld);
3527
3528 ord = VtsID__getOrdering(rmini,tviR);
3529 if (ord == POrd_EQ || ord == POrd_LT) {
3530 /* no race */
3531 /* Note: RWLOCK subtlety: use tviW, not tviR */
3532 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3533 goto out;
3534 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003535 /* assert on sanity of constraints. */
3536 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3537 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003538 svNew = MSM_RACE2ERR
3539 ? SVal__mkE()
sewardj3b0c4d72008-11-20 11:20:50 +00003540#if 0
3541 //std
sewardjb0e009d2008-11-19 16:35:15 +00003542 : SVal__mkC( VtsID__join2(wmini,tviR),
3543 VtsID__join2(wmini,tviW) );
sewardj3b0c4d72008-11-20 11:20:50 +00003544#else
3545 // relaxed
3546 : SVal__mkC( VtsID__join2(rmini,tviR),
3547 VtsID__join2(wmini,tviW) );
3548#endif
sewardjf98e1c02008-10-25 16:22:41 +00003549 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
3550 svOld, svNew );
3551 goto out;
3552 }
3553 }
3554 if (SVal__isA(svOld)) {
3555 /* reading no-access memory (sigh); leave unchanged */
3556 /* check for no pollution */
3557 tl_assert(svOld == SVal_NOACCESS);
3558 svNew = SVal_NOACCESS;
3559 goto out;
3560 }
3561 if (SVal__isE(svOld)) {
3562 /* no race, location is already "in error" */
3563 svNew = SVal__mkE();
3564 goto out;
3565 }
3566 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3567 tl_assert(0);
3568
3569 out:
3570 if (MSM_CHECK) {
3571 tl_assert(is_sane_SVal_C(svNew));
3572 }
3573 tl_assert(svNew != SVal_INVALID);
3574 if (svNew != svOld) {
3575 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3576 event_map_bind( acc_addr, acc_thr );
3577 stats__msm_read_change++;
3578 }
3579 }
3580 return svNew;
3581}
3582
3583
3584/* Compute new state following a write */
3585static inline SVal msm_write ( SVal svOld,
3586 /* The following are only needed for
3587 creating error reports. */
3588 Thr* acc_thr,
3589 Addr acc_addr, SizeT szB )
3590{
3591 SVal svNew = SVal_INVALID;
3592 stats__msm_write++;
3593
3594 /* Redundant sanity check on the constraints */
3595 if (MSM_CHECK) {
3596 tl_assert(is_sane_SVal_C(svOld));
3597 }
3598
3599 if (SVal__isC(svOld)) {
3600 POrd ord;
3601 VtsID tviW = acc_thr->viW;
3602 VtsID wmini = SVal__unC_Wmin(svOld);
3603
3604 ord = VtsID__getOrdering(wmini,tviW);
3605 if (ord == POrd_EQ || ord == POrd_LT) {
3606 /* no race */
3607 svNew = SVal__mkC( tviW, tviW );
3608 goto out;
3609 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003610 VtsID tviR = acc_thr->viR;
sewardjf98e1c02008-10-25 16:22:41 +00003611 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00003612 /* assert on sanity of constraints. */
3613 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3614 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003615 svNew = MSM_RACE2ERR
3616 ? SVal__mkE()
sewardj3b0c4d72008-11-20 11:20:50 +00003617#if 0
3618 // std
sewardjb0e009d2008-11-19 16:35:15 +00003619 : SVal__mkC( VtsID__join2(wmini,tviR),
sewardjf98e1c02008-10-25 16:22:41 +00003620 VtsID__join2(wmini,tviW) );
sewardj3b0c4d72008-11-20 11:20:50 +00003621#else
3622 // relaxed
3623 : SVal__mkC( VtsID__join2(rmini,tviR),
3624 VtsID__join2(wmini,tviW) );
3625#endif
sewardjf98e1c02008-10-25 16:22:41 +00003626 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
3627 svOld, svNew );
3628 goto out;
3629 }
3630 }
3631 if (SVal__isA(svOld)) {
3632 /* writing no-access memory (sigh); leave unchanged */
3633 /* check for no pollution */
3634 tl_assert(svOld == SVal_NOACCESS);
3635 svNew = SVal_NOACCESS;
3636 goto out;
3637 }
3638 if (SVal__isE(svOld)) {
3639 /* no race, location is already "in error" */
3640 svNew = SVal__mkE();
3641 goto out;
3642 }
3643 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3644 tl_assert(0);
3645
3646 out:
3647 if (MSM_CHECK) {
3648 tl_assert(is_sane_SVal_C(svNew));
3649 }
3650 tl_assert(svNew != SVal_INVALID);
3651 if (svNew != svOld) {
3652 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3653 event_map_bind( acc_addr, acc_thr );
3654 stats__msm_write_change++;
3655 }
3656 }
3657 return svNew;
3658}
3659
3660
3661/////////////////////////////////////////////////////////
3662// //
3663// Apply core MSM to specific memory locations //
3664// //
3665/////////////////////////////////////////////////////////
3666
3667/*------------- ZSM accesses: 8 bit apply ------------- */
3668
3669void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3670 CacheLine* cl;
3671 UWord cloff, tno, toff;
3672 SVal svOld, svNew;
3673 UShort descr;
3674 stats__cline_read8s++;
3675 cl = get_cacheline(a);
3676 cloff = get_cacheline_offset(a);
3677 tno = get_treeno(a);
3678 toff = get_tree_offset(a); /* == 0 .. 7 */
3679 descr = cl->descrs[tno];
3680 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3681 SVal* tree = &cl->svals[tno << 3];
3682 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3683 if (SCE_CACHELINE)
3684 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3685 }
3686 svOld = cl->svals[cloff];
3687 svNew = msm_read( svOld, thr,a,1 );
3688 tl_assert(svNew != SVal_INVALID);
3689 cl->svals[cloff] = svNew;
3690}
3691
3692void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3693 CacheLine* cl;
3694 UWord cloff, tno, toff;
3695 SVal svOld, svNew;
3696 UShort descr;
3697 stats__cline_read8s++;
3698 cl = get_cacheline(a);
3699 cloff = get_cacheline_offset(a);
3700 tno = get_treeno(a);
3701 toff = get_tree_offset(a); /* == 0 .. 7 */
3702 descr = cl->descrs[tno];
3703 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3704 SVal* tree = &cl->svals[tno << 3];
3705 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3706 if (SCE_CACHELINE)
3707 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3708 }
3709 svOld = cl->svals[cloff];
3710 svNew = msm_write( svOld, thr,a,1 );
3711 tl_assert(svNew != SVal_INVALID);
3712 cl->svals[cloff] = svNew;
3713}
3714
3715/*------------- ZSM accesses: 16 bit apply ------------- */
3716
3717void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3718 CacheLine* cl;
3719 UWord cloff, tno, toff;
3720 SVal svOld, svNew;
3721 UShort descr;
3722 stats__cline_read16s++;
3723 if (UNLIKELY(!aligned16(a))) goto slowcase;
3724 cl = get_cacheline(a);
3725 cloff = get_cacheline_offset(a);
3726 tno = get_treeno(a);
3727 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3728 descr = cl->descrs[tno];
3729 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3730 if (valid_value_is_below_me_16(descr, toff)) {
3731 goto slowcase;
3732 } else {
3733 SVal* tree = &cl->svals[tno << 3];
3734 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3735 }
3736 if (SCE_CACHELINE)
3737 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3738 }
3739 svOld = cl->svals[cloff];
3740 svNew = msm_read( svOld, thr,a,2 );
3741 tl_assert(svNew != SVal_INVALID);
3742 cl->svals[cloff] = svNew;
3743 return;
3744 slowcase: /* misaligned, or must go further down the tree */
3745 stats__cline_16to8splits++;
3746 zsm_apply8___msm_read( thr, a + 0 );
3747 zsm_apply8___msm_read( thr, a + 1 );
3748}
3749
3750void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3751 CacheLine* cl;
3752 UWord cloff, tno, toff;
3753 SVal svOld, svNew;
3754 UShort descr;
3755 stats__cline_read16s++;
3756 if (UNLIKELY(!aligned16(a))) goto slowcase;
3757 cl = get_cacheline(a);
3758 cloff = get_cacheline_offset(a);
3759 tno = get_treeno(a);
3760 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3761 descr = cl->descrs[tno];
3762 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3763 if (valid_value_is_below_me_16(descr, toff)) {
3764 goto slowcase;
3765 } else {
3766 SVal* tree = &cl->svals[tno << 3];
3767 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3768 }
3769 if (SCE_CACHELINE)
3770 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3771 }
3772 svOld = cl->svals[cloff];
3773 svNew = msm_write( svOld, thr,a,2 );
3774 tl_assert(svNew != SVal_INVALID);
3775 cl->svals[cloff] = svNew;
3776 return;
3777 slowcase: /* misaligned, or must go further down the tree */
3778 stats__cline_16to8splits++;
3779 zsm_apply8___msm_write( thr, a + 0 );
3780 zsm_apply8___msm_write( thr, a + 1 );
3781}
3782
3783/*------------- ZSM accesses: 32 bit apply ------------- */
3784
3785void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3786 CacheLine* cl;
3787 UWord cloff, tno, toff;
3788 SVal svOld, svNew;
3789 UShort descr;
3790 if (UNLIKELY(!aligned32(a))) goto slowcase;
3791 cl = get_cacheline(a);
3792 cloff = get_cacheline_offset(a);
3793 tno = get_treeno(a);
3794 toff = get_tree_offset(a); /* == 0 or 4 */
3795 descr = cl->descrs[tno];
3796 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3797 if (valid_value_is_above_me_32(descr, toff)) {
3798 SVal* tree = &cl->svals[tno << 3];
3799 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3800 } else {
3801 goto slowcase;
3802 }
3803 if (SCE_CACHELINE)
3804 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3805 }
3806 svOld = cl->svals[cloff];
3807 svNew = msm_read( svOld, thr,a,4 );
3808 tl_assert(svNew != SVal_INVALID);
3809 cl->svals[cloff] = svNew;
3810 return;
3811 slowcase: /* misaligned, or must go further down the tree */
3812 stats__cline_32to16splits++;
3813 zsm_apply16___msm_read( thr, a + 0 );
3814 zsm_apply16___msm_read( thr, a + 2 );
3815}
3816
3817void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3818 CacheLine* cl;
3819 UWord cloff, tno, toff;
3820 SVal svOld, svNew;
3821 UShort descr;
3822 if (UNLIKELY(!aligned32(a))) goto slowcase;
3823 cl = get_cacheline(a);
3824 cloff = get_cacheline_offset(a);
3825 tno = get_treeno(a);
3826 toff = get_tree_offset(a); /* == 0 or 4 */
3827 descr = cl->descrs[tno];
3828 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3829 if (valid_value_is_above_me_32(descr, toff)) {
3830 SVal* tree = &cl->svals[tno << 3];
3831 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3832 } else {
3833 goto slowcase;
3834 }
3835 if (SCE_CACHELINE)
3836 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3837 }
3838 svOld = cl->svals[cloff];
3839 svNew = msm_write( svOld, thr,a,4 );
3840 tl_assert(svNew != SVal_INVALID);
3841 cl->svals[cloff] = svNew;
3842 return;
3843 slowcase: /* misaligned, or must go further down the tree */
3844 stats__cline_32to16splits++;
3845 zsm_apply16___msm_write( thr, a + 0 );
3846 zsm_apply16___msm_write( thr, a + 2 );
3847}
3848
3849/*------------- ZSM accesses: 64 bit apply ------------- */
3850
3851void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3852 CacheLine* cl;
3853 UWord cloff, tno, toff;
3854 SVal svOld, svNew;
3855 UShort descr;
3856 stats__cline_read64s++;
3857 if (UNLIKELY(!aligned64(a))) goto slowcase;
3858 cl = get_cacheline(a);
3859 cloff = get_cacheline_offset(a);
3860 tno = get_treeno(a);
3861 toff = get_tree_offset(a); /* == 0, unused */
3862 descr = cl->descrs[tno];
3863 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3864 goto slowcase;
3865 }
3866 svOld = cl->svals[cloff];
3867 svNew = msm_read( svOld, thr,a,8 );
3868 tl_assert(svNew != SVal_INVALID);
3869 cl->svals[cloff] = svNew;
3870 return;
3871 slowcase: /* misaligned, or must go further down the tree */
3872 stats__cline_64to32splits++;
3873 zsm_apply32___msm_read( thr, a + 0 );
3874 zsm_apply32___msm_read( thr, a + 4 );
3875}
3876
3877void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
3878 CacheLine* cl;
3879 UWord cloff, tno, toff;
3880 SVal svOld, svNew;
3881 UShort descr;
3882 stats__cline_read64s++;
3883 if (UNLIKELY(!aligned64(a))) goto slowcase;
3884 cl = get_cacheline(a);
3885 cloff = get_cacheline_offset(a);
3886 tno = get_treeno(a);
3887 toff = get_tree_offset(a); /* == 0, unused */
3888 descr = cl->descrs[tno];
3889 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3890 goto slowcase;
3891 }
3892 svOld = cl->svals[cloff];
3893 svNew = msm_write( svOld, thr,a,8 );
3894 tl_assert(svNew != SVal_INVALID);
3895 cl->svals[cloff] = svNew;
3896 return;
3897 slowcase: /* misaligned, or must go further down the tree */
3898 stats__cline_64to32splits++;
3899 zsm_apply32___msm_write( thr, a + 0 );
3900 zsm_apply32___msm_write( thr, a + 4 );
3901}
3902
3903/*--------------- ZSM accesses: 8 bit write --------------- */
3904
3905static
3906void zsm_write8 ( Addr a, SVal svNew ) {
3907 CacheLine* cl;
3908 UWord cloff, tno, toff;
3909 UShort descr;
3910 stats__cline_set8s++;
3911 cl = get_cacheline(a);
3912 cloff = get_cacheline_offset(a);
3913 tno = get_treeno(a);
3914 toff = get_tree_offset(a); /* == 0 .. 7 */
3915 descr = cl->descrs[tno];
3916 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3917 SVal* tree = &cl->svals[tno << 3];
3918 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3919 if (SCE_CACHELINE)
3920 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3921 }
3922 tl_assert(svNew != SVal_INVALID);
3923 cl->svals[cloff] = svNew;
3924}
3925
3926/*--------------- ZSM accesses: 16 bit write --------------- */
3927
3928static
3929void zsm_write16 ( Addr a, SVal svNew ) {
3930 CacheLine* cl;
3931 UWord cloff, tno, toff;
3932 UShort descr;
3933 stats__cline_set16s++;
3934 if (UNLIKELY(!aligned16(a))) goto slowcase;
3935 cl = get_cacheline(a);
3936 cloff = get_cacheline_offset(a);
3937 tno = get_treeno(a);
3938 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3939 descr = cl->descrs[tno];
3940 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3941 if (valid_value_is_below_me_16(descr, toff)) {
3942 /* Writing at this level. Need to fix up 'descr'. */
3943 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
3944 /* At this point, the tree does not match cl->descr[tno] any
3945 more. The assignments below will fix it up. */
3946 } else {
3947 /* We can't indiscriminately write on the w16 node as in the
3948 w64 case, as that might make the node inconsistent with
3949 its parent. So first, pull down to this level. */
3950 SVal* tree = &cl->svals[tno << 3];
3951 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3952 if (SCE_CACHELINE)
3953 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3954 }
3955 }
3956 tl_assert(svNew != SVal_INVALID);
3957 cl->svals[cloff + 0] = svNew;
3958 cl->svals[cloff + 1] = SVal_INVALID;
3959 return;
3960 slowcase: /* misaligned */
3961 stats__cline_16to8splits++;
3962 zsm_write8( a + 0, svNew );
3963 zsm_write8( a + 1, svNew );
3964}
3965
3966/*--------------- ZSM accesses: 32 bit write --------------- */
3967
3968static
3969void zsm_write32 ( Addr a, SVal svNew ) {
3970 CacheLine* cl;
3971 UWord cloff, tno, toff;
3972 UShort descr;
3973 stats__cline_set32s++;
3974 if (UNLIKELY(!aligned32(a))) goto slowcase;
3975 cl = get_cacheline(a);
3976 cloff = get_cacheline_offset(a);
3977 tno = get_treeno(a);
3978 toff = get_tree_offset(a); /* == 0 or 4 */
3979 descr = cl->descrs[tno];
3980 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3981 if (valid_value_is_above_me_32(descr, toff)) {
3982 /* We can't indiscriminately write on the w32 node as in the
3983 w64 case, as that might make the node inconsistent with
3984 its parent. So first, pull down to this level. */
3985 SVal* tree = &cl->svals[tno << 3];
3986 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3987 if (SCE_CACHELINE)
3988 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3989 } else {
3990 /* Writing at this level. Need to fix up 'descr'. */
3991 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
3992 /* At this point, the tree does not match cl->descr[tno] any
3993 more. The assignments below will fix it up. */
3994 }
3995 }
3996 tl_assert(svNew != SVal_INVALID);
3997 cl->svals[cloff + 0] = svNew;
3998 cl->svals[cloff + 1] = SVal_INVALID;
3999 cl->svals[cloff + 2] = SVal_INVALID;
4000 cl->svals[cloff + 3] = SVal_INVALID;
4001 return;
4002 slowcase: /* misaligned */
4003 stats__cline_32to16splits++;
4004 zsm_write16( a + 0, svNew );
4005 zsm_write16( a + 2, svNew );
4006}
4007
4008/*--------------- ZSM accesses: 64 bit write --------------- */
4009
4010static
4011void zsm_write64 ( Addr a, SVal svNew ) {
4012 CacheLine* cl;
4013 UWord cloff, tno, toff;
4014 stats__cline_set64s++;
4015 if (UNLIKELY(!aligned64(a))) goto slowcase;
4016 cl = get_cacheline(a);
4017 cloff = get_cacheline_offset(a);
4018 tno = get_treeno(a);
4019 toff = get_tree_offset(a); /* == 0 */
4020 cl->descrs[tno] = TREE_DESCR_64;
4021 tl_assert(svNew != SVal_INVALID);
4022 cl->svals[cloff + 0] = svNew;
4023 cl->svals[cloff + 1] = SVal_INVALID;
4024 cl->svals[cloff + 2] = SVal_INVALID;
4025 cl->svals[cloff + 3] = SVal_INVALID;
4026 cl->svals[cloff + 4] = SVal_INVALID;
4027 cl->svals[cloff + 5] = SVal_INVALID;
4028 cl->svals[cloff + 6] = SVal_INVALID;
4029 cl->svals[cloff + 7] = SVal_INVALID;
4030 return;
4031 slowcase: /* misaligned */
4032 stats__cline_64to32splits++;
4033 zsm_write32( a + 0, svNew );
4034 zsm_write32( a + 4, svNew );
4035}
4036
4037/*------------- ZSM accesses: 8 bit read/copy ------------- */
4038
4039static
4040SVal zsm_read8 ( Addr a ) {
4041 CacheLine* cl;
4042 UWord cloff, tno, toff;
4043 UShort descr;
4044 stats__cline_get8s++;
4045 cl = get_cacheline(a);
4046 cloff = get_cacheline_offset(a);
4047 tno = get_treeno(a);
4048 toff = get_tree_offset(a); /* == 0 .. 7 */
4049 descr = cl->descrs[tno];
4050 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4051 SVal* tree = &cl->svals[tno << 3];
4052 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4053 }
4054 return cl->svals[cloff];
4055}
4056
4057static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
4058 SVal sv;
4059 stats__cline_copy8s++;
4060 sv = zsm_read8( src );
4061 zsm_write8( dst, sv );
4062}
4063
4064/* ------------ Shadow memory range setting ops ------------ */
4065
4066void zsm_apply_range___msm_read ( Thr* thr,
4067 Addr a, SizeT len )
4068{
4069 /* fast track a couple of common cases */
4070 if (len == 4 && aligned32(a)) {
4071 zsm_apply32___msm_read( thr, a );
4072 return;
4073 }
4074 if (len == 8 && aligned64(a)) {
4075 zsm_apply64___msm_read( thr, a );
4076 return;
4077 }
4078
4079 /* be completely general (but as efficient as possible) */
4080 if (len == 0) return;
4081
4082 if (!aligned16(a) && len >= 1) {
4083 zsm_apply8___msm_read( thr, a );
4084 a += 1;
4085 len -= 1;
4086 tl_assert(aligned16(a));
4087 }
4088 if (len == 0) return;
4089
4090 if (!aligned32(a) && len >= 2) {
4091 zsm_apply16___msm_read( thr, a );
4092 a += 2;
4093 len -= 2;
4094 tl_assert(aligned32(a));
4095 }
4096 if (len == 0) return;
4097
4098 if (!aligned64(a) && len >= 4) {
4099 zsm_apply32___msm_read( thr, a );
4100 a += 4;
4101 len -= 4;
4102 tl_assert(aligned64(a));
4103 }
4104 if (len == 0) return;
4105
4106 if (len >= 8) {
4107 tl_assert(aligned64(a));
4108 while (len >= 8) {
4109 zsm_apply64___msm_read( thr, a );
4110 a += 8;
4111 len -= 8;
4112 }
4113 tl_assert(aligned64(a));
4114 }
4115 if (len == 0) return;
4116
4117 if (len >= 4)
4118 tl_assert(aligned32(a));
4119 if (len >= 4) {
4120 zsm_apply32___msm_read( thr, a );
4121 a += 4;
4122 len -= 4;
4123 }
4124 if (len == 0) return;
4125
4126 if (len >= 2)
4127 tl_assert(aligned16(a));
4128 if (len >= 2) {
4129 zsm_apply16___msm_read( thr, a );
4130 a += 2;
4131 len -= 2;
4132 }
4133 if (len == 0) return;
4134
4135 if (len >= 1) {
4136 zsm_apply8___msm_read( thr, a );
4137 a += 1;
4138 len -= 1;
4139 }
4140 tl_assert(len == 0);
4141}
4142
4143
4144
4145void zsm_apply_range___msm_write ( Thr* thr,
4146 Addr a, SizeT len )
4147{
4148 /* fast track a couple of common cases */
4149 if (len == 4 && aligned32(a)) {
4150 zsm_apply32___msm_write( thr, a );
4151 return;
4152 }
4153 if (len == 8 && aligned64(a)) {
4154 zsm_apply64___msm_write( thr, a );
4155 return;
4156 }
4157
4158 /* be completely general (but as efficient as possible) */
4159 if (len == 0) return;
4160
4161 if (!aligned16(a) && len >= 1) {
4162 zsm_apply8___msm_write( thr, a );
4163 a += 1;
4164 len -= 1;
4165 tl_assert(aligned16(a));
4166 }
4167 if (len == 0) return;
4168
4169 if (!aligned32(a) && len >= 2) {
4170 zsm_apply16___msm_write( thr, a );
4171 a += 2;
4172 len -= 2;
4173 tl_assert(aligned32(a));
4174 }
4175 if (len == 0) return;
4176
4177 if (!aligned64(a) && len >= 4) {
4178 zsm_apply32___msm_write( thr, a );
4179 a += 4;
4180 len -= 4;
4181 tl_assert(aligned64(a));
4182 }
4183 if (len == 0) return;
4184
4185 if (len >= 8) {
4186 tl_assert(aligned64(a));
4187 while (len >= 8) {
4188 zsm_apply64___msm_write( thr, a );
4189 a += 8;
4190 len -= 8;
4191 }
4192 tl_assert(aligned64(a));
4193 }
4194 if (len == 0) return;
4195
4196 if (len >= 4)
4197 tl_assert(aligned32(a));
4198 if (len >= 4) {
4199 zsm_apply32___msm_write( thr, a );
4200 a += 4;
4201 len -= 4;
4202 }
4203 if (len == 0) return;
4204
4205 if (len >= 2)
4206 tl_assert(aligned16(a));
4207 if (len >= 2) {
4208 zsm_apply16___msm_write( thr, a );
4209 a += 2;
4210 len -= 2;
4211 }
4212 if (len == 0) return;
4213
4214 if (len >= 1) {
4215 zsm_apply8___msm_write( thr, a );
4216 a += 1;
4217 len -= 1;
4218 }
4219 tl_assert(len == 0);
4220}
4221
4222
4223
4224
4225/* Block-copy states (needed for implementing realloc()). */
4226
4227static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
4228{
4229 SizeT i;
4230 if (len == 0)
4231 return;
4232
4233 /* assert for non-overlappingness */
4234 tl_assert(src+len <= dst || dst+len <= src);
4235
4236 /* To be simple, just copy byte by byte. But so as not to wreck
4237 performance for later accesses to dst[0 .. len-1], normalise
4238 destination lines as we finish with them, and also normalise the
4239 line containing the first and last address. */
4240 for (i = 0; i < len; i++) {
4241 Bool normalise
4242 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4243 || i == 0 /* first in range */
4244 || i == len-1; /* last in range */
4245 zsm_copy8( src+i, dst+i, normalise );
4246 }
4247}
4248
4249
4250/* For setting address ranges to a given value. Has considerable
4251 sophistication so as to avoid generating large numbers of pointless
4252 cache loads/writebacks for large ranges. */
4253
4254/* Do small ranges in-cache, in the obvious way. */
4255static
4256void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
4257{
4258 /* fast track a couple of common cases */
4259 if (len == 4 && aligned32(a)) {
4260 zsm_write32( a, svNew );
4261 return;
4262 }
4263 if (len == 8 && aligned64(a)) {
4264 zsm_write64( a, svNew );
4265 return;
4266 }
4267
4268 /* be completely general (but as efficient as possible) */
4269 if (len == 0) return;
4270
4271 if (!aligned16(a) && len >= 1) {
4272 zsm_write8( a, svNew );
4273 a += 1;
4274 len -= 1;
4275 tl_assert(aligned16(a));
4276 }
4277 if (len == 0) return;
4278
4279 if (!aligned32(a) && len >= 2) {
4280 zsm_write16( a, svNew );
4281 a += 2;
4282 len -= 2;
4283 tl_assert(aligned32(a));
4284 }
4285 if (len == 0) return;
4286
4287 if (!aligned64(a) && len >= 4) {
4288 zsm_write32( a, svNew );
4289 a += 4;
4290 len -= 4;
4291 tl_assert(aligned64(a));
4292 }
4293 if (len == 0) return;
4294
4295 if (len >= 8) {
4296 tl_assert(aligned64(a));
4297 while (len >= 8) {
4298 zsm_write64( a, svNew );
4299 a += 8;
4300 len -= 8;
4301 }
4302 tl_assert(aligned64(a));
4303 }
4304 if (len == 0) return;
4305
4306 if (len >= 4)
4307 tl_assert(aligned32(a));
4308 if (len >= 4) {
4309 zsm_write32( a, svNew );
4310 a += 4;
4311 len -= 4;
4312 }
4313 if (len == 0) return;
4314
4315 if (len >= 2)
4316 tl_assert(aligned16(a));
4317 if (len >= 2) {
4318 zsm_write16( a, svNew );
4319 a += 2;
4320 len -= 2;
4321 }
4322 if (len == 0) return;
4323
4324 if (len >= 1) {
4325 zsm_write8( a, svNew );
4326 a += 1;
4327 len -= 1;
4328 }
4329 tl_assert(len == 0);
4330}
4331
4332
4333/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4334 for larger ranges, try to operate directly on the out-of-cache
4335 representation, rather than dragging lines into the cache,
4336 overwriting them, and forcing them out. This turns out to be an
4337 important performance optimisation. */
4338
4339static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4340{
4341 tl_assert(svNew != SVal_INVALID);
4342 stats__cache_make_New_arange += (ULong)len;
4343
4344 if (0 && len > 500)
4345 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4346
4347 if (0) {
4348 static UWord n_New_in_cache = 0;
4349 static UWord n_New_not_in_cache = 0;
4350 /* tag is 'a' with the in-line offset masked out,
4351 eg a[31]..a[4] 0000 */
4352 Addr tag = a & ~(N_LINE_ARANGE - 1);
4353 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4354 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4355 n_New_in_cache++;
4356 } else {
4357 n_New_not_in_cache++;
4358 }
4359 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4360 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4361 n_New_in_cache, n_New_not_in_cache );
4362 }
4363
4364 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4365 zsm_set_range_SMALL( a, len, svNew );
4366 } else {
4367 Addr before_start = a;
4368 Addr aligned_start = cacheline_ROUNDUP(a);
4369 Addr after_start = cacheline_ROUNDDN(a + len);
4370 UWord before_len = aligned_start - before_start;
4371 UWord aligned_len = after_start - aligned_start;
4372 UWord after_len = a + len - after_start;
4373 tl_assert(before_start <= aligned_start);
4374 tl_assert(aligned_start <= after_start);
4375 tl_assert(before_len < N_LINE_ARANGE);
4376 tl_assert(after_len < N_LINE_ARANGE);
4377 tl_assert(get_cacheline_offset(aligned_start) == 0);
4378 if (get_cacheline_offset(a) == 0) {
4379 tl_assert(before_len == 0);
4380 tl_assert(a == aligned_start);
4381 }
4382 if (get_cacheline_offset(a+len) == 0) {
4383 tl_assert(after_len == 0);
4384 tl_assert(after_start == a+len);
4385 }
4386 if (before_len > 0) {
4387 zsm_set_range_SMALL( before_start, before_len, svNew );
4388 }
4389 if (after_len > 0) {
4390 zsm_set_range_SMALL( after_start, after_len, svNew );
4391 }
4392 stats__cache_make_New_inZrep += (ULong)aligned_len;
4393
4394 while (1) {
4395 Addr tag;
4396 UWord wix;
4397 if (aligned_start >= after_start)
4398 break;
4399 tl_assert(get_cacheline_offset(aligned_start) == 0);
4400 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4401 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4402 if (tag == cache_shmem.tags0[wix]) {
4403 UWord i;
4404 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4405 zsm_write64( aligned_start + i * 8, svNew );
4406 } else {
4407 UWord i;
4408 Word zix;
4409 SecMap* sm;
4410 LineZ* lineZ;
4411 /* This line is not in the cache. Do not force it in; instead
4412 modify it in-place. */
4413 /* find the Z line to write in and rcdec it or the
4414 associated F line. */
4415 find_Z_for_writing( &sm, &zix, tag );
4416 tl_assert(sm);
4417 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4418 lineZ = &sm->linesZ[zix];
4419 lineZ->dict[0] = svNew;
4420 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4421 for (i = 0; i < N_LINE_ARANGE/4; i++)
4422 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4423 rcinc_LineZ(lineZ);
4424 }
4425 aligned_start += N_LINE_ARANGE;
4426 aligned_len -= N_LINE_ARANGE;
4427 }
4428 tl_assert(aligned_start == after_start);
4429 tl_assert(aligned_len == 0);
4430 }
4431}
4432
4433
4434/////////////////////////////////////////////////////////
4435// //
4436// Synchronisation objects //
4437// //
4438/////////////////////////////////////////////////////////
4439
4440// (UInt) `echo "Synchronisation object" | md5sum`
4441#define SO_MAGIC 0x56b3c5b0U
4442
4443struct _SO {
4444 VtsID viR; /* r-clock of sender */
4445 VtsID viW; /* w-clock of sender */
4446 UInt magic;
4447};
4448
4449static SO* SO__Alloc ( void ) {
4450 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4451 so->viR = VtsID_INVALID;
4452 so->viW = VtsID_INVALID;
4453 so->magic = SO_MAGIC;
4454 return so;
4455}
4456static void SO__Dealloc ( SO* so ) {
4457 tl_assert(so);
4458 tl_assert(so->magic == SO_MAGIC);
4459 if (so->viR == VtsID_INVALID) {
4460 tl_assert(so->viW == VtsID_INVALID);
4461 } else {
4462 tl_assert(so->viW != VtsID_INVALID);
4463 VtsID__rcdec(so->viR);
4464 VtsID__rcdec(so->viW);
4465 }
4466 so->magic = 0;
4467 HG_(free)( so );
4468}
4469
4470
4471/////////////////////////////////////////////////////////
4472// //
4473// Top Level API //
4474// //
4475/////////////////////////////////////////////////////////
4476
4477static void show_thread_state ( HChar* str, Thr* t )
4478{
4479 if (1) return;
4480 if (t->viR == t->viW) {
4481 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4482 VtsID__pp( t->viR );
4483 VG_(printf)("%s","\n");
4484 } else {
4485 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4486 VtsID__pp( t->viR );
4487 VG_(printf)(" viW %u==", t->viW);
4488 VtsID__pp( t->viW );
4489 VG_(printf)("%s","\n");
4490 }
4491}
4492
4493
4494Thr* libhb_init (
4495 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004496 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004497 )
4498{
4499 Thr* thr;
4500 VtsID vi;
4501 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004502 tl_assert(get_EC);
4503 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004504 main_get_EC = get_EC;
4505
4506 // No need to initialise hg_wordfm.
4507 // No need to initialise hg_wordset.
4508
4509 vts_set_init();
4510 vts_tab_init();
4511 event_map_init();
4512 VtsID__invalidate_caches();
4513
4514 // initialise shadow memory
4515 zsm_init( SVal__rcinc, SVal__rcdec );
4516
4517 thr = Thr__new();
4518 vi = VtsID__mk_Singleton( thr, 1 );
4519 thr->viR = vi;
4520 thr->viW = vi;
4521 VtsID__rcinc(thr->viR);
4522 VtsID__rcinc(thr->viW);
4523
4524 show_thread_state(" root", thr);
4525 return thr;
4526}
4527
4528Thr* libhb_create ( Thr* parent )
4529{
4530 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4531 the child's index. Since the child's index is guaranteed
4532 unique, it has never been seen before, so the implicit value
4533 before the tick is zero and after that is one. */
4534 Thr* child = Thr__new();
4535
4536 child->viR = VtsID__tick( parent->viR, child );
4537 child->viW = VtsID__tick( parent->viW, child );
4538 VtsID__rcinc(child->viR);
4539 VtsID__rcinc(child->viW);
4540
4541 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4542 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4543
4544 /* and the parent has to move along too */
4545 VtsID__rcdec(parent->viR);
4546 VtsID__rcdec(parent->viW);
4547 parent->viR = VtsID__tick( parent->viR, parent );
4548 parent->viW = VtsID__tick( parent->viW, parent );
4549 VtsID__rcinc(parent->viR);
4550 VtsID__rcinc(parent->viW);
4551
4552 show_thread_state(" child", child);
4553 show_thread_state("parent", parent);
4554
4555 return child;
4556}
4557
4558/* Shut down the library, and print stats (in fact that's _all_
4559 this is for. */
4560void libhb_shutdown ( Bool show_stats )
4561{
4562 if (show_stats) {
4563 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4564 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4565 stats__secmaps_allocd,
4566 stats__secmap_ga_space_covered);
4567 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4568 stats__secmap_linesZ_allocd,
4569 stats__secmap_linesZ_bytes);
4570 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4571 stats__secmap_linesF_allocd,
4572 stats__secmap_linesF_bytes);
4573 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4574 stats__secmap_iterator_steppings);
4575 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4576 stats__secmaps_search, stats__secmaps_search_slow);
4577
4578 VG_(printf)("%s","\n");
4579 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4580 stats__cache_totrefs, stats__cache_totmisses );
4581 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4582 stats__cache_Z_fetches, stats__cache_F_fetches );
4583 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4584 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4585 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4586 stats__cache_invals, stats__cache_flushes );
4587 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4588 stats__cache_make_New_arange,
4589 stats__cache_make_New_inZrep);
4590
4591 VG_(printf)("%s","\n");
4592 VG_(printf)(" cline: %'10lu normalises\n",
4593 stats__cline_normalises );
4594 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4595 stats__cline_read64s,
4596 stats__cline_read32s,
4597 stats__cline_read16s,
4598 stats__cline_read8s );
4599 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4600 stats__cline_write64s,
4601 stats__cline_write32s,
4602 stats__cline_write16s,
4603 stats__cline_write8s );
4604 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4605 stats__cline_set64s,
4606 stats__cline_set32s,
4607 stats__cline_set16s,
4608 stats__cline_set8s );
4609 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4610 stats__cline_get8s, stats__cline_copy8s );
4611 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4612 stats__cline_64to32splits,
4613 stats__cline_32to16splits,
4614 stats__cline_16to8splits );
4615 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4616 stats__cline_64to32pulldown,
4617 stats__cline_32to16pulldown,
4618 stats__cline_16to8pulldown );
4619 if (0)
4620 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4621 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4622
4623 VG_(printf)("%s","\n");
4624
4625 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4626 stats__msm_read, stats__msm_read_change);
4627 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4628 stats__msm_write, stats__msm_write_change);
4629 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4630 stats__getOrdering_queries, stats__getOrdering_misses);
4631 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4632 stats__join2_queries, stats__join2_misses);
4633
4634 VG_(printf)("%s","\n");
4635 VG_(printf)(
4636 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4637 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4638 );
4639 VG_(printf)( " libhb: %lu entries in vts_set\n",
4640 VG_(sizeFM)( vts_set ) );
4641
4642 VG_(printf)("%s","\n");
4643 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4644 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4645 stats__ctxt_rcdec2,
4646 stats__ctxt_rcdec3 );
4647 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4648 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4649 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4650 (UWord)N_RCEC_TAB,
4651 stats__ctxt_tab_curr );
4652 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4653 stats__ctxt_tab_qs,
4654 stats__ctxt_tab_cmps );
4655#if 0
4656 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4657 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4658 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4659 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4660 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4661 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4662 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4663 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4664 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4665 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4666 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4667 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4668 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4669 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4670
4671 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4672 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4673 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4674 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4675#endif
4676
4677 VG_(printf)("%s","<<< END libhb stats >>>\n");
4678 VG_(printf)("%s","\n");
4679
4680 }
4681}
4682
4683void libhb_async_exit ( Thr* thr )
4684{
4685 /* is there anything we need to do? */
4686}
4687
4688/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4689 a Seg that points at a VTS is its one-and-only owner, and ditto for
4690 a SO that points at a VTS. */
4691
4692SO* libhb_so_alloc ( void )
4693{
4694 return SO__Alloc();
4695}
4696
4697void libhb_so_dealloc ( SO* so )
4698{
4699 tl_assert(so);
4700 tl_assert(so->magic == SO_MAGIC);
4701 SO__Dealloc(so);
4702}
4703
4704/* See comments in libhb.h for details on the meaning of
4705 strong vs weak sends and strong vs weak receives. */
4706void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4707{
4708 /* Copy the VTSs from 'thr' into the sync object, and then move
4709 the thread along one step. */
4710
4711 tl_assert(so);
4712 tl_assert(so->magic == SO_MAGIC);
4713
4714 /* stay sane .. a thread's read-clock must always lead or be the
4715 same as its write-clock */
4716 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4717 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4718 }
4719
4720 /* since we're overwriting the VtsIDs in the SO, we need to drop
4721 any references made by the previous contents thereof */
4722 if (so->viR == VtsID_INVALID) {
4723 tl_assert(so->viW == VtsID_INVALID);
4724 so->viR = thr->viR;
4725 so->viW = thr->viW;
4726 VtsID__rcinc(so->viR);
4727 VtsID__rcinc(so->viW);
4728 } else {
4729 /* In a strong send, we dump any previous VC in the SO and
4730 install the sending thread's VC instead. For a weak send we
4731 must join2 with what's already there. */
4732 tl_assert(so->viW != VtsID_INVALID);
4733 VtsID__rcdec(so->viR);
4734 VtsID__rcdec(so->viW);
4735 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4736 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4737 VtsID__rcinc(so->viR);
4738 VtsID__rcinc(so->viW);
4739 }
4740
4741 /* move both parent clocks along */
4742 VtsID__rcdec(thr->viR);
4743 VtsID__rcdec(thr->viW);
4744 thr->viR = VtsID__tick( thr->viR, thr );
4745 thr->viW = VtsID__tick( thr->viW, thr );
4746 VtsID__rcinc(thr->viR);
4747 VtsID__rcinc(thr->viW);
4748 if (strong_send)
4749 show_thread_state("s-send", thr);
4750 else
4751 show_thread_state("w-send", thr);
4752}
4753
4754void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4755{
4756 tl_assert(so);
4757 tl_assert(so->magic == SO_MAGIC);
4758
4759 if (so->viR != VtsID_INVALID) {
4760 tl_assert(so->viW != VtsID_INVALID);
4761
4762 /* Weak receive (basically, an R-acquisition of a R-W lock).
4763 This advances the read-clock of the receiver, but not the
4764 write-clock. */
4765 VtsID__rcdec(thr->viR);
4766 thr->viR = VtsID__join2( thr->viR, so->viR );
4767 VtsID__rcinc(thr->viR);
4768
4769 /* For a strong receive, we also advance the receiver's write
4770 clock, which means the receive as a whole is essentially
4771 equivalent to a W-acquisition of a R-W lock. */
4772 if (strong_recv) {
4773 VtsID__rcdec(thr->viW);
4774 thr->viW = VtsID__join2( thr->viW, so->viW );
4775 VtsID__rcinc(thr->viW);
4776 }
4777
4778 if (strong_recv)
4779 show_thread_state("s-recv", thr);
4780 else
4781 show_thread_state("w-recv", thr);
4782
4783 } else {
4784 tl_assert(so->viW == VtsID_INVALID);
4785 /* Deal with degenerate case: 'so' has no vts, so there has been
4786 no message posted to it. Just ignore this case. */
4787 show_thread_state("d-recv", thr);
4788 }
4789}
4790
4791Bool libhb_so_everSent ( SO* so )
4792{
4793 if (so->viR == VtsID_INVALID) {
4794 tl_assert(so->viW == VtsID_INVALID);
4795 return False;
4796 } else {
4797 tl_assert(so->viW != VtsID_INVALID);
4798 return True;
4799 }
4800}
4801
4802#define XXX1 0 // 0x67a106c
4803#define XXX2 0
4804
4805static Bool TRACEME(Addr a, SizeT szB) {
4806 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4807 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4808 return False;
4809}
4810static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4811 SVal sv = zsm_read8(a);
4812 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4813 show_thread_state("", thr);
4814 VG_(printf)("%s","\n");
4815}
4816
4817void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4818{
4819 SVal sv = SVal__mkC(thr->viW, thr->viW);
4820 tl_assert(is_sane_SVal_C(sv));
4821 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4822 zsm_set_range( a, szB, sv );
4823 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4824}
4825
4826void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4827{
4828 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4829 zsm_set_range( a, szB, SVal__mkA() );
4830 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4831}
4832
4833void* libhb_get_Thr_opaque ( Thr* thr ) {
4834 tl_assert(thr);
4835 return thr->opaque;
4836}
4837
4838void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4839 tl_assert(thr);
4840 thr->opaque = v;
4841}
4842
4843void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4844{
4845 zsm_copy_range(dst, src, len);
4846}
4847
4848void libhb_maybe_GC ( void )
4849{
4850 event_map_maybe_GC();
4851 /* If there are still freelist entries available, no need for a
4852 GC. */
4853 if (vts_tab_freelist != VtsID_INVALID)
4854 return;
4855 /* So all the table entries are full, and we're having to expand
4856 the table. But did we hit the threshhold point yet? */
4857 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
4858 return;
4859 vts_tab__do_GC( False/*don't show stats*/ );
4860}
4861
4862
4863/////////////////////////////////////////////////////////////////
4864/////////////////////////////////////////////////////////////////
4865// //
4866// SECTION END main library //
4867// //
4868/////////////////////////////////////////////////////////////////
4869/////////////////////////////////////////////////////////////////
4870
4871/*--------------------------------------------------------------------*/
4872/*--- end libhb_main.c ---*/
4873/*--------------------------------------------------------------------*/