blob: 1a600ac500b7fddcbeb1e1255be53b3e59d0d1f9 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
njn9f207462009-03-10 22:02:09 +000012 Copyright (C) 2008-2009 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000046#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
150static void zsm_set_range ( Addr, SizeT, SVal );
151static SVal zsm_read8 ( Addr );
152static void zsm_copy_range ( Addr, Addr, SizeT );
153static void zsm_flush_cache ( void );
154
155#endif /* ! __HB_ZSM_H */
156
157
sewardjf98e1c02008-10-25 16:22:41 +0000158/* Round a up to the next multiple of N. N must be a power of 2 */
159#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
160/* Round a down to the next multiple of N. N must be a power of 2 */
161#define ROUNDDN(a, N) ((a) & ~(N-1))
162
163
164
165/* ------ User-supplied RC functions ------ */
166static void(*rcinc)(SVal) = NULL;
167static void(*rcdec)(SVal) = NULL;
168
169
170/* ------ CacheLine ------ */
171
172#define N_LINE_BITS 6 /* must be >= 3 */
173#define N_LINE_ARANGE (1 << N_LINE_BITS)
174#define N_LINE_TREES (N_LINE_ARANGE >> 3)
175
176typedef
177 struct {
178 UShort descrs[N_LINE_TREES];
179 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
180 }
181 CacheLine;
182
183#define TREE_DESCR_16_0 (1<<0)
184#define TREE_DESCR_32_0 (1<<1)
185#define TREE_DESCR_16_1 (1<<2)
186#define TREE_DESCR_64 (1<<3)
187#define TREE_DESCR_16_2 (1<<4)
188#define TREE_DESCR_32_1 (1<<5)
189#define TREE_DESCR_16_3 (1<<6)
190#define TREE_DESCR_8_0 (1<<7)
191#define TREE_DESCR_8_1 (1<<8)
192#define TREE_DESCR_8_2 (1<<9)
193#define TREE_DESCR_8_3 (1<<10)
194#define TREE_DESCR_8_4 (1<<11)
195#define TREE_DESCR_8_5 (1<<12)
196#define TREE_DESCR_8_6 (1<<13)
197#define TREE_DESCR_8_7 (1<<14)
198#define TREE_DESCR_DTY (1<<15)
199
200typedef
201 struct {
202 SVal dict[4]; /* can represent up to 4 diff values in the line */
203 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
204 dict indexes */
205 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
206 LineF to use, and dict[2..] are also SVal_INVALID. */
207 }
208 LineZ; /* compressed rep for a cache line */
209
210typedef
211 struct {
212 Bool inUse;
213 SVal w64s[N_LINE_ARANGE];
214 }
215 LineF; /* full rep for a cache line */
216
217/* Shadow memory.
218 Primary map is a WordFM Addr SecMap*.
219 SecMaps cover some page-size-ish section of address space and hold
220 a compressed representation.
221 CacheLine-sized chunks of SecMaps are copied into a Cache, being
222 decompressed when moved into the cache and recompressed on the
223 way out. Because of this, the cache must operate as a writeback
224 cache, not a writethrough one.
225
226 Each SecMap must hold a power-of-2 number of CacheLines. Hence
227 N_SECMAP_BITS must >= N_LINE_BITS.
228*/
229#define N_SECMAP_BITS 13
230#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
231
232// # CacheLines held by a SecMap
233#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
234
235/* The data in the SecMap is held in the array of LineZs. Each LineZ
236 either carries the required data directly, in a compressed
237 representation, or it holds (in .dict[0]) an index to the LineF in
238 .linesF that holds the full representation.
239
240 Currently-unused LineF's have their .inUse bit set to zero.
241 Since each in-use LineF is referred to be exactly one LineZ,
242 the number of .linesZ[] that refer to .linesF should equal
243 the number of .linesF[] that have .inUse == True.
244
245 RC obligations: the RCs presented to the user include exactly
246 the values in:
247 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
248 * F reps that are in use (.inUse == True)
249
250 Hence the following actions at the following transitions are required:
251
252 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
253 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
254 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
255 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
256*/
257typedef
258 struct {
259 UInt magic;
260 LineZ linesZ[N_SECMAP_ZLINES];
261 LineF* linesF;
262 UInt linesF_size;
263 }
264 SecMap;
265
266#define SecMap_MAGIC 0x571e58cbU
267
268static inline Bool is_sane_SecMap ( SecMap* sm ) {
269 return sm != NULL && sm->magic == SecMap_MAGIC;
270}
271
272/* ------ Cache ------ */
273
274#define N_WAY_BITS 16
275#define N_WAY_NENT (1 << N_WAY_BITS)
276
277/* Each tag is the address of the associated CacheLine, rounded down
278 to a CacheLine address boundary. A CacheLine size must be a power
279 of 2 and must be 8 or more. Hence an easy way to initialise the
280 cache so it is empty is to set all the tag values to any value % 8
281 != 0, eg 1. This means all queries in the cache initially miss.
282 It does however require us to detect and not writeback, any line
283 with a bogus tag. */
284typedef
285 struct {
286 CacheLine lyns0[N_WAY_NENT];
287 Addr tags0[N_WAY_NENT];
288 }
289 Cache;
290
291static inline Bool is_valid_scache_tag ( Addr tag ) {
292 /* a valid tag should be naturally aligned to the start of
293 a CacheLine. */
294 return 0 == (tag & (N_LINE_ARANGE - 1));
295}
296
297
298/* --------- Primary data structures --------- */
299
300/* Shadow memory primary map */
301static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
302static Cache cache_shmem;
303
304
305static UWord stats__secmaps_search = 0; // # SM finds
306static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
307static UWord stats__secmaps_allocd = 0; // # SecMaps issued
308static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
309static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
310static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
311static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
312static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
313static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
314static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
315static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
316static UWord stats__cache_F_fetches = 0; // # F lines fetched
317static UWord stats__cache_F_wbacks = 0; // # F lines written back
318static UWord stats__cache_invals = 0; // # cache invals
319static UWord stats__cache_flushes = 0; // # cache flushes
320static UWord stats__cache_totrefs = 0; // # total accesses
321static UWord stats__cache_totmisses = 0; // # misses
322static ULong stats__cache_make_New_arange = 0; // total arange made New
323static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
324static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
325static UWord stats__cline_read64s = 0; // # calls to s_m_read64
326static UWord stats__cline_read32s = 0; // # calls to s_m_read32
327static UWord stats__cline_read16s = 0; // # calls to s_m_read16
328static UWord stats__cline_read8s = 0; // # calls to s_m_read8
329static UWord stats__cline_write64s = 0; // # calls to s_m_write64
330static UWord stats__cline_write32s = 0; // # calls to s_m_write32
331static UWord stats__cline_write16s = 0; // # calls to s_m_write16
332static UWord stats__cline_write8s = 0; // # calls to s_m_write8
333static UWord stats__cline_set64s = 0; // # calls to s_m_set64
334static UWord stats__cline_set32s = 0; // # calls to s_m_set32
335static UWord stats__cline_set16s = 0; // # calls to s_m_set16
336static UWord stats__cline_set8s = 0; // # calls to s_m_set8
337static UWord stats__cline_get8s = 0; // # calls to s_m_get8
338static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
339static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
340static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
341static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
342static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
343static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
344static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
345
346static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
347 return a & ~(N_SECMAP_ARANGE - 1);
348}
349static inline UWord shmem__get_SecMap_offset ( Addr a ) {
350 return a & (N_SECMAP_ARANGE - 1);
351}
352
353
354/*----------------------------------------------------------------*/
355/*--- map_shmem :: WordFM Addr SecMap ---*/
356/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
357/*----------------------------------------------------------------*/
358
359/*--------------- SecMap allocation --------------- */
360
361static HChar* shmem__bigchunk_next = NULL;
362static HChar* shmem__bigchunk_end1 = NULL;
363
364static void* shmem__bigchunk_alloc ( SizeT n )
365{
366 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
367 tl_assert(n > 0);
368 n = VG_ROUNDUP(n, 16);
369 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
370 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
371 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
372 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
373 if (0)
374 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
375 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
376 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
377 if (shmem__bigchunk_next == NULL)
378 VG_(out_of_memory_NORETURN)(
379 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
380 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
381 }
382 tl_assert(shmem__bigchunk_next);
383 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
384 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
385 shmem__bigchunk_next += n;
386 return shmem__bigchunk_next - n;
387}
388
389static SecMap* shmem__alloc_SecMap ( void )
390{
391 Word i, j;
392 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
393 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
394 tl_assert(sm);
395 sm->magic = SecMap_MAGIC;
396 for (i = 0; i < N_SECMAP_ZLINES; i++) {
397 sm->linesZ[i].dict[0] = SVal_NOACCESS;
398 sm->linesZ[i].dict[1] = SVal_INVALID;
399 sm->linesZ[i].dict[2] = SVal_INVALID;
400 sm->linesZ[i].dict[3] = SVal_INVALID;
401 for (j = 0; j < N_LINE_ARANGE/4; j++)
402 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
403 }
404 sm->linesF = NULL;
405 sm->linesF_size = 0;
406 stats__secmaps_allocd++;
407 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
408 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
409 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
410 return sm;
411}
412
413typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
414static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
415
416static SecMap* shmem__find_SecMap ( Addr ga )
417{
418 SecMap* sm = NULL;
419 Addr gaKey = shmem__round_to_SecMap_base(ga);
420 // Cache
421 stats__secmaps_search++;
422 if (LIKELY(gaKey == smCache[0].gaKey))
423 return smCache[0].sm;
424 if (LIKELY(gaKey == smCache[1].gaKey)) {
425 SMCacheEnt tmp = smCache[0];
426 smCache[0] = smCache[1];
427 smCache[1] = tmp;
428 return smCache[0].sm;
429 }
430 if (gaKey == smCache[2].gaKey) {
431 SMCacheEnt tmp = smCache[1];
432 smCache[1] = smCache[2];
433 smCache[2] = tmp;
434 return smCache[1].sm;
435 }
436 // end Cache
437 stats__secmaps_search_slow++;
438 if (VG_(lookupFM)( map_shmem,
439 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
440 tl_assert(sm != NULL);
441 smCache[2] = smCache[1];
442 smCache[1] = smCache[0];
443 smCache[0].gaKey = gaKey;
444 smCache[0].sm = sm;
445 } else {
446 tl_assert(sm == NULL);
447 }
448 return sm;
449}
450
451static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
452{
453 SecMap* sm = shmem__find_SecMap ( ga );
454 if (LIKELY(sm)) {
455 return sm;
456 } else {
457 /* create a new one */
458 Addr gaKey = shmem__round_to_SecMap_base(ga);
459 sm = shmem__alloc_SecMap();
460 tl_assert(sm);
461 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
462 return sm;
463 }
464}
465
466
467/* ------------ LineF and LineZ related ------------ */
468
469static void rcinc_LineF ( LineF* lineF ) {
470 UWord i;
471 tl_assert(lineF->inUse);
472 for (i = 0; i < N_LINE_ARANGE; i++)
473 rcinc(lineF->w64s[i]);
474}
475
476static void rcdec_LineF ( LineF* lineF ) {
477 UWord i;
478 tl_assert(lineF->inUse);
479 for (i = 0; i < N_LINE_ARANGE; i++)
480 rcdec(lineF->w64s[i]);
481}
482
483static void rcinc_LineZ ( LineZ* lineZ ) {
484 tl_assert(lineZ->dict[0] != SVal_INVALID);
485 rcinc(lineZ->dict[0]);
486 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
487 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
488 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
489}
490
491static void rcdec_LineZ ( LineZ* lineZ ) {
492 tl_assert(lineZ->dict[0] != SVal_INVALID);
493 rcdec(lineZ->dict[0]);
494 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
495 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
496 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
497}
498
499inline
500static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
501 Word bix, shft, mask, prep;
502 tl_assert(ix >= 0);
503 bix = ix >> 2;
504 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
505 mask = 3 << shft;
506 prep = b2 << shft;
507 arr[bix] = (arr[bix] & ~mask) | prep;
508}
509
510inline
511static UWord read_twobit_array ( UChar* arr, UWord ix ) {
512 Word bix, shft;
513 tl_assert(ix >= 0);
514 bix = ix >> 2;
515 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
516 return (arr[bix] >> shft) & 3;
517}
518
519/* Given address 'tag', find either the Z or F line containing relevant
520 data, so it can be read into the cache.
521*/
522static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
523 /*OUT*/LineF** fp, Addr tag ) {
524 LineZ* lineZ;
525 LineF* lineF;
526 UWord zix;
527 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
528 UWord smoff = shmem__get_SecMap_offset(tag);
529 /* since smoff is derived from a valid tag, it should be
530 cacheline-aligned. */
531 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
532 zix = smoff >> N_LINE_BITS;
533 tl_assert(zix < N_SECMAP_ZLINES);
534 lineZ = &sm->linesZ[zix];
535 lineF = NULL;
536 if (lineZ->dict[0] == SVal_INVALID) {
537 UInt fix = (UInt)lineZ->dict[1];
538 tl_assert(sm->linesF);
539 tl_assert(sm->linesF_size > 0);
540 tl_assert(fix >= 0 && fix < sm->linesF_size);
541 lineF = &sm->linesF[fix];
542 tl_assert(lineF->inUse);
543 lineZ = NULL;
544 }
545 *zp = lineZ;
546 *fp = lineF;
547}
548
549/* Given address 'tag', return the relevant SecMap and the index of
550 the LineZ within it, in the expectation that the line is to be
551 overwritten. Regardless of whether 'tag' is currently associated
552 with a Z or F representation, to rcdec on the current
553 representation, in recognition of the fact that the contents are
554 just about to be overwritten. */
555static __attribute__((noinline))
556void find_Z_for_writing ( /*OUT*/SecMap** smp,
557 /*OUT*/Word* zixp,
558 Addr tag ) {
559 LineZ* lineZ;
560 LineF* lineF;
561 UWord zix;
562 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
563 UWord smoff = shmem__get_SecMap_offset(tag);
564 /* since smoff is derived from a valid tag, it should be
565 cacheline-aligned. */
566 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
567 zix = smoff >> N_LINE_BITS;
568 tl_assert(zix < N_SECMAP_ZLINES);
569 lineZ = &sm->linesZ[zix];
570 lineF = NULL;
571 /* re RCs, we are freeing up this LineZ/LineF so that new data can
572 be parked in it. Hence have to rcdec it accordingly. */
573 /* If lineZ has an associated lineF, free it up. */
574 if (lineZ->dict[0] == SVal_INVALID) {
575 UInt fix = (UInt)lineZ->dict[1];
576 tl_assert(sm->linesF);
577 tl_assert(sm->linesF_size > 0);
578 tl_assert(fix >= 0 && fix < sm->linesF_size);
579 lineF = &sm->linesF[fix];
580 tl_assert(lineF->inUse);
581 rcdec_LineF(lineF);
582 lineF->inUse = False;
583 } else {
584 rcdec_LineZ(lineZ);
585 }
586 *smp = sm;
587 *zixp = zix;
588}
589
590static __attribute__((noinline))
591void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
592 UInt i, new_size;
593 LineF* nyu;
594
595 if (sm->linesF) {
596 tl_assert(sm->linesF_size > 0);
597 } else {
598 tl_assert(sm->linesF_size == 0);
599 }
600
601 if (sm->linesF) {
602 for (i = 0; i < sm->linesF_size; i++) {
603 if (!sm->linesF[i].inUse) {
604 *fixp = (Word)i;
605 return;
606 }
607 }
608 }
609
610 /* No free F line found. Expand existing array and try again. */
611 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
612 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
613 new_size * sizeof(LineF) );
614 tl_assert(nyu);
615
616 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
617 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
618 * sizeof(LineF);
619
620 if (0)
621 VG_(printf)("SM %p: expand F array from %d to %d\n",
622 sm, (Int)sm->linesF_size, new_size);
623
624 for (i = 0; i < new_size; i++)
625 nyu[i].inUse = False;
626
627 if (sm->linesF) {
628 for (i = 0; i < sm->linesF_size; i++) {
629 tl_assert(sm->linesF[i].inUse);
630 nyu[i] = sm->linesF[i];
631 }
632 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
633 HG_(free)(sm->linesF);
634 }
635
636 sm->linesF = nyu;
637 sm->linesF_size = new_size;
638
639 for (i = 0; i < sm->linesF_size; i++) {
640 if (!sm->linesF[i].inUse) {
641 *fixp = (Word)i;
642 return;
643 }
644 }
645
646 /*NOTREACHED*/
647 tl_assert(0);
648}
649
650
651/* ------------ CacheLine and implicit-tree related ------------ */
652
653__attribute__((unused))
654static void pp_CacheLine ( CacheLine* cl ) {
655 Word i;
656 if (!cl) {
657 VG_(printf)("%s","pp_CacheLine(NULL)\n");
658 return;
659 }
660 for (i = 0; i < N_LINE_TREES; i++)
661 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
662 for (i = 0; i < N_LINE_ARANGE; i++)
663 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
664}
665
666static UChar descr_to_validbits ( UShort descr )
667{
668 /* a.k.a Party Time for gcc's constant folder */
669# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
670 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
671 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
672 ( (b8_5) << 12) | ( (b8_4) << 11) | \
673 ( (b8_3) << 10) | ( (b8_2) << 9) | \
674 ( (b8_1) << 8) | ( (b8_0) << 7) | \
675 ( (b16_3) << 6) | ( (b32_1) << 5) | \
676 ( (b16_2) << 4) | ( (b64) << 3) | \
677 ( (b16_1) << 2) | ( (b32_0) << 1) | \
678 ( (b16_0) << 0) ) )
679
680# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
681 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
682 ( (bit5) << 5) | ( (bit4) << 4) | \
683 ( (bit3) << 3) | ( (bit2) << 2) | \
684 ( (bit1) << 1) | ( (bit0) << 0) ) )
685
686 /* these should all get folded out at compile time */
687 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
688 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
689 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
690 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
691 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
692 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
693 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
694 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
695 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
696
697 switch (descr) {
698 /*
699 +--------------------------------- TREE_DESCR_8_7
700 | +------------------- TREE_DESCR_8_0
701 | | +---------------- TREE_DESCR_16_3
702 | | | +-------------- TREE_DESCR_32_1
703 | | | | +------------ TREE_DESCR_16_2
704 | | | | | +--------- TREE_DESCR_64
705 | | | | | | +------ TREE_DESCR_16_1
706 | | | | | | | +---- TREE_DESCR_32_0
707 | | | | | | | | +-- TREE_DESCR_16_0
708 | | | | | | | | |
709 | | | | | | | | | GRANULARITY, 7 -> 0 */
710 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
711 return BYTE(1,1,1,1,1,1,1,1);
712 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
713 return BYTE(1,1,0,1,1,1,1,1);
714 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
715 return BYTE(0,1,1,1,1,1,1,1);
716 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
717 return BYTE(0,1,0,1,1,1,1,1);
718
719 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
720 return BYTE(1,1,1,1,1,1,0,1);
721 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
722 return BYTE(1,1,0,1,1,1,0,1);
723 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
724 return BYTE(0,1,1,1,1,1,0,1);
725 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
726 return BYTE(0,1,0,1,1,1,0,1);
727
728 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
729 return BYTE(1,1,1,1,0,1,1,1);
730 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
731 return BYTE(1,1,0,1,0,1,1,1);
732 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
733 return BYTE(0,1,1,1,0,1,1,1);
734 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
735 return BYTE(0,1,0,1,0,1,1,1);
736
737 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
738 return BYTE(1,1,1,1,0,1,0,1);
739 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
740 return BYTE(1,1,0,1,0,1,0,1);
741 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
742 return BYTE(0,1,1,1,0,1,0,1);
743 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
744 return BYTE(0,1,0,1,0,1,0,1);
745
746 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
747 return BYTE(0,0,0,1,1,1,1,1);
748 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
749 return BYTE(0,0,0,1,1,1,0,1);
750 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
751 return BYTE(0,0,0,1,0,1,1,1);
752 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
753 return BYTE(0,0,0,1,0,1,0,1);
754
755 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
756 return BYTE(1,1,1,1,0,0,0,1);
757 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
758 return BYTE(1,1,0,1,0,0,0,1);
759 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
760 return BYTE(0,1,1,1,0,0,0,1);
761 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
762 return BYTE(0,1,0,1,0,0,0,1);
763
764 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
765 return BYTE(0,0,0,1,0,0,0,1);
766
767 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
768 return BYTE(0,0,0,0,0,0,0,1);
769
770 default: return BYTE(0,0,0,0,0,0,0,0);
771 /* INVALID - any valid descr produces at least one
772 valid bit in tree[0..7]*/
773 }
774 /* NOTREACHED*/
775 tl_assert(0);
776
777# undef DESCR
778# undef BYTE
779}
780
781__attribute__((unused))
782static Bool is_sane_Descr ( UShort descr ) {
783 return descr_to_validbits(descr) != 0;
784}
785
786static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
787 VG_(sprintf)(dst,
788 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
789 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
790 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
791 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
792 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
793 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
794 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
795 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
796 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
797 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
798 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
803 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
804 );
805}
806static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
807 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
808 (Int)((byte & 128) ? 1 : 0),
809 (Int)((byte & 64) ? 1 : 0),
810 (Int)((byte & 32) ? 1 : 0),
811 (Int)((byte & 16) ? 1 : 0),
812 (Int)((byte & 8) ? 1 : 0),
813 (Int)((byte & 4) ? 1 : 0),
814 (Int)((byte & 2) ? 1 : 0),
815 (Int)((byte & 1) ? 1 : 0)
816 );
817}
818
819static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
820 Word i;
821 UChar validbits = descr_to_validbits(descr);
822 HChar buf[128], buf2[128];
823 if (validbits == 0)
824 goto bad;
825 for (i = 0; i < 8; i++) {
826 if (validbits & (1<<i)) {
827 if (tree[i] == SVal_INVALID)
828 goto bad;
829 } else {
830 if (tree[i] != SVal_INVALID)
831 goto bad;
832 }
833 }
834 return True;
835 bad:
836 sprintf_Descr( buf, descr );
837 sprintf_Byte( buf2, validbits );
838 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
839 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
840 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
841 for (i = 0; i < 8; i++)
842 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
843 VG_(printf)("%s","}\n");
844 return 0;
845}
846
847static Bool is_sane_CacheLine ( CacheLine* cl )
848{
849 Word tno, cloff;
850
851 if (!cl) goto bad;
852
853 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
854 UShort descr = cl->descrs[tno];
855 SVal* tree = &cl->svals[cloff];
856 if (!is_sane_Descr_and_Tree(descr, tree))
857 goto bad;
858 }
859 tl_assert(cloff == N_LINE_ARANGE);
860 return True;
861 bad:
862 pp_CacheLine(cl);
863 return False;
864}
865
866static UShort normalise_tree ( /*MOD*/SVal* tree )
867{
868 UShort descr;
869 /* pre: incoming tree[0..7] does not have any invalid shvals, in
870 particular no zeroes. */
871 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
872 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
873 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
874 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
875 tl_assert(0);
876
877 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
878 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
879 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
880 /* build 16-bit layer */
881 if (tree[1] == tree[0]) {
882 tree[1] = SVal_INVALID;
883 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
884 descr |= TREE_DESCR_16_0;
885 }
886 if (tree[3] == tree[2]) {
887 tree[3] = SVal_INVALID;
888 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
889 descr |= TREE_DESCR_16_1;
890 }
891 if (tree[5] == tree[4]) {
892 tree[5] = SVal_INVALID;
893 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
894 descr |= TREE_DESCR_16_2;
895 }
896 if (tree[7] == tree[6]) {
897 tree[7] = SVal_INVALID;
898 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
899 descr |= TREE_DESCR_16_3;
900 }
901 /* build 32-bit layer */
902 if (tree[2] == tree[0]
903 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
904 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
905 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
906 descr |= TREE_DESCR_32_0;
907 }
908 if (tree[6] == tree[4]
909 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
910 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
911 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
912 descr |= TREE_DESCR_32_1;
913 }
914 /* build 64-bit layer */
915 if (tree[4] == tree[0]
916 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
917 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
918 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
919 descr |= TREE_DESCR_64;
920 }
921 return descr;
922}
923
924/* This takes a cacheline where all the data is at the leaves
925 (w8[..]) and builds a correctly normalised tree. */
926static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
927{
928 Word tno, cloff;
929 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
930 SVal* tree = &cl->svals[cloff];
931 cl->descrs[tno] = normalise_tree( tree );
932 }
933 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000934 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000935 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
936 stats__cline_normalises++;
937}
938
939
940typedef struct { UChar count; SVal sval; } CountedSVal;
941
942static
943void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
944 /*OUT*/Word* dstUsedP,
945 Word nDst, CacheLine* src )
946{
947 Word tno, cloff, dstUsed;
948
949 tl_assert(nDst == N_LINE_ARANGE);
950 dstUsed = 0;
951
952 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
953 UShort descr = src->descrs[tno];
954 SVal* tree = &src->svals[cloff];
955
956 /* sequentialise the tree described by (descr,tree). */
957# define PUT(_n,_v) \
958 do { dst[dstUsed ].count = (_n); \
959 dst[dstUsed++].sval = (_v); \
960 } while (0)
961
962 /* byte 0 */
963 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
964 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
965 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
966 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
967 /* byte 1 */
968 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
969 /* byte 2 */
970 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
971 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
972 /* byte 3 */
973 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
974 /* byte 4 */
975 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
976 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
977 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
978 /* byte 5 */
979 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
980 /* byte 6 */
981 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
982 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
983 /* byte 7 */
984 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
985
986# undef PUT
987 /* END sequentialise the tree described by (descr,tree). */
988
989 }
990 tl_assert(cloff == N_LINE_ARANGE);
991 tl_assert(dstUsed <= nDst);
992
993 *dstUsedP = dstUsed;
994}
995
996/* Write the cacheline 'wix' to backing store. Where it ends up
997 is determined by its tag field. */
998static __attribute__((noinline)) void cacheline_wback ( UWord wix )
999{
1000 Word i, j, k, m;
1001 Addr tag;
1002 SecMap* sm;
1003 CacheLine* cl;
1004 LineZ* lineZ;
1005 LineF* lineF;
1006 Word zix, fix, csvalsUsed;
1007 CountedSVal csvals[N_LINE_ARANGE];
1008 SVal sv;
1009
1010 if (0)
1011 VG_(printf)("scache wback line %d\n", (Int)wix);
1012
1013 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1014
1015 tag = cache_shmem.tags0[wix];
1016 cl = &cache_shmem.lyns0[wix];
1017
1018 /* The cache line may have been invalidated; if so, ignore it. */
1019 if (!is_valid_scache_tag(tag))
1020 return;
1021
1022 /* Where are we going to put it? */
1023 sm = NULL;
1024 lineZ = NULL;
1025 lineF = NULL;
1026 zix = fix = -1;
1027
1028 /* find the Z line to write in and rcdec it or the associated F
1029 line. */
1030 find_Z_for_writing( &sm, &zix, tag );
1031
1032 tl_assert(sm);
1033 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1034 lineZ = &sm->linesZ[zix];
1035
1036 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001037 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001038 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1039
1040 csvalsUsed = -1;
1041 sequentialise_CacheLine( csvals, &csvalsUsed,
1042 N_LINE_ARANGE, cl );
1043 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1044 if (0) VG_(printf)("%lu ", csvalsUsed);
1045
1046 lineZ->dict[0] = lineZ->dict[1]
1047 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1048
1049 /* i indexes actual shadow values, k is cursor in csvals */
1050 i = 0;
1051 for (k = 0; k < csvalsUsed; k++) {
1052
1053 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001054 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001055 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1056 /* do we already have it? */
1057 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1058 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1059 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1060 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1061 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001062 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001063 tl_assert(sv != SVal_INVALID);
1064 if (lineZ->dict[0]
1065 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1066 if (lineZ->dict[1]
1067 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1068 if (lineZ->dict[2]
1069 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1070 if (lineZ->dict[3]
1071 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1072 break; /* we'll have to use the f rep */
1073 dict_ok:
1074 m = csvals[k].count;
1075 if (m == 8) {
1076 write_twobit_array( lineZ->ix2s, i+0, j );
1077 write_twobit_array( lineZ->ix2s, i+1, j );
1078 write_twobit_array( lineZ->ix2s, i+2, j );
1079 write_twobit_array( lineZ->ix2s, i+3, j );
1080 write_twobit_array( lineZ->ix2s, i+4, j );
1081 write_twobit_array( lineZ->ix2s, i+5, j );
1082 write_twobit_array( lineZ->ix2s, i+6, j );
1083 write_twobit_array( lineZ->ix2s, i+7, j );
1084 i += 8;
1085 }
1086 else if (m == 4) {
1087 write_twobit_array( lineZ->ix2s, i+0, j );
1088 write_twobit_array( lineZ->ix2s, i+1, j );
1089 write_twobit_array( lineZ->ix2s, i+2, j );
1090 write_twobit_array( lineZ->ix2s, i+3, j );
1091 i += 4;
1092 }
1093 else if (m == 1) {
1094 write_twobit_array( lineZ->ix2s, i+0, j );
1095 i += 1;
1096 }
1097 else if (m == 2) {
1098 write_twobit_array( lineZ->ix2s, i+0, j );
1099 write_twobit_array( lineZ->ix2s, i+1, j );
1100 i += 2;
1101 }
1102 else {
1103 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1104 }
1105
1106 }
1107
1108 if (LIKELY(i == N_LINE_ARANGE)) {
1109 /* Construction of the compressed representation was
1110 successful. */
1111 rcinc_LineZ(lineZ);
1112 stats__cache_Z_wbacks++;
1113 } else {
1114 /* Cannot use the compressed(z) representation. Use the full(f)
1115 rep instead. */
1116 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1117 alloc_F_for_writing( sm, &fix );
1118 tl_assert(sm->linesF);
1119 tl_assert(sm->linesF_size > 0);
1120 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1121 lineF = &sm->linesF[fix];
1122 tl_assert(!lineF->inUse);
1123 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1124 lineZ->dict[1] = (SVal)fix;
1125 lineF->inUse = True;
1126 i = 0;
1127 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001128 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001129 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1130 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001131 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001132 tl_assert(sv != SVal_INVALID);
1133 for (m = csvals[k].count; m > 0; m--) {
1134 lineF->w64s[i] = sv;
1135 i++;
1136 }
1137 }
1138 tl_assert(i == N_LINE_ARANGE);
1139 rcinc_LineF(lineF);
1140 stats__cache_F_wbacks++;
1141 }
sewardjf98e1c02008-10-25 16:22:41 +00001142}
1143
1144/* Fetch the cacheline 'wix' from the backing store. The tag
1145 associated with 'wix' is assumed to have already been filled in;
1146 hence that is used to determine where in the backing store to read
1147 from. */
1148static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1149{
1150 Word i;
1151 Addr tag;
1152 CacheLine* cl;
1153 LineZ* lineZ;
1154 LineF* lineF;
1155
1156 if (0)
1157 VG_(printf)("scache fetch line %d\n", (Int)wix);
1158
1159 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1160
1161 tag = cache_shmem.tags0[wix];
1162 cl = &cache_shmem.lyns0[wix];
1163
1164 /* reject nonsense requests */
1165 tl_assert(is_valid_scache_tag(tag));
1166
1167 lineZ = NULL;
1168 lineF = NULL;
1169 find_ZF_for_reading( &lineZ, &lineF, tag );
1170 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1171
1172 /* expand the data into the bottom layer of the tree, then get
1173 cacheline_normalise to build the descriptor array. */
1174 if (lineF) {
1175 tl_assert(lineF->inUse);
1176 for (i = 0; i < N_LINE_ARANGE; i++) {
1177 cl->svals[i] = lineF->w64s[i];
1178 }
1179 stats__cache_F_fetches++;
1180 } else {
1181 for (i = 0; i < N_LINE_ARANGE; i++) {
1182 SVal sv;
1183 UWord ix = read_twobit_array( lineZ->ix2s, i );
1184 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1185 sv = lineZ->dict[ix];
1186 tl_assert(sv != SVal_INVALID);
1187 cl->svals[i] = sv;
1188 }
1189 stats__cache_Z_fetches++;
1190 }
1191 normalise_CacheLine( cl );
1192}
1193
1194static void shmem__invalidate_scache ( void ) {
1195 Word wix;
1196 if (0) VG_(printf)("%s","scache inval\n");
1197 tl_assert(!is_valid_scache_tag(1));
1198 for (wix = 0; wix < N_WAY_NENT; wix++) {
1199 cache_shmem.tags0[wix] = 1/*INVALID*/;
1200 }
1201 stats__cache_invals++;
1202}
1203
1204static void shmem__flush_and_invalidate_scache ( void ) {
1205 Word wix;
1206 Addr tag;
1207 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1208 tl_assert(!is_valid_scache_tag(1));
1209 for (wix = 0; wix < N_WAY_NENT; wix++) {
1210 tag = cache_shmem.tags0[wix];
1211 if (tag == 1/*INVALID*/) {
1212 /* already invalid; nothing to do */
1213 } else {
1214 tl_assert(is_valid_scache_tag(tag));
1215 cacheline_wback( wix );
1216 }
1217 cache_shmem.tags0[wix] = 1/*INVALID*/;
1218 }
1219 stats__cache_flushes++;
1220 stats__cache_invals++;
1221}
1222
1223
1224static inline Bool aligned16 ( Addr a ) {
1225 return 0 == (a & 1);
1226}
1227static inline Bool aligned32 ( Addr a ) {
1228 return 0 == (a & 3);
1229}
1230static inline Bool aligned64 ( Addr a ) {
1231 return 0 == (a & 7);
1232}
1233static inline UWord get_cacheline_offset ( Addr a ) {
1234 return (UWord)(a & (N_LINE_ARANGE - 1));
1235}
1236static inline Addr cacheline_ROUNDUP ( Addr a ) {
1237 return ROUNDUP(a, N_LINE_ARANGE);
1238}
1239static inline Addr cacheline_ROUNDDN ( Addr a ) {
1240 return ROUNDDN(a, N_LINE_ARANGE);
1241}
1242static inline UWord get_treeno ( Addr a ) {
1243 return get_cacheline_offset(a) >> 3;
1244}
1245static inline UWord get_tree_offset ( Addr a ) {
1246 return a & 7;
1247}
1248
1249static __attribute__((noinline))
1250 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1251static inline CacheLine* get_cacheline ( Addr a )
1252{
1253 /* tag is 'a' with the in-line offset masked out,
1254 eg a[31]..a[4] 0000 */
1255 Addr tag = a & ~(N_LINE_ARANGE - 1);
1256 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1257 stats__cache_totrefs++;
1258 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1259 return &cache_shmem.lyns0[wix];
1260 } else {
1261 return get_cacheline_MISS( a );
1262 }
1263}
1264
1265static __attribute__((noinline))
1266 CacheLine* get_cacheline_MISS ( Addr a )
1267{
1268 /* tag is 'a' with the in-line offset masked out,
1269 eg a[31]..a[4] 0000 */
1270
1271 CacheLine* cl;
1272 Addr* tag_old_p;
1273 Addr tag = a & ~(N_LINE_ARANGE - 1);
1274 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1275
1276 tl_assert(tag != cache_shmem.tags0[wix]);
1277
1278 /* Dump the old line into the backing store. */
1279 stats__cache_totmisses++;
1280
1281 cl = &cache_shmem.lyns0[wix];
1282 tag_old_p = &cache_shmem.tags0[wix];
1283
1284 if (is_valid_scache_tag( *tag_old_p )) {
1285 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001286 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001287 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1288 cacheline_wback( wix );
1289 }
1290 /* and reload the new one */
1291 *tag_old_p = tag;
1292 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001293 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001294 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1295 return cl;
1296}
1297
1298static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1299 stats__cline_64to32pulldown++;
1300 switch (toff) {
1301 case 0: case 4:
1302 tl_assert(descr & TREE_DESCR_64);
1303 tree[4] = tree[0];
1304 descr &= ~TREE_DESCR_64;
1305 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1306 break;
1307 default:
1308 tl_assert(0);
1309 }
1310 return descr;
1311}
1312
1313static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1314 stats__cline_32to16pulldown++;
1315 switch (toff) {
1316 case 0: case 2:
1317 if (!(descr & TREE_DESCR_32_0)) {
1318 descr = pulldown_to_32(tree, 0, descr);
1319 }
1320 tl_assert(descr & TREE_DESCR_32_0);
1321 tree[2] = tree[0];
1322 descr &= ~TREE_DESCR_32_0;
1323 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1324 break;
1325 case 4: case 6:
1326 if (!(descr & TREE_DESCR_32_1)) {
1327 descr = pulldown_to_32(tree, 4, descr);
1328 }
1329 tl_assert(descr & TREE_DESCR_32_1);
1330 tree[6] = tree[4];
1331 descr &= ~TREE_DESCR_32_1;
1332 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1333 break;
1334 default:
1335 tl_assert(0);
1336 }
1337 return descr;
1338}
1339
1340static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1341 stats__cline_16to8pulldown++;
1342 switch (toff) {
1343 case 0: case 1:
1344 if (!(descr & TREE_DESCR_16_0)) {
1345 descr = pulldown_to_16(tree, 0, descr);
1346 }
1347 tl_assert(descr & TREE_DESCR_16_0);
1348 tree[1] = tree[0];
1349 descr &= ~TREE_DESCR_16_0;
1350 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1351 break;
1352 case 2: case 3:
1353 if (!(descr & TREE_DESCR_16_1)) {
1354 descr = pulldown_to_16(tree, 2, descr);
1355 }
1356 tl_assert(descr & TREE_DESCR_16_1);
1357 tree[3] = tree[2];
1358 descr &= ~TREE_DESCR_16_1;
1359 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1360 break;
1361 case 4: case 5:
1362 if (!(descr & TREE_DESCR_16_2)) {
1363 descr = pulldown_to_16(tree, 4, descr);
1364 }
1365 tl_assert(descr & TREE_DESCR_16_2);
1366 tree[5] = tree[4];
1367 descr &= ~TREE_DESCR_16_2;
1368 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1369 break;
1370 case 6: case 7:
1371 if (!(descr & TREE_DESCR_16_3)) {
1372 descr = pulldown_to_16(tree, 6, descr);
1373 }
1374 tl_assert(descr & TREE_DESCR_16_3);
1375 tree[7] = tree[6];
1376 descr &= ~TREE_DESCR_16_3;
1377 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1378 break;
1379 default:
1380 tl_assert(0);
1381 }
1382 return descr;
1383}
1384
1385
1386static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1387 UShort mask;
1388 switch (toff) {
1389 case 0:
1390 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1391 tl_assert( (descr & mask) == mask );
1392 descr &= ~mask;
1393 descr |= TREE_DESCR_16_0;
1394 break;
1395 case 2:
1396 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1397 tl_assert( (descr & mask) == mask );
1398 descr &= ~mask;
1399 descr |= TREE_DESCR_16_1;
1400 break;
1401 case 4:
1402 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1403 tl_assert( (descr & mask) == mask );
1404 descr &= ~mask;
1405 descr |= TREE_DESCR_16_2;
1406 break;
1407 case 6:
1408 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1409 tl_assert( (descr & mask) == mask );
1410 descr &= ~mask;
1411 descr |= TREE_DESCR_16_3;
1412 break;
1413 default:
1414 tl_assert(0);
1415 }
1416 return descr;
1417}
1418
1419static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1420 UShort mask;
1421 switch (toff) {
1422 case 0:
1423 if (!(descr & TREE_DESCR_16_0))
1424 descr = pullup_descr_to_16(descr, 0);
1425 if (!(descr & TREE_DESCR_16_1))
1426 descr = pullup_descr_to_16(descr, 2);
1427 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1428 tl_assert( (descr & mask) == mask );
1429 descr &= ~mask;
1430 descr |= TREE_DESCR_32_0;
1431 break;
1432 case 4:
1433 if (!(descr & TREE_DESCR_16_2))
1434 descr = pullup_descr_to_16(descr, 4);
1435 if (!(descr & TREE_DESCR_16_3))
1436 descr = pullup_descr_to_16(descr, 6);
1437 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1438 tl_assert( (descr & mask) == mask );
1439 descr &= ~mask;
1440 descr |= TREE_DESCR_32_1;
1441 break;
1442 default:
1443 tl_assert(0);
1444 }
1445 return descr;
1446}
1447
1448static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1449 switch (toff) {
1450 case 0: case 4:
1451 return 0 != (descr & TREE_DESCR_64);
1452 default:
1453 tl_assert(0);
1454 }
1455}
1456
1457static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1458 switch (toff) {
1459 case 0:
1460 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1461 case 2:
1462 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1463 case 4:
1464 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1465 case 6:
1466 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1467 default:
1468 tl_assert(0);
1469 }
1470}
1471
1472/* ------------ Cache management ------------ */
1473
1474static void zsm_flush_cache ( void )
1475{
1476 shmem__flush_and_invalidate_scache();
1477}
1478
1479
1480static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1481{
1482 tl_assert( sizeof(UWord) == sizeof(Addr) );
1483
1484 rcinc = p_rcinc;
1485 rcdec = p_rcdec;
1486
1487 tl_assert(map_shmem == NULL);
1488 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1489 HG_(free),
1490 NULL/*unboxed UWord cmp*/);
1491 tl_assert(map_shmem != NULL);
1492 shmem__invalidate_scache();
1493
1494 /* a SecMap must contain an integral number of CacheLines */
1495 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1496 /* also ... a CacheLine holds an integral number of trees */
1497 tl_assert(0 == (N_LINE_ARANGE % 8));
1498}
1499
1500/////////////////////////////////////////////////////////////////
1501/////////////////////////////////////////////////////////////////
1502// //
1503// SECTION END compressed shadow memory //
1504// //
1505/////////////////////////////////////////////////////////////////
1506/////////////////////////////////////////////////////////////////
1507
1508
1509
1510/////////////////////////////////////////////////////////////////
1511/////////////////////////////////////////////////////////////////
1512// //
1513// SECTION BEGIN vts primitives //
1514// //
1515/////////////////////////////////////////////////////////////////
1516/////////////////////////////////////////////////////////////////
1517
1518#ifndef __HB_VTS_H
1519#define __HB_VTS_H
1520
1521/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1522 lowest 30 bits of an SVal. */
1523typedef UInt VtsID;
1524#define VtsID_INVALID 0xFFFFFFFF
1525
1526/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1527 a backlink for the caller's convenience. Since we have no idea
1528 what to set that to in the library, it always gets set to
1529 VtsID_INVALID. */
1530typedef
1531 struct {
1532 VtsID id;
1533 XArray* ts; /* XArray* ScalarTS(abstract) */
1534 }
1535 VTS;
1536
1537
1538/* Create a new, empty VTS. */
1539VTS* VTS__new ( void );
1540
1541/* Delete this VTS in its entirety. */
1542void VTS__delete ( VTS* vts );
1543
1544/* Create a new singleton VTS. */
1545VTS* VTS__singleton ( Thr* thr, ULong tym );
1546
1547/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1548 not modified. */
1549VTS* VTS__tick ( Thr* me, VTS* vts );
1550
1551/* Return a new VTS constructed as the join (max) of the 2 args.
1552 Neither arg is modified. */
1553VTS* VTS__join ( VTS* a, VTS* b );
1554
1555/* Compute the partial ordering relation of the two args. */
1556typedef
1557 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1558 POrd;
1559
1560POrd VTS__cmp ( VTS* a, VTS* b );
1561
1562/* Compute an arbitrary structural (total) ordering on the two args,
1563 based on their VCs, so they can be looked up in a table, tree, etc.
1564 Returns -1, 0 or 1. */
1565Word VTS__cmp_structural ( VTS* a, VTS* b );
1566
1567/* Debugging only. Display the given VTS in the buffer. */
1568void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1569
1570/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001571ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001572
1573#endif /* ! __HB_VTS_H */
1574
1575
1576/*--------------- to do with Vector Timestamps ---------------*/
1577
1578/* Scalar Timestamp */
1579typedef
1580 struct {
1581 Thr* thr;
1582 ULong tym;
1583 }
1584 ScalarTS;
1585
1586
1587static Bool is_sane_VTS ( VTS* vts )
1588{
1589 UWord i, n;
1590 ScalarTS *st1, *st2;
1591 if (!vts) return False;
1592 if (!vts->ts) return False;
1593 n = VG_(sizeXA)( vts->ts );
1594 if (n >= 2) {
1595 for (i = 0; i < n-1; i++) {
1596 st1 = VG_(indexXA)( vts->ts, i );
1597 st2 = VG_(indexXA)( vts->ts, i+1 );
1598 if (st1->thr >= st2->thr)
1599 return False;
1600 if (st1->tym == 0 || st2->tym == 0)
1601 return False;
1602 }
1603 }
1604 return True;
1605}
1606
1607
1608/* Create a new, empty VTS.
1609*/
1610VTS* VTS__new ( void )
1611{
1612 VTS* vts;
1613 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1614 tl_assert(vts);
1615 vts->id = VtsID_INVALID;
1616 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1617 HG_(free), sizeof(ScalarTS) );
1618 tl_assert(vts->ts);
1619 return vts;
1620}
1621
1622
1623/* Delete this VTS in its entirety.
1624*/
1625void VTS__delete ( VTS* vts )
1626{
1627 tl_assert(vts);
1628 tl_assert(vts->ts);
1629 VG_(deleteXA)( vts->ts );
1630 HG_(free)(vts);
1631}
1632
1633
1634/* Create a new singleton VTS.
1635*/
1636VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1637 ScalarTS st;
1638 VTS* vts;
1639 tl_assert(thr);
1640 tl_assert(tym >= 1);
1641 vts = VTS__new();
1642 st.thr = thr;
1643 st.tym = tym;
1644 VG_(addToXA)( vts->ts, &st );
1645 return vts;
1646}
1647
1648
1649/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1650 not modified.
1651*/
1652VTS* VTS__tick ( Thr* me, VTS* vts )
1653{
1654 ScalarTS* here = NULL;
1655 ScalarTS tmp;
1656 VTS* res;
1657 Word i, n;
1658 tl_assert(me);
1659 tl_assert(is_sane_VTS(vts));
1660 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1661 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1662 res = VTS__new();
1663 n = VG_(sizeXA)( vts->ts );
1664
1665 /* main loop doesn't handle zero-entry case correctly, so
1666 special-case it. */
1667 if (n == 0) {
1668 tmp.thr = me;
1669 tmp.tym = 1;
1670 VG_(addToXA)( res->ts, &tmp );
1671 tl_assert(is_sane_VTS(res));
1672 return res;
1673 }
1674
1675 for (i = 0; i < n; i++) {
1676 here = VG_(indexXA)( vts->ts, i );
1677 if (me < here->thr) {
1678 /* We just went past 'me', without seeing it. */
1679 tmp.thr = me;
1680 tmp.tym = 1;
1681 VG_(addToXA)( res->ts, &tmp );
1682 tmp = *here;
1683 VG_(addToXA)( res->ts, &tmp );
1684 i++;
1685 break;
1686 }
1687 else if (me == here->thr) {
1688 tmp = *here;
1689 tmp.tym++;
1690 VG_(addToXA)( res->ts, &tmp );
1691 i++;
1692 break;
1693 }
1694 else /* me > here->thr */ {
1695 tmp = *here;
1696 VG_(addToXA)( res->ts, &tmp );
1697 }
1698 }
1699 tl_assert(i >= 0 && i <= n);
1700 if (i == n && here && here->thr < me) {
1701 tmp.thr = me;
1702 tmp.tym = 1;
1703 VG_(addToXA)( res->ts, &tmp );
1704 } else {
1705 for (/*keepgoing*/; i < n; i++) {
1706 here = VG_(indexXA)( vts->ts, i );
1707 tmp = *here;
1708 VG_(addToXA)( res->ts, &tmp );
1709 }
1710 }
1711 tl_assert(is_sane_VTS(res));
1712 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1713 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1714 return res;
1715}
1716
1717
1718/* Return a new VTS constructed as the join (max) of the 2 args.
1719 Neither arg is modified.
1720*/
1721VTS* VTS__join ( VTS* a, VTS* b )
1722{
1723 Word ia, ib, useda, usedb;
1724 ULong tyma, tymb, tymMax;
1725 Thr* thr;
1726 VTS* res;
sewardjf98e1c02008-10-25 16:22:41 +00001727
1728 tl_assert(a && a->ts);
1729 tl_assert(b && b->ts);
1730 useda = VG_(sizeXA)( a->ts );
1731 usedb = VG_(sizeXA)( b->ts );
1732
1733 res = VTS__new();
1734 ia = ib = 0;
1735
1736 while (1) {
1737
1738 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1739 from a and b in order, where thr is the next Thr*
1740 occurring in either a or b, and tyma/b are the relevant
1741 scalar timestamps, taking into account implicit zeroes. */
1742 tl_assert(ia >= 0 && ia <= useda);
1743 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001744
njn4c245e52009-03-15 23:25:38 +00001745 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001746 /* both empty - done */
1747 break;
njn4c245e52009-03-15 23:25:38 +00001748
1749 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001750 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001751 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001752 thr = tmpb->thr;
1753 tyma = 0;
1754 tymb = tmpb->tym;
1755 ib++;
njn4c245e52009-03-15 23:25:38 +00001756
1757 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001758 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001759 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001760 thr = tmpa->thr;
1761 tyma = tmpa->tym;
1762 tymb = 0;
1763 ia++;
njn4c245e52009-03-15 23:25:38 +00001764
1765 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001766 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001767 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1768 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001769 if (tmpa->thr < tmpb->thr) {
1770 /* a has the lowest unconsidered Thr* */
1771 thr = tmpa->thr;
1772 tyma = tmpa->tym;
1773 tymb = 0;
1774 ia++;
njn4c245e52009-03-15 23:25:38 +00001775 } else if (tmpa->thr > tmpb->thr) {
sewardjf98e1c02008-10-25 16:22:41 +00001776 /* b has the lowest unconsidered Thr* */
1777 thr = tmpb->thr;
1778 tyma = 0;
1779 tymb = tmpb->tym;
1780 ib++;
1781 } else {
1782 /* they both next mention the same Thr* */
1783 tl_assert(tmpa->thr == tmpb->thr);
1784 thr = tmpa->thr; /* == tmpb->thr */
1785 tyma = tmpa->tym;
1786 tymb = tmpb->tym;
1787 ia++;
1788 ib++;
1789 }
1790 }
1791
1792 /* having laboriously determined (thr, tyma, tymb), do something
1793 useful with it. */
1794 tymMax = tyma > tymb ? tyma : tymb;
1795 if (tymMax > 0) {
1796 ScalarTS st;
1797 st.thr = thr;
1798 st.tym = tymMax;
1799 VG_(addToXA)( res->ts, &st );
1800 }
1801
1802 }
1803
1804 tl_assert(is_sane_VTS( res ));
1805
1806 return res;
1807}
1808
1809
1810/* Compute the partial ordering relation of the two args.
1811*/
1812POrd VTS__cmp ( VTS* a, VTS* b )
1813{
1814 Word ia, ib, useda, usedb;
1815 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00001816
1817 Bool all_leq = True;
1818 Bool all_geq = True;
1819
1820 tl_assert(a && a->ts);
1821 tl_assert(b && b->ts);
1822 useda = VG_(sizeXA)( a->ts );
1823 usedb = VG_(sizeXA)( b->ts );
1824
1825 ia = ib = 0;
1826
1827 while (1) {
1828
njn4c245e52009-03-15 23:25:38 +00001829 /* This logic is to enumerate doubles (tyma, tymb) drawn
1830 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00001831 scalar timestamps, taking into account implicit zeroes. */
1832 tl_assert(ia >= 0 && ia <= useda);
1833 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001834
njn4c245e52009-03-15 23:25:38 +00001835 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001836 /* both empty - done */
1837 break;
njn4c245e52009-03-15 23:25:38 +00001838
1839 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001840 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001841 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001842 tyma = 0;
1843 tymb = tmpb->tym;
1844 ib++;
njn4c245e52009-03-15 23:25:38 +00001845
1846 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001847 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001848 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001849 tyma = tmpa->tym;
1850 tymb = 0;
1851 ia++;
njn4c245e52009-03-15 23:25:38 +00001852
1853 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001854 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001855 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1856 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001857 if (tmpa->thr < tmpb->thr) {
1858 /* a has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001859 tyma = tmpa->tym;
1860 tymb = 0;
1861 ia++;
1862 }
1863 else
1864 if (tmpa->thr > tmpb->thr) {
1865 /* b has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001866 tyma = 0;
1867 tymb = tmpb->tym;
1868 ib++;
1869 } else {
1870 /* they both next mention the same Thr* */
1871 tl_assert(tmpa->thr == tmpb->thr);
sewardjf98e1c02008-10-25 16:22:41 +00001872 tyma = tmpa->tym;
1873 tymb = tmpb->tym;
1874 ia++;
1875 ib++;
1876 }
1877 }
1878
njn4c245e52009-03-15 23:25:38 +00001879 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00001880 useful with it. */
1881 if (tyma < tymb)
1882 all_geq = False;
1883 if (tyma > tymb)
1884 all_leq = False;
1885 }
1886
1887 if (all_leq && all_geq)
1888 return POrd_EQ;
1889 /* now we know they aren't equal, so either all_leq or all_geq or
1890 both are false. */
1891 if (all_leq)
1892 return POrd_LT;
1893 if (all_geq)
1894 return POrd_GT;
1895 /* hmm, neither all_geq or all_leq. This means unordered. */
1896 return POrd_UN;
1897}
1898
1899
1900/* Compute an arbitrary structural (total) ordering on the two args,
1901 based on their VCs, so they can be looked up in a table, tree, etc.
1902 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1903*/
1904Word VTS__cmp_structural ( VTS* a, VTS* b )
1905{
1906 /* We just need to generate an arbitrary total ordering based on
1907 a->ts and b->ts. Preferably do it in a way which comes across likely
1908 differences relatively quickly. */
1909 Word i, useda, usedb;
1910 ScalarTS *tmpa, *tmpb;
1911
1912 tl_assert(a && a->ts);
1913 tl_assert(b && b->ts);
1914 useda = VG_(sizeXA)( a->ts );
1915 usedb = VG_(sizeXA)( b->ts );
1916
1917 if (useda < usedb) return -1;
1918 if (useda > usedb) return 1;
1919
1920 /* Same length vectors, so let's step through them together. */
1921 tl_assert(useda == usedb);
1922 for (i = 0; i < useda; i++) {
1923 tmpa = VG_(indexXA)( a->ts, i );
1924 tmpb = VG_(indexXA)( b->ts, i );
1925 if (tmpa->tym < tmpb->tym) return -1;
1926 if (tmpa->tym > tmpb->tym) return 1;
1927 if (tmpa->thr < tmpb->thr) return -1;
1928 if (tmpa->thr > tmpb->thr) return 1;
1929 }
1930
1931 /* They're identical. */
1932 return 0;
1933}
1934
1935
1936/* Debugging only. Display the given VTS in the buffer.
1937*/
1938void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1939 ScalarTS* st;
1940 HChar unit[64];
1941 Word i, n;
1942 Int avail = nBuf;
1943 tl_assert(vts && vts->ts);
1944 tl_assert(nBuf > 16);
1945 buf[0] = '[';
1946 buf[1] = 0;
1947 n = VG_(sizeXA)( vts->ts );
1948 for (i = 0; i < n; i++) {
1949 tl_assert(avail >= 40);
1950 st = VG_(indexXA)( vts->ts, i );
1951 VG_(memset)(unit, 0, sizeof(unit));
1952 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1953 st->thr, st->tym);
1954 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1955 VG_(strcat)(buf, " ...]");
1956 buf[nBuf-1] = 0;
1957 return;
1958 }
1959 VG_(strcat)(buf, unit);
1960 avail -= VG_(strlen)(unit);
1961 }
1962 VG_(strcat)(buf, "]");
1963 buf[nBuf-1] = 0;
1964}
1965
1966
1967/* Debugging only. Return vts[index], so to speak.
1968*/
1969ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1970 UWord i, n;
1971 tl_assert(vts && vts->ts);
1972 n = VG_(sizeXA)( vts->ts );
1973 for (i = 0; i < n; i++) {
1974 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1975 if (st->thr == idx)
1976 return st->tym;
1977 }
1978 return 0;
1979}
1980
1981
1982/////////////////////////////////////////////////////////////////
1983/////////////////////////////////////////////////////////////////
1984// //
1985// SECTION END vts primitives //
1986// //
1987/////////////////////////////////////////////////////////////////
1988/////////////////////////////////////////////////////////////////
1989
1990
1991
1992/////////////////////////////////////////////////////////////////
1993/////////////////////////////////////////////////////////////////
1994// //
1995// SECTION BEGIN main library //
1996// //
1997/////////////////////////////////////////////////////////////////
1998/////////////////////////////////////////////////////////////////
1999
2000
2001/////////////////////////////////////////////////////////
2002// //
2003// VTS set //
2004// //
2005/////////////////////////////////////////////////////////
2006
2007static WordFM* /* VTS* void void */ vts_set = NULL;
2008
2009static void vts_set_init ( void )
2010{
2011 tl_assert(!vts_set);
2012 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2013 HG_(free),
2014 (Word(*)(UWord,UWord))VTS__cmp_structural );
2015 tl_assert(vts_set);
2016}
2017
2018/* Given a newly made VTS, look in vts_set to see if we already have
2019 an identical one. If yes, free up this one and return instead a
2020 pointer to the existing one. If no, add this one to the set and
2021 return the same pointer. Caller differentiates the two cases by
2022 comparing returned pointer with the supplied one (although that
2023 does require that the supplied VTS is not already in the set).
2024*/
2025static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2026{
2027 UWord keyW, valW;
2028 /* lookup cand (by value) */
2029 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2030 /* found it */
2031 tl_assert(valW == 0);
2032 /* if this fails, cand (by ref) was already present (!) */
2033 tl_assert(keyW != (UWord)cand);
2034 VTS__delete(cand);
2035 return (VTS*)keyW;
2036 } else {
2037 /* not present. Add and return pointer to same. */
2038 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2039 return cand;
2040 }
2041}
2042
2043
2044/////////////////////////////////////////////////////////
2045// //
2046// VTS table //
2047// //
2048/////////////////////////////////////////////////////////
2049
2050static void VtsID__invalidate_caches ( void ); /* fwds */
2051
2052/* A type to hold VTS table entries. Invariants:
2053 If .vts == NULL, then this entry is not in use, so:
2054 - .rc == 0
2055 - this entry is on the freelist (unfortunately, does not imply
2056 any constraints on value for .nextfree)
2057 If .vts != NULL, then this entry is in use:
2058 - .vts is findable in vts_set
2059 - .vts->id == this entry number
2060 - no specific value for .rc (even 0 is OK)
2061 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2062*/
2063typedef
2064 struct {
2065 VTS* vts; /* vts, in vts_set */
2066 UWord rc; /* reference count - enough for entire aspace */
2067 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2068 }
2069 VtsTE;
2070
2071/* The VTS table. */
2072static XArray* /* of VtsTE */ vts_tab = NULL;
2073
2074/* An index into the VTS table, indicating the start of the list of
2075 free (available for use) entries. If the list is empty, this is
2076 VtsID_INVALID. */
2077static VtsID vts_tab_freelist = VtsID_INVALID;
2078
2079/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2080 vts_tab equals or exceeds this size. After GC, the value here is
2081 set appropriately so as to check for the next GC point. */
2082static Word vts_next_GC_at = 1000;
2083
2084static void vts_tab_init ( void )
2085{
2086 vts_tab
2087 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2088 HG_(free), sizeof(VtsTE) );
2089 vts_tab_freelist
2090 = VtsID_INVALID;
2091 tl_assert(vts_tab);
2092}
2093
2094/* Add ii to the free list, checking that it looks out-of-use. */
2095static void add_to_free_list ( VtsID ii )
2096{
2097 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2098 tl_assert(ie->vts == NULL);
2099 tl_assert(ie->rc == 0);
2100 tl_assert(ie->freelink == VtsID_INVALID);
2101 ie->freelink = vts_tab_freelist;
2102 vts_tab_freelist = ii;
2103}
2104
2105/* Get an entry from the free list. This will return VtsID_INVALID if
2106 the free list is empty. */
2107static VtsID get_from_free_list ( void )
2108{
2109 VtsID ii;
2110 VtsTE* ie;
2111 if (vts_tab_freelist == VtsID_INVALID)
2112 return VtsID_INVALID;
2113 ii = vts_tab_freelist;
2114 ie = VG_(indexXA)( vts_tab, ii );
2115 tl_assert(ie->vts == NULL);
2116 tl_assert(ie->rc == 0);
2117 vts_tab_freelist = ie->freelink;
2118 return ii;
2119}
2120
2121/* Produce a new VtsID that can be used, either by getting it from
2122 the freelist, or, if that is empty, by expanding vts_tab. */
2123static VtsID get_new_VtsID ( void )
2124{
2125 VtsID ii;
2126 VtsTE te;
2127 ii = get_from_free_list();
2128 if (ii != VtsID_INVALID)
2129 return ii;
2130 te.vts = NULL;
2131 te.rc = 0;
2132 te.freelink = VtsID_INVALID;
2133 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2134 return ii;
2135}
2136
2137
2138/* Indirect callback from lib_zsm. */
2139static void VtsID__rcinc ( VtsID ii )
2140{
2141 VtsTE* ie;
2142 /* VG_(indexXA) does a range check for us */
2143 ie = VG_(indexXA)( vts_tab, ii );
2144 tl_assert(ie->vts); /* else it's not in use */
2145 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2146 tl_assert(ie->vts->id == ii);
2147 ie->rc++;
2148}
2149
2150/* Indirect callback from lib_zsm. */
2151static void VtsID__rcdec ( VtsID ii )
2152{
2153 VtsTE* ie;
2154 /* VG_(indexXA) does a range check for us */
2155 ie = VG_(indexXA)( vts_tab, ii );
2156 tl_assert(ie->vts); /* else it's not in use */
2157 tl_assert(ie->rc > 0); /* else RC snafu */
2158 tl_assert(ie->vts->id == ii);
2159 ie->rc--;
2160}
2161
2162
2163/* Look up 'cand' in our collection of VTSs. If present, deallocate
2164 it and return the VtsID for the pre-existing version. If not
2165 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2166 for it, and return that. */
2167static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2168{
2169 VTS* auld;
2170 tl_assert(cand->id == VtsID_INVALID);
2171 auld = vts_set__find_and_dealloc__or_add(cand);
2172 if (auld != cand) {
2173 /* We already have an Aulde one. Use that. */
2174 VtsTE* ie;
2175 tl_assert(auld->id != VtsID_INVALID);
2176 ie = VG_(indexXA)( vts_tab, auld->id );
2177 tl_assert(ie->vts == auld);
2178 return auld->id;
2179 } else {
2180 VtsID ii = get_new_VtsID();
2181 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2182 ie->vts = cand;
2183 ie->rc = 0;
2184 ie->freelink = VtsID_INVALID;
2185 cand->id = ii;
2186 return ii;
2187 }
2188}
2189
2190
2191static void show_vts_stats ( HChar* caller )
2192{
2193 UWord nSet, nTab, nLive;
2194 ULong totrc;
2195 UWord n, i;
2196 nSet = VG_(sizeFM)( vts_set );
2197 nTab = VG_(sizeXA)( vts_tab );
2198 totrc = 0;
2199 nLive = 0;
2200 n = VG_(sizeXA)( vts_tab );
2201 for (i = 0; i < n; i++) {
2202 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2203 if (ie->vts) {
2204 nLive++;
2205 totrc += (ULong)ie->rc;
2206 } else {
2207 tl_assert(ie->rc == 0);
2208 }
2209 }
2210 VG_(printf)(" show_vts_stats %s\n", caller);
2211 VG_(printf)(" vts_tab size %4lu\n", nTab);
2212 VG_(printf)(" vts_tab live %4lu\n", nLive);
2213 VG_(printf)(" vts_set size %4lu\n", nSet);
2214 VG_(printf)(" total rc %4llu\n", totrc);
2215}
2216
2217/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002218__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002219static void vts_tab__do_GC ( Bool show_stats )
2220{
2221 UWord i, nTab, nLive, nFreed;
2222
2223 /* check this is actually necessary. */
2224 tl_assert(vts_tab_freelist == VtsID_INVALID);
2225
2226 /* empty the caches for partial order checks and binary joins. We
2227 could do better and prune out the entries to be deleted, but it
2228 ain't worth the hassle. */
2229 VtsID__invalidate_caches();
2230
2231 /* First, make the reference counts up to date. */
2232 zsm_flush_cache();
2233
2234 nTab = VG_(sizeXA)( vts_tab );
2235
2236 if (show_stats) {
2237 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2238 show_vts_stats("before GC");
2239 }
2240
2241 /* Now we can inspect the entire vts_tab. Any entries
2242 with zero .rc fields are now no longer in use and can be
2243 free list, removed from vts_set, and deleted. */
2244 nFreed = 0;
2245 for (i = 0; i < nTab; i++) {
2246 Bool present;
2247 UWord oldK = 0, oldV = 0;
2248 VtsTE* te = VG_(indexXA)( vts_tab, i );
2249 if (te->vts == NULL) {
2250 tl_assert(te->rc == 0);
2251 continue; /* already on the free list (presumably) */
2252 }
2253 if (te->rc > 0)
2254 continue; /* in use */
2255 /* Ok, we got one we can free. */
2256 tl_assert(te->vts->id == i);
2257 /* first, remove it from vts_set. */
2258 present = VG_(delFromFM)( vts_set,
2259 &oldK, &oldV, (UWord)te->vts );
2260 tl_assert(present); /* else it isn't in vts_set ?! */
2261 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2262 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2263 /* now free the VTS itself */
2264 VTS__delete(te->vts);
2265 te->vts = NULL;
2266 /* and finally put this entry on the free list */
2267 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2268 add_to_free_list( i );
2269 nFreed++;
2270 }
2271
2272 /* Now figure out when the next GC should be. We'll allow the
2273 number of VTSs to double before GCing again. Except of course
2274 that since we can't (or, at least, don't) shrink vts_tab, we
2275 can't set the threshhold value smaller than it. */
2276 tl_assert(nFreed <= nTab);
2277 nLive = nTab - nFreed;
2278 tl_assert(nLive >= 0 && nLive <= nTab);
2279 vts_next_GC_at = 2 * nLive;
2280 if (vts_next_GC_at < nTab)
2281 vts_next_GC_at = nTab;
2282
2283 if (show_stats) {
2284 show_vts_stats("after GC");
2285 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2286 }
2287
sewardjd024ae52008-11-09 20:47:57 +00002288 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002289 static UInt ctr = 0;
2290 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002291 VG_(message)(Vg_DebugMsg,
2292 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
sewardj8aa41de2009-01-22 12:24:26 +00002293 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002294 }
2295}
2296
2297
2298/////////////////////////////////////////////////////////
2299// //
2300// Vts IDs //
2301// //
2302/////////////////////////////////////////////////////////
2303
2304//////////////////////////
2305static ULong stats__getOrdering_queries = 0;
2306static ULong stats__getOrdering_misses = 0;
2307static ULong stats__join2_queries = 0;
2308static ULong stats__join2_misses = 0;
2309
2310static inline UInt ROL32 ( UInt w, Int n ) {
2311 w = (w << n) | (w >> (32-n));
2312 return w;
2313}
2314static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2315 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2316 return hash % nTab;
2317}
2318
2319#define N_GETORDERING_CACHE 1023
2320static
2321 struct { VtsID vi1; VtsID vi2; POrd ord; }
2322 getOrdering_cache[N_GETORDERING_CACHE];
2323
2324#define N_JOIN2_CACHE 1023
2325static
2326 struct { VtsID vi1; VtsID vi2; VtsID res; }
2327 join2_cache[N_JOIN2_CACHE];
2328
2329static void VtsID__invalidate_caches ( void ) {
2330 Int i;
2331 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2332 getOrdering_cache[i].vi1 = VtsID_INVALID;
2333 getOrdering_cache[i].vi2 = VtsID_INVALID;
2334 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2335 }
2336 for (i = 0; i < N_JOIN2_CACHE; i++) {
2337 join2_cache[i].vi1 = VtsID_INVALID;
2338 join2_cache[i].vi2 = VtsID_INVALID;
2339 join2_cache[i].res = VtsID_INVALID;
2340 }
2341}
2342//////////////////////////
2343
sewardjd52392d2008-11-08 20:36:26 +00002344//static Bool VtsID__is_valid ( VtsID vi ) {
2345// VtsTE* ve;
2346// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2347// return False;
2348// ve = VG_(indexXA)( vts_tab, vi );
2349// if (!ve->vts)
2350// return False;
2351// tl_assert(ve->vts->id == vi);
2352// return True;
2353//}
sewardjf98e1c02008-10-25 16:22:41 +00002354
2355static VTS* VtsID__to_VTS ( VtsID vi ) {
2356 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2357 tl_assert(te->vts);
2358 return te->vts;
2359}
2360
2361static void VtsID__pp ( VtsID vi ) {
2362 HChar buf[100];
2363 VTS* vts = VtsID__to_VTS(vi);
2364 VTS__show( buf, sizeof(buf)-1, vts );
2365 buf[sizeof(buf)-1] = 0;
2366 VG_(printf)("%s", buf);
2367}
2368
2369/* compute partial ordering relation of vi1 and vi2. */
2370__attribute__((noinline))
2371static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2372 UInt hash;
2373 POrd ord;
2374 VTS *v1, *v2;
2375 //if (vi1 == vi2) return POrd_EQ;
2376 tl_assert(vi1 != vi2);
2377 ////++
2378 stats__getOrdering_queries++;
2379 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2380 if (getOrdering_cache[hash].vi1 == vi1
2381 && getOrdering_cache[hash].vi2 == vi2)
2382 return getOrdering_cache[hash].ord;
2383 stats__getOrdering_misses++;
2384 ////--
2385 v1 = VtsID__to_VTS(vi1);
2386 v2 = VtsID__to_VTS(vi2);
2387 ord = VTS__cmp( v1, v2 );
2388 ////++
2389 getOrdering_cache[hash].vi1 = vi1;
2390 getOrdering_cache[hash].vi2 = vi2;
2391 getOrdering_cache[hash].ord = ord;
2392 ////--
2393 return ord;
2394}
2395static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2396 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2397}
2398
2399/* compute binary join */
2400__attribute__((noinline))
2401static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2402 UInt hash;
2403 VtsID res;
2404 VTS *vts1, *vts2, *nyu;
2405 //if (vi1 == vi2) return vi1;
2406 tl_assert(vi1 != vi2);
2407 ////++
2408 stats__join2_queries++;
2409 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2410 if (join2_cache[hash].vi1 == vi1
2411 && join2_cache[hash].vi2 == vi2)
2412 return join2_cache[hash].res;
2413 stats__join2_misses++;
2414 ////--
2415 vts1 = VtsID__to_VTS(vi1);
2416 vts2 = VtsID__to_VTS(vi2);
2417 nyu = VTS__join(vts1,vts2);
2418 res = vts_tab__find_and_dealloc__or_add(nyu);
2419 ////++
2420 join2_cache[hash].vi1 = vi1;
2421 join2_cache[hash].vi2 = vi2;
2422 join2_cache[hash].res = res;
2423 ////--
2424 return res;
2425}
2426static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2427 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2428}
2429
2430/* create a singleton VTS, namely [thr:1] */
2431static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2432 VTS* nyu = VTS__singleton(thr,tym);
2433 return vts_tab__find_and_dealloc__or_add(nyu);
2434}
2435
2436/* tick operation, creates value 1 if specified index is absent */
2437static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2438 VTS* vts = VtsID__to_VTS(vi);
2439 VTS* nyu = VTS__tick(idx,vts);
2440 return vts_tab__find_and_dealloc__or_add(nyu);
2441}
2442
2443/* index into a VTS (only for assertions) */
2444static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2445 VTS* vts = VtsID__to_VTS(vi);
2446 return VTS__indexAt_SLOW( vts, idx );
2447}
2448
2449
2450/////////////////////////////////////////////////////////
2451// //
2452// Threads //
2453// //
2454/////////////////////////////////////////////////////////
2455
2456struct _Thr {
2457 /* Current VTSs for this thread. They change as we go along. viR
2458 is the VTS to be used for reads, viW for writes. Usually they
2459 are the same, but can differ when we deal with reader-writer
2460 locks. It is always the case that VtsID__getOrdering(viW,viR)
2461 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2462 lagging behind, viR. */
2463 VtsID viR;
2464 VtsID viW;
2465 /* opaque (to us) data we hold on behalf of the library's user. */
2466 void* opaque;
2467};
2468
2469static Thr* Thr__new ( void ) {
2470 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2471 thr->viR = VtsID_INVALID;
2472 thr->viW = VtsID_INVALID;
2473 return thr;
2474}
2475
2476
2477/////////////////////////////////////////////////////////
2478// //
2479// Shadow Values //
2480// //
2481/////////////////////////////////////////////////////////
2482
2483// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2484// hb_zsm.h. We have to do everything else here.
2485
2486/* SVal is 64 bit unsigned int.
2487
2488 <---------30---------> <---------30--------->
2489 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2490 01 X--------------------X XX X--------------------X E(rror)
2491 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2492 11 X--------------------X XX X--------------------X I: SVal_INVALID
2493*/
2494#define SVAL_TAGMASK (3ULL << 62)
2495
2496static inline Bool SVal__isC ( SVal s ) {
2497 return (0ULL << 62) == (s & SVAL_TAGMASK);
2498}
2499static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2500 //tl_assert(VtsID__is_valid(rmini));
2501 //tl_assert(VtsID__is_valid(wmini));
2502 return (((ULong)rmini) << 32) | ((ULong)wmini);
2503}
2504static inline VtsID SVal__unC_Rmin ( SVal s ) {
2505 tl_assert(SVal__isC(s));
2506 return (VtsID)(s >> 32);
2507}
2508static inline VtsID SVal__unC_Wmin ( SVal s ) {
2509 tl_assert(SVal__isC(s));
2510 return (VtsID)(s & 0xFFFFFFFFULL);
2511}
2512
2513static Bool SVal__isE ( SVal s ) {
2514 return (1ULL << 62) == (s & SVAL_TAGMASK);
2515}
2516static SVal SVal__mkE ( void ) {
2517 return 1ULL << 62;
2518}
2519
2520static Bool SVal__isA ( SVal s ) {
2521 return (2ULL << 62) == (s & SVAL_TAGMASK);
2522}
2523static SVal SVal__mkA ( void ) {
2524 return 2ULL << 62;
2525}
2526
2527/* Direct callback from lib_zsm. */
2528static void SVal__rcinc ( SVal s ) {
2529 if (SVal__isC(s)) {
2530 VtsID__rcinc( SVal__unC_Rmin(s) );
2531 VtsID__rcinc( SVal__unC_Wmin(s) );
2532 }
2533}
2534
2535/* Direct callback from lib_zsm. */
2536static void SVal__rcdec ( SVal s ) {
2537 if (SVal__isC(s)) {
2538 VtsID__rcdec( SVal__unC_Rmin(s) );
2539 VtsID__rcdec( SVal__unC_Wmin(s) );
2540 }
2541}
2542
2543
2544/////////////////////////////////////////////////////////
2545// //
sewardjd86e3a22008-12-03 11:39:37 +00002546// A simple group (memory) allocator //
2547// //
2548/////////////////////////////////////////////////////////
2549
2550//////////////// BEGIN general group allocator
2551typedef
2552 struct {
2553 UWord elemSzB; /* element size */
2554 UWord nPerGroup; /* # elems per group */
2555 void* (*alloc)(HChar*, SizeT); /* group allocator */
2556 HChar* cc; /* group allocator's cc */
2557 void (*free)(void*); /* group allocator's free-er (unused) */
2558 /* XArray of void* (pointers to groups). The groups themselves.
2559 Each element is a pointer to a block of size (elemSzB *
2560 nPerGroup) bytes. */
2561 XArray* groups;
2562 /* next free element. Is a pointer to an element in one of the
2563 groups pointed to by .groups. */
2564 void* nextFree;
2565 }
2566 GroupAlloc;
2567
2568static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
2569 UWord elemSzB,
2570 UWord nPerGroup,
2571 void* (*alloc)(HChar*, SizeT),
2572 HChar* cc,
2573 void (*free)(void*) )
2574{
2575 tl_assert(0 == (elemSzB % sizeof(UWord)));
2576 tl_assert(elemSzB >= sizeof(UWord));
2577 tl_assert(nPerGroup >= 100); /* let's say */
2578 tl_assert(alloc);
2579 tl_assert(cc);
2580 tl_assert(free);
2581 tl_assert(ga);
2582 VG_(memset)(ga, 0, sizeof(*ga));
2583 ga->elemSzB = elemSzB;
2584 ga->nPerGroup = nPerGroup;
2585 ga->groups = NULL;
2586 ga->alloc = alloc;
2587 ga->cc = cc;
2588 ga->free = free;
2589 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
2590 ga->nextFree = NULL;
2591 tl_assert(ga->groups);
2592}
2593
2594/* The freelist is empty. Allocate a new group and put all the new
2595 elements in it onto the freelist. */
2596__attribute__((noinline))
2597static void gal_add_new_group ( GroupAlloc* ga )
2598{
2599 Word i;
2600 UWord* group;
2601 tl_assert(ga);
2602 tl_assert(ga->nextFree == NULL);
2603 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
2604 tl_assert(group);
2605 /* extend the freelist through the new group. Place the freelist
2606 pointer in the first word of each element. That's why the
2607 element size must be at least one word. */
2608 for (i = ga->nPerGroup-1; i >= 0; i--) {
2609 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
2610 UWord* elem = (UWord*)elemC;
2611 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
2612 *elem = (UWord)ga->nextFree;
2613 ga->nextFree = elem;
2614 }
2615 /* and add to our collection of groups */
2616 VG_(addToXA)( ga->groups, &group );
2617}
2618
2619inline static void* gal_Alloc ( GroupAlloc* ga )
2620{
2621 UWord* elem;
2622 if (UNLIKELY(ga->nextFree == NULL)) {
2623 gal_add_new_group(ga);
2624 }
2625 elem = ga->nextFree;
2626 ga->nextFree = (void*)*elem;
2627 *elem = 0; /* unnecessary, but just to be on the safe side */
2628 return elem;
2629}
2630
2631inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
2632{
2633 tl_assert(n == ga->elemSzB);
2634 return gal_Alloc( ga );
2635}
2636
2637inline static void gal_Free ( GroupAlloc* ga, void* p )
2638{
2639 UWord* elem = (UWord*)p;
2640 *elem = (UWord)ga->nextFree;
2641 ga->nextFree = elem;
2642}
2643//////////////// END general group allocator
2644
2645
2646/////////////////////////////////////////////////////////
2647// //
sewardjf98e1c02008-10-25 16:22:41 +00002648// Change-event map2 //
2649// //
2650/////////////////////////////////////////////////////////
2651
sewardjf98e1c02008-10-25 16:22:41 +00002652#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2653
2654/* This is in two parts:
2655
2656 1. An OSet of RCECs. This is a set of reference-counted stack
2657 traces. When the reference count of a stack trace becomes zero,
2658 it is removed from the set and freed up. The intent is to have
2659 a set of stack traces which can be referred to from (2), but to
2660 only represent each one once. The set is indexed/searched by
2661 ordering on the stack trace vectors.
2662
sewardj849b0ed2008-12-21 10:43:10 +00002663 2. A SparseWA of OldRefs. These store information about each old
2664 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00002665 location for which the information is recorded. For LRU
2666 purposes, each OldRef also contains a generation number,
2667 indicating when it was most recently accessed.
2668
2669 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00002670 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
2671 size) triples to RCECs. This allows us to collect the last
2672 access-traceback by up to N_OLDREF_ACCS different triples for
2673 this location. The accs[] array is a MTF-array. If a binding
2674 falls off the end, that's too bad -- we will lose info about
2675 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00002676
sewardj849b0ed2008-12-21 10:43:10 +00002677 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00002678 whose generation numbers are below some threshold; hence doing
2679 approximate LRU discarding. For each discarded OldRef we must
2680 of course decrement the reference count on the all RCECs it
2681 refers to, in order that entries from (1) eventually get
2682 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00002683
2684 A major improvement in reliability of this mechanism would be to
2685 have a dynamically sized OldRef.accs[] array, so no entries ever
2686 fall off the end. In investigations (Dec 08) it appears that a
2687 major cause for the non-availability of conflicting-access traces
2688 in race reports is caused by the fixed size of this array. I
2689 suspect for most OldRefs, only a few entries are used, but for a
2690 minority of cases there is an overflow, leading to info lossage.
2691 Investigations also suggest this is very workload and scheduling
2692 sensitive. Therefore a dynamic sizing would be better.
2693
2694 However, dynamic sizing would defeat the use of a GroupAllocator
2695 for OldRef structures. And that's important for performance. So
2696 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00002697*/
2698
2699
2700static UWord stats__ctxt_rcdec1 = 0;
2701static UWord stats__ctxt_rcdec2 = 0;
2702static UWord stats__ctxt_rcdec3 = 0;
2703static UWord stats__ctxt_rcdec_calls = 0;
2704static UWord stats__ctxt_rcdec_discards = 0;
2705static UWord stats__ctxt_rcdec1_eq = 0;
2706
2707static UWord stats__ctxt_tab_curr = 0;
2708static UWord stats__ctxt_tab_max = 0;
2709
2710static UWord stats__ctxt_tab_qs = 0;
2711static UWord stats__ctxt_tab_cmps = 0;
2712
2713
2714///////////////////////////////////////////////////////
2715//// Part (1): An OSet of RCECs
2716///
2717
2718#define N_FRAMES 8
2719
2720// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2721#define RCEC_MAGIC 0xab88abb2UL
2722
2723//#define N_RCEC_TAB 98317 /* prime */
2724#define N_RCEC_TAB 196613 /* prime */
2725
2726typedef
2727 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00002728 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002729 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00002730 UWord rc;
2731 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00002732 UWord frames_hash; /* hash of all the frames */
2733 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00002734 }
2735 RCEC;
2736
2737static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2738
2739
2740/* Gives an arbitrary total order on RCEC .frames fields */
2741static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2742 Word i;
2743 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2744 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00002745 if (ec1->frames_hash < ec2->frames_hash) return -1;
2746 if (ec1->frames_hash > ec2->frames_hash) return 1;
2747 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00002748 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00002749 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00002750 }
2751 return 0;
2752}
2753
2754
2755/* Dec the ref of this RCEC. */
2756static void ctxt__rcdec ( RCEC* ec )
2757{
2758 stats__ctxt_rcdec_calls++;
2759 tl_assert(ec && ec->magic == RCEC_MAGIC);
2760 tl_assert(ec->rc > 0);
2761 ec->rc--;
2762}
2763
2764static void ctxt__rcinc ( RCEC* ec )
2765{
2766 tl_assert(ec && ec->magic == RCEC_MAGIC);
2767 ec->rc++;
2768}
2769
2770
sewardjd86e3a22008-12-03 11:39:37 +00002771//////////// BEGIN RCEC group allocator
2772static GroupAlloc rcec_group_allocator;
2773
2774static RCEC* alloc_RCEC ( void ) {
2775 return gal_Alloc ( &rcec_group_allocator );
2776}
2777
2778static void free_RCEC ( RCEC* rcec ) {
2779 tl_assert(rcec->magic == RCEC_MAGIC);
2780 gal_Free( &rcec_group_allocator, rcec );
2781}
2782//////////// END OldRef group allocator
2783
2784
sewardjf98e1c02008-10-25 16:22:41 +00002785/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2786 move it one step closer the the front of the list, so as to make
2787 subsequent searches for it cheaper. */
2788static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2789{
2790 RCEC *ec0, *ec1, *ec2;
2791 if (ec == *headp)
2792 tl_assert(0); /* already at head of list */
2793 tl_assert(ec != NULL);
2794 ec0 = *headp;
2795 ec1 = NULL;
2796 ec2 = NULL;
2797 while (True) {
2798 if (ec0 == NULL || ec0 == ec) break;
2799 ec2 = ec1;
2800 ec1 = ec0;
2801 ec0 = ec0->next;
2802 }
2803 tl_assert(ec0 == ec);
2804 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2805 RCEC* tmp;
2806 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2807 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2808 closer to the start of the list. */
2809 tl_assert(ec2->next == ec1);
2810 tl_assert(ec1->next == ec0);
2811 tmp = ec0->next;
2812 ec2->next = ec0;
2813 ec0->next = ec1;
2814 ec1->next = tmp;
2815 }
2816 else
2817 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2818 /* it's second in the list. */
2819 tl_assert(*headp == ec1);
2820 tl_assert(ec1->next == ec0);
2821 ec1->next = ec0->next;
2822 ec0->next = ec1;
2823 *headp = ec0;
2824 }
2825}
2826
2827
2828/* Find the given RCEC in the tree, and return a pointer to it. Or,
2829 if not present, add the given one to the tree (by making a copy of
2830 it, so the caller can immediately deallocate the original) and
2831 return a pointer to the copy. The caller can safely have 'example'
2832 on its stack, since we will always return a pointer to a copy of
2833 it, not to the original. Note that the inserted node will have .rc
2834 of zero and so the caller must immediatly increment it. */
2835__attribute__((noinline))
2836static RCEC* ctxt__find_or_add ( RCEC* example )
2837{
2838 UWord hent;
2839 RCEC* copy;
2840 tl_assert(example && example->magic == RCEC_MAGIC);
2841 tl_assert(example->rc == 0);
2842
2843 /* Search the hash table to see if we already have it. */
2844 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00002845 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00002846 copy = contextTab[hent];
2847 while (1) {
2848 if (!copy) break;
2849 tl_assert(copy->magic == RCEC_MAGIC);
2850 stats__ctxt_tab_cmps++;
2851 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2852 copy = copy->next;
2853 }
2854
2855 if (copy) {
2856 tl_assert(copy != example);
2857 /* optimisation: if it's not at the head of its list, move 1
2858 step fwds, to make future searches cheaper */
2859 if (copy != contextTab[hent]) {
2860 move_RCEC_one_step_forward( &contextTab[hent], copy );
2861 }
2862 } else {
sewardjd86e3a22008-12-03 11:39:37 +00002863 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00002864 tl_assert(copy != example);
2865 *copy = *example;
2866 copy->next = contextTab[hent];
2867 contextTab[hent] = copy;
2868 stats__ctxt_tab_curr++;
2869 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2870 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2871 }
2872 return copy;
2873}
2874
2875static inline UWord ROLW ( UWord w, Int n )
2876{
2877 Int bpw = 8 * sizeof(UWord);
2878 w = (w << n) | (w >> (bpw-n));
2879 return w;
2880}
2881
2882__attribute__((noinline))
2883static RCEC* get_RCEC ( Thr* thr )
2884{
2885 UWord hash, i;
2886 RCEC example;
2887 example.magic = RCEC_MAGIC;
2888 example.rc = 0;
2889 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00002890 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00002891 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00002892 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00002893 hash ^= example.frames[i];
2894 hash = ROLW(hash, 19);
2895 }
njn6c83d5e2009-05-05 23:46:24 +00002896 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00002897 return ctxt__find_or_add( &example );
2898}
2899
2900///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00002901//// Part (2):
2902/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00002903///
2904
2905// (UInt) `echo "Old Reference Information" | md5sum`
2906#define OldRef_MAGIC 0x30b1f075UL
2907
sewardjc5ea9962008-12-07 01:41:46 +00002908/* Records an access: a thread and a context. The size
2909 (1,2,4,8) and read-or-writeness are also encoded as
2910 follows: bottom bit of .thr is 1 if write, 0 if read
2911 bottom 2 bits of .rcec are encode size:
2912 00 = 1, 01 = 2, 10 = 4, 11 = 8
2913*/
sewardjf98e1c02008-10-25 16:22:41 +00002914typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2915
sewardj849b0ed2008-12-21 10:43:10 +00002916#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00002917
2918typedef
2919 struct {
sewardjd86e3a22008-12-03 11:39:37 +00002920 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00002921 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00002922 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00002923 /* unused slots in this array have .thr == NULL */
2924 Thr_n_RCEC accs[N_OLDREF_ACCS];
2925 }
2926 OldRef;
2927
sewardjd86e3a22008-12-03 11:39:37 +00002928
2929//////////// BEGIN OldRef group allocator
2930static GroupAlloc oldref_group_allocator;
2931
2932static OldRef* alloc_OldRef ( void ) {
2933 return gal_Alloc ( &oldref_group_allocator );
2934}
2935
2936static void free_OldRef ( OldRef* r ) {
2937 tl_assert(r->magic == OldRef_MAGIC);
2938 gal_Free( &oldref_group_allocator, r );
2939}
2940//////////// END OldRef group allocator
2941
sewardjd86e3a22008-12-03 11:39:37 +00002942
sewardjbc307e52008-12-06 22:10:54 +00002943static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
2944static UWord oldrefGen = 0; /* current LRU generation # */
2945static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2946static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00002947
sewardjc5ea9962008-12-07 01:41:46 +00002948inline static void* ptr_or_UWord ( void* p, UWord w ) {
2949 return (void*)( ((UWord)p) | ((UWord)w) );
2950}
2951inline static void* ptr_and_UWord ( void* p, UWord w ) {
2952 return (void*)( ((UWord)p) & ((UWord)w) );
2953}
2954
sewardj1669cc72008-12-13 01:20:21 +00002955inline static UInt min_UInt ( UInt a, UInt b ) {
2956 return a < b ? a : b;
2957}
2958
sewardja781be62008-12-08 00:12:28 +00002959/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
2960 first interval is lower, 1 if the first interval is higher, and 0
2961 if there is any overlap. Redundant paranoia with casting is there
2962 following what looked distinctly like a bug in gcc-4.1.2, in which
2963 some of the comparisons were done signedly instead of
2964 unsignedly. */
2965/* Copied from exp-ptrcheck/sg_main.c */
2966static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
2967 Addr a2, SizeT n2 ) {
2968 UWord a1w = (UWord)a1;
2969 UWord n1w = (UWord)n1;
2970 UWord a2w = (UWord)a2;
2971 UWord n2w = (UWord)n2;
2972 tl_assert(n1w > 0 && n2w > 0);
2973 if (a1w + n1w <= a2w) return -1L;
2974 if (a2w + n2w <= a1w) return 1L;
2975 return 0;
2976}
2977
sewardjc5ea9962008-12-07 01:41:46 +00002978static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00002979{
sewardjd86e3a22008-12-03 11:39:37 +00002980 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00002981 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00002982 Word i, j;
2983 UWord keyW, valW;
2984 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00002985
sewardjc5ea9962008-12-07 01:41:46 +00002986 rcec = get_RCEC( thr );
2987 ctxt__rcinc(rcec);
2988
2989 /* encode the size and writeness of the transaction in the bottom
2990 two bits of thr and rcec. */
2991 thr = ptr_or_UWord(thr, isW ? 1 : 0);
2992 switch (szB) {
2993 /* This doesn't look particularly branch-predictor friendly. */
2994 case 1: rcec = ptr_or_UWord(rcec, 0); break;
2995 case 2: rcec = ptr_or_UWord(rcec, 1); break;
2996 case 4: rcec = ptr_or_UWord(rcec, 2); break;
2997 case 8: rcec = ptr_or_UWord(rcec, 3); break;
2998 default: tl_assert(0);
2999 }
3000
3001 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003002 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003003
sewardjd86e3a22008-12-03 11:39:37 +00003004 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003005
3006 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003007 see if we have a stack trace pertaining to this (thread, R/W,
3008 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003009 tl_assert(keyW == a);
3010 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003011 tl_assert(ref->magic == OldRef_MAGIC);
3012
3013 tl_assert(thr);
3014 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003015 if (ref->accs[i].thr != thr)
3016 continue;
3017 /* since .thr encodes both the accessing thread and the
3018 read/writeness, we know now that at least those features
3019 of the access match this entry. So we just need to check
3020 the size indication. Do this by inspecting the lowest 2 bits of
3021 .rcec, which contain the encoded size info. */
3022 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3023 continue;
3024 /* else we have a match, so stop looking. */
3025 break;
sewardjf98e1c02008-10-25 16:22:41 +00003026 }
3027
3028 if (i < N_OLDREF_ACCS) {
3029 /* thread 'thr' has an entry at index 'i'. Update it. */
3030 if (i > 0) {
3031 Thr_n_RCEC tmp = ref->accs[i-1];
3032 ref->accs[i-1] = ref->accs[i];
3033 ref->accs[i] = tmp;
3034 i--;
3035 }
sewardjc5ea9962008-12-07 01:41:46 +00003036 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003037 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003038 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3039 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003040 tl_assert(ref->accs[i].thr == thr);
3041 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003042 /* No entry for this (thread, R/W, size) triple. Shuffle all
3043 of them down one slot, and put the new entry at the start
3044 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003045 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3046 /* the last slot is in use. We must dec the rc on the
3047 associated rcec. */
3048 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3049 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003050 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3051 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003052 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003053 } else {
3054 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3055 }
3056 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3057 ref->accs[j] = ref->accs[j-1];
3058 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003059 ref->accs[0].rcec = rcec;
3060 /* thr==NULL is used to signify an empty slot, so we can't
3061 add a NULL thr. */
3062 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003063 }
3064
3065 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003066
3067 } else {
3068
3069 /* We don't have a record for this address. Create a new one. */
3070 if (oldrefTreeN >= oldrefGenIncAt) {
3071 oldrefGen++;
3072 oldrefGenIncAt = oldrefTreeN + 50000;
3073 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3074 oldrefGen, oldrefTreeN );
3075 }
sewardjd86e3a22008-12-03 11:39:37 +00003076
3077 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003078 ref->magic = OldRef_MAGIC;
3079 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003080 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003081 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003082 /* thr==NULL is used to signify an empty slot, so we can't add a
3083 NULL thr. */
3084 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003085 for (j = 1; j < N_OLDREF_ACCS; j++) {
3086 ref->accs[j].thr = NULL;
3087 ref->accs[j].rcec = NULL;
3088 }
sewardjbc307e52008-12-06 22:10:54 +00003089 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003090 oldrefTreeN++;
3091
3092 }
3093}
3094
3095
sewardjc5ea9962008-12-07 01:41:46 +00003096Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3097 /*OUT*/Thr** resThr,
3098 /*OUT*/SizeT* resSzB,
3099 /*OUT*/Bool* resIsW,
3100 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003101{
sewardja781be62008-12-08 00:12:28 +00003102 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003103 OldRef* ref;
3104 UWord keyW, valW;
3105 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003106
sewardjc5ea9962008-12-07 01:41:46 +00003107 Thr* cand_thr;
3108 RCEC* cand_rcec;
3109 Bool cand_isW;
3110 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003111 Addr cand_a;
3112
3113 Addr toCheck[15];
3114 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003115
3116 tl_assert(thr);
3117 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003118
sewardja781be62008-12-08 00:12:28 +00003119 toCheck[nToCheck++] = a;
3120 for (i = -7; i < (Word)szB; i++) {
3121 if (i != 0)
3122 toCheck[nToCheck++] = a + i;
3123 }
3124 tl_assert(nToCheck <= 15);
3125
3126 /* Now see if we can find a suitable matching event for
3127 any of the addresses in toCheck[0 .. nToCheck-1]. */
3128 for (j = 0; j < nToCheck; j++) {
3129
3130 cand_a = toCheck[j];
3131 // VG_(printf)("test %ld %p\n", j, cand_a);
3132
3133 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3134 if (!b)
3135 continue;
3136
sewardjd86e3a22008-12-03 11:39:37 +00003137 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003138 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003139 tl_assert(ref->magic == OldRef_MAGIC);
3140 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3141
sewardjc5ea9962008-12-07 01:41:46 +00003142 cand_thr = NULL;
3143 cand_rcec = NULL;
3144 cand_isW = False;
3145 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003146
sewardjc5ea9962008-12-07 01:41:46 +00003147 for (i = 0; i < N_OLDREF_ACCS; i++) {
3148 Thr_n_RCEC* cand = &ref->accs[i];
3149 cand_thr = ptr_and_UWord(cand->thr, ~3);
3150 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3151 /* Decode the writeness from the bottom bit of .thr. */
3152 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3153 /* Decode the size from the bottom two bits of .rcec. */
3154 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3155 case 0: cand_szB = 1; break;
3156 case 1: cand_szB = 2; break;
3157 case 2: cand_szB = 4; break;
3158 case 3: cand_szB = 8; break;
3159 default: tl_assert(0);
3160 }
3161
3162 if (cand_thr == NULL)
3163 /* This slot isn't in use. Ignore it. */
3164 continue;
3165
3166 if (cand_thr == thr)
3167 /* This is an access by the same thread, but we're only
3168 interested in accesses from other threads. Ignore. */
3169 continue;
3170
3171 if ((!cand_isW) && (!isW))
3172 /* We don't want to report a read racing against another
3173 read; that's stupid. So in this case move on. */
3174 continue;
3175
sewardja781be62008-12-08 00:12:28 +00003176 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3177 /* No overlap with the access we're asking about. Ignore. */
3178 continue;
3179
sewardjc5ea9962008-12-07 01:41:46 +00003180 /* We have a match. Stop searching. */
3181 break;
3182 }
3183
3184 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3185
sewardja781be62008-12-08 00:12:28 +00003186 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00003187 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00003188 /* return with success */
3189 tl_assert(cand_thr);
3190 tl_assert(cand_rcec);
3191 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3192 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00003193 /* Count how many non-zero frames we have. */
3194 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3195 for (n = 0; n < maxNFrames; n++) {
3196 if (0 == cand_rcec->frames[n]) break;
3197 }
3198 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
sewardja781be62008-12-08 00:12:28 +00003199 *resThr = cand_thr;
3200 *resSzB = cand_szB;
3201 *resIsW = cand_isW;
3202 return True;
3203 }
sewardjc5ea9962008-12-07 01:41:46 +00003204
sewardja781be62008-12-08 00:12:28 +00003205 /* consider next address in toCheck[] */
3206 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003207
sewardja781be62008-12-08 00:12:28 +00003208 /* really didn't find anything. */
3209 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003210}
3211
3212static void event_map_init ( void )
3213{
3214 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003215
3216 /* Context (RCEC) group allocator */
3217 init_GroupAlloc ( &rcec_group_allocator,
3218 sizeof(RCEC),
3219 1000 /* RCECs per group */,
3220 HG_(zalloc),
3221 "libhb.event_map_init.1 (RCEC groups)",
3222 HG_(free) );
3223
3224 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003225 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003226 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003227 N_RCEC_TAB * sizeof(RCEC*) );
3228 tl_assert(contextTab);
3229 for (i = 0; i < N_RCEC_TAB; i++)
3230 contextTab[i] = NULL;
3231
sewardjd86e3a22008-12-03 11:39:37 +00003232 /* Oldref group allocator */
3233 init_GroupAlloc ( &oldref_group_allocator,
3234 sizeof(OldRef),
3235 1000 /* OldRefs per group */,
3236 HG_(zalloc),
3237 "libhb.event_map_init.3 (OldRef groups)",
3238 HG_(free) );
3239
sewardjd86e3a22008-12-03 11:39:37 +00003240 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003241 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003242 oldrefTree = VG_(newSWA)(
3243 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003244 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003245 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003246 );
3247 tl_assert(oldrefTree);
3248
3249 oldrefGen = 0;
3250 oldrefGenIncAt = 0;
3251 oldrefTreeN = 0;
3252}
3253
3254static void event_map__check_reference_counts ( Bool before )
3255{
3256 RCEC* rcec;
3257 OldRef* oldref;
3258 Word i;
3259 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003260 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003261
3262 /* Set the 'check' reference counts to zero. Also, optionally
3263 check that the real reference counts are non-zero. We allow
3264 these to fall to zero before a GC, but the GC must get rid of
3265 all those that are zero, hence none should be zero after a
3266 GC. */
3267 for (i = 0; i < N_RCEC_TAB; i++) {
3268 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3269 nEnts++;
3270 tl_assert(rcec);
3271 tl_assert(rcec->magic == RCEC_MAGIC);
3272 if (!before)
3273 tl_assert(rcec->rc > 0);
3274 rcec->rcX = 0;
3275 }
3276 }
3277
3278 /* check that the stats are sane */
3279 tl_assert(nEnts == stats__ctxt_tab_curr);
3280 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3281
3282 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003283 VG_(initIterSWA)( oldrefTree );
3284 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003285 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003286 tl_assert(oldref->magic == OldRef_MAGIC);
3287 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003288 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3289 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3290 if (aThr) {
3291 tl_assert(aRef);
3292 tl_assert(aRef->magic == RCEC_MAGIC);
3293 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003294 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003295 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003296 }
3297 }
3298 }
3299
3300 /* compare check ref counts with actual */
3301 for (i = 0; i < N_RCEC_TAB; i++) {
3302 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3303 tl_assert(rcec->rc == rcec->rcX);
3304 }
3305 }
3306}
3307
sewardj8fd92d32008-11-20 23:17:01 +00003308__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003309static void event_map_maybe_GC ( void )
3310{
3311 OldRef* oldref;
3312 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003313 XArray* refs2del;
3314 Word i, j, n2del;
3315
sewardj8fd92d32008-11-20 23:17:01 +00003316 UWord* genMap = NULL;
3317 UWord genMap_min = 0;
3318 UWord genMap_size = 0;
3319
sewardj849b0ed2008-12-21 10:43:10 +00003320 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00003321 return;
3322
3323 if (0)
3324 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3325
sewardj849b0ed2008-12-21 10:43:10 +00003326 /* Check for sane command line params. Limit values must match
3327 those in hg_process_cmd_line_option. */
3328 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
3329 tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
3330
sewardj8f5374e2008-12-07 11:40:17 +00003331 /* Check our counting is sane (expensive) */
3332 if (CHECK_CEM)
3333 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003334
sewardj8f5374e2008-12-07 11:40:17 +00003335 /* Check the reference counts (expensive) */
3336 if (CHECK_CEM)
3337 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003338
sewardj8fd92d32008-11-20 23:17:01 +00003339 /* Compute the distribution of generation values in the ref tree.
3340 There are likely only to be a few different generation numbers
3341 in the whole tree, but we don't know what they are. Hence use a
3342 dynamically resized array of counters. The array is genMap[0
3343 .. genMap_size-1], where genMap[0] is the count for the
3344 generation number genMap_min, genMap[1] is the count for
3345 genMap_min+1, etc. If a new number is seen outside the range
3346 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3347 copied into a larger array, and genMap_min and genMap_size are
3348 adjusted accordingly. */
3349
sewardjf98e1c02008-10-25 16:22:41 +00003350 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003351
sewardjbc307e52008-12-06 22:10:54 +00003352 VG_(initIterSWA)( oldrefTree );
3353 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003354
sewardjd86e3a22008-12-03 11:39:37 +00003355 UWord ea, key;
3356 oldref = (OldRef*)valW;
3357 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003358
3359 /* BEGIN find 'ea', which is the index in genMap holding the
3360 count for generation number 'key'. */
3361 if (UNLIKELY(genMap == NULL)) {
3362 /* deal with the first key to be seen, so that the following
3363 cases don't need to handle the complexity of a NULL count
3364 array. */
3365 genMap_min = key;
3366 genMap_size = 1;
3367 genMap = HG_(zalloc)( "libhb.emmG.1a",
3368 genMap_size * sizeof(UWord) );
3369 ea = 0;
3370 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3371 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003372 }
sewardj8fd92d32008-11-20 23:17:01 +00003373 else
3374 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3375 /* this is the expected (almost-always-happens) case: 'key'
3376 is already mapped in the array. */
3377 ea = key - genMap_min;
3378 }
3379 else
3380 if (key < genMap_min) {
3381 /* 'key' appears before the start of the current array.
3382 Extend the current array by allocating a larger one and
3383 copying the current one to the upper end of it. */
3384 Word more;
3385 UWord* map2;
3386 more = genMap_min - key;
3387 tl_assert(more > 0);
3388 map2 = HG_(zalloc)( "libhb.emmG.1b",
3389 (genMap_size + more) * sizeof(UWord) );
3390 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3391 HG_(free)( genMap );
3392 genMap = map2;
3393 genMap_size += more;
3394 genMap_min -= more;
3395 ea = 0;
3396 tl_assert(genMap_min == key);
3397 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3398 key, genMap_min, genMap_min+genMap_size- 1 );
3399 }
3400 else {
3401 /* 'key' appears after the end of the current array. Extend
3402 the current array by allocating a larger one and copying
3403 the current one to the lower end of it. */
3404 Word more;
3405 UWord* map2;
3406 tl_assert(key >= genMap_min + genMap_size);
3407 more = key - (genMap_min + genMap_size) + 1;
3408 tl_assert(more > 0);
3409 map2 = HG_(zalloc)( "libhb.emmG.1c",
3410 (genMap_size + more) * sizeof(UWord) );
3411 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3412 HG_(free)( genMap );
3413 genMap = map2;
3414 genMap_size += more;
3415 ea = genMap_size - 1;;
3416 tl_assert(genMap_min + genMap_size - 1 == key);
3417 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3418 key, genMap_min, genMap_min+genMap_size- 1 );
3419 }
3420 /* END find 'ea' from 'key' */
3421
3422 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003423 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003424 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003425 }
3426
sewardj8fd92d32008-11-20 23:17:01 +00003427 tl_assert(genMap);
3428 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003429
sewardj8fd92d32008-11-20 23:17:01 +00003430 /* Sanity check what we just computed */
3431 { UWord sum = 0;
3432 for (i = 0; i < genMap_size; i++) {
3433 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3434 i + genMap_min, genMap[i] );
3435 sum += genMap[i];
3436 }
3437 tl_assert(sum == oldrefTreeN);
3438 }
3439
3440 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003441 retained = oldrefTreeN;
3442 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003443
3444 for (i = 0; i < genMap_size; i++) {
3445 keyW = i + genMap_min;
3446 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003447 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3448 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3449 tl_assert(keyW >= maxGen);
3450 tl_assert(retained >= valW);
3451 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00003452 > (UWord)(HG_(clo_conflict_cache_size)
3453 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00003454 retained -= valW;
3455 maxGen = keyW;
3456 } else {
3457 break;
3458 }
3459 }
sewardjf98e1c02008-10-25 16:22:41 +00003460
sewardj8fd92d32008-11-20 23:17:01 +00003461 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003462
sewardj9b1f0fd2008-11-18 23:40:00 +00003463 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003464
3465 /* Now make up a big list of the oldrefTree entries we want to
3466 delete. We can't simultaneously traverse the tree and delete
3467 stuff from it, so first we need to copy them off somewhere
3468 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003469 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003470 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003471
sewardj9b1f0fd2008-11-18 23:40:00 +00003472 if (retained < oldrefTreeN) {
3473
3474 /* This is the normal (expected) case. We discard any ref whose
3475 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003476 VG_(initIterSWA)( oldrefTree );
3477 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003478 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003479 tl_assert(oldref->magic == OldRef_MAGIC);
3480 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003481 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003482 }
sewardjf98e1c02008-10-25 16:22:41 +00003483 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003484 if (VG_(clo_verbosity) > 1) {
3485 VG_(message)(Vg_DebugMsg,
3486 "libhb: EvM GC: delete generations %lu and below, "
3487 "retaining %lu entries",
3488 maxGen, retained );
3489 }
3490
3491 } else {
3492
3493 static UInt rand_seed = 0; /* leave as static */
3494
3495 /* Degenerate case: there's only one generation in the entire
3496 tree, so we need to have some other way of deciding which
3497 refs to throw away. Just throw out half of them randomly. */
3498 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00003499 VG_(initIterSWA)( oldrefTree );
3500 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003501 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003502 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003503 tl_assert(oldref->magic == OldRef_MAGIC);
3504 n = VG_(random)( &rand_seed );
3505 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003506 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003507 retained--;
3508 }
3509 }
3510 if (VG_(clo_verbosity) > 1) {
3511 VG_(message)(Vg_DebugMsg,
3512 "libhb: EvM GC: randomly delete half the entries, "
3513 "retaining %lu entries",
3514 retained );
3515 }
3516
sewardjf98e1c02008-10-25 16:22:41 +00003517 }
3518
3519 n2del = VG_(sizeXA)( refs2del );
3520 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3521
3522 if (0) VG_(printf)("%s","deleting entries\n");
3523 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003524 Bool b;
3525 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00003526 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00003527 tl_assert(b);
3528 tl_assert(keyW == ga2del);
3529 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003530 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00003531 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
3532 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
3533 if (aRef) {
3534 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003535 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00003536 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00003537 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003538 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003539 }
3540 }
sewardjd86e3a22008-12-03 11:39:37 +00003541
3542 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00003543 }
3544
3545 VG_(deleteXA)( refs2del );
3546
sewardjc5ea9962008-12-07 01:41:46 +00003547 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00003548
3549 oldrefTreeN = retained;
3550 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3551
3552 /* Throw away all RCECs with zero reference counts */
3553 for (i = 0; i < N_RCEC_TAB; i++) {
3554 RCEC** pp = &contextTab[i];
3555 RCEC* p = *pp;
3556 while (p) {
3557 if (p->rc == 0) {
3558 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00003559 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00003560 p = *pp;
3561 tl_assert(stats__ctxt_tab_curr > 0);
3562 stats__ctxt_tab_curr--;
3563 } else {
3564 pp = &p->next;
3565 p = p->next;
3566 }
3567 }
3568 }
3569
sewardj8f5374e2008-12-07 11:40:17 +00003570 /* Check the reference counts (expensive) */
3571 if (CHECK_CEM)
3572 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003573
3574 //if (0)
3575 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3576 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3577
3578}
3579
3580
3581/////////////////////////////////////////////////////////
3582// //
3583// Core MSM //
3584// //
3585/////////////////////////////////////////////////////////
3586
sewardjb0e009d2008-11-19 16:35:15 +00003587/* Logic in msm_read/msm_write updated/verified after re-analysis,
3588 19 Nov 08. */
3589
sewardjb0e009d2008-11-19 16:35:15 +00003590/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
3591 nonzero, the effect is that when a race is detected for a location,
3592 that location is put into a special 'error' state and no further
3593 checking of it is done until it returns to a 'normal' state, which
3594 requires it to be deallocated and reallocated.
3595
3596 This is a bad idea, because of the interaction with suppressions.
3597 Suppose there is a race on the location, but the error is
3598 suppressed. The location now is marked as in-error. Now any
3599 subsequent race -- including ones we want to see -- will never be
3600 detected until the location is deallocated and reallocated.
3601
sewardj8f5374e2008-12-07 11:40:17 +00003602 Hence set MSM_RACE2ERR to zero. This causes raced-on locations to
sewardjb0e009d2008-11-19 16:35:15 +00003603 remain in the normal 'C' (constrained) state, but places on them
3604 the constraint that the next accesses happen-after both the
3605 existing constraint and the relevant vector clock of the thread
sewardj8f5374e2008-12-07 11:40:17 +00003606 doing the racing access.
sewardjb0e009d2008-11-19 16:35:15 +00003607*/
3608#define MSM_RACE2ERR 0
3609
sewardjf98e1c02008-10-25 16:22:41 +00003610static ULong stats__msm_read = 0;
3611static ULong stats__msm_read_change = 0;
3612static ULong stats__msm_write = 0;
3613static ULong stats__msm_write_change = 0;
3614
3615__attribute__((noinline))
3616static void record_race_info ( Thr* acc_thr,
sewardja781be62008-12-08 00:12:28 +00003617 Addr acc_addr, SizeT szB, Bool isWrite )
sewardjf98e1c02008-10-25 16:22:41 +00003618{
sewardjc5ea9962008-12-07 01:41:46 +00003619 /* Call here to report a race. We just hand it onwards to
3620 HG_(record_error_Race). If that in turn discovers that the
3621 error is going to be collected, then that queries the
3622 conflicting-event map. The alternative would be to query it
3623 right here. But that causes a lot of pointless queries for
3624 errors which will shortly be discarded as duplicates, and can
3625 become a performance overhead; so we defer the query until we
3626 know the error is not a duplicate. */
3627 tl_assert(acc_thr->opaque);
3628 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
sewardja781be62008-12-08 00:12:28 +00003629 szB, isWrite, NULL/*mb_lastlock*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003630}
3631
3632static Bool is_sane_SVal_C ( SVal sv ) {
3633 POrd ord;
3634 if (!SVal__isC(sv)) return True;
3635 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3636 if (ord == POrd_EQ || ord == POrd_LT) return True;
3637 return False;
3638}
3639
3640
3641/* Compute new state following a read */
3642static inline SVal msm_read ( SVal svOld,
3643 /* The following are only needed for
3644 creating error reports. */
3645 Thr* acc_thr,
3646 Addr acc_addr, SizeT szB )
3647{
3648 SVal svNew = SVal_INVALID;
3649 stats__msm_read++;
3650
3651 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00003652 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003653 tl_assert(is_sane_SVal_C(svOld));
3654 }
3655
3656 if (SVal__isC(svOld)) {
3657 POrd ord;
3658 VtsID tviR = acc_thr->viR;
3659 VtsID tviW = acc_thr->viW;
3660 VtsID rmini = SVal__unC_Rmin(svOld);
3661 VtsID wmini = SVal__unC_Wmin(svOld);
3662
3663 ord = VtsID__getOrdering(rmini,tviR);
3664 if (ord == POrd_EQ || ord == POrd_LT) {
3665 /* no race */
3666 /* Note: RWLOCK subtlety: use tviW, not tviR */
3667 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3668 goto out;
3669 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003670 /* assert on sanity of constraints. */
3671 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3672 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003673 svNew = MSM_RACE2ERR
3674 ? SVal__mkE()
sewardj8f5374e2008-12-07 11:40:17 +00003675 /* see comments on corresponding fragment in
3676 msm_write for explanation. */
3677 /* aggressive setting: */
3678 /*
sewardjb0e009d2008-11-19 16:35:15 +00003679 : SVal__mkC( VtsID__join2(wmini,tviR),
3680 VtsID__join2(wmini,tviW) );
sewardj8f5374e2008-12-07 11:40:17 +00003681 */
3682 /* "consistent" setting: */
sewardj3b0c4d72008-11-20 11:20:50 +00003683 : SVal__mkC( VtsID__join2(rmini,tviR),
3684 VtsID__join2(wmini,tviW) );
sewardja781be62008-12-08 00:12:28 +00003685 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003686 goto out;
3687 }
3688 }
3689 if (SVal__isA(svOld)) {
3690 /* reading no-access memory (sigh); leave unchanged */
3691 /* check for no pollution */
3692 tl_assert(svOld == SVal_NOACCESS);
3693 svNew = SVal_NOACCESS;
3694 goto out;
3695 }
3696 if (SVal__isE(svOld)) {
3697 /* no race, location is already "in error" */
3698 svNew = SVal__mkE();
3699 goto out;
3700 }
3701 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3702 tl_assert(0);
3703
3704 out:
sewardj8f5374e2008-12-07 11:40:17 +00003705 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003706 tl_assert(is_sane_SVal_C(svNew));
3707 }
3708 tl_assert(svNew != SVal_INVALID);
sewardj849b0ed2008-12-21 10:43:10 +00003709 if (svNew != svOld && HG_(clo_show_conflicts)) {
sewardj8f5374e2008-12-07 11:40:17 +00003710 if (SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00003711 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardjf98e1c02008-10-25 16:22:41 +00003712 stats__msm_read_change++;
3713 }
3714 }
3715 return svNew;
3716}
3717
3718
3719/* Compute new state following a write */
3720static inline SVal msm_write ( SVal svOld,
3721 /* The following are only needed for
3722 creating error reports. */
3723 Thr* acc_thr,
3724 Addr acc_addr, SizeT szB )
3725{
3726 SVal svNew = SVal_INVALID;
3727 stats__msm_write++;
3728
3729 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00003730 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003731 tl_assert(is_sane_SVal_C(svOld));
3732 }
3733
3734 if (SVal__isC(svOld)) {
3735 POrd ord;
3736 VtsID tviW = acc_thr->viW;
3737 VtsID wmini = SVal__unC_Wmin(svOld);
3738
3739 ord = VtsID__getOrdering(wmini,tviW);
3740 if (ord == POrd_EQ || ord == POrd_LT) {
3741 /* no race */
3742 svNew = SVal__mkC( tviW, tviW );
3743 goto out;
3744 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003745 VtsID tviR = acc_thr->viR;
sewardjf98e1c02008-10-25 16:22:41 +00003746 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00003747 /* assert on sanity of constraints. */
3748 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3749 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003750 svNew = MSM_RACE2ERR
3751 ? SVal__mkE()
sewardj8f5374e2008-12-07 11:40:17 +00003752 /* One possibility is, after a race is seen, to
3753 set the location's constraints as aggressively
3754 (as far ahead) as possible. However, that just
3755 causes lots more races to be reported, which is
3756 very confusing. Hence don't do this. */
3757 /*
sewardjb0e009d2008-11-19 16:35:15 +00003758 : SVal__mkC( VtsID__join2(wmini,tviR),
sewardjf98e1c02008-10-25 16:22:41 +00003759 VtsID__join2(wmini,tviW) );
sewardj8f5374e2008-12-07 11:40:17 +00003760 */
3761 /* instead, re-set the constraints in a way which
3762 is consistent with (ie, as they would have been
3763 computed anyway) had no race been detected. */
sewardj3b0c4d72008-11-20 11:20:50 +00003764 : SVal__mkC( VtsID__join2(rmini,tviR),
3765 VtsID__join2(wmini,tviW) );
sewardja781be62008-12-08 00:12:28 +00003766 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003767 goto out;
3768 }
3769 }
3770 if (SVal__isA(svOld)) {
3771 /* writing no-access memory (sigh); leave unchanged */
3772 /* check for no pollution */
3773 tl_assert(svOld == SVal_NOACCESS);
3774 svNew = SVal_NOACCESS;
3775 goto out;
3776 }
3777 if (SVal__isE(svOld)) {
3778 /* no race, location is already "in error" */
3779 svNew = SVal__mkE();
3780 goto out;
3781 }
3782 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3783 tl_assert(0);
3784
3785 out:
sewardj8f5374e2008-12-07 11:40:17 +00003786 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00003787 tl_assert(is_sane_SVal_C(svNew));
3788 }
3789 tl_assert(svNew != SVal_INVALID);
sewardj849b0ed2008-12-21 10:43:10 +00003790 if (svNew != svOld && HG_(clo_show_conflicts)) {
sewardj8f5374e2008-12-07 11:40:17 +00003791 if (SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00003792 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardjf98e1c02008-10-25 16:22:41 +00003793 stats__msm_write_change++;
3794 }
3795 }
3796 return svNew;
3797}
3798
3799
3800/////////////////////////////////////////////////////////
3801// //
3802// Apply core MSM to specific memory locations //
3803// //
3804/////////////////////////////////////////////////////////
3805
3806/*------------- ZSM accesses: 8 bit apply ------------- */
3807
3808void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3809 CacheLine* cl;
3810 UWord cloff, tno, toff;
3811 SVal svOld, svNew;
3812 UShort descr;
3813 stats__cline_read8s++;
3814 cl = get_cacheline(a);
3815 cloff = get_cacheline_offset(a);
3816 tno = get_treeno(a);
3817 toff = get_tree_offset(a); /* == 0 .. 7 */
3818 descr = cl->descrs[tno];
3819 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3820 SVal* tree = &cl->svals[tno << 3];
3821 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00003822 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003823 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3824 }
3825 svOld = cl->svals[cloff];
3826 svNew = msm_read( svOld, thr,a,1 );
3827 tl_assert(svNew != SVal_INVALID);
3828 cl->svals[cloff] = svNew;
3829}
3830
3831void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3832 CacheLine* cl;
3833 UWord cloff, tno, toff;
3834 SVal svOld, svNew;
3835 UShort descr;
3836 stats__cline_read8s++;
3837 cl = get_cacheline(a);
3838 cloff = get_cacheline_offset(a);
3839 tno = get_treeno(a);
3840 toff = get_tree_offset(a); /* == 0 .. 7 */
3841 descr = cl->descrs[tno];
3842 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3843 SVal* tree = &cl->svals[tno << 3];
3844 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00003845 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003846 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3847 }
3848 svOld = cl->svals[cloff];
3849 svNew = msm_write( svOld, thr,a,1 );
3850 tl_assert(svNew != SVal_INVALID);
3851 cl->svals[cloff] = svNew;
3852}
3853
3854/*------------- ZSM accesses: 16 bit apply ------------- */
3855
3856void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3857 CacheLine* cl;
3858 UWord cloff, tno, toff;
3859 SVal svOld, svNew;
3860 UShort descr;
3861 stats__cline_read16s++;
3862 if (UNLIKELY(!aligned16(a))) goto slowcase;
3863 cl = get_cacheline(a);
3864 cloff = get_cacheline_offset(a);
3865 tno = get_treeno(a);
3866 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3867 descr = cl->descrs[tno];
3868 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3869 if (valid_value_is_below_me_16(descr, toff)) {
3870 goto slowcase;
3871 } else {
3872 SVal* tree = &cl->svals[tno << 3];
3873 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3874 }
sewardj8f5374e2008-12-07 11:40:17 +00003875 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003876 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3877 }
3878 svOld = cl->svals[cloff];
3879 svNew = msm_read( svOld, thr,a,2 );
3880 tl_assert(svNew != SVal_INVALID);
3881 cl->svals[cloff] = svNew;
3882 return;
3883 slowcase: /* misaligned, or must go further down the tree */
3884 stats__cline_16to8splits++;
3885 zsm_apply8___msm_read( thr, a + 0 );
3886 zsm_apply8___msm_read( thr, a + 1 );
3887}
3888
3889void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3890 CacheLine* cl;
3891 UWord cloff, tno, toff;
3892 SVal svOld, svNew;
3893 UShort descr;
3894 stats__cline_read16s++;
3895 if (UNLIKELY(!aligned16(a))) goto slowcase;
3896 cl = get_cacheline(a);
3897 cloff = get_cacheline_offset(a);
3898 tno = get_treeno(a);
3899 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3900 descr = cl->descrs[tno];
3901 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3902 if (valid_value_is_below_me_16(descr, toff)) {
3903 goto slowcase;
3904 } else {
3905 SVal* tree = &cl->svals[tno << 3];
3906 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3907 }
sewardj8f5374e2008-12-07 11:40:17 +00003908 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003909 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3910 }
3911 svOld = cl->svals[cloff];
3912 svNew = msm_write( svOld, thr,a,2 );
3913 tl_assert(svNew != SVal_INVALID);
3914 cl->svals[cloff] = svNew;
3915 return;
3916 slowcase: /* misaligned, or must go further down the tree */
3917 stats__cline_16to8splits++;
3918 zsm_apply8___msm_write( thr, a + 0 );
3919 zsm_apply8___msm_write( thr, a + 1 );
3920}
3921
3922/*------------- ZSM accesses: 32 bit apply ------------- */
3923
3924void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3925 CacheLine* cl;
3926 UWord cloff, tno, toff;
3927 SVal svOld, svNew;
3928 UShort descr;
3929 if (UNLIKELY(!aligned32(a))) goto slowcase;
3930 cl = get_cacheline(a);
3931 cloff = get_cacheline_offset(a);
3932 tno = get_treeno(a);
3933 toff = get_tree_offset(a); /* == 0 or 4 */
3934 descr = cl->descrs[tno];
3935 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3936 if (valid_value_is_above_me_32(descr, toff)) {
3937 SVal* tree = &cl->svals[tno << 3];
3938 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3939 } else {
3940 goto slowcase;
3941 }
sewardj8f5374e2008-12-07 11:40:17 +00003942 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003943 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3944 }
3945 svOld = cl->svals[cloff];
3946 svNew = msm_read( svOld, thr,a,4 );
3947 tl_assert(svNew != SVal_INVALID);
3948 cl->svals[cloff] = svNew;
3949 return;
3950 slowcase: /* misaligned, or must go further down the tree */
3951 stats__cline_32to16splits++;
3952 zsm_apply16___msm_read( thr, a + 0 );
3953 zsm_apply16___msm_read( thr, a + 2 );
3954}
3955
3956void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3957 CacheLine* cl;
3958 UWord cloff, tno, toff;
3959 SVal svOld, svNew;
3960 UShort descr;
3961 if (UNLIKELY(!aligned32(a))) goto slowcase;
3962 cl = get_cacheline(a);
3963 cloff = get_cacheline_offset(a);
3964 tno = get_treeno(a);
3965 toff = get_tree_offset(a); /* == 0 or 4 */
3966 descr = cl->descrs[tno];
3967 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3968 if (valid_value_is_above_me_32(descr, toff)) {
3969 SVal* tree = &cl->svals[tno << 3];
3970 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3971 } else {
3972 goto slowcase;
3973 }
sewardj8f5374e2008-12-07 11:40:17 +00003974 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00003975 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3976 }
3977 svOld = cl->svals[cloff];
3978 svNew = msm_write( svOld, thr,a,4 );
3979 tl_assert(svNew != SVal_INVALID);
3980 cl->svals[cloff] = svNew;
3981 return;
3982 slowcase: /* misaligned, or must go further down the tree */
3983 stats__cline_32to16splits++;
3984 zsm_apply16___msm_write( thr, a + 0 );
3985 zsm_apply16___msm_write( thr, a + 2 );
3986}
3987
3988/*------------- ZSM accesses: 64 bit apply ------------- */
3989
3990void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3991 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00003992 UWord cloff, tno;
3993 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00003994 SVal svOld, svNew;
3995 UShort descr;
3996 stats__cline_read64s++;
3997 if (UNLIKELY(!aligned64(a))) goto slowcase;
3998 cl = get_cacheline(a);
3999 cloff = get_cacheline_offset(a);
4000 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004001 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004002 descr = cl->descrs[tno];
4003 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4004 goto slowcase;
4005 }
4006 svOld = cl->svals[cloff];
4007 svNew = msm_read( svOld, thr,a,8 );
4008 tl_assert(svNew != SVal_INVALID);
4009 cl->svals[cloff] = svNew;
4010 return;
4011 slowcase: /* misaligned, or must go further down the tree */
4012 stats__cline_64to32splits++;
4013 zsm_apply32___msm_read( thr, a + 0 );
4014 zsm_apply32___msm_read( thr, a + 4 );
4015}
4016
4017void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
4018 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004019 UWord cloff, tno;
4020 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004021 SVal svOld, svNew;
4022 UShort descr;
4023 stats__cline_read64s++;
4024 if (UNLIKELY(!aligned64(a))) goto slowcase;
4025 cl = get_cacheline(a);
4026 cloff = get_cacheline_offset(a);
4027 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004028 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004029 descr = cl->descrs[tno];
4030 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4031 goto slowcase;
4032 }
4033 svOld = cl->svals[cloff];
4034 svNew = msm_write( svOld, thr,a,8 );
4035 tl_assert(svNew != SVal_INVALID);
4036 cl->svals[cloff] = svNew;
4037 return;
4038 slowcase: /* misaligned, or must go further down the tree */
4039 stats__cline_64to32splits++;
4040 zsm_apply32___msm_write( thr, a + 0 );
4041 zsm_apply32___msm_write( thr, a + 4 );
4042}
4043
4044/*--------------- ZSM accesses: 8 bit write --------------- */
4045
4046static
4047void zsm_write8 ( Addr a, SVal svNew ) {
4048 CacheLine* cl;
4049 UWord cloff, tno, toff;
4050 UShort descr;
4051 stats__cline_set8s++;
4052 cl = get_cacheline(a);
4053 cloff = get_cacheline_offset(a);
4054 tno = get_treeno(a);
4055 toff = get_tree_offset(a); /* == 0 .. 7 */
4056 descr = cl->descrs[tno];
4057 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4058 SVal* tree = &cl->svals[tno << 3];
4059 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004060 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004061 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4062 }
4063 tl_assert(svNew != SVal_INVALID);
4064 cl->svals[cloff] = svNew;
4065}
4066
4067/*--------------- ZSM accesses: 16 bit write --------------- */
4068
4069static
4070void zsm_write16 ( Addr a, SVal svNew ) {
4071 CacheLine* cl;
4072 UWord cloff, tno, toff;
4073 UShort descr;
4074 stats__cline_set16s++;
4075 if (UNLIKELY(!aligned16(a))) goto slowcase;
4076 cl = get_cacheline(a);
4077 cloff = get_cacheline_offset(a);
4078 tno = get_treeno(a);
4079 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4080 descr = cl->descrs[tno];
4081 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4082 if (valid_value_is_below_me_16(descr, toff)) {
4083 /* Writing at this level. Need to fix up 'descr'. */
4084 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4085 /* At this point, the tree does not match cl->descr[tno] any
4086 more. The assignments below will fix it up. */
4087 } else {
4088 /* We can't indiscriminately write on the w16 node as in the
4089 w64 case, as that might make the node inconsistent with
4090 its parent. So first, pull down to this level. */
4091 SVal* tree = &cl->svals[tno << 3];
4092 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004093 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004094 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4095 }
4096 }
4097 tl_assert(svNew != SVal_INVALID);
4098 cl->svals[cloff + 0] = svNew;
4099 cl->svals[cloff + 1] = SVal_INVALID;
4100 return;
4101 slowcase: /* misaligned */
4102 stats__cline_16to8splits++;
4103 zsm_write8( a + 0, svNew );
4104 zsm_write8( a + 1, svNew );
4105}
4106
4107/*--------------- ZSM accesses: 32 bit write --------------- */
4108
4109static
4110void zsm_write32 ( Addr a, SVal svNew ) {
4111 CacheLine* cl;
4112 UWord cloff, tno, toff;
4113 UShort descr;
4114 stats__cline_set32s++;
4115 if (UNLIKELY(!aligned32(a))) goto slowcase;
4116 cl = get_cacheline(a);
4117 cloff = get_cacheline_offset(a);
4118 tno = get_treeno(a);
4119 toff = get_tree_offset(a); /* == 0 or 4 */
4120 descr = cl->descrs[tno];
4121 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4122 if (valid_value_is_above_me_32(descr, toff)) {
4123 /* We can't indiscriminately write on the w32 node as in the
4124 w64 case, as that might make the node inconsistent with
4125 its parent. So first, pull down to this level. */
4126 SVal* tree = &cl->svals[tno << 3];
4127 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004128 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004129 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4130 } else {
4131 /* Writing at this level. Need to fix up 'descr'. */
4132 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4133 /* At this point, the tree does not match cl->descr[tno] any
4134 more. The assignments below will fix it up. */
4135 }
4136 }
4137 tl_assert(svNew != SVal_INVALID);
4138 cl->svals[cloff + 0] = svNew;
4139 cl->svals[cloff + 1] = SVal_INVALID;
4140 cl->svals[cloff + 2] = SVal_INVALID;
4141 cl->svals[cloff + 3] = SVal_INVALID;
4142 return;
4143 slowcase: /* misaligned */
4144 stats__cline_32to16splits++;
4145 zsm_write16( a + 0, svNew );
4146 zsm_write16( a + 2, svNew );
4147}
4148
4149/*--------------- ZSM accesses: 64 bit write --------------- */
4150
4151static
4152void zsm_write64 ( Addr a, SVal svNew ) {
4153 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004154 UWord cloff, tno;
4155 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004156 stats__cline_set64s++;
4157 if (UNLIKELY(!aligned64(a))) goto slowcase;
4158 cl = get_cacheline(a);
4159 cloff = get_cacheline_offset(a);
4160 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004161 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004162 cl->descrs[tno] = TREE_DESCR_64;
4163 tl_assert(svNew != SVal_INVALID);
4164 cl->svals[cloff + 0] = svNew;
4165 cl->svals[cloff + 1] = SVal_INVALID;
4166 cl->svals[cloff + 2] = SVal_INVALID;
4167 cl->svals[cloff + 3] = SVal_INVALID;
4168 cl->svals[cloff + 4] = SVal_INVALID;
4169 cl->svals[cloff + 5] = SVal_INVALID;
4170 cl->svals[cloff + 6] = SVal_INVALID;
4171 cl->svals[cloff + 7] = SVal_INVALID;
4172 return;
4173 slowcase: /* misaligned */
4174 stats__cline_64to32splits++;
4175 zsm_write32( a + 0, svNew );
4176 zsm_write32( a + 4, svNew );
4177}
4178
4179/*------------- ZSM accesses: 8 bit read/copy ------------- */
4180
4181static
4182SVal zsm_read8 ( Addr a ) {
4183 CacheLine* cl;
4184 UWord cloff, tno, toff;
4185 UShort descr;
4186 stats__cline_get8s++;
4187 cl = get_cacheline(a);
4188 cloff = get_cacheline_offset(a);
4189 tno = get_treeno(a);
4190 toff = get_tree_offset(a); /* == 0 .. 7 */
4191 descr = cl->descrs[tno];
4192 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4193 SVal* tree = &cl->svals[tno << 3];
4194 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4195 }
4196 return cl->svals[cloff];
4197}
4198
4199static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
4200 SVal sv;
4201 stats__cline_copy8s++;
4202 sv = zsm_read8( src );
4203 zsm_write8( dst, sv );
4204}
4205
4206/* ------------ Shadow memory range setting ops ------------ */
4207
4208void zsm_apply_range___msm_read ( Thr* thr,
4209 Addr a, SizeT len )
4210{
4211 /* fast track a couple of common cases */
4212 if (len == 4 && aligned32(a)) {
4213 zsm_apply32___msm_read( thr, a );
4214 return;
4215 }
4216 if (len == 8 && aligned64(a)) {
4217 zsm_apply64___msm_read( thr, a );
4218 return;
4219 }
4220
4221 /* be completely general (but as efficient as possible) */
4222 if (len == 0) return;
4223
4224 if (!aligned16(a) && len >= 1) {
4225 zsm_apply8___msm_read( thr, a );
4226 a += 1;
4227 len -= 1;
4228 tl_assert(aligned16(a));
4229 }
4230 if (len == 0) return;
4231
4232 if (!aligned32(a) && len >= 2) {
4233 zsm_apply16___msm_read( thr, a );
4234 a += 2;
4235 len -= 2;
4236 tl_assert(aligned32(a));
4237 }
4238 if (len == 0) return;
4239
4240 if (!aligned64(a) && len >= 4) {
4241 zsm_apply32___msm_read( thr, a );
4242 a += 4;
4243 len -= 4;
4244 tl_assert(aligned64(a));
4245 }
4246 if (len == 0) return;
4247
4248 if (len >= 8) {
4249 tl_assert(aligned64(a));
4250 while (len >= 8) {
4251 zsm_apply64___msm_read( thr, a );
4252 a += 8;
4253 len -= 8;
4254 }
4255 tl_assert(aligned64(a));
4256 }
4257 if (len == 0) return;
4258
4259 if (len >= 4)
4260 tl_assert(aligned32(a));
4261 if (len >= 4) {
4262 zsm_apply32___msm_read( thr, a );
4263 a += 4;
4264 len -= 4;
4265 }
4266 if (len == 0) return;
4267
4268 if (len >= 2)
4269 tl_assert(aligned16(a));
4270 if (len >= 2) {
4271 zsm_apply16___msm_read( thr, a );
4272 a += 2;
4273 len -= 2;
4274 }
4275 if (len == 0) return;
4276
4277 if (len >= 1) {
4278 zsm_apply8___msm_read( thr, a );
njn4c245e52009-03-15 23:25:38 +00004279 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004280 len -= 1;
4281 }
4282 tl_assert(len == 0);
4283}
4284
4285
4286
4287void zsm_apply_range___msm_write ( Thr* thr,
4288 Addr a, SizeT len )
4289{
4290 /* fast track a couple of common cases */
4291 if (len == 4 && aligned32(a)) {
4292 zsm_apply32___msm_write( thr, a );
4293 return;
4294 }
4295 if (len == 8 && aligned64(a)) {
4296 zsm_apply64___msm_write( thr, a );
4297 return;
4298 }
4299
4300 /* be completely general (but as efficient as possible) */
4301 if (len == 0) return;
4302
4303 if (!aligned16(a) && len >= 1) {
4304 zsm_apply8___msm_write( thr, a );
4305 a += 1;
4306 len -= 1;
4307 tl_assert(aligned16(a));
4308 }
4309 if (len == 0) return;
4310
4311 if (!aligned32(a) && len >= 2) {
4312 zsm_apply16___msm_write( thr, a );
4313 a += 2;
4314 len -= 2;
4315 tl_assert(aligned32(a));
4316 }
4317 if (len == 0) return;
4318
4319 if (!aligned64(a) && len >= 4) {
4320 zsm_apply32___msm_write( thr, a );
4321 a += 4;
4322 len -= 4;
4323 tl_assert(aligned64(a));
4324 }
4325 if (len == 0) return;
4326
4327 if (len >= 8) {
4328 tl_assert(aligned64(a));
4329 while (len >= 8) {
4330 zsm_apply64___msm_write( thr, a );
4331 a += 8;
4332 len -= 8;
4333 }
4334 tl_assert(aligned64(a));
4335 }
4336 if (len == 0) return;
4337
4338 if (len >= 4)
4339 tl_assert(aligned32(a));
4340 if (len >= 4) {
4341 zsm_apply32___msm_write( thr, a );
4342 a += 4;
4343 len -= 4;
4344 }
4345 if (len == 0) return;
4346
4347 if (len >= 2)
4348 tl_assert(aligned16(a));
4349 if (len >= 2) {
4350 zsm_apply16___msm_write( thr, a );
4351 a += 2;
4352 len -= 2;
4353 }
4354 if (len == 0) return;
4355
4356 if (len >= 1) {
4357 zsm_apply8___msm_write( thr, a );
njn4c245e52009-03-15 23:25:38 +00004358 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004359 len -= 1;
4360 }
4361 tl_assert(len == 0);
4362}
4363
4364
4365
4366
4367/* Block-copy states (needed for implementing realloc()). */
4368
4369static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
4370{
4371 SizeT i;
4372 if (len == 0)
4373 return;
4374
4375 /* assert for non-overlappingness */
4376 tl_assert(src+len <= dst || dst+len <= src);
4377
4378 /* To be simple, just copy byte by byte. But so as not to wreck
4379 performance for later accesses to dst[0 .. len-1], normalise
4380 destination lines as we finish with them, and also normalise the
4381 line containing the first and last address. */
4382 for (i = 0; i < len; i++) {
4383 Bool normalise
4384 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4385 || i == 0 /* first in range */
4386 || i == len-1; /* last in range */
4387 zsm_copy8( src+i, dst+i, normalise );
4388 }
4389}
4390
4391
4392/* For setting address ranges to a given value. Has considerable
4393 sophistication so as to avoid generating large numbers of pointless
4394 cache loads/writebacks for large ranges. */
4395
4396/* Do small ranges in-cache, in the obvious way. */
4397static
4398void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
4399{
4400 /* fast track a couple of common cases */
4401 if (len == 4 && aligned32(a)) {
4402 zsm_write32( a, svNew );
4403 return;
4404 }
4405 if (len == 8 && aligned64(a)) {
4406 zsm_write64( a, svNew );
4407 return;
4408 }
4409
4410 /* be completely general (but as efficient as possible) */
4411 if (len == 0) return;
4412
4413 if (!aligned16(a) && len >= 1) {
4414 zsm_write8( a, svNew );
4415 a += 1;
4416 len -= 1;
4417 tl_assert(aligned16(a));
4418 }
4419 if (len == 0) return;
4420
4421 if (!aligned32(a) && len >= 2) {
4422 zsm_write16( a, svNew );
4423 a += 2;
4424 len -= 2;
4425 tl_assert(aligned32(a));
4426 }
4427 if (len == 0) return;
4428
4429 if (!aligned64(a) && len >= 4) {
4430 zsm_write32( a, svNew );
4431 a += 4;
4432 len -= 4;
4433 tl_assert(aligned64(a));
4434 }
4435 if (len == 0) return;
4436
4437 if (len >= 8) {
4438 tl_assert(aligned64(a));
4439 while (len >= 8) {
4440 zsm_write64( a, svNew );
4441 a += 8;
4442 len -= 8;
4443 }
4444 tl_assert(aligned64(a));
4445 }
4446 if (len == 0) return;
4447
4448 if (len >= 4)
4449 tl_assert(aligned32(a));
4450 if (len >= 4) {
4451 zsm_write32( a, svNew );
4452 a += 4;
4453 len -= 4;
4454 }
4455 if (len == 0) return;
4456
4457 if (len >= 2)
4458 tl_assert(aligned16(a));
4459 if (len >= 2) {
4460 zsm_write16( a, svNew );
4461 a += 2;
4462 len -= 2;
4463 }
4464 if (len == 0) return;
4465
4466 if (len >= 1) {
4467 zsm_write8( a, svNew );
njn4c245e52009-03-15 23:25:38 +00004468 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004469 len -= 1;
4470 }
4471 tl_assert(len == 0);
4472}
4473
4474
4475/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4476 for larger ranges, try to operate directly on the out-of-cache
4477 representation, rather than dragging lines into the cache,
4478 overwriting them, and forcing them out. This turns out to be an
4479 important performance optimisation. */
4480
4481static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4482{
4483 tl_assert(svNew != SVal_INVALID);
4484 stats__cache_make_New_arange += (ULong)len;
4485
4486 if (0 && len > 500)
4487 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4488
4489 if (0) {
4490 static UWord n_New_in_cache = 0;
4491 static UWord n_New_not_in_cache = 0;
4492 /* tag is 'a' with the in-line offset masked out,
4493 eg a[31]..a[4] 0000 */
4494 Addr tag = a & ~(N_LINE_ARANGE - 1);
4495 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4496 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4497 n_New_in_cache++;
4498 } else {
4499 n_New_not_in_cache++;
4500 }
4501 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4502 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4503 n_New_in_cache, n_New_not_in_cache );
4504 }
4505
4506 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4507 zsm_set_range_SMALL( a, len, svNew );
4508 } else {
4509 Addr before_start = a;
4510 Addr aligned_start = cacheline_ROUNDUP(a);
4511 Addr after_start = cacheline_ROUNDDN(a + len);
4512 UWord before_len = aligned_start - before_start;
4513 UWord aligned_len = after_start - aligned_start;
4514 UWord after_len = a + len - after_start;
4515 tl_assert(before_start <= aligned_start);
4516 tl_assert(aligned_start <= after_start);
4517 tl_assert(before_len < N_LINE_ARANGE);
4518 tl_assert(after_len < N_LINE_ARANGE);
4519 tl_assert(get_cacheline_offset(aligned_start) == 0);
4520 if (get_cacheline_offset(a) == 0) {
4521 tl_assert(before_len == 0);
4522 tl_assert(a == aligned_start);
4523 }
4524 if (get_cacheline_offset(a+len) == 0) {
4525 tl_assert(after_len == 0);
4526 tl_assert(after_start == a+len);
4527 }
4528 if (before_len > 0) {
4529 zsm_set_range_SMALL( before_start, before_len, svNew );
4530 }
4531 if (after_len > 0) {
4532 zsm_set_range_SMALL( after_start, after_len, svNew );
4533 }
4534 stats__cache_make_New_inZrep += (ULong)aligned_len;
4535
4536 while (1) {
4537 Addr tag;
4538 UWord wix;
4539 if (aligned_start >= after_start)
4540 break;
4541 tl_assert(get_cacheline_offset(aligned_start) == 0);
4542 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4543 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4544 if (tag == cache_shmem.tags0[wix]) {
4545 UWord i;
4546 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4547 zsm_write64( aligned_start + i * 8, svNew );
4548 } else {
4549 UWord i;
4550 Word zix;
4551 SecMap* sm;
4552 LineZ* lineZ;
4553 /* This line is not in the cache. Do not force it in; instead
4554 modify it in-place. */
4555 /* find the Z line to write in and rcdec it or the
4556 associated F line. */
4557 find_Z_for_writing( &sm, &zix, tag );
4558 tl_assert(sm);
4559 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4560 lineZ = &sm->linesZ[zix];
4561 lineZ->dict[0] = svNew;
4562 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4563 for (i = 0; i < N_LINE_ARANGE/4; i++)
4564 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4565 rcinc_LineZ(lineZ);
4566 }
4567 aligned_start += N_LINE_ARANGE;
4568 aligned_len -= N_LINE_ARANGE;
4569 }
4570 tl_assert(aligned_start == after_start);
4571 tl_assert(aligned_len == 0);
4572 }
4573}
4574
4575
4576/////////////////////////////////////////////////////////
4577// //
4578// Synchronisation objects //
4579// //
4580/////////////////////////////////////////////////////////
4581
4582// (UInt) `echo "Synchronisation object" | md5sum`
4583#define SO_MAGIC 0x56b3c5b0U
4584
4585struct _SO {
4586 VtsID viR; /* r-clock of sender */
4587 VtsID viW; /* w-clock of sender */
4588 UInt magic;
4589};
4590
4591static SO* SO__Alloc ( void ) {
4592 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4593 so->viR = VtsID_INVALID;
4594 so->viW = VtsID_INVALID;
4595 so->magic = SO_MAGIC;
4596 return so;
4597}
4598static void SO__Dealloc ( SO* so ) {
4599 tl_assert(so);
4600 tl_assert(so->magic == SO_MAGIC);
4601 if (so->viR == VtsID_INVALID) {
4602 tl_assert(so->viW == VtsID_INVALID);
4603 } else {
4604 tl_assert(so->viW != VtsID_INVALID);
4605 VtsID__rcdec(so->viR);
4606 VtsID__rcdec(so->viW);
4607 }
4608 so->magic = 0;
4609 HG_(free)( so );
4610}
4611
4612
4613/////////////////////////////////////////////////////////
4614// //
4615// Top Level API //
4616// //
4617/////////////////////////////////////////////////////////
4618
4619static void show_thread_state ( HChar* str, Thr* t )
4620{
4621 if (1) return;
4622 if (t->viR == t->viW) {
4623 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4624 VtsID__pp( t->viR );
4625 VG_(printf)("%s","\n");
4626 } else {
4627 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4628 VtsID__pp( t->viR );
4629 VG_(printf)(" viW %u==", t->viW);
4630 VtsID__pp( t->viW );
4631 VG_(printf)("%s","\n");
4632 }
4633}
4634
4635
4636Thr* libhb_init (
4637 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004638 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004639 )
4640{
4641 Thr* thr;
4642 VtsID vi;
4643 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004644 tl_assert(get_EC);
4645 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004646 main_get_EC = get_EC;
4647
4648 // No need to initialise hg_wordfm.
4649 // No need to initialise hg_wordset.
4650
4651 vts_set_init();
4652 vts_tab_init();
4653 event_map_init();
4654 VtsID__invalidate_caches();
4655
4656 // initialise shadow memory
4657 zsm_init( SVal__rcinc, SVal__rcdec );
4658
4659 thr = Thr__new();
4660 vi = VtsID__mk_Singleton( thr, 1 );
4661 thr->viR = vi;
4662 thr->viW = vi;
4663 VtsID__rcinc(thr->viR);
4664 VtsID__rcinc(thr->viW);
4665
4666 show_thread_state(" root", thr);
4667 return thr;
4668}
4669
4670Thr* libhb_create ( Thr* parent )
4671{
4672 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4673 the child's index. Since the child's index is guaranteed
4674 unique, it has never been seen before, so the implicit value
4675 before the tick is zero and after that is one. */
4676 Thr* child = Thr__new();
4677
4678 child->viR = VtsID__tick( parent->viR, child );
4679 child->viW = VtsID__tick( parent->viW, child );
4680 VtsID__rcinc(child->viR);
4681 VtsID__rcinc(child->viW);
4682
4683 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4684 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4685
4686 /* and the parent has to move along too */
4687 VtsID__rcdec(parent->viR);
4688 VtsID__rcdec(parent->viW);
4689 parent->viR = VtsID__tick( parent->viR, parent );
4690 parent->viW = VtsID__tick( parent->viW, parent );
4691 VtsID__rcinc(parent->viR);
4692 VtsID__rcinc(parent->viW);
4693
4694 show_thread_state(" child", child);
4695 show_thread_state("parent", parent);
4696
4697 return child;
4698}
4699
4700/* Shut down the library, and print stats (in fact that's _all_
4701 this is for. */
4702void libhb_shutdown ( Bool show_stats )
4703{
4704 if (show_stats) {
4705 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4706 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4707 stats__secmaps_allocd,
4708 stats__secmap_ga_space_covered);
4709 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4710 stats__secmap_linesZ_allocd,
4711 stats__secmap_linesZ_bytes);
4712 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4713 stats__secmap_linesF_allocd,
4714 stats__secmap_linesF_bytes);
4715 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4716 stats__secmap_iterator_steppings);
4717 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4718 stats__secmaps_search, stats__secmaps_search_slow);
4719
4720 VG_(printf)("%s","\n");
4721 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4722 stats__cache_totrefs, stats__cache_totmisses );
4723 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4724 stats__cache_Z_fetches, stats__cache_F_fetches );
4725 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4726 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4727 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4728 stats__cache_invals, stats__cache_flushes );
4729 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4730 stats__cache_make_New_arange,
4731 stats__cache_make_New_inZrep);
4732
4733 VG_(printf)("%s","\n");
4734 VG_(printf)(" cline: %'10lu normalises\n",
4735 stats__cline_normalises );
4736 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4737 stats__cline_read64s,
4738 stats__cline_read32s,
4739 stats__cline_read16s,
4740 stats__cline_read8s );
4741 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4742 stats__cline_write64s,
4743 stats__cline_write32s,
4744 stats__cline_write16s,
4745 stats__cline_write8s );
4746 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4747 stats__cline_set64s,
4748 stats__cline_set32s,
4749 stats__cline_set16s,
4750 stats__cline_set8s );
4751 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4752 stats__cline_get8s, stats__cline_copy8s );
4753 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4754 stats__cline_64to32splits,
4755 stats__cline_32to16splits,
4756 stats__cline_16to8splits );
4757 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4758 stats__cline_64to32pulldown,
4759 stats__cline_32to16pulldown,
4760 stats__cline_16to8pulldown );
4761 if (0)
4762 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4763 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4764
4765 VG_(printf)("%s","\n");
4766
4767 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4768 stats__msm_read, stats__msm_read_change);
4769 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4770 stats__msm_write, stats__msm_write_change);
4771 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4772 stats__getOrdering_queries, stats__getOrdering_misses);
4773 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4774 stats__join2_queries, stats__join2_misses);
4775
4776 VG_(printf)("%s","\n");
4777 VG_(printf)(
4778 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4779 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4780 );
4781 VG_(printf)( " libhb: %lu entries in vts_set\n",
4782 VG_(sizeFM)( vts_set ) );
4783
4784 VG_(printf)("%s","\n");
4785 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4786 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4787 stats__ctxt_rcdec2,
4788 stats__ctxt_rcdec3 );
4789 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4790 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4791 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4792 (UWord)N_RCEC_TAB,
4793 stats__ctxt_tab_curr );
4794 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4795 stats__ctxt_tab_qs,
4796 stats__ctxt_tab_cmps );
4797#if 0
4798 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4799 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4800 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4801 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4802 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4803 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4804 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4805 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4806 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4807 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4808 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4809 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4810 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4811 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4812
4813 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4814 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4815 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4816 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4817#endif
4818
4819 VG_(printf)("%s","<<< END libhb stats >>>\n");
4820 VG_(printf)("%s","\n");
4821
4822 }
4823}
4824
4825void libhb_async_exit ( Thr* thr )
4826{
4827 /* is there anything we need to do? */
4828}
4829
4830/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4831 a Seg that points at a VTS is its one-and-only owner, and ditto for
4832 a SO that points at a VTS. */
4833
4834SO* libhb_so_alloc ( void )
4835{
4836 return SO__Alloc();
4837}
4838
4839void libhb_so_dealloc ( SO* so )
4840{
4841 tl_assert(so);
4842 tl_assert(so->magic == SO_MAGIC);
4843 SO__Dealloc(so);
4844}
4845
4846/* See comments in libhb.h for details on the meaning of
4847 strong vs weak sends and strong vs weak receives. */
4848void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4849{
4850 /* Copy the VTSs from 'thr' into the sync object, and then move
4851 the thread along one step. */
4852
4853 tl_assert(so);
4854 tl_assert(so->magic == SO_MAGIC);
4855
4856 /* stay sane .. a thread's read-clock must always lead or be the
4857 same as its write-clock */
4858 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4859 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4860 }
4861
4862 /* since we're overwriting the VtsIDs in the SO, we need to drop
4863 any references made by the previous contents thereof */
4864 if (so->viR == VtsID_INVALID) {
4865 tl_assert(so->viW == VtsID_INVALID);
4866 so->viR = thr->viR;
4867 so->viW = thr->viW;
4868 VtsID__rcinc(so->viR);
4869 VtsID__rcinc(so->viW);
4870 } else {
4871 /* In a strong send, we dump any previous VC in the SO and
4872 install the sending thread's VC instead. For a weak send we
4873 must join2 with what's already there. */
4874 tl_assert(so->viW != VtsID_INVALID);
4875 VtsID__rcdec(so->viR);
4876 VtsID__rcdec(so->viW);
4877 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4878 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4879 VtsID__rcinc(so->viR);
4880 VtsID__rcinc(so->viW);
4881 }
4882
4883 /* move both parent clocks along */
4884 VtsID__rcdec(thr->viR);
4885 VtsID__rcdec(thr->viW);
4886 thr->viR = VtsID__tick( thr->viR, thr );
4887 thr->viW = VtsID__tick( thr->viW, thr );
4888 VtsID__rcinc(thr->viR);
4889 VtsID__rcinc(thr->viW);
4890 if (strong_send)
4891 show_thread_state("s-send", thr);
4892 else
4893 show_thread_state("w-send", thr);
4894}
4895
4896void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4897{
4898 tl_assert(so);
4899 tl_assert(so->magic == SO_MAGIC);
4900
4901 if (so->viR != VtsID_INVALID) {
4902 tl_assert(so->viW != VtsID_INVALID);
4903
4904 /* Weak receive (basically, an R-acquisition of a R-W lock).
4905 This advances the read-clock of the receiver, but not the
4906 write-clock. */
4907 VtsID__rcdec(thr->viR);
4908 thr->viR = VtsID__join2( thr->viR, so->viR );
4909 VtsID__rcinc(thr->viR);
4910
4911 /* For a strong receive, we also advance the receiver's write
4912 clock, which means the receive as a whole is essentially
4913 equivalent to a W-acquisition of a R-W lock. */
4914 if (strong_recv) {
4915 VtsID__rcdec(thr->viW);
4916 thr->viW = VtsID__join2( thr->viW, so->viW );
4917 VtsID__rcinc(thr->viW);
4918 }
4919
4920 if (strong_recv)
4921 show_thread_state("s-recv", thr);
4922 else
4923 show_thread_state("w-recv", thr);
4924
4925 } else {
4926 tl_assert(so->viW == VtsID_INVALID);
4927 /* Deal with degenerate case: 'so' has no vts, so there has been
4928 no message posted to it. Just ignore this case. */
4929 show_thread_state("d-recv", thr);
4930 }
4931}
4932
4933Bool libhb_so_everSent ( SO* so )
4934{
4935 if (so->viR == VtsID_INVALID) {
4936 tl_assert(so->viW == VtsID_INVALID);
4937 return False;
4938 } else {
4939 tl_assert(so->viW != VtsID_INVALID);
4940 return True;
4941 }
4942}
4943
4944#define XXX1 0 // 0x67a106c
4945#define XXX2 0
4946
4947static Bool TRACEME(Addr a, SizeT szB) {
4948 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4949 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4950 return False;
4951}
4952static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4953 SVal sv = zsm_read8(a);
4954 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4955 show_thread_state("", thr);
4956 VG_(printf)("%s","\n");
4957}
4958
4959void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4960{
4961 SVal sv = SVal__mkC(thr->viW, thr->viW);
4962 tl_assert(is_sane_SVal_C(sv));
4963 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4964 zsm_set_range( a, szB, sv );
4965 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4966}
4967
4968void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4969{
4970 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4971 zsm_set_range( a, szB, SVal__mkA() );
4972 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4973}
4974
4975void* libhb_get_Thr_opaque ( Thr* thr ) {
4976 tl_assert(thr);
4977 return thr->opaque;
4978}
4979
4980void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4981 tl_assert(thr);
4982 thr->opaque = v;
4983}
4984
4985void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4986{
4987 zsm_copy_range(dst, src, len);
4988}
4989
4990void libhb_maybe_GC ( void )
4991{
4992 event_map_maybe_GC();
4993 /* If there are still freelist entries available, no need for a
4994 GC. */
4995 if (vts_tab_freelist != VtsID_INVALID)
4996 return;
4997 /* So all the table entries are full, and we're having to expand
4998 the table. But did we hit the threshhold point yet? */
4999 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5000 return;
5001 vts_tab__do_GC( False/*don't show stats*/ );
5002}
5003
5004
5005/////////////////////////////////////////////////////////////////
5006/////////////////////////////////////////////////////////////////
5007// //
5008// SECTION END main library //
5009// //
5010/////////////////////////////////////////////////////////////////
5011/////////////////////////////////////////////////////////////////
5012
5013/*--------------------------------------------------------------------*/
5014/*--- end libhb_main.c ---*/
5015/*--------------------------------------------------------------------*/