Merge Helgrind from branches/YARD into the trunk. Also includes some
minor changes to make stack unwinding on amd64-linux approximately
twice as fast as it was before.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@8707 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c
new file mode 100644
index 0000000..fc232f7
--- /dev/null
+++ b/helgrind/libhb_core.c
@@ -0,0 +1,4562 @@
+
+/*--------------------------------------------------------------------*/
+/*--- LibHB: a library for implementing and checking ---*/
+/*--- the happens-before relationship in concurrent programs. ---*/
+/*--- libhb_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of LibHB, a library for implementing and checking
+ the happens-before relationship in concurrent programs.
+
+ Copyright (C) 2008-2008 OpenWorks Ltd
+ info@open-works.co.uk
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_oset.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_aspacemgr.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_errormgr.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "hg_errors.h"
+
+#include "libhb.h"
+
+
+/* fwds for
+ Globals needed by other parts of the library. These are set
+ once at startup and then never changed. */
+static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
+static struct _EC* (*main_stacktrace_to_EC)( Addr*, UWord ) = NULL;
+static struct _EC* (*main_get_EC)( Thr* ) = NULL;
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION BEGIN compressed shadow memory //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_ZSM_H
+#define __HB_ZSM_H
+
+typedef ULong SVal;
+
+/* This value has special significance to the implementation, and callers
+ may not store it in the shadow memory. */
+#define SVal_INVALID (3ULL << 62)
+
+/* This is the default value for shadow memory. Initially the shadow
+ memory contains no accessible areas and so all reads produce this
+ value. TODO: make this caller-defineable. */
+#define SVal_NOACCESS (2ULL << 62)
+
+/* Initialise the library. Once initialised, it will (or may) call
+ rcinc and rcdec in response to all the calls below, in order to
+ allow the user to do reference counting on the SVals stored herein.
+ It is important to understand, however, that due to internal
+ caching, the reference counts are in general inaccurate, and can be
+ both above or below the true reference count for an item. In
+ particular, the library may indicate that the reference count for
+ an item is zero, when in fact it is not.
+
+ To make the reference counting exact and therefore non-pointless,
+ call zsm_flush_cache. Immediately after it returns, the reference
+ counts for all items, as deduced by the caller by observing calls
+ to rcinc and rcdec, will be correct, and so any items with a zero
+ reference count may be freed (or at least considered to be
+ unreferenced by this library).
+*/
+static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
+
+static void zsm_set_range ( Addr, SizeT, SVal );
+static SVal zsm_read8 ( Addr );
+static void zsm_copy_range ( Addr, Addr, SizeT );
+static void zsm_flush_cache ( void );
+
+#endif /* ! __HB_ZSM_H */
+
+
+/* For the shadow mem cache stuff we may want more intrusive
+ checks. Unfortunately there's no almost-zero-cost way to make them
+ selectable at run time. Hence set the #if 0 to #if 1 and
+ rebuild if you want them. */
+#if 0
+# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
+# define inline __attribute__((noinline))
+ /* probably want to ditch -fomit-frame-pointer too */
+#else
+# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
+#endif
+
+/* For the SegmentID, SegmentSet and SVal stuff we may want more
+ intrusive checks. Again there's no zero cost way to do this. Set
+ the #if 0 to #if 1 and rebuild if you want them. */
+#if 0
+# define SCE_SVALS 1 /* sanity-check shadow value stuff */
+#else
+# define SCE_SVALS 0
+#endif
+
+
+/* Round a up to the next multiple of N. N must be a power of 2 */
+#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
+/* Round a down to the next multiple of N. N must be a power of 2 */
+#define ROUNDDN(a, N) ((a) & ~(N-1))
+
+
+
+/* ------ User-supplied RC functions ------ */
+static void(*rcinc)(SVal) = NULL;
+static void(*rcdec)(SVal) = NULL;
+
+
+/* ------ CacheLine ------ */
+
+#define N_LINE_BITS 6 /* must be >= 3 */
+#define N_LINE_ARANGE (1 << N_LINE_BITS)
+#define N_LINE_TREES (N_LINE_ARANGE >> 3)
+
+typedef
+ struct {
+ UShort descrs[N_LINE_TREES];
+ SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
+ }
+ CacheLine;
+
+#define TREE_DESCR_16_0 (1<<0)
+#define TREE_DESCR_32_0 (1<<1)
+#define TREE_DESCR_16_1 (1<<2)
+#define TREE_DESCR_64 (1<<3)
+#define TREE_DESCR_16_2 (1<<4)
+#define TREE_DESCR_32_1 (1<<5)
+#define TREE_DESCR_16_3 (1<<6)
+#define TREE_DESCR_8_0 (1<<7)
+#define TREE_DESCR_8_1 (1<<8)
+#define TREE_DESCR_8_2 (1<<9)
+#define TREE_DESCR_8_3 (1<<10)
+#define TREE_DESCR_8_4 (1<<11)
+#define TREE_DESCR_8_5 (1<<12)
+#define TREE_DESCR_8_6 (1<<13)
+#define TREE_DESCR_8_7 (1<<14)
+#define TREE_DESCR_DTY (1<<15)
+
+typedef
+ struct {
+ SVal dict[4]; /* can represent up to 4 diff values in the line */
+ UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
+ dict indexes */
+ /* if dict[0] == SVal_INVALID then dict[1] is the index of the
+ LineF to use, and dict[2..] are also SVal_INVALID. */
+ }
+ LineZ; /* compressed rep for a cache line */
+
+typedef
+ struct {
+ Bool inUse;
+ SVal w64s[N_LINE_ARANGE];
+ }
+ LineF; /* full rep for a cache line */
+
+/* Shadow memory.
+ Primary map is a WordFM Addr SecMap*.
+ SecMaps cover some page-size-ish section of address space and hold
+ a compressed representation.
+ CacheLine-sized chunks of SecMaps are copied into a Cache, being
+ decompressed when moved into the cache and recompressed on the
+ way out. Because of this, the cache must operate as a writeback
+ cache, not a writethrough one.
+
+ Each SecMap must hold a power-of-2 number of CacheLines. Hence
+ N_SECMAP_BITS must >= N_LINE_BITS.
+*/
+#define N_SECMAP_BITS 13
+#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
+
+// # CacheLines held by a SecMap
+#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
+
+/* The data in the SecMap is held in the array of LineZs. Each LineZ
+ either carries the required data directly, in a compressed
+ representation, or it holds (in .dict[0]) an index to the LineF in
+ .linesF that holds the full representation.
+
+ Currently-unused LineF's have their .inUse bit set to zero.
+ Since each in-use LineF is referred to be exactly one LineZ,
+ the number of .linesZ[] that refer to .linesF should equal
+ the number of .linesF[] that have .inUse == True.
+
+ RC obligations: the RCs presented to the user include exactly
+ the values in:
+ * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
+ * F reps that are in use (.inUse == True)
+
+ Hence the following actions at the following transitions are required:
+
+ F rep: .inUse==True -> .inUse==False -- rcdec_LineF
+ F rep: .inUse==False -> .inUse==True -- rcinc_LineF
+ Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
+ Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
+*/
+typedef
+ struct {
+ UInt magic;
+ LineZ linesZ[N_SECMAP_ZLINES];
+ LineF* linesF;
+ UInt linesF_size;
+ }
+ SecMap;
+
+#define SecMap_MAGIC 0x571e58cbU
+
+static inline Bool is_sane_SecMap ( SecMap* sm ) {
+ return sm != NULL && sm->magic == SecMap_MAGIC;
+}
+
+/* ------ Cache ------ */
+
+#define N_WAY_BITS 16
+#define N_WAY_NENT (1 << N_WAY_BITS)
+
+/* Each tag is the address of the associated CacheLine, rounded down
+ to a CacheLine address boundary. A CacheLine size must be a power
+ of 2 and must be 8 or more. Hence an easy way to initialise the
+ cache so it is empty is to set all the tag values to any value % 8
+ != 0, eg 1. This means all queries in the cache initially miss.
+ It does however require us to detect and not writeback, any line
+ with a bogus tag. */
+typedef
+ struct {
+ CacheLine lyns0[N_WAY_NENT];
+ Addr tags0[N_WAY_NENT];
+ }
+ Cache;
+
+static inline Bool is_valid_scache_tag ( Addr tag ) {
+ /* a valid tag should be naturally aligned to the start of
+ a CacheLine. */
+ return 0 == (tag & (N_LINE_ARANGE - 1));
+}
+
+
+/* --------- Primary data structures --------- */
+
+/* Shadow memory primary map */
+static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
+static Cache cache_shmem;
+
+
+static UWord stats__secmaps_search = 0; // # SM finds
+static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
+static UWord stats__secmaps_allocd = 0; // # SecMaps issued
+static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
+static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
+static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
+static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
+static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
+static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
+static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
+static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
+static UWord stats__cache_F_fetches = 0; // # F lines fetched
+static UWord stats__cache_F_wbacks = 0; // # F lines written back
+static UWord stats__cache_invals = 0; // # cache invals
+static UWord stats__cache_flushes = 0; // # cache flushes
+static UWord stats__cache_totrefs = 0; // # total accesses
+static UWord stats__cache_totmisses = 0; // # misses
+static ULong stats__cache_make_New_arange = 0; // total arange made New
+static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
+static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
+static UWord stats__cline_read64s = 0; // # calls to s_m_read64
+static UWord stats__cline_read32s = 0; // # calls to s_m_read32
+static UWord stats__cline_read16s = 0; // # calls to s_m_read16
+static UWord stats__cline_read8s = 0; // # calls to s_m_read8
+static UWord stats__cline_write64s = 0; // # calls to s_m_write64
+static UWord stats__cline_write32s = 0; // # calls to s_m_write32
+static UWord stats__cline_write16s = 0; // # calls to s_m_write16
+static UWord stats__cline_write8s = 0; // # calls to s_m_write8
+static UWord stats__cline_set64s = 0; // # calls to s_m_set64
+static UWord stats__cline_set32s = 0; // # calls to s_m_set32
+static UWord stats__cline_set16s = 0; // # calls to s_m_set16
+static UWord stats__cline_set8s = 0; // # calls to s_m_set8
+static UWord stats__cline_get8s = 0; // # calls to s_m_get8
+static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
+static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
+static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
+static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
+static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
+static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
+static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
+
+static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
+ return a & ~(N_SECMAP_ARANGE - 1);
+}
+static inline UWord shmem__get_SecMap_offset ( Addr a ) {
+ return a & (N_SECMAP_ARANGE - 1);
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- map_shmem :: WordFM Addr SecMap ---*/
+/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
+/*----------------------------------------------------------------*/
+
+/*--------------- SecMap allocation --------------- */
+
+static HChar* shmem__bigchunk_next = NULL;
+static HChar* shmem__bigchunk_end1 = NULL;
+
+static void* shmem__bigchunk_alloc ( SizeT n )
+{
+ const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
+ tl_assert(n > 0);
+ n = VG_ROUNDUP(n, 16);
+ tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
+ tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
+ <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
+ if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
+ if (0)
+ VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
+ (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
+ shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
+ if (shmem__bigchunk_next == NULL)
+ VG_(out_of_memory_NORETURN)(
+ "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
+ shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
+ }
+ tl_assert(shmem__bigchunk_next);
+ tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
+ tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
+ shmem__bigchunk_next += n;
+ return shmem__bigchunk_next - n;
+}
+
+static SecMap* shmem__alloc_SecMap ( void )
+{
+ Word i, j;
+ SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
+ if (0) VG_(printf)("alloc_SecMap %p\n",sm);
+ tl_assert(sm);
+ sm->magic = SecMap_MAGIC;
+ for (i = 0; i < N_SECMAP_ZLINES; i++) {
+ sm->linesZ[i].dict[0] = SVal_NOACCESS;
+ sm->linesZ[i].dict[1] = SVal_INVALID;
+ sm->linesZ[i].dict[2] = SVal_INVALID;
+ sm->linesZ[i].dict[3] = SVal_INVALID;
+ for (j = 0; j < N_LINE_ARANGE/4; j++)
+ sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
+ }
+ sm->linesF = NULL;
+ sm->linesF_size = 0;
+ stats__secmaps_allocd++;
+ stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
+ stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
+ stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
+ return sm;
+}
+
+typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
+static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
+
+static SecMap* shmem__find_SecMap ( Addr ga )
+{
+ SecMap* sm = NULL;
+ Addr gaKey = shmem__round_to_SecMap_base(ga);
+ // Cache
+ stats__secmaps_search++;
+ if (LIKELY(gaKey == smCache[0].gaKey))
+ return smCache[0].sm;
+ if (LIKELY(gaKey == smCache[1].gaKey)) {
+ SMCacheEnt tmp = smCache[0];
+ smCache[0] = smCache[1];
+ smCache[1] = tmp;
+ return smCache[0].sm;
+ }
+ if (gaKey == smCache[2].gaKey) {
+ SMCacheEnt tmp = smCache[1];
+ smCache[1] = smCache[2];
+ smCache[2] = tmp;
+ return smCache[1].sm;
+ }
+ // end Cache
+ stats__secmaps_search_slow++;
+ if (VG_(lookupFM)( map_shmem,
+ NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
+ tl_assert(sm != NULL);
+ smCache[2] = smCache[1];
+ smCache[1] = smCache[0];
+ smCache[0].gaKey = gaKey;
+ smCache[0].sm = sm;
+ } else {
+ tl_assert(sm == NULL);
+ }
+ return sm;
+}
+
+static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
+{
+ SecMap* sm = shmem__find_SecMap ( ga );
+ if (LIKELY(sm)) {
+ return sm;
+ } else {
+ /* create a new one */
+ Addr gaKey = shmem__round_to_SecMap_base(ga);
+ sm = shmem__alloc_SecMap();
+ tl_assert(sm);
+ VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
+ return sm;
+ }
+}
+
+
+/* ------------ LineF and LineZ related ------------ */
+
+static void rcinc_LineF ( LineF* lineF ) {
+ UWord i;
+ tl_assert(lineF->inUse);
+ for (i = 0; i < N_LINE_ARANGE; i++)
+ rcinc(lineF->w64s[i]);
+}
+
+static void rcdec_LineF ( LineF* lineF ) {
+ UWord i;
+ tl_assert(lineF->inUse);
+ for (i = 0; i < N_LINE_ARANGE; i++)
+ rcdec(lineF->w64s[i]);
+}
+
+static void rcinc_LineZ ( LineZ* lineZ ) {
+ tl_assert(lineZ->dict[0] != SVal_INVALID);
+ rcinc(lineZ->dict[0]);
+ if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
+ if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
+ if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
+}
+
+static void rcdec_LineZ ( LineZ* lineZ ) {
+ tl_assert(lineZ->dict[0] != SVal_INVALID);
+ rcdec(lineZ->dict[0]);
+ if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
+ if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
+ if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
+}
+
+inline
+static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
+ Word bix, shft, mask, prep;
+ tl_assert(ix >= 0);
+ bix = ix >> 2;
+ shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+ mask = 3 << shft;
+ prep = b2 << shft;
+ arr[bix] = (arr[bix] & ~mask) | prep;
+}
+
+inline
+static UWord read_twobit_array ( UChar* arr, UWord ix ) {
+ Word bix, shft;
+ tl_assert(ix >= 0);
+ bix = ix >> 2;
+ shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+ return (arr[bix] >> shft) & 3;
+}
+
+/* Given address 'tag', find either the Z or F line containing relevant
+ data, so it can be read into the cache.
+*/
+static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
+ /*OUT*/LineF** fp, Addr tag ) {
+ LineZ* lineZ;
+ LineF* lineF;
+ UWord zix;
+ SecMap* sm = shmem__find_or_alloc_SecMap(tag);
+ UWord smoff = shmem__get_SecMap_offset(tag);
+ /* since smoff is derived from a valid tag, it should be
+ cacheline-aligned. */
+ tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+ zix = smoff >> N_LINE_BITS;
+ tl_assert(zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+ lineF = NULL;
+ if (lineZ->dict[0] == SVal_INVALID) {
+ UInt fix = (UInt)lineZ->dict[1];
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
+ lineF = &sm->linesF[fix];
+ tl_assert(lineF->inUse);
+ lineZ = NULL;
+ }
+ *zp = lineZ;
+ *fp = lineF;
+}
+
+/* Given address 'tag', return the relevant SecMap and the index of
+ the LineZ within it, in the expectation that the line is to be
+ overwritten. Regardless of whether 'tag' is currently associated
+ with a Z or F representation, to rcdec on the current
+ representation, in recognition of the fact that the contents are
+ just about to be overwritten. */
+static __attribute__((noinline))
+void find_Z_for_writing ( /*OUT*/SecMap** smp,
+ /*OUT*/Word* zixp,
+ Addr tag ) {
+ LineZ* lineZ;
+ LineF* lineF;
+ UWord zix;
+ SecMap* sm = shmem__find_or_alloc_SecMap(tag);
+ UWord smoff = shmem__get_SecMap_offset(tag);
+ /* since smoff is derived from a valid tag, it should be
+ cacheline-aligned. */
+ tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+ zix = smoff >> N_LINE_BITS;
+ tl_assert(zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+ lineF = NULL;
+ /* re RCs, we are freeing up this LineZ/LineF so that new data can
+ be parked in it. Hence have to rcdec it accordingly. */
+ /* If lineZ has an associated lineF, free it up. */
+ if (lineZ->dict[0] == SVal_INVALID) {
+ UInt fix = (UInt)lineZ->dict[1];
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
+ lineF = &sm->linesF[fix];
+ tl_assert(lineF->inUse);
+ rcdec_LineF(lineF);
+ lineF->inUse = False;
+ } else {
+ rcdec_LineZ(lineZ);
+ }
+ *smp = sm;
+ *zixp = zix;
+}
+
+static __attribute__((noinline))
+void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
+ UInt i, new_size;
+ LineF* nyu;
+
+ if (sm->linesF) {
+ tl_assert(sm->linesF_size > 0);
+ } else {
+ tl_assert(sm->linesF_size == 0);
+ }
+
+ if (sm->linesF) {
+ for (i = 0; i < sm->linesF_size; i++) {
+ if (!sm->linesF[i].inUse) {
+ *fixp = (Word)i;
+ return;
+ }
+ }
+ }
+
+ /* No free F line found. Expand existing array and try again. */
+ new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
+ nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
+ new_size * sizeof(LineF) );
+ tl_assert(nyu);
+
+ stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
+ stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
+ * sizeof(LineF);
+
+ if (0)
+ VG_(printf)("SM %p: expand F array from %d to %d\n",
+ sm, (Int)sm->linesF_size, new_size);
+
+ for (i = 0; i < new_size; i++)
+ nyu[i].inUse = False;
+
+ if (sm->linesF) {
+ for (i = 0; i < sm->linesF_size; i++) {
+ tl_assert(sm->linesF[i].inUse);
+ nyu[i] = sm->linesF[i];
+ }
+ VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
+ HG_(free)(sm->linesF);
+ }
+
+ sm->linesF = nyu;
+ sm->linesF_size = new_size;
+
+ for (i = 0; i < sm->linesF_size; i++) {
+ if (!sm->linesF[i].inUse) {
+ *fixp = (Word)i;
+ return;
+ }
+ }
+
+ /*NOTREACHED*/
+ tl_assert(0);
+}
+
+
+/* ------------ CacheLine and implicit-tree related ------------ */
+
+__attribute__((unused))
+static void pp_CacheLine ( CacheLine* cl ) {
+ Word i;
+ if (!cl) {
+ VG_(printf)("%s","pp_CacheLine(NULL)\n");
+ return;
+ }
+ for (i = 0; i < N_LINE_TREES; i++)
+ VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
+ for (i = 0; i < N_LINE_ARANGE; i++)
+ VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
+}
+
+static UChar descr_to_validbits ( UShort descr )
+{
+ /* a.k.a Party Time for gcc's constant folder */
+# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
+ b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
+ ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
+ ( (b8_5) << 12) | ( (b8_4) << 11) | \
+ ( (b8_3) << 10) | ( (b8_2) << 9) | \
+ ( (b8_1) << 8) | ( (b8_0) << 7) | \
+ ( (b16_3) << 6) | ( (b32_1) << 5) | \
+ ( (b16_2) << 4) | ( (b64) << 3) | \
+ ( (b16_1) << 2) | ( (b32_0) << 1) | \
+ ( (b16_0) << 0) ) )
+
+# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
+ ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
+ ( (bit5) << 5) | ( (bit4) << 4) | \
+ ( (bit3) << 3) | ( (bit2) << 2) | \
+ ( (bit1) << 1) | ( (bit0) << 0) ) )
+
+ /* these should all get folded out at compile time */
+ tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
+ tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
+
+ switch (descr) {
+ /*
+ +--------------------------------- TREE_DESCR_8_7
+ | +------------------- TREE_DESCR_8_0
+ | | +---------------- TREE_DESCR_16_3
+ | | | +-------------- TREE_DESCR_32_1
+ | | | | +------------ TREE_DESCR_16_2
+ | | | | | +--------- TREE_DESCR_64
+ | | | | | | +------ TREE_DESCR_16_1
+ | | | | | | | +---- TREE_DESCR_32_0
+ | | | | | | | | +-- TREE_DESCR_16_0
+ | | | | | | | | |
+ | | | | | | | | | GRANULARITY, 7 -> 0 */
+ case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
+ return BYTE(1,1,1,1,1,1,1,1);
+ case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
+ return BYTE(1,1,0,1,1,1,1,1);
+ case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
+ return BYTE(0,1,1,1,1,1,1,1);
+ case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
+ return BYTE(0,1,0,1,1,1,1,1);
+
+ case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
+ return BYTE(1,1,1,1,1,1,0,1);
+ case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
+ return BYTE(1,1,0,1,1,1,0,1);
+ case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
+ return BYTE(0,1,1,1,1,1,0,1);
+ case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
+ return BYTE(0,1,0,1,1,1,0,1);
+
+ case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
+ return BYTE(1,1,1,1,0,1,1,1);
+ case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
+ return BYTE(1,1,0,1,0,1,1,1);
+ case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
+ return BYTE(0,1,1,1,0,1,1,1);
+ case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
+ return BYTE(0,1,0,1,0,1,1,1);
+
+ case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
+ return BYTE(1,1,1,1,0,1,0,1);
+ case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
+ return BYTE(1,1,0,1,0,1,0,1);
+ case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
+ return BYTE(0,1,1,1,0,1,0,1);
+ case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
+ return BYTE(0,1,0,1,0,1,0,1);
+
+ case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
+ return BYTE(0,0,0,1,1,1,1,1);
+ case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
+ return BYTE(0,0,0,1,1,1,0,1);
+ case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
+ return BYTE(0,0,0,1,0,1,1,1);
+ case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
+ return BYTE(0,0,0,1,0,1,0,1);
+
+ case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
+ return BYTE(1,1,1,1,0,0,0,1);
+ case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
+ return BYTE(1,1,0,1,0,0,0,1);
+ case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
+ return BYTE(0,1,1,1,0,0,0,1);
+ case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
+ return BYTE(0,1,0,1,0,0,0,1);
+
+ case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
+ return BYTE(0,0,0,1,0,0,0,1);
+
+ case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
+ return BYTE(0,0,0,0,0,0,0,1);
+
+ default: return BYTE(0,0,0,0,0,0,0,0);
+ /* INVALID - any valid descr produces at least one
+ valid bit in tree[0..7]*/
+ }
+ /* NOTREACHED*/
+ tl_assert(0);
+
+# undef DESCR
+# undef BYTE
+}
+
+__attribute__((unused))
+static Bool is_sane_Descr ( UShort descr ) {
+ return descr_to_validbits(descr) != 0;
+}
+
+static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
+ VG_(sprintf)(dst,
+ "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
+ (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_64) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
+ );
+}
+static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
+ VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
+ (Int)((byte & 128) ? 1 : 0),
+ (Int)((byte & 64) ? 1 : 0),
+ (Int)((byte & 32) ? 1 : 0),
+ (Int)((byte & 16) ? 1 : 0),
+ (Int)((byte & 8) ? 1 : 0),
+ (Int)((byte & 4) ? 1 : 0),
+ (Int)((byte & 2) ? 1 : 0),
+ (Int)((byte & 1) ? 1 : 0)
+ );
+}
+
+static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
+ Word i;
+ UChar validbits = descr_to_validbits(descr);
+ HChar buf[128], buf2[128];
+ if (validbits == 0)
+ goto bad;
+ for (i = 0; i < 8; i++) {
+ if (validbits & (1<<i)) {
+ if (tree[i] == SVal_INVALID)
+ goto bad;
+ } else {
+ if (tree[i] != SVal_INVALID)
+ goto bad;
+ }
+ }
+ return True;
+ bad:
+ sprintf_Descr( buf, descr );
+ sprintf_Byte( buf2, validbits );
+ VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
+ VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
+ VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
+ for (i = 0; i < 8; i++)
+ VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
+ VG_(printf)("%s","}\n");
+ return 0;
+}
+
+static Bool is_sane_CacheLine ( CacheLine* cl )
+{
+ Word tno, cloff;
+
+ if (!cl) goto bad;
+
+ for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
+ UShort descr = cl->descrs[tno];
+ SVal* tree = &cl->svals[cloff];
+ if (!is_sane_Descr_and_Tree(descr, tree))
+ goto bad;
+ }
+ tl_assert(cloff == N_LINE_ARANGE);
+ return True;
+ bad:
+ pp_CacheLine(cl);
+ return False;
+}
+
+static UShort normalise_tree ( /*MOD*/SVal* tree )
+{
+ UShort descr;
+ /* pre: incoming tree[0..7] does not have any invalid shvals, in
+ particular no zeroes. */
+ if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
+ || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
+ || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
+ || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
+ tl_assert(0);
+
+ descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
+ | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
+ | TREE_DESCR_8_1 | TREE_DESCR_8_0;
+ /* build 16-bit layer */
+ if (tree[1] == tree[0]) {
+ tree[1] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
+ descr |= TREE_DESCR_16_0;
+ }
+ if (tree[3] == tree[2]) {
+ tree[3] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
+ descr |= TREE_DESCR_16_1;
+ }
+ if (tree[5] == tree[4]) {
+ tree[5] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
+ descr |= TREE_DESCR_16_2;
+ }
+ if (tree[7] == tree[6]) {
+ tree[7] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
+ descr |= TREE_DESCR_16_3;
+ }
+ /* build 32-bit layer */
+ if (tree[2] == tree[0]
+ && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
+ tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
+ descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
+ descr |= TREE_DESCR_32_0;
+ }
+ if (tree[6] == tree[4]
+ && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
+ tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
+ descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
+ descr |= TREE_DESCR_32_1;
+ }
+ /* build 64-bit layer */
+ if (tree[4] == tree[0]
+ && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
+ tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
+ descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
+ descr |= TREE_DESCR_64;
+ }
+ return descr;
+}
+
+/* This takes a cacheline where all the data is at the leaves
+ (w8[..]) and builds a correctly normalised tree. */
+static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
+{
+ Word tno, cloff;
+ for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
+ SVal* tree = &cl->svals[cloff];
+ cl->descrs[tno] = normalise_tree( tree );
+ }
+ tl_assert(cloff == N_LINE_ARANGE);
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ stats__cline_normalises++;
+}
+
+
+typedef struct { UChar count; SVal sval; } CountedSVal;
+
+static
+void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
+ /*OUT*/Word* dstUsedP,
+ Word nDst, CacheLine* src )
+{
+ Word tno, cloff, dstUsed;
+
+ tl_assert(nDst == N_LINE_ARANGE);
+ dstUsed = 0;
+
+ for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
+ UShort descr = src->descrs[tno];
+ SVal* tree = &src->svals[cloff];
+
+ /* sequentialise the tree described by (descr,tree). */
+# define PUT(_n,_v) \
+ do { dst[dstUsed ].count = (_n); \
+ dst[dstUsed++].sval = (_v); \
+ } while (0)
+
+ /* byte 0 */
+ if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
+ if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
+ if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
+ if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
+ /* byte 1 */
+ if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
+ /* byte 2 */
+ if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
+ if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
+ /* byte 3 */
+ if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
+ /* byte 4 */
+ if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
+ if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
+ if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
+ /* byte 5 */
+ if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
+ /* byte 6 */
+ if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
+ if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
+ /* byte 7 */
+ if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
+
+# undef PUT
+ /* END sequentialise the tree described by (descr,tree). */
+
+ }
+ tl_assert(cloff == N_LINE_ARANGE);
+ tl_assert(dstUsed <= nDst);
+
+ *dstUsedP = dstUsed;
+}
+
+/* Write the cacheline 'wix' to backing store. Where it ends up
+ is determined by its tag field. */
+static __attribute__((noinline)) void cacheline_wback ( UWord wix )
+{
+ Word i, j, k, m;
+ Addr tag;
+ SecMap* sm;
+ CacheLine* cl;
+ LineZ* lineZ;
+ LineF* lineF;
+ Word zix, fix, csvalsUsed;
+ CountedSVal csvals[N_LINE_ARANGE];
+ SVal sv;
+
+ if (0)
+ VG_(printf)("scache wback line %d\n", (Int)wix);
+
+ tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+ tag = cache_shmem.tags0[wix];
+ cl = &cache_shmem.lyns0[wix];
+
+ /* The cache line may have been invalidated; if so, ignore it. */
+ if (!is_valid_scache_tag(tag))
+ return;
+
+ /* Where are we going to put it? */
+ sm = NULL;
+ lineZ = NULL;
+ lineF = NULL;
+ zix = fix = -1;
+
+ /* find the Z line to write in and rcdec it or the associated F
+ line. */
+ find_Z_for_writing( &sm, &zix, tag );
+
+ tl_assert(sm);
+ tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+
+ /* Generate the data to be stored */
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+
+ csvalsUsed = -1;
+ sequentialise_CacheLine( csvals, &csvalsUsed,
+ N_LINE_ARANGE, cl );
+ tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
+ if (0) VG_(printf)("%lu ", csvalsUsed);
+
+ lineZ->dict[0] = lineZ->dict[1]
+ = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+
+ /* i indexes actual shadow values, k is cursor in csvals */
+ i = 0;
+ for (k = 0; k < csvalsUsed; k++) {
+
+ sv = csvals[k].sval;
+ if (SCE_SVALS)
+ tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+ /* do we already have it? */
+ if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
+ if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
+ if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
+ if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
+ /* no. look for a free slot. */
+ if (SCE_SVALS)
+ tl_assert(sv != SVal_INVALID);
+ if (lineZ->dict[0]
+ == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
+ if (lineZ->dict[1]
+ == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
+ if (lineZ->dict[2]
+ == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
+ if (lineZ->dict[3]
+ == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
+ break; /* we'll have to use the f rep */
+ dict_ok:
+ m = csvals[k].count;
+ if (m == 8) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ write_twobit_array( lineZ->ix2s, i+1, j );
+ write_twobit_array( lineZ->ix2s, i+2, j );
+ write_twobit_array( lineZ->ix2s, i+3, j );
+ write_twobit_array( lineZ->ix2s, i+4, j );
+ write_twobit_array( lineZ->ix2s, i+5, j );
+ write_twobit_array( lineZ->ix2s, i+6, j );
+ write_twobit_array( lineZ->ix2s, i+7, j );
+ i += 8;
+ }
+ else if (m == 4) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ write_twobit_array( lineZ->ix2s, i+1, j );
+ write_twobit_array( lineZ->ix2s, i+2, j );
+ write_twobit_array( lineZ->ix2s, i+3, j );
+ i += 4;
+ }
+ else if (m == 1) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ i += 1;
+ }
+ else if (m == 2) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ write_twobit_array( lineZ->ix2s, i+1, j );
+ i += 2;
+ }
+ else {
+ tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
+ }
+
+ }
+
+ if (LIKELY(i == N_LINE_ARANGE)) {
+ /* Construction of the compressed representation was
+ successful. */
+ rcinc_LineZ(lineZ);
+ stats__cache_Z_wbacks++;
+ } else {
+ /* Cannot use the compressed(z) representation. Use the full(f)
+ rep instead. */
+ tl_assert(i >= 0 && i < N_LINE_ARANGE);
+ alloc_F_for_writing( sm, &fix );
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
+ lineF = &sm->linesF[fix];
+ tl_assert(!lineF->inUse);
+ lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+ lineZ->dict[1] = (SVal)fix;
+ lineF->inUse = True;
+ i = 0;
+ for (k = 0; k < csvalsUsed; k++) {
+ if (SCE_SVALS)
+ tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+ sv = csvals[k].sval;
+ if (SCE_SVALS)
+ tl_assert(sv != SVal_INVALID);
+ for (m = csvals[k].count; m > 0; m--) {
+ lineF->w64s[i] = sv;
+ i++;
+ }
+ }
+ tl_assert(i == N_LINE_ARANGE);
+ rcinc_LineF(lineF);
+ stats__cache_F_wbacks++;
+ }
+
+ //if (anyShared)
+ // sm->mbHasShared = True;
+
+ /* mb_tidy_one_cacheline(); */
+}
+
+/* Fetch the cacheline 'wix' from the backing store. The tag
+ associated with 'wix' is assumed to have already been filled in;
+ hence that is used to determine where in the backing store to read
+ from. */
+static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
+{
+ Word i;
+ Addr tag;
+ CacheLine* cl;
+ LineZ* lineZ;
+ LineF* lineF;
+
+ if (0)
+ VG_(printf)("scache fetch line %d\n", (Int)wix);
+
+ tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+ tag = cache_shmem.tags0[wix];
+ cl = &cache_shmem.lyns0[wix];
+
+ /* reject nonsense requests */
+ tl_assert(is_valid_scache_tag(tag));
+
+ lineZ = NULL;
+ lineF = NULL;
+ find_ZF_for_reading( &lineZ, &lineF, tag );
+ tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
+
+ /* expand the data into the bottom layer of the tree, then get
+ cacheline_normalise to build the descriptor array. */
+ if (lineF) {
+ tl_assert(lineF->inUse);
+ for (i = 0; i < N_LINE_ARANGE; i++) {
+ cl->svals[i] = lineF->w64s[i];
+ }
+ stats__cache_F_fetches++;
+ } else {
+ for (i = 0; i < N_LINE_ARANGE; i++) {
+ SVal sv;
+ UWord ix = read_twobit_array( lineZ->ix2s, i );
+ /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
+ sv = lineZ->dict[ix];
+ tl_assert(sv != SVal_INVALID);
+ cl->svals[i] = sv;
+ }
+ stats__cache_Z_fetches++;
+ }
+ normalise_CacheLine( cl );
+}
+
+static void shmem__invalidate_scache ( void ) {
+ Word wix;
+ if (0) VG_(printf)("%s","scache inval\n");
+ tl_assert(!is_valid_scache_tag(1));
+ for (wix = 0; wix < N_WAY_NENT; wix++) {
+ cache_shmem.tags0[wix] = 1/*INVALID*/;
+ }
+ stats__cache_invals++;
+}
+
+static void shmem__flush_and_invalidate_scache ( void ) {
+ Word wix;
+ Addr tag;
+ if (0) VG_(printf)("%s","scache flush and invalidate\n");
+ tl_assert(!is_valid_scache_tag(1));
+ for (wix = 0; wix < N_WAY_NENT; wix++) {
+ tag = cache_shmem.tags0[wix];
+ if (tag == 1/*INVALID*/) {
+ /* already invalid; nothing to do */
+ } else {
+ tl_assert(is_valid_scache_tag(tag));
+ cacheline_wback( wix );
+ }
+ cache_shmem.tags0[wix] = 1/*INVALID*/;
+ }
+ stats__cache_flushes++;
+ stats__cache_invals++;
+}
+
+
+static inline Bool aligned16 ( Addr a ) {
+ return 0 == (a & 1);
+}
+static inline Bool aligned32 ( Addr a ) {
+ return 0 == (a & 3);
+}
+static inline Bool aligned64 ( Addr a ) {
+ return 0 == (a & 7);
+}
+static inline UWord get_cacheline_offset ( Addr a ) {
+ return (UWord)(a & (N_LINE_ARANGE - 1));
+}
+static inline Addr cacheline_ROUNDUP ( Addr a ) {
+ return ROUNDUP(a, N_LINE_ARANGE);
+}
+static inline Addr cacheline_ROUNDDN ( Addr a ) {
+ return ROUNDDN(a, N_LINE_ARANGE);
+}
+static inline UWord get_treeno ( Addr a ) {
+ return get_cacheline_offset(a) >> 3;
+}
+static inline UWord get_tree_offset ( Addr a ) {
+ return a & 7;
+}
+
+static __attribute__((noinline))
+ CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
+static inline CacheLine* get_cacheline ( Addr a )
+{
+ /* tag is 'a' with the in-line offset masked out,
+ eg a[31]..a[4] 0000 */
+ Addr tag = a & ~(N_LINE_ARANGE - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+ stats__cache_totrefs++;
+ if (LIKELY(tag == cache_shmem.tags0[wix])) {
+ return &cache_shmem.lyns0[wix];
+ } else {
+ return get_cacheline_MISS( a );
+ }
+}
+
+static __attribute__((noinline))
+ CacheLine* get_cacheline_MISS ( Addr a )
+{
+ /* tag is 'a' with the in-line offset masked out,
+ eg a[31]..a[4] 0000 */
+
+ CacheLine* cl;
+ Addr* tag_old_p;
+ Addr tag = a & ~(N_LINE_ARANGE - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+
+ tl_assert(tag != cache_shmem.tags0[wix]);
+
+ /* Dump the old line into the backing store. */
+ stats__cache_totmisses++;
+
+ cl = &cache_shmem.lyns0[wix];
+ tag_old_p = &cache_shmem.tags0[wix];
+
+ if (is_valid_scache_tag( *tag_old_p )) {
+ /* EXPENSIVE and REDUNDANT: callee does it */
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ cacheline_wback( wix );
+ }
+ /* and reload the new one */
+ *tag_old_p = tag;
+ cacheline_fetch( wix );
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ return cl;
+}
+
+static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+ stats__cline_64to32pulldown++;
+ switch (toff) {
+ case 0: case 4:
+ tl_assert(descr & TREE_DESCR_64);
+ tree[4] = tree[0];
+ descr &= ~TREE_DESCR_64;
+ descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+ stats__cline_32to16pulldown++;
+ switch (toff) {
+ case 0: case 2:
+ if (!(descr & TREE_DESCR_32_0)) {
+ descr = pulldown_to_32(tree, 0, descr);
+ }
+ tl_assert(descr & TREE_DESCR_32_0);
+ tree[2] = tree[0];
+ descr &= ~TREE_DESCR_32_0;
+ descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
+ break;
+ case 4: case 6:
+ if (!(descr & TREE_DESCR_32_1)) {
+ descr = pulldown_to_32(tree, 4, descr);
+ }
+ tl_assert(descr & TREE_DESCR_32_1);
+ tree[6] = tree[4];
+ descr &= ~TREE_DESCR_32_1;
+ descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+ stats__cline_16to8pulldown++;
+ switch (toff) {
+ case 0: case 1:
+ if (!(descr & TREE_DESCR_16_0)) {
+ descr = pulldown_to_16(tree, 0, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_0);
+ tree[1] = tree[0];
+ descr &= ~TREE_DESCR_16_0;
+ descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
+ break;
+ case 2: case 3:
+ if (!(descr & TREE_DESCR_16_1)) {
+ descr = pulldown_to_16(tree, 2, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_1);
+ tree[3] = tree[2];
+ descr &= ~TREE_DESCR_16_1;
+ descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
+ break;
+ case 4: case 5:
+ if (!(descr & TREE_DESCR_16_2)) {
+ descr = pulldown_to_16(tree, 4, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_2);
+ tree[5] = tree[4];
+ descr &= ~TREE_DESCR_16_2;
+ descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
+ break;
+ case 6: case 7:
+ if (!(descr & TREE_DESCR_16_3)) {
+ descr = pulldown_to_16(tree, 6, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_3);
+ tree[7] = tree[6];
+ descr &= ~TREE_DESCR_16_3;
+ descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+
+static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
+ UShort mask;
+ switch (toff) {
+ case 0:
+ mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_0;
+ break;
+ case 2:
+ mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_1;
+ break;
+ case 4:
+ mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_2;
+ break;
+ case 6:
+ mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_3;
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
+ UShort mask;
+ switch (toff) {
+ case 0:
+ if (!(descr & TREE_DESCR_16_0))
+ descr = pullup_descr_to_16(descr, 0);
+ if (!(descr & TREE_DESCR_16_1))
+ descr = pullup_descr_to_16(descr, 2);
+ mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_32_0;
+ break;
+ case 4:
+ if (!(descr & TREE_DESCR_16_2))
+ descr = pullup_descr_to_16(descr, 4);
+ if (!(descr & TREE_DESCR_16_3))
+ descr = pullup_descr_to_16(descr, 6);
+ mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_32_1;
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
+ switch (toff) {
+ case 0: case 4:
+ return 0 != (descr & TREE_DESCR_64);
+ default:
+ tl_assert(0);
+ }
+}
+
+static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
+ switch (toff) {
+ case 0:
+ return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
+ case 2:
+ return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
+ case 4:
+ return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
+ case 6:
+ return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
+ default:
+ tl_assert(0);
+ }
+}
+
+/* ------------ Cache management ------------ */
+
+static void zsm_flush_cache ( void )
+{
+ shmem__flush_and_invalidate_scache();
+}
+
+
+static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
+{
+ tl_assert( sizeof(UWord) == sizeof(Addr) );
+
+ rcinc = p_rcinc;
+ rcdec = p_rcdec;
+
+ tl_assert(map_shmem == NULL);
+ map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
+ HG_(free),
+ NULL/*unboxed UWord cmp*/);
+ tl_assert(map_shmem != NULL);
+ shmem__invalidate_scache();
+
+ /* a SecMap must contain an integral number of CacheLines */
+ tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
+ /* also ... a CacheLine holds an integral number of trees */
+ tl_assert(0 == (N_LINE_ARANGE % 8));
+}
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION END compressed shadow memory //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION BEGIN vts primitives //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_VTS_H
+#define __HB_VTS_H
+
+/* VtsIDs can't exceed 30 bits, since they have to be packed into the
+ lowest 30 bits of an SVal. */
+typedef UInt VtsID;
+#define VtsID_INVALID 0xFFFFFFFF
+
+/* A VTS contains .ts, its vector clock, and also .id, a field to hold
+ a backlink for the caller's convenience. Since we have no idea
+ what to set that to in the library, it always gets set to
+ VtsID_INVALID. */
+typedef
+ struct {
+ VtsID id;
+ XArray* ts; /* XArray* ScalarTS(abstract) */
+ }
+ VTS;
+
+
+/* Create a new, empty VTS. */
+VTS* VTS__new ( void );
+
+/* Delete this VTS in its entirety. */
+void VTS__delete ( VTS* vts );
+
+/* Create a new singleton VTS. */
+VTS* VTS__singleton ( Thr* thr, ULong tym );
+
+/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
+ not modified. */
+VTS* VTS__tick ( Thr* me, VTS* vts );
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+ Neither arg is modified. */
+VTS* VTS__join ( VTS* a, VTS* b );
+
+/* Compute the partial ordering relation of the two args. */
+typedef
+ enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
+ POrd;
+
+POrd VTS__cmp ( VTS* a, VTS* b );
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+ based on their VCs, so they can be looked up in a table, tree, etc.
+ Returns -1, 0 or 1. */
+Word VTS__cmp_structural ( VTS* a, VTS* b );
+
+/* Debugging only. Display the given VTS in the buffer. */
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
+
+/* Debugging only. Return vts[index], so to speak. */
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* index );
+
+#endif /* ! __HB_VTS_H */
+
+
+/*--------------- to do with Vector Timestamps ---------------*/
+
+/* Scalar Timestamp */
+typedef
+ struct {
+ Thr* thr;
+ ULong tym;
+ }
+ ScalarTS;
+
+
+static Bool is_sane_VTS ( VTS* vts )
+{
+ UWord i, n;
+ ScalarTS *st1, *st2;
+ if (!vts) return False;
+ if (!vts->ts) return False;
+ n = VG_(sizeXA)( vts->ts );
+ if (n >= 2) {
+ for (i = 0; i < n-1; i++) {
+ st1 = VG_(indexXA)( vts->ts, i );
+ st2 = VG_(indexXA)( vts->ts, i+1 );
+ if (st1->thr >= st2->thr)
+ return False;
+ if (st1->tym == 0 || st2->tym == 0)
+ return False;
+ }
+ }
+ return True;
+}
+
+
+/* Create a new, empty VTS.
+*/
+VTS* VTS__new ( void )
+{
+ VTS* vts;
+ vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
+ tl_assert(vts);
+ vts->id = VtsID_INVALID;
+ vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
+ HG_(free), sizeof(ScalarTS) );
+ tl_assert(vts->ts);
+ return vts;
+}
+
+
+/* Delete this VTS in its entirety.
+*/
+void VTS__delete ( VTS* vts )
+{
+ tl_assert(vts);
+ tl_assert(vts->ts);
+ VG_(deleteXA)( vts->ts );
+ HG_(free)(vts);
+}
+
+
+/* Create a new singleton VTS.
+*/
+VTS* VTS__singleton ( Thr* thr, ULong tym ) {
+ ScalarTS st;
+ VTS* vts;
+ tl_assert(thr);
+ tl_assert(tym >= 1);
+ vts = VTS__new();
+ st.thr = thr;
+ st.tym = tym;
+ VG_(addToXA)( vts->ts, &st );
+ return vts;
+}
+
+
+/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
+ not modified.
+*/
+VTS* VTS__tick ( Thr* me, VTS* vts )
+{
+ ScalarTS* here = NULL;
+ ScalarTS tmp;
+ VTS* res;
+ Word i, n;
+ tl_assert(me);
+ tl_assert(is_sane_VTS(vts));
+ //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
+ // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
+ res = VTS__new();
+ n = VG_(sizeXA)( vts->ts );
+
+ /* main loop doesn't handle zero-entry case correctly, so
+ special-case it. */
+ if (n == 0) {
+ tmp.thr = me;
+ tmp.tym = 1;
+ VG_(addToXA)( res->ts, &tmp );
+ tl_assert(is_sane_VTS(res));
+ return res;
+ }
+
+ for (i = 0; i < n; i++) {
+ here = VG_(indexXA)( vts->ts, i );
+ if (me < here->thr) {
+ /* We just went past 'me', without seeing it. */
+ tmp.thr = me;
+ tmp.tym = 1;
+ VG_(addToXA)( res->ts, &tmp );
+ tmp = *here;
+ VG_(addToXA)( res->ts, &tmp );
+ i++;
+ break;
+ }
+ else if (me == here->thr) {
+ tmp = *here;
+ tmp.tym++;
+ VG_(addToXA)( res->ts, &tmp );
+ i++;
+ break;
+ }
+ else /* me > here->thr */ {
+ tmp = *here;
+ VG_(addToXA)( res->ts, &tmp );
+ }
+ }
+ tl_assert(i >= 0 && i <= n);
+ if (i == n && here && here->thr < me) {
+ tmp.thr = me;
+ tmp.tym = 1;
+ VG_(addToXA)( res->ts, &tmp );
+ } else {
+ for (/*keepgoing*/; i < n; i++) {
+ here = VG_(indexXA)( vts->ts, i );
+ tmp = *here;
+ VG_(addToXA)( res->ts, &tmp );
+ }
+ }
+ tl_assert(is_sane_VTS(res));
+ //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
+ // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
+ return res;
+}
+
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+ Neither arg is modified.
+*/
+VTS* VTS__join ( VTS* a, VTS* b )
+{
+ Word ia, ib, useda, usedb;
+ ULong tyma, tymb, tymMax;
+ Thr* thr;
+ VTS* res;
+ ScalarTS *tmpa, *tmpb;
+
+ tl_assert(a && a->ts);
+ tl_assert(b && b->ts);
+ useda = VG_(sizeXA)( a->ts );
+ usedb = VG_(sizeXA)( b->ts );
+
+ res = VTS__new();
+ ia = ib = 0;
+
+ while (1) {
+
+ /* This logic is to enumerate triples (thr, tyma, tymb) drawn
+ from a and b in order, where thr is the next Thr*
+ occurring in either a or b, and tyma/b are the relevant
+ scalar timestamps, taking into account implicit zeroes. */
+ tl_assert(ia >= 0 && ia <= useda);
+ tl_assert(ib >= 0 && ib <= usedb);
+ tmpa = tmpb = NULL;
+
+ if (ia == useda && ib == usedb) {
+ /* both empty - done */
+ break;
+ }
+ else
+ if (ia == useda && ib != usedb) {
+ /* a empty, use up b */
+ tmpb = VG_(indexXA)( b->ts, ib );
+ thr = tmpb->thr;
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+ }
+ else
+ if (ia != useda && ib == usedb) {
+ /* b empty, use up a */
+ tmpa = VG_(indexXA)( a->ts, ia );
+ thr = tmpa->thr;
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+ }
+ else {
+ /* both not empty; extract lowest-Thr*'d triple */
+ tmpa = VG_(indexXA)( a->ts, ia );
+ tmpb = VG_(indexXA)( b->ts, ib );
+ if (tmpa->thr < tmpb->thr) {
+ /* a has the lowest unconsidered Thr* */
+ thr = tmpa->thr;
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+ }
+ else
+ if (tmpa->thr > tmpb->thr) {
+ /* b has the lowest unconsidered Thr* */
+ thr = tmpb->thr;
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+ } else {
+ /* they both next mention the same Thr* */
+ tl_assert(tmpa->thr == tmpb->thr);
+ thr = tmpa->thr; /* == tmpb->thr */
+ tyma = tmpa->tym;
+ tymb = tmpb->tym;
+ ia++;
+ ib++;
+ }
+ }
+
+ /* having laboriously determined (thr, tyma, tymb), do something
+ useful with it. */
+ tymMax = tyma > tymb ? tyma : tymb;
+ if (tymMax > 0) {
+ ScalarTS st;
+ st.thr = thr;
+ st.tym = tymMax;
+ VG_(addToXA)( res->ts, &st );
+ }
+
+ }
+
+ tl_assert(is_sane_VTS( res ));
+
+ return res;
+}
+
+
+/* Compute the partial ordering relation of the two args.
+*/
+POrd VTS__cmp ( VTS* a, VTS* b )
+{
+ Word ia, ib, useda, usedb;
+ ULong tyma, tymb;
+ Thr* thr;
+ ScalarTS *tmpa, *tmpb;
+
+ Bool all_leq = True;
+ Bool all_geq = True;
+
+ tl_assert(a && a->ts);
+ tl_assert(b && b->ts);
+ useda = VG_(sizeXA)( a->ts );
+ usedb = VG_(sizeXA)( b->ts );
+
+ ia = ib = 0;
+
+ while (1) {
+
+ /* This logic is to enumerate triples (thr, tyma, tymb) drawn
+ from a and b in order, where thr is the next Thr*
+ occurring in either a or b, and tyma/b are the relevant
+ scalar timestamps, taking into account implicit zeroes. */
+ tl_assert(ia >= 0 && ia <= useda);
+ tl_assert(ib >= 0 && ib <= usedb);
+ tmpa = tmpb = NULL;
+
+ if (ia == useda && ib == usedb) {
+ /* both empty - done */
+ break;
+ }
+ else
+ if (ia == useda && ib != usedb) {
+ /* a empty, use up b */
+ tmpb = VG_(indexXA)( b->ts, ib );
+ thr = tmpb->thr;
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+ }
+ else
+ if (ia != useda && ib == usedb) {
+ /* b empty, use up a */
+ tmpa = VG_(indexXA)( a->ts, ia );
+ thr = tmpa->thr;
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+ }
+ else {
+ /* both not empty; extract lowest-Thr*'d triple */
+ tmpa = VG_(indexXA)( a->ts, ia );
+ tmpb = VG_(indexXA)( b->ts, ib );
+ if (tmpa->thr < tmpb->thr) {
+ /* a has the lowest unconsidered Thr* */
+ thr = tmpa->thr;
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+ }
+ else
+ if (tmpa->thr > tmpb->thr) {
+ /* b has the lowest unconsidered Thr* */
+ thr = tmpb->thr;
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+ } else {
+ /* they both next mention the same Thr* */
+ tl_assert(tmpa->thr == tmpb->thr);
+ thr = tmpa->thr; /* == tmpb->thr */
+ tyma = tmpa->tym;
+ tymb = tmpb->tym;
+ ia++;
+ ib++;
+ }
+ }
+
+ /* having laboriously determined (thr, tyma, tymb), do something
+ useful with it. */
+ if (tyma < tymb)
+ all_geq = False;
+ if (tyma > tymb)
+ all_leq = False;
+ }
+
+ if (all_leq && all_geq)
+ return POrd_EQ;
+ /* now we know they aren't equal, so either all_leq or all_geq or
+ both are false. */
+ if (all_leq)
+ return POrd_LT;
+ if (all_geq)
+ return POrd_GT;
+ /* hmm, neither all_geq or all_leq. This means unordered. */
+ return POrd_UN;
+}
+
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+ based on their VCs, so they can be looked up in a table, tree, etc.
+ Returns -1, 0 or 1. (really just 'deriving Ord' :-)
+*/
+Word VTS__cmp_structural ( VTS* a, VTS* b )
+{
+ /* We just need to generate an arbitrary total ordering based on
+ a->ts and b->ts. Preferably do it in a way which comes across likely
+ differences relatively quickly. */
+ Word i, useda, usedb;
+ ScalarTS *tmpa, *tmpb;
+
+ tl_assert(a && a->ts);
+ tl_assert(b && b->ts);
+ useda = VG_(sizeXA)( a->ts );
+ usedb = VG_(sizeXA)( b->ts );
+
+ if (useda < usedb) return -1;
+ if (useda > usedb) return 1;
+
+ /* Same length vectors, so let's step through them together. */
+ tl_assert(useda == usedb);
+ for (i = 0; i < useda; i++) {
+ tmpa = VG_(indexXA)( a->ts, i );
+ tmpb = VG_(indexXA)( b->ts, i );
+ if (tmpa->tym < tmpb->tym) return -1;
+ if (tmpa->tym > tmpb->tym) return 1;
+ if (tmpa->thr < tmpb->thr) return -1;
+ if (tmpa->thr > tmpb->thr) return 1;
+ }
+
+ /* They're identical. */
+ return 0;
+}
+
+
+/* Debugging only. Display the given VTS in the buffer.
+*/
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
+ ScalarTS* st;
+ HChar unit[64];
+ Word i, n;
+ Int avail = nBuf;
+ tl_assert(vts && vts->ts);
+ tl_assert(nBuf > 16);
+ buf[0] = '[';
+ buf[1] = 0;
+ n = VG_(sizeXA)( vts->ts );
+ for (i = 0; i < n; i++) {
+ tl_assert(avail >= 40);
+ st = VG_(indexXA)( vts->ts, i );
+ VG_(memset)(unit, 0, sizeof(unit));
+ VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
+ st->thr, st->tym);
+ if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
+ VG_(strcat)(buf, " ...]");
+ buf[nBuf-1] = 0;
+ return;
+ }
+ VG_(strcat)(buf, unit);
+ avail -= VG_(strlen)(unit);
+ }
+ VG_(strcat)(buf, "]");
+ buf[nBuf-1] = 0;
+}
+
+
+/* Debugging only. Return vts[index], so to speak.
+*/
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
+ UWord i, n;
+ tl_assert(vts && vts->ts);
+ n = VG_(sizeXA)( vts->ts );
+ for (i = 0; i < n; i++) {
+ ScalarTS* st = VG_(indexXA)( vts->ts, i );
+ if (st->thr == idx)
+ return st->tym;
+ }
+ return 0;
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION END vts primitives //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION BEGIN main library //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////
+// //
+// VTS set //
+// //
+/////////////////////////////////////////////////////////
+
+static WordFM* /* VTS* void void */ vts_set = NULL;
+
+static void vts_set_init ( void )
+{
+ tl_assert(!vts_set);
+ vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
+ HG_(free),
+ (Word(*)(UWord,UWord))VTS__cmp_structural );
+ tl_assert(vts_set);
+}
+
+/* Given a newly made VTS, look in vts_set to see if we already have
+ an identical one. If yes, free up this one and return instead a
+ pointer to the existing one. If no, add this one to the set and
+ return the same pointer. Caller differentiates the two cases by
+ comparing returned pointer with the supplied one (although that
+ does require that the supplied VTS is not already in the set).
+*/
+static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
+{
+ UWord keyW, valW;
+ /* lookup cand (by value) */
+ if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
+ /* found it */
+ tl_assert(valW == 0);
+ /* if this fails, cand (by ref) was already present (!) */
+ tl_assert(keyW != (UWord)cand);
+ VTS__delete(cand);
+ return (VTS*)keyW;
+ } else {
+ /* not present. Add and return pointer to same. */
+ VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
+ return cand;
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// VTS table //
+// //
+/////////////////////////////////////////////////////////
+
+static void VtsID__invalidate_caches ( void ); /* fwds */
+
+/* A type to hold VTS table entries. Invariants:
+ If .vts == NULL, then this entry is not in use, so:
+ - .rc == 0
+ - this entry is on the freelist (unfortunately, does not imply
+ any constraints on value for .nextfree)
+ If .vts != NULL, then this entry is in use:
+ - .vts is findable in vts_set
+ - .vts->id == this entry number
+ - no specific value for .rc (even 0 is OK)
+ - this entry is not on freelist, so .nextfree == VtsID_INVALID
+*/
+typedef
+ struct {
+ VTS* vts; /* vts, in vts_set */
+ UWord rc; /* reference count - enough for entire aspace */
+ VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
+ }
+ VtsTE;
+
+/* The VTS table. */
+static XArray* /* of VtsTE */ vts_tab = NULL;
+
+/* An index into the VTS table, indicating the start of the list of
+ free (available for use) entries. If the list is empty, this is
+ VtsID_INVALID. */
+static VtsID vts_tab_freelist = VtsID_INVALID;
+
+/* Do a GC of vts_tab when the freelist becomes empty AND the size of
+ vts_tab equals or exceeds this size. After GC, the value here is
+ set appropriately so as to check for the next GC point. */
+static Word vts_next_GC_at = 1000;
+
+static void vts_tab_init ( void )
+{
+ vts_tab
+ = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
+ HG_(free), sizeof(VtsTE) );
+ vts_tab_freelist
+ = VtsID_INVALID;
+ tl_assert(vts_tab);
+}
+
+/* Add ii to the free list, checking that it looks out-of-use. */
+static void add_to_free_list ( VtsID ii )
+{
+ VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts == NULL);
+ tl_assert(ie->rc == 0);
+ tl_assert(ie->freelink == VtsID_INVALID);
+ ie->freelink = vts_tab_freelist;
+ vts_tab_freelist = ii;
+}
+
+/* Get an entry from the free list. This will return VtsID_INVALID if
+ the free list is empty. */
+static VtsID get_from_free_list ( void )
+{
+ VtsID ii;
+ VtsTE* ie;
+ if (vts_tab_freelist == VtsID_INVALID)
+ return VtsID_INVALID;
+ ii = vts_tab_freelist;
+ ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts == NULL);
+ tl_assert(ie->rc == 0);
+ vts_tab_freelist = ie->freelink;
+ return ii;
+}
+
+/* Produce a new VtsID that can be used, either by getting it from
+ the freelist, or, if that is empty, by expanding vts_tab. */
+static VtsID get_new_VtsID ( void )
+{
+ VtsID ii;
+ VtsTE te;
+ ii = get_from_free_list();
+ if (ii != VtsID_INVALID)
+ return ii;
+ te.vts = NULL;
+ te.rc = 0;
+ te.freelink = VtsID_INVALID;
+ ii = (VtsID)VG_(addToXA)( vts_tab, &te );
+ return ii;
+}
+
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcinc ( VtsID ii )
+{
+ VtsTE* ie;
+ /* VG_(indexXA) does a range check for us */
+ ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts); /* else it's not in use */
+ tl_assert(ie->rc < ~0UL); /* else we can't continue */
+ tl_assert(ie->vts->id == ii);
+ ie->rc++;
+}
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcdec ( VtsID ii )
+{
+ VtsTE* ie;
+ /* VG_(indexXA) does a range check for us */
+ ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts); /* else it's not in use */
+ tl_assert(ie->rc > 0); /* else RC snafu */
+ tl_assert(ie->vts->id == ii);
+ ie->rc--;
+}
+
+
+/* Look up 'cand' in our collection of VTSs. If present, deallocate
+ it and return the VtsID for the pre-existing version. If not
+ present, add it to both vts_tab and vts_set, allocate a fresh VtsID
+ for it, and return that. */
+static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
+{
+ VTS* auld;
+ tl_assert(cand->id == VtsID_INVALID);
+ auld = vts_set__find_and_dealloc__or_add(cand);
+ if (auld != cand) {
+ /* We already have an Aulde one. Use that. */
+ VtsTE* ie;
+ tl_assert(auld->id != VtsID_INVALID);
+ ie = VG_(indexXA)( vts_tab, auld->id );
+ tl_assert(ie->vts == auld);
+ return auld->id;
+ } else {
+ VtsID ii = get_new_VtsID();
+ VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+ ie->vts = cand;
+ ie->rc = 0;
+ ie->freelink = VtsID_INVALID;
+ cand->id = ii;
+ return ii;
+ }
+}
+
+
+static void show_vts_stats ( HChar* caller )
+{
+ UWord nSet, nTab, nLive;
+ ULong totrc;
+ UWord n, i;
+ nSet = VG_(sizeFM)( vts_set );
+ nTab = VG_(sizeXA)( vts_tab );
+ totrc = 0;
+ nLive = 0;
+ n = VG_(sizeXA)( vts_tab );
+ for (i = 0; i < n; i++) {
+ VtsTE* ie = VG_(indexXA)( vts_tab, i );
+ if (ie->vts) {
+ nLive++;
+ totrc += (ULong)ie->rc;
+ } else {
+ tl_assert(ie->rc == 0);
+ }
+ }
+ VG_(printf)(" show_vts_stats %s\n", caller);
+ VG_(printf)(" vts_tab size %4lu\n", nTab);
+ VG_(printf)(" vts_tab live %4lu\n", nLive);
+ VG_(printf)(" vts_set size %4lu\n", nSet);
+ VG_(printf)(" total rc %4llu\n", totrc);
+}
+
+/* NOT TO BE CALLED FROM WITHIN libzsm. */
+static void vts_tab__do_GC ( Bool show_stats )
+{
+ UWord i, nTab, nLive, nFreed;
+
+ /* check this is actually necessary. */
+ tl_assert(vts_tab_freelist == VtsID_INVALID);
+
+ /* empty the caches for partial order checks and binary joins. We
+ could do better and prune out the entries to be deleted, but it
+ ain't worth the hassle. */
+ VtsID__invalidate_caches();
+
+ /* First, make the reference counts up to date. */
+ zsm_flush_cache();
+
+ nTab = VG_(sizeXA)( vts_tab );
+
+ if (show_stats) {
+ VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
+ show_vts_stats("before GC");
+ }
+
+ /* Now we can inspect the entire vts_tab. Any entries
+ with zero .rc fields are now no longer in use and can be
+ free list, removed from vts_set, and deleted. */
+ nFreed = 0;
+ for (i = 0; i < nTab; i++) {
+ Bool present;
+ UWord oldK = 0, oldV = 0;
+ VtsTE* te = VG_(indexXA)( vts_tab, i );
+ if (te->vts == NULL) {
+ tl_assert(te->rc == 0);
+ continue; /* already on the free list (presumably) */
+ }
+ if (te->rc > 0)
+ continue; /* in use */
+ /* Ok, we got one we can free. */
+ tl_assert(te->vts->id == i);
+ /* first, remove it from vts_set. */
+ present = VG_(delFromFM)( vts_set,
+ &oldK, &oldV, (UWord)te->vts );
+ tl_assert(present); /* else it isn't in vts_set ?! */
+ tl_assert(oldV == 0); /* no info stored in vts_set val fields */
+ tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
+ /* now free the VTS itself */
+ VTS__delete(te->vts);
+ te->vts = NULL;
+ /* and finally put this entry on the free list */
+ tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
+ add_to_free_list( i );
+ nFreed++;
+ }
+
+ /* Now figure out when the next GC should be. We'll allow the
+ number of VTSs to double before GCing again. Except of course
+ that since we can't (or, at least, don't) shrink vts_tab, we
+ can't set the threshhold value smaller than it. */
+ tl_assert(nFreed <= nTab);
+ nLive = nTab - nFreed;
+ tl_assert(nLive >= 0 && nLive <= nTab);
+ vts_next_GC_at = 2 * nLive;
+ if (vts_next_GC_at < nTab)
+ vts_next_GC_at = nTab;
+
+ if (show_stats) {
+ show_vts_stats("after GC");
+ VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
+ }
+
+ if (1) {
+ static UInt ctr = 0;
+ tl_assert(nTab > 0);
+ VG_(printf)("libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
+ ctr++, nTab, nLive, (100ULL * nLive) / nTab);
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Vts IDs //
+// //
+/////////////////////////////////////////////////////////
+
+//////////////////////////
+static ULong stats__getOrdering_queries = 0;
+static ULong stats__getOrdering_misses = 0;
+static ULong stats__join2_queries = 0;
+static ULong stats__join2_misses = 0;
+
+static inline UInt ROL32 ( UInt w, Int n ) {
+ w = (w << n) | (w >> (32-n));
+ return w;
+}
+static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
+ UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
+ return hash % nTab;
+}
+
+#define N_GETORDERING_CACHE 1023
+static
+ struct { VtsID vi1; VtsID vi2; POrd ord; }
+ getOrdering_cache[N_GETORDERING_CACHE];
+
+#define N_JOIN2_CACHE 1023
+static
+ struct { VtsID vi1; VtsID vi2; VtsID res; }
+ join2_cache[N_JOIN2_CACHE];
+
+static void VtsID__invalidate_caches ( void ) {
+ Int i;
+ for (i = 0; i < N_GETORDERING_CACHE; i++) {
+ getOrdering_cache[i].vi1 = VtsID_INVALID;
+ getOrdering_cache[i].vi2 = VtsID_INVALID;
+ getOrdering_cache[i].ord = 0; /* an invalid POrd value */
+ }
+ for (i = 0; i < N_JOIN2_CACHE; i++) {
+ join2_cache[i].vi1 = VtsID_INVALID;
+ join2_cache[i].vi2 = VtsID_INVALID;
+ join2_cache[i].res = VtsID_INVALID;
+ }
+}
+//////////////////////////
+
+static Bool VtsID__is_valid ( VtsID vi ) {
+ VtsTE* ve;
+ if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
+ return False;
+ ve = VG_(indexXA)( vts_tab, vi );
+ if (!ve->vts)
+ return False;
+ tl_assert(ve->vts->id == vi);
+ return True;
+}
+
+static VTS* VtsID__to_VTS ( VtsID vi ) {
+ VtsTE* te = VG_(indexXA)( vts_tab, vi );
+ tl_assert(te->vts);
+ return te->vts;
+}
+
+static void VtsID__pp ( VtsID vi ) {
+ HChar buf[100];
+ VTS* vts = VtsID__to_VTS(vi);
+ VTS__show( buf, sizeof(buf)-1, vts );
+ buf[sizeof(buf)-1] = 0;
+ VG_(printf)("%s", buf);
+}
+
+/* compute partial ordering relation of vi1 and vi2. */
+__attribute__((noinline))
+static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
+ UInt hash;
+ POrd ord;
+ VTS *v1, *v2;
+ //if (vi1 == vi2) return POrd_EQ;
+ tl_assert(vi1 != vi2);
+ ////++
+ stats__getOrdering_queries++;
+ hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
+ if (getOrdering_cache[hash].vi1 == vi1
+ && getOrdering_cache[hash].vi2 == vi2)
+ return getOrdering_cache[hash].ord;
+ stats__getOrdering_misses++;
+ ////--
+ v1 = VtsID__to_VTS(vi1);
+ v2 = VtsID__to_VTS(vi2);
+ ord = VTS__cmp( v1, v2 );
+ ////++
+ getOrdering_cache[hash].vi1 = vi1;
+ getOrdering_cache[hash].vi2 = vi2;
+ getOrdering_cache[hash].ord = ord;
+ ////--
+ return ord;
+}
+static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
+ return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
+}
+
+/* compute binary join */
+__attribute__((noinline))
+static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
+ UInt hash;
+ VtsID res;
+ VTS *vts1, *vts2, *nyu;
+ //if (vi1 == vi2) return vi1;
+ tl_assert(vi1 != vi2);
+ ////++
+ stats__join2_queries++;
+ hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
+ if (join2_cache[hash].vi1 == vi1
+ && join2_cache[hash].vi2 == vi2)
+ return join2_cache[hash].res;
+ stats__join2_misses++;
+ ////--
+ vts1 = VtsID__to_VTS(vi1);
+ vts2 = VtsID__to_VTS(vi2);
+ nyu = VTS__join(vts1,vts2);
+ res = vts_tab__find_and_dealloc__or_add(nyu);
+ ////++
+ join2_cache[hash].vi1 = vi1;
+ join2_cache[hash].vi2 = vi2;
+ join2_cache[hash].res = res;
+ ////--
+ return res;
+}
+static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
+ return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
+}
+
+/* create a singleton VTS, namely [thr:1] */
+static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
+ VTS* nyu = VTS__singleton(thr,tym);
+ return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* tick operation, creates value 1 if specified index is absent */
+static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
+ VTS* vts = VtsID__to_VTS(vi);
+ VTS* nyu = VTS__tick(idx,vts);
+ return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* index into a VTS (only for assertions) */
+static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
+ VTS* vts = VtsID__to_VTS(vi);
+ return VTS__indexAt_SLOW( vts, idx );
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Threads //
+// //
+/////////////////////////////////////////////////////////
+
+struct _Thr {
+ /* Current VTSs for this thread. They change as we go along. viR
+ is the VTS to be used for reads, viW for writes. Usually they
+ are the same, but can differ when we deal with reader-writer
+ locks. It is always the case that VtsID__getOrdering(viW,viR)
+ == POrd_LT or POrdEQ -- that is, viW must be the same, or
+ lagging behind, viR. */
+ VtsID viR;
+ VtsID viW;
+ /* opaque (to us) data we hold on behalf of the library's user. */
+ void* opaque;
+};
+
+static Thr* Thr__new ( void ) {
+ Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
+ thr->viR = VtsID_INVALID;
+ thr->viW = VtsID_INVALID;
+ return thr;
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Shadow Values //
+// //
+/////////////////////////////////////////////////////////
+
+// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
+// hb_zsm.h. We have to do everything else here.
+
+/* SVal is 64 bit unsigned int.
+
+ <---------30---------> <---------30--------->
+ 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
+ 01 X--------------------X XX X--------------------X E(rror)
+ 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
+ 11 X--------------------X XX X--------------------X I: SVal_INVALID
+*/
+#define SVAL_TAGMASK (3ULL << 62)
+
+static inline Bool SVal__isC ( SVal s ) {
+ return (0ULL << 62) == (s & SVAL_TAGMASK);
+}
+static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
+ //tl_assert(VtsID__is_valid(rmini));
+ //tl_assert(VtsID__is_valid(wmini));
+ return (((ULong)rmini) << 32) | ((ULong)wmini);
+}
+static inline VtsID SVal__unC_Rmin ( SVal s ) {
+ tl_assert(SVal__isC(s));
+ return (VtsID)(s >> 32);
+}
+static inline VtsID SVal__unC_Wmin ( SVal s ) {
+ tl_assert(SVal__isC(s));
+ return (VtsID)(s & 0xFFFFFFFFULL);
+}
+
+static Bool SVal__isE ( SVal s ) {
+ return (1ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkE ( void ) {
+ return 1ULL << 62;
+}
+
+static Bool SVal__isA ( SVal s ) {
+ return (2ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkA ( void ) {
+ return 2ULL << 62;
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcinc ( SVal s ) {
+ if (SVal__isC(s)) {
+ VtsID__rcinc( SVal__unC_Rmin(s) );
+ VtsID__rcinc( SVal__unC_Wmin(s) );
+ }
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcdec ( SVal s ) {
+ if (SVal__isC(s)) {
+ VtsID__rcdec( SVal__unC_Rmin(s) );
+ VtsID__rcdec( SVal__unC_Wmin(s) );
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Change-event map2 //
+// //
+/////////////////////////////////////////////////////////
+
+#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
+#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
+
+/* This is in two parts:
+
+ 1. An OSet of RCECs. This is a set of reference-counted stack
+ traces. When the reference count of a stack trace becomes zero,
+ it is removed from the set and freed up. The intent is to have
+ a set of stack traces which can be referred to from (2), but to
+ only represent each one once. The set is indexed/searched by
+ ordering on the stack trace vectors.
+
+ 2. An OSet of OldRefs. These store information about each old ref
+ that we need to record. It is indexed by address of the
+ location for which the information is recorded. For LRU
+ purposes, each OldRef also contains a generation number,
+ indicating when it was most recently accessed.
+
+ The important part of an OldRef is, however, its accs[] array.
+ This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
+ allows us to collect the last access-traceback by up to
+ N_OLDREF_ACCS different threads for this location. The accs[]
+ array is a MTF-array. If a pair falls off the end, that's too
+ bad -- we will lose info about that thread's access to this
+ location.
+
+ When this OSet becomes too big, we can throw away the entries
+ whose generation numbers are below some threshold; hence doing
+ approximate LRU discarding. For each discarded OldRef we must
+ of course decrement the reference count on the all RCECs it
+ refers to, in order that entries from (1) eventually get
+ discarded too.
+*/
+
+
+static UWord stats__ctxt_rcdec1 = 0;
+static UWord stats__ctxt_rcdec2 = 0;
+static UWord stats__ctxt_rcdec3 = 0;
+static UWord stats__ctxt_rcdec_calls = 0;
+static UWord stats__ctxt_rcdec_discards = 0;
+static UWord stats__ctxt_rcdec1_eq = 0;
+
+static UWord stats__ctxt_tab_curr = 0;
+static UWord stats__ctxt_tab_max = 0;
+
+static UWord stats__ctxt_tab_qs = 0;
+static UWord stats__ctxt_tab_cmps = 0;
+
+
+///////////////////////////////////////////////////////
+//// Part (1): An OSet of RCECs
+///
+
+#define N_FRAMES 8
+
+// (UInt) `echo "Reference Counted Execution Context" | md5sum`
+#define RCEC_MAGIC 0xab88abb2UL
+
+//#define N_RCEC_TAB 98317 /* prime */
+#define N_RCEC_TAB 196613 /* prime */
+
+typedef
+ struct _RCEC {
+ struct _RCEC* next;
+ UWord magic;
+ UWord rc;
+ UWord rcX; /* used for crosschecking */
+ UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
+ }
+ RCEC;
+
+static RCEC** contextTab = NULL; /* hash table of RCEC*s */
+
+
+/* Gives an arbitrary total order on RCEC .frames fields */
+static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
+ Word i;
+ tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
+ tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
+ if (ec1->frames[0] < ec2->frames[0]) return -1;
+ if (ec1->frames[0] > ec2->frames[0]) return 1;
+ for (i = 1; i < 1 + N_FRAMES; i++) {
+ if (ec1->frames[i] < ec2->frames[i]) return -1;
+ if (ec1->frames[i] > ec2->frames[i]) return 1;
+ }
+ return 0;
+}
+
+
+/* Dec the ref of this RCEC. */
+static void ctxt__rcdec ( RCEC* ec )
+{
+ stats__ctxt_rcdec_calls++;
+ tl_assert(ec && ec->magic == RCEC_MAGIC);
+ tl_assert(ec->rc > 0);
+ ec->rc--;
+}
+
+static void ctxt__rcinc ( RCEC* ec )
+{
+ tl_assert(ec && ec->magic == RCEC_MAGIC);
+ ec->rc++;
+}
+
+
+/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
+ move it one step closer the the front of the list, so as to make
+ subsequent searches for it cheaper. */
+static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
+{
+ RCEC *ec0, *ec1, *ec2;
+ if (ec == *headp)
+ tl_assert(0); /* already at head of list */
+ tl_assert(ec != NULL);
+ ec0 = *headp;
+ ec1 = NULL;
+ ec2 = NULL;
+ while (True) {
+ if (ec0 == NULL || ec0 == ec) break;
+ ec2 = ec1;
+ ec1 = ec0;
+ ec0 = ec0->next;
+ }
+ tl_assert(ec0 == ec);
+ if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
+ RCEC* tmp;
+ /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
+ predecessor. Swap ec0 and ec1, that is, move ec0 one step
+ closer to the start of the list. */
+ tl_assert(ec2->next == ec1);
+ tl_assert(ec1->next == ec0);
+ tmp = ec0->next;
+ ec2->next = ec0;
+ ec0->next = ec1;
+ ec1->next = tmp;
+ }
+ else
+ if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
+ /* it's second in the list. */
+ tl_assert(*headp == ec1);
+ tl_assert(ec1->next == ec0);
+ ec1->next = ec0->next;
+ ec0->next = ec1;
+ *headp = ec0;
+ }
+}
+
+
+/* Find the given RCEC in the tree, and return a pointer to it. Or,
+ if not present, add the given one to the tree (by making a copy of
+ it, so the caller can immediately deallocate the original) and
+ return a pointer to the copy. The caller can safely have 'example'
+ on its stack, since we will always return a pointer to a copy of
+ it, not to the original. Note that the inserted node will have .rc
+ of zero and so the caller must immediatly increment it. */
+__attribute__((noinline))
+static RCEC* ctxt__find_or_add ( RCEC* example )
+{
+ UWord hent;
+ RCEC* copy;
+ tl_assert(example && example->magic == RCEC_MAGIC);
+ tl_assert(example->rc == 0);
+
+ /* Search the hash table to see if we already have it. */
+ stats__ctxt_tab_qs++;
+ hent = example->frames[0] % N_RCEC_TAB;
+ copy = contextTab[hent];
+ while (1) {
+ if (!copy) break;
+ tl_assert(copy->magic == RCEC_MAGIC);
+ stats__ctxt_tab_cmps++;
+ if (0 == RCEC__cmp_by_frames(copy, example)) break;
+ copy = copy->next;
+ }
+
+ if (copy) {
+ tl_assert(copy != example);
+ /* optimisation: if it's not at the head of its list, move 1
+ step fwds, to make future searches cheaper */
+ if (copy != contextTab[hent]) {
+ move_RCEC_one_step_forward( &contextTab[hent], copy );
+ }
+ } else {
+ copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
+ tl_assert(copy != example);
+ *copy = *example;
+ copy->next = contextTab[hent];
+ contextTab[hent] = copy;
+ stats__ctxt_tab_curr++;
+ if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
+ stats__ctxt_tab_max = stats__ctxt_tab_curr;
+ }
+ return copy;
+}
+
+static inline UWord ROLW ( UWord w, Int n )
+{
+ Int bpw = 8 * sizeof(UWord);
+ w = (w << n) | (w >> (bpw-n));
+ return w;
+}
+
+__attribute__((noinline))
+static RCEC* get_RCEC ( Thr* thr )
+{
+ UWord hash, i;
+ RCEC example;
+ example.magic = RCEC_MAGIC;
+ example.rc = 0;
+ example.rcX = 0;
+ main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
+ hash = 0;
+ for (i = 1; i < 1 + N_FRAMES; i++) {
+ hash ^= example.frames[i];
+ hash = ROLW(hash, 19);
+ }
+ example.frames[0] = hash;
+ return ctxt__find_or_add( &example );
+}
+
+///////////////////////////////////////////////////////
+//// Part (2): An OSet of OldRefs, that refer to (1)
+///
+
+// (UInt) `echo "Old Reference Information" | md5sum`
+#define OldRef_MAGIC 0x30b1f075UL
+
+typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
+
+#define N_OLDREF_ACCS 3
+
+typedef
+ struct {
+ Addr ea;
+ UWord magic;
+ UWord gen; /* when most recently accessed */
+ /* unused slots in this array have .thr == NULL */
+ Thr_n_RCEC accs[N_OLDREF_ACCS];
+ }
+ OldRef;
+
+static Word OldRef__cmp_by_EA ( OldRef* r1, OldRef* r2 ) {
+ tl_assert(r1 && r1->magic == OldRef_MAGIC);
+ tl_assert(r2 && r2->magic == OldRef_MAGIC);
+ if (r1->ea < r2->ea) return -1;
+ if (r1->ea > r2->ea) return 1;
+ return 0;
+}
+
+static OSet* oldrefTree = NULL; /* OSet* of OldRef */
+static UWord oldrefGen = 0; /* current LRU generation # */
+static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
+static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
+
+static void event_map_bind ( Addr a, Thr* thr )
+{
+ OldRef key, *ref;
+ RCEC* here;
+ Word i, j;
+
+ key.ea = a;
+ key.magic = OldRef_MAGIC;
+
+ ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
+
+ if (ref) {
+
+ /* We already have a record for this address. We now need to
+ see if we have a stack trace pertaining to this thread's
+ access. */
+ tl_assert(ref->magic == OldRef_MAGIC);
+
+ tl_assert(thr);
+ for (i = 0; i < N_OLDREF_ACCS; i++) {
+ if (ref->accs[i].thr == thr)
+ break;
+ }
+
+ if (i < N_OLDREF_ACCS) {
+ /* thread 'thr' has an entry at index 'i'. Update it. */
+ if (i > 0) {
+ Thr_n_RCEC tmp = ref->accs[i-1];
+ ref->accs[i-1] = ref->accs[i];
+ ref->accs[i] = tmp;
+ i--;
+ }
+ here = get_RCEC( thr );
+ if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
+ ctxt__rcinc( here );
+ stats__ctxt_rcdec1++;
+ ctxt__rcdec( ref->accs[i].rcec );
+ ref->accs[i].rcec = here;
+ tl_assert(ref->accs[i].thr == thr);
+ } else {
+ here = get_RCEC( thr );
+ ctxt__rcinc( here );
+ /* No entry for this thread. Shuffle all of them down one
+ slot, and put the new entry at the start of the array. */
+ if (ref->accs[N_OLDREF_ACCS-1].thr) {
+ /* the last slot is in use. We must dec the rc on the
+ associated rcec. */
+ tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
+ stats__ctxt_rcdec2++;
+ ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
+ } else {
+ tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
+ }
+ for (j = N_OLDREF_ACCS-1; j >= 1; j--)
+ ref->accs[j] = ref->accs[j-1];
+ ref->accs[0].thr = thr;
+ ref->accs[0].rcec = here;
+ tl_assert(thr); /* thr==NULL is used to signify an empty slot,
+ so we can't add a NULL thr. */
+ }
+
+ ref->gen = oldrefGen;
+ tl_assert(ref->ea == a);
+
+ } else {
+
+ /* We don't have a record for this address. Create a new one. */
+ if (oldrefTreeN >= oldrefGenIncAt) {
+ oldrefGen++;
+ oldrefGenIncAt = oldrefTreeN + 50000;
+ if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
+ oldrefGen, oldrefTreeN );
+ }
+ here = get_RCEC( thr );
+ ctxt__rcinc(here);
+ ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
+ ref->magic = OldRef_MAGIC;
+ ref->gen = oldrefGen;
+ ref->ea = a;
+ ref->accs[0].rcec = here;
+ ref->accs[0].thr = thr;
+ tl_assert(thr); /* thr==NULL is used to signify an empty slot,
+ so we can't add a NULL thr. */
+ for (j = 1; j < N_OLDREF_ACCS; j++) {
+ ref->accs[j].thr = NULL;
+ ref->accs[j].rcec = NULL;
+ }
+ VG_(OSetGen_Insert)( oldrefTree, ref );
+ oldrefTreeN++;
+
+ }
+}
+
+
+static
+Bool event_map_lookup ( /*OUT*/struct _EC** resEC,
+ /*OUT*/Thr** resThr,
+ Thr* thr_acc, Addr a )
+{
+ Word i;
+ OldRef key, *ref;
+
+ tl_assert(thr_acc);
+
+ key.ea = a;
+ key.magic = OldRef_MAGIC;
+
+ ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
+ if (ref) {
+ tl_assert(ref->magic == OldRef_MAGIC);
+ tl_assert(ref->accs[0].thr); /* first slot must always be used */
+
+ for (i = 0; i < N_OLDREF_ACCS; i++) {
+ if (ref->accs[i].thr != NULL
+ && ref->accs[i].thr != thr_acc)
+ break;
+ }
+ /* If we didn't find an entry for some thread other than
+ thr_acc, just return the entry for thread 0. It'll look
+ pretty stupid to the user though. */
+ if (i == N_OLDREF_ACCS)
+ i = 0;
+
+ tl_assert(i >= 0 && i < N_OLDREF_ACCS);
+ tl_assert(ref->accs[i].thr);
+ tl_assert(ref->accs[i].rcec);
+ tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
+
+ *resEC = main_stacktrace_to_EC(&ref->accs[i].rcec->frames[1], N_FRAMES);
+ *resThr = ref->accs[i].thr;
+ return True;
+ } else {
+ return False;
+ }
+}
+
+static void event_map_init ( void )
+{
+ Word i;
+ tl_assert(!contextTab);
+ contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
+ N_RCEC_TAB * sizeof(RCEC*) );
+ tl_assert(contextTab);
+ for (i = 0; i < N_RCEC_TAB; i++)
+ contextTab[i] = NULL;
+
+ tl_assert(!oldrefTree);
+ tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
+ oldrefTree = VG_(OSetGen_Create)(
+ offsetof(OldRef,ea), /* == 0 */
+ NULL, /* use unboxed cmp on OldRefs */
+ HG_(zalloc), "libhb.event_map_init.2 (oldref tree)",
+ HG_(free)
+ );
+ tl_assert(oldrefTree);
+
+ oldrefGen = 0;
+ oldrefGenIncAt = 0;
+ oldrefTreeN = 0;
+}
+
+static void event_map__check_reference_counts ( Bool before )
+{
+ RCEC* rcec;
+ OldRef* oldref;
+ Word i;
+ UWord nEnts = 0;
+
+ /* Set the 'check' reference counts to zero. Also, optionally
+ check that the real reference counts are non-zero. We allow
+ these to fall to zero before a GC, but the GC must get rid of
+ all those that are zero, hence none should be zero after a
+ GC. */
+ for (i = 0; i < N_RCEC_TAB; i++) {
+ for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+ nEnts++;
+ tl_assert(rcec);
+ tl_assert(rcec->magic == RCEC_MAGIC);
+ if (!before)
+ tl_assert(rcec->rc > 0);
+ rcec->rcX = 0;
+ }
+ }
+
+ /* check that the stats are sane */
+ tl_assert(nEnts == stats__ctxt_tab_curr);
+ tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
+
+ /* visit all the referencing points, inc check ref counts */
+ VG_(OSetGen_ResetIter)( oldrefTree );
+ while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
+ tl_assert(oldref->magic == OldRef_MAGIC);
+ for (i = 0; i < N_OLDREF_ACCS; i++) {
+ if (oldref->accs[i].thr) {
+ tl_assert(oldref->accs[i].rcec);
+ tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
+ oldref->accs[i].rcec->rcX++;
+ } else {
+ tl_assert(!oldref->accs[i].rcec);
+ }
+ }
+ }
+
+ /* compare check ref counts with actual */
+ for (i = 0; i < N_RCEC_TAB; i++) {
+ for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+ tl_assert(rcec->rc == rcec->rcX);
+ }
+ }
+}
+
+static void event_map_maybe_GC ( void )
+{
+ OldRef* oldref;
+ UWord keyW, valW, retained, maxGen;
+ WordFM* genMap;
+ XArray* refs2del;
+ Word i, j, n2del;
+
+ if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
+ return;
+
+ if (0)
+ VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
+
+ /* Check our counting is sane */
+ tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
+
+ /* Check the reference counts */
+ event_map__check_reference_counts( True/*before*/ );
+
+ /* Compute the distribution of generation values in the ref tree */
+ /* genMap :: generation-number -> count-of-nodes-with-that-number */
+ genMap = VG_(newFM)( HG_(zalloc), "libhb.emmG.1",
+ HG_(free), NULL );
+
+ VG_(OSetGen_ResetIter)( oldrefTree );
+ while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
+ UWord key = oldref->gen;
+ keyW = valW = 0;
+ if (VG_(lookupFM)(genMap, &keyW, &valW, key )) {
+ tl_assert(keyW == key);
+ tl_assert(valW > 0);
+ }
+ /* now valW is the old count for generation 'key' */
+ VG_(addToFM)(genMap, key, valW+1);
+ }
+
+ tl_assert(VG_(sizeFM)(genMap) > 0);
+
+ retained = oldrefTreeN;
+ maxGen = 0;
+ VG_(initIterFM)( genMap );
+ while (VG_(nextIterFM)( genMap, &keyW, &valW )) {
+ tl_assert(keyW > 0); /* can't allow a generation # 0 */
+ if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
+ tl_assert(keyW >= maxGen);
+ tl_assert(retained >= valW);
+ if (retained - valW
+ > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
+ retained -= valW;
+ maxGen = keyW;
+ } else {
+ break;
+ }
+ }
+ VG_(doneIterFM)( genMap );
+
+ VG_(printf)(
+ "libhb: EvM GC: delete generations %lu and below, "
+ "retaining %lu entries\n",
+ maxGen, retained );
+
+ VG_(deleteFM)( genMap, NULL, NULL );
+
+ /* If this fails, it means there's only one generation in the
+ entire tree. So we're kind of in a bad situation, and need to
+ do some stop-gap measure, such as randomly deleting half the
+ entries. */
+ tl_assert(retained < oldrefTreeN);
+
+ /* Now make up a big list of the oldrefTree entries we want to
+ delete. We can't simultaneously traverse the tree and delete
+ stuff from it, so first we need to copy them off somewhere
+ else. (sigh) */
+ refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.1",
+ HG_(free), sizeof(OldRef*) );
+
+ VG_(OSetGen_ResetIter)( oldrefTree );
+ while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
+ tl_assert(oldref->magic == OldRef_MAGIC);
+ if (oldref->gen <= maxGen) {
+ VG_(addToXA)( refs2del, &oldref );
+ }
+ }
+
+ n2del = VG_(sizeXA)( refs2del );
+ tl_assert(n2del == (Word)(oldrefTreeN - retained));
+
+ if (0) VG_(printf)("%s","deleting entries\n");
+ for (i = 0; i < n2del; i++) {
+ void* nd;
+ OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
+ tl_assert(ref);
+ tl_assert(ref->magic == OldRef_MAGIC);
+ for (j = 0; j < N_OLDREF_ACCS; j++) {
+ if (ref->accs[j].rcec) {
+ tl_assert(ref->accs[j].thr);
+ stats__ctxt_rcdec3++;
+ ctxt__rcdec( ref->accs[j].rcec );
+ } else {
+ tl_assert(!ref->accs[j].thr);
+ }
+ }
+ nd = VG_(OSetGen_Remove)( oldrefTree, ref );
+ VG_(OSetGen_FreeNode)( oldrefTree, nd );
+ }
+
+ VG_(deleteXA)( refs2del );
+
+ tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
+
+ oldrefTreeN = retained;
+ oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
+
+ /* Throw away all RCECs with zero reference counts */
+ for (i = 0; i < N_RCEC_TAB; i++) {
+ RCEC** pp = &contextTab[i];
+ RCEC* p = *pp;
+ while (p) {
+ if (p->rc == 0) {
+ *pp = p->next;
+ HG_(free)(p);
+ p = *pp;
+ tl_assert(stats__ctxt_tab_curr > 0);
+ stats__ctxt_tab_curr--;
+ } else {
+ pp = &p->next;
+ p = p->next;
+ }
+ }
+ }
+
+ /* Check the reference counts */
+ event_map__check_reference_counts( False/*after*/ );
+
+ //if (0)
+ //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
+ // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
+
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Core MSM //
+// //
+/////////////////////////////////////////////////////////
+
+#define MSM_CONFACC 1
+
+#define MSM_RACE2ERR 1
+
+#define MSM_CHECK 0
+
+static ULong stats__msm_read = 0;
+static ULong stats__msm_read_change = 0;
+static ULong stats__msm_write = 0;
+static ULong stats__msm_write_change = 0;
+
+__attribute__((noinline))
+static void record_race_info ( Thr* acc_thr,
+ Addr acc_addr, SizeT szB, Bool isWrite,
+ SVal svOld, SVal svNew )
+{
+ Bool found;
+ Thr* thrp = NULL;
+ struct _EC* where = NULL;
+ struct _EC* wherep = NULL;
+ where = main_get_EC( acc_thr );
+ found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
+ if (found) {
+ tl_assert(wherep);
+ tl_assert(thrp);
+ tl_assert(thrp->opaque);
+ tl_assert(acc_thr->opaque);
+ HG_(record_error_Race)( acc_thr->opaque, acc_addr,
+ isWrite, szB, NULL/*mb_lastlock*/,
+ wherep, thrp->opaque );
+ } else {
+ tl_assert(!wherep);
+ tl_assert(!thrp);
+ tl_assert(acc_thr->opaque);
+ HG_(record_error_Race)( acc_thr->opaque, acc_addr,
+ isWrite, szB, NULL/*mb_lastlock*/,
+ NULL, NULL );
+ }
+}
+
+static Bool is_sane_SVal_C ( SVal sv ) {
+ POrd ord;
+ if (!SVal__isC(sv)) return True;
+ ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
+ if (ord == POrd_EQ || ord == POrd_LT) return True;
+ return False;
+}
+
+
+/* Compute new state following a read */
+static inline SVal msm_read ( SVal svOld,
+ /* The following are only needed for
+ creating error reports. */
+ Thr* acc_thr,
+ Addr acc_addr, SizeT szB )
+{
+ SVal svNew = SVal_INVALID;
+ stats__msm_read++;
+
+ /* Redundant sanity check on the constraints */
+ if (MSM_CHECK) {
+ tl_assert(is_sane_SVal_C(svOld));
+ }
+
+ if (SVal__isC(svOld)) {
+ POrd ord;
+ VtsID tviR = acc_thr->viR;
+ VtsID tviW = acc_thr->viW;
+ VtsID rmini = SVal__unC_Rmin(svOld);
+ VtsID wmini = SVal__unC_Wmin(svOld);
+
+ ord = VtsID__getOrdering(rmini,tviR);
+ if (ord == POrd_EQ || ord == POrd_LT) {
+ /* no race */
+ /* Note: RWLOCK subtlety: use tviW, not tviR */
+ svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
+ goto out;
+ } else {
+ svNew = MSM_RACE2ERR
+ ? SVal__mkE()
+ : SVal__mkC( rmini, VtsID__join2(wmini,tviR) );
+ record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
+ svOld, svNew );
+ goto out;
+ }
+ }
+ if (SVal__isA(svOld)) {
+ /* reading no-access memory (sigh); leave unchanged */
+ /* check for no pollution */
+ tl_assert(svOld == SVal_NOACCESS);
+ svNew = SVal_NOACCESS;
+ goto out;
+ }
+ if (SVal__isE(svOld)) {
+ /* no race, location is already "in error" */
+ svNew = SVal__mkE();
+ goto out;
+ }
+ VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
+ tl_assert(0);
+
+ out:
+ if (MSM_CHECK) {
+ tl_assert(is_sane_SVal_C(svNew));
+ }
+ tl_assert(svNew != SVal_INVALID);
+ if (svNew != svOld) {
+ if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
+ event_map_bind( acc_addr, acc_thr );
+ stats__msm_read_change++;
+ }
+ }
+ return svNew;
+}
+
+
+/* Compute new state following a write */
+static inline SVal msm_write ( SVal svOld,
+ /* The following are only needed for
+ creating error reports. */
+ Thr* acc_thr,
+ Addr acc_addr, SizeT szB )
+{
+ SVal svNew = SVal_INVALID;
+ stats__msm_write++;
+
+ /* Redundant sanity check on the constraints */
+ if (MSM_CHECK) {
+ tl_assert(is_sane_SVal_C(svOld));
+ }
+
+ if (SVal__isC(svOld)) {
+ POrd ord;
+ VtsID tviW = acc_thr->viW;
+ VtsID wmini = SVal__unC_Wmin(svOld);
+
+ ord = VtsID__getOrdering(wmini,tviW);
+ if (ord == POrd_EQ || ord == POrd_LT) {
+ /* no race */
+ svNew = SVal__mkC( tviW, tviW );
+ goto out;
+ } else {
+ VtsID rmini = SVal__unC_Rmin(svOld);
+ svNew = MSM_RACE2ERR
+ ? SVal__mkE()
+ : SVal__mkC( VtsID__join2(rmini,tviW),
+ VtsID__join2(wmini,tviW) );
+ record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
+ svOld, svNew );
+ goto out;
+ }
+ }
+ if (SVal__isA(svOld)) {
+ /* writing no-access memory (sigh); leave unchanged */
+ /* check for no pollution */
+ tl_assert(svOld == SVal_NOACCESS);
+ svNew = SVal_NOACCESS;
+ goto out;
+ }
+ if (SVal__isE(svOld)) {
+ /* no race, location is already "in error" */
+ svNew = SVal__mkE();
+ goto out;
+ }
+ VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
+ tl_assert(0);
+
+ out:
+ if (MSM_CHECK) {
+ tl_assert(is_sane_SVal_C(svNew));
+ }
+ tl_assert(svNew != SVal_INVALID);
+ if (svNew != svOld) {
+ if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
+ event_map_bind( acc_addr, acc_thr );
+ stats__msm_write_change++;
+ }
+ }
+ return svNew;
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Apply core MSM to specific memory locations //
+// //
+/////////////////////////////////////////////////////////
+
+/*------------- ZSM accesses: 8 bit apply ------------- */
+
+void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,1 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+}
+
+void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,1 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+}
+
+/*------------- ZSM accesses: 16 bit apply ------------- */
+
+void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read16s++;
+ if (UNLIKELY(!aligned16(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+ if (valid_value_is_below_me_16(descr, toff)) {
+ goto slowcase;
+ } else {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+ }
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,2 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_16to8splits++;
+ zsm_apply8___msm_read( thr, a + 0 );
+ zsm_apply8___msm_read( thr, a + 1 );
+}
+
+void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read16s++;
+ if (UNLIKELY(!aligned16(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+ if (valid_value_is_below_me_16(descr, toff)) {
+ goto slowcase;
+ } else {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+ }
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,2 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_16to8splits++;
+ zsm_apply8___msm_write( thr, a + 0 );
+ zsm_apply8___msm_write( thr, a + 1 );
+}
+
+/*------------- ZSM accesses: 32 bit apply ------------- */
+
+void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ if (UNLIKELY(!aligned32(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 or 4 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+ if (valid_value_is_above_me_32(descr, toff)) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+ } else {
+ goto slowcase;
+ }
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,4 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_32to16splits++;
+ zsm_apply16___msm_read( thr, a + 0 );
+ zsm_apply16___msm_read( thr, a + 2 );
+}
+
+void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ if (UNLIKELY(!aligned32(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 or 4 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+ if (valid_value_is_above_me_32(descr, toff)) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+ } else {
+ goto slowcase;
+ }
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,4 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_32to16splits++;
+ zsm_apply16___msm_write( thr, a + 0 );
+ zsm_apply16___msm_write( thr, a + 2 );
+}
+
+/*------------- ZSM accesses: 64 bit apply ------------- */
+
+void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read64s++;
+ if (UNLIKELY(!aligned64(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, unused */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+ goto slowcase;
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,8 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_64to32splits++;
+ zsm_apply32___msm_read( thr, a + 0 );
+ zsm_apply32___msm_read( thr, a + 4 );
+}
+
+void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read64s++;
+ if (UNLIKELY(!aligned64(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, unused */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+ goto slowcase;
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,8 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_64to32splits++;
+ zsm_apply32___msm_write( thr, a + 0 );
+ zsm_apply32___msm_write( thr, a + 4 );
+}
+
+/*--------------- ZSM accesses: 8 bit write --------------- */
+
+static
+void zsm_write8 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_set8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+}
+
+/*--------------- ZSM accesses: 16 bit write --------------- */
+
+static
+void zsm_write16 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_set16s++;
+ if (UNLIKELY(!aligned16(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+ if (valid_value_is_below_me_16(descr, toff)) {
+ /* Writing at this level. Need to fix up 'descr'. */
+ cl->descrs[tno] = pullup_descr_to_16(descr, toff);
+ /* At this point, the tree does not match cl->descr[tno] any
+ more. The assignments below will fix it up. */
+ } else {
+ /* We can't indiscriminately write on the w16 node as in the
+ w64 case, as that might make the node inconsistent with
+ its parent. So first, pull down to this level. */
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ }
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff + 0] = svNew;
+ cl->svals[cloff + 1] = SVal_INVALID;
+ return;
+ slowcase: /* misaligned */
+ stats__cline_16to8splits++;
+ zsm_write8( a + 0, svNew );
+ zsm_write8( a + 1, svNew );
+}
+
+/*--------------- ZSM accesses: 32 bit write --------------- */
+
+static
+void zsm_write32 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_set32s++;
+ if (UNLIKELY(!aligned32(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 or 4 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+ if (valid_value_is_above_me_32(descr, toff)) {
+ /* We can't indiscriminately write on the w32 node as in the
+ w64 case, as that might make the node inconsistent with
+ its parent. So first, pull down to this level. */
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+ if (SCE_CACHELINE)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ } else {
+ /* Writing at this level. Need to fix up 'descr'. */
+ cl->descrs[tno] = pullup_descr_to_32(descr, toff);
+ /* At this point, the tree does not match cl->descr[tno] any
+ more. The assignments below will fix it up. */
+ }
+ }
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff + 0] = svNew;
+ cl->svals[cloff + 1] = SVal_INVALID;
+ cl->svals[cloff + 2] = SVal_INVALID;
+ cl->svals[cloff + 3] = SVal_INVALID;
+ return;
+ slowcase: /* misaligned */
+ stats__cline_32to16splits++;
+ zsm_write16( a + 0, svNew );
+ zsm_write16( a + 2, svNew );
+}
+
+/*--------------- ZSM accesses: 64 bit write --------------- */
+
+static
+void zsm_write64 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ stats__cline_set64s++;
+ if (UNLIKELY(!aligned64(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 */
+ cl->descrs[tno] = TREE_DESCR_64;
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff + 0] = svNew;
+ cl->svals[cloff + 1] = SVal_INVALID;
+ cl->svals[cloff + 2] = SVal_INVALID;
+ cl->svals[cloff + 3] = SVal_INVALID;
+ cl->svals[cloff + 4] = SVal_INVALID;
+ cl->svals[cloff + 5] = SVal_INVALID;
+ cl->svals[cloff + 6] = SVal_INVALID;
+ cl->svals[cloff + 7] = SVal_INVALID;
+ return;
+ slowcase: /* misaligned */
+ stats__cline_64to32splits++;
+ zsm_write32( a + 0, svNew );
+ zsm_write32( a + 4, svNew );
+}
+
+/*------------- ZSM accesses: 8 bit read/copy ------------- */
+
+static
+SVal zsm_read8 ( Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_get8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ }
+ return cl->svals[cloff];
+}
+
+static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
+ SVal sv;
+ stats__cline_copy8s++;
+ sv = zsm_read8( src );
+ zsm_write8( dst, sv );
+}
+
+/* ------------ Shadow memory range setting ops ------------ */
+
+void zsm_apply_range___msm_read ( Thr* thr,
+ Addr a, SizeT len )
+{
+ /* fast track a couple of common cases */
+ if (len == 4 && aligned32(a)) {
+ zsm_apply32___msm_read( thr, a );
+ return;
+ }
+ if (len == 8 && aligned64(a)) {
+ zsm_apply64___msm_read( thr, a );
+ return;
+ }
+
+ /* be completely general (but as efficient as possible) */
+ if (len == 0) return;
+
+ if (!aligned16(a) && len >= 1) {
+ zsm_apply8___msm_read( thr, a );
+ a += 1;
+ len -= 1;
+ tl_assert(aligned16(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned32(a) && len >= 2) {
+ zsm_apply16___msm_read( thr, a );
+ a += 2;
+ len -= 2;
+ tl_assert(aligned32(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned64(a) && len >= 4) {
+ zsm_apply32___msm_read( thr, a );
+ a += 4;
+ len -= 4;
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 8) {
+ tl_assert(aligned64(a));
+ while (len >= 8) {
+ zsm_apply64___msm_read( thr, a );
+ a += 8;
+ len -= 8;
+ }
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 4)
+ tl_assert(aligned32(a));
+ if (len >= 4) {
+ zsm_apply32___msm_read( thr, a );
+ a += 4;
+ len -= 4;
+ }
+ if (len == 0) return;
+
+ if (len >= 2)
+ tl_assert(aligned16(a));
+ if (len >= 2) {
+ zsm_apply16___msm_read( thr, a );
+ a += 2;
+ len -= 2;
+ }
+ if (len == 0) return;
+
+ if (len >= 1) {
+ zsm_apply8___msm_read( thr, a );
+ a += 1;
+ len -= 1;
+ }
+ tl_assert(len == 0);
+}
+
+
+
+void zsm_apply_range___msm_write ( Thr* thr,
+ Addr a, SizeT len )
+{
+ /* fast track a couple of common cases */
+ if (len == 4 && aligned32(a)) {
+ zsm_apply32___msm_write( thr, a );
+ return;
+ }
+ if (len == 8 && aligned64(a)) {
+ zsm_apply64___msm_write( thr, a );
+ return;
+ }
+
+ /* be completely general (but as efficient as possible) */
+ if (len == 0) return;
+
+ if (!aligned16(a) && len >= 1) {
+ zsm_apply8___msm_write( thr, a );
+ a += 1;
+ len -= 1;
+ tl_assert(aligned16(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned32(a) && len >= 2) {
+ zsm_apply16___msm_write( thr, a );
+ a += 2;
+ len -= 2;
+ tl_assert(aligned32(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned64(a) && len >= 4) {
+ zsm_apply32___msm_write( thr, a );
+ a += 4;
+ len -= 4;
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 8) {
+ tl_assert(aligned64(a));
+ while (len >= 8) {
+ zsm_apply64___msm_write( thr, a );
+ a += 8;
+ len -= 8;
+ }
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 4)
+ tl_assert(aligned32(a));
+ if (len >= 4) {
+ zsm_apply32___msm_write( thr, a );
+ a += 4;
+ len -= 4;
+ }
+ if (len == 0) return;
+
+ if (len >= 2)
+ tl_assert(aligned16(a));
+ if (len >= 2) {
+ zsm_apply16___msm_write( thr, a );
+ a += 2;
+ len -= 2;
+ }
+ if (len == 0) return;
+
+ if (len >= 1) {
+ zsm_apply8___msm_write( thr, a );
+ a += 1;
+ len -= 1;
+ }
+ tl_assert(len == 0);
+}
+
+
+
+
+/* Block-copy states (needed for implementing realloc()). */
+
+static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
+{
+ SizeT i;
+ if (len == 0)
+ return;
+
+ /* assert for non-overlappingness */
+ tl_assert(src+len <= dst || dst+len <= src);
+
+ /* To be simple, just copy byte by byte. But so as not to wreck
+ performance for later accesses to dst[0 .. len-1], normalise
+ destination lines as we finish with them, and also normalise the
+ line containing the first and last address. */
+ for (i = 0; i < len; i++) {
+ Bool normalise
+ = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
+ || i == 0 /* first in range */
+ || i == len-1; /* last in range */
+ zsm_copy8( src+i, dst+i, normalise );
+ }
+}
+
+
+/* For setting address ranges to a given value. Has considerable
+ sophistication so as to avoid generating large numbers of pointless
+ cache loads/writebacks for large ranges. */
+
+/* Do small ranges in-cache, in the obvious way. */
+static
+void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
+{
+ /* fast track a couple of common cases */
+ if (len == 4 && aligned32(a)) {
+ zsm_write32( a, svNew );
+ return;
+ }
+ if (len == 8 && aligned64(a)) {
+ zsm_write64( a, svNew );
+ return;
+ }
+
+ /* be completely general (but as efficient as possible) */
+ if (len == 0) return;
+
+ if (!aligned16(a) && len >= 1) {
+ zsm_write8( a, svNew );
+ a += 1;
+ len -= 1;
+ tl_assert(aligned16(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned32(a) && len >= 2) {
+ zsm_write16( a, svNew );
+ a += 2;
+ len -= 2;
+ tl_assert(aligned32(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned64(a) && len >= 4) {
+ zsm_write32( a, svNew );
+ a += 4;
+ len -= 4;
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 8) {
+ tl_assert(aligned64(a));
+ while (len >= 8) {
+ zsm_write64( a, svNew );
+ a += 8;
+ len -= 8;
+ }
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 4)
+ tl_assert(aligned32(a));
+ if (len >= 4) {
+ zsm_write32( a, svNew );
+ a += 4;
+ len -= 4;
+ }
+ if (len == 0) return;
+
+ if (len >= 2)
+ tl_assert(aligned16(a));
+ if (len >= 2) {
+ zsm_write16( a, svNew );
+ a += 2;
+ len -= 2;
+ }
+ if (len == 0) return;
+
+ if (len >= 1) {
+ zsm_write8( a, svNew );
+ a += 1;
+ len -= 1;
+ }
+ tl_assert(len == 0);
+}
+
+
+/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
+ for larger ranges, try to operate directly on the out-of-cache
+ representation, rather than dragging lines into the cache,
+ overwriting them, and forcing them out. This turns out to be an
+ important performance optimisation. */
+
+static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
+{
+ tl_assert(svNew != SVal_INVALID);
+ stats__cache_make_New_arange += (ULong)len;
+
+ if (0 && len > 500)
+ VG_(printf)("make New ( %#lx, %ld )\n", a, len );
+
+ if (0) {
+ static UWord n_New_in_cache = 0;
+ static UWord n_New_not_in_cache = 0;
+ /* tag is 'a' with the in-line offset masked out,
+ eg a[31]..a[4] 0000 */
+ Addr tag = a & ~(N_LINE_ARANGE - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+ if (LIKELY(tag == cache_shmem.tags0[wix])) {
+ n_New_in_cache++;
+ } else {
+ n_New_not_in_cache++;
+ }
+ if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
+ VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
+ n_New_in_cache, n_New_not_in_cache );
+ }
+
+ if (LIKELY(len < 2 * N_LINE_ARANGE)) {
+ zsm_set_range_SMALL( a, len, svNew );
+ } else {
+ Addr before_start = a;
+ Addr aligned_start = cacheline_ROUNDUP(a);
+ Addr after_start = cacheline_ROUNDDN(a + len);
+ UWord before_len = aligned_start - before_start;
+ UWord aligned_len = after_start - aligned_start;
+ UWord after_len = a + len - after_start;
+ tl_assert(before_start <= aligned_start);
+ tl_assert(aligned_start <= after_start);
+ tl_assert(before_len < N_LINE_ARANGE);
+ tl_assert(after_len < N_LINE_ARANGE);
+ tl_assert(get_cacheline_offset(aligned_start) == 0);
+ if (get_cacheline_offset(a) == 0) {
+ tl_assert(before_len == 0);
+ tl_assert(a == aligned_start);
+ }
+ if (get_cacheline_offset(a+len) == 0) {
+ tl_assert(after_len == 0);
+ tl_assert(after_start == a+len);
+ }
+ if (before_len > 0) {
+ zsm_set_range_SMALL( before_start, before_len, svNew );
+ }
+ if (after_len > 0) {
+ zsm_set_range_SMALL( after_start, after_len, svNew );
+ }
+ stats__cache_make_New_inZrep += (ULong)aligned_len;
+
+ while (1) {
+ Addr tag;
+ UWord wix;
+ if (aligned_start >= after_start)
+ break;
+ tl_assert(get_cacheline_offset(aligned_start) == 0);
+ tag = aligned_start & ~(N_LINE_ARANGE - 1);
+ wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
+ if (tag == cache_shmem.tags0[wix]) {
+ UWord i;
+ for (i = 0; i < N_LINE_ARANGE / 8; i++)
+ zsm_write64( aligned_start + i * 8, svNew );
+ } else {
+ UWord i;
+ Word zix;
+ SecMap* sm;
+ LineZ* lineZ;
+ /* This line is not in the cache. Do not force it in; instead
+ modify it in-place. */
+ /* find the Z line to write in and rcdec it or the
+ associated F line. */
+ find_Z_for_writing( &sm, &zix, tag );
+ tl_assert(sm);
+ tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+ lineZ->dict[0] = svNew;
+ lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+ for (i = 0; i < N_LINE_ARANGE/4; i++)
+ lineZ->ix2s[i] = 0; /* all refer to dict[0] */
+ rcinc_LineZ(lineZ);
+ }
+ aligned_start += N_LINE_ARANGE;
+ aligned_len -= N_LINE_ARANGE;
+ }
+ tl_assert(aligned_start == after_start);
+ tl_assert(aligned_len == 0);
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Synchronisation objects //
+// //
+/////////////////////////////////////////////////////////
+
+// (UInt) `echo "Synchronisation object" | md5sum`
+#define SO_MAGIC 0x56b3c5b0U
+
+struct _SO {
+ VtsID viR; /* r-clock of sender */
+ VtsID viW; /* w-clock of sender */
+ UInt magic;
+};
+
+static SO* SO__Alloc ( void ) {
+ SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
+ so->viR = VtsID_INVALID;
+ so->viW = VtsID_INVALID;
+ so->magic = SO_MAGIC;
+ return so;
+}
+static void SO__Dealloc ( SO* so ) {
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+ if (so->viR == VtsID_INVALID) {
+ tl_assert(so->viW == VtsID_INVALID);
+ } else {
+ tl_assert(so->viW != VtsID_INVALID);
+ VtsID__rcdec(so->viR);
+ VtsID__rcdec(so->viW);
+ }
+ so->magic = 0;
+ HG_(free)( so );
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Top Level API //
+// //
+/////////////////////////////////////////////////////////
+
+static void show_thread_state ( HChar* str, Thr* t )
+{
+ if (1) return;
+ if (t->viR == t->viW) {
+ VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
+ VtsID__pp( t->viR );
+ VG_(printf)("%s","\n");
+ } else {
+ VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
+ VtsID__pp( t->viR );
+ VG_(printf)(" viW %u==", t->viW);
+ VtsID__pp( t->viW );
+ VG_(printf)("%s","\n");
+ }
+}
+
+
+Thr* libhb_init (
+ void (*get_stacktrace)( Thr*, Addr*, UWord ),
+ struct _EC* (*stacktrace_to_EC)( Addr*, UWord ),
+ struct _EC* (*get_EC)( Thr* )
+ )
+{
+ Thr* thr;
+ VtsID vi;
+ tl_assert(get_stacktrace);
+ tl_assert(stacktrace_to_EC);
+ tl_assert(get_EC);
+ main_get_stacktrace = get_stacktrace;
+ main_stacktrace_to_EC = stacktrace_to_EC;
+ main_get_EC = get_EC;
+
+ // No need to initialise hg_wordfm.
+ // No need to initialise hg_wordset.
+
+ vts_set_init();
+ vts_tab_init();
+ event_map_init();
+ VtsID__invalidate_caches();
+
+ // initialise shadow memory
+ zsm_init( SVal__rcinc, SVal__rcdec );
+
+ thr = Thr__new();
+ vi = VtsID__mk_Singleton( thr, 1 );
+ thr->viR = vi;
+ thr->viW = vi;
+ VtsID__rcinc(thr->viR);
+ VtsID__rcinc(thr->viW);
+
+ show_thread_state(" root", thr);
+ return thr;
+}
+
+Thr* libhb_create ( Thr* parent )
+{
+ /* The child's VTSs are copies of the parent's VTSs, but ticked at
+ the child's index. Since the child's index is guaranteed
+ unique, it has never been seen before, so the implicit value
+ before the tick is zero and after that is one. */
+ Thr* child = Thr__new();
+
+ child->viR = VtsID__tick( parent->viR, child );
+ child->viW = VtsID__tick( parent->viW, child );
+ VtsID__rcinc(child->viR);
+ VtsID__rcinc(child->viW);
+
+ tl_assert(VtsID__indexAt( child->viR, child ) == 1);
+ tl_assert(VtsID__indexAt( child->viW, child ) == 1);
+
+ /* and the parent has to move along too */
+ VtsID__rcdec(parent->viR);
+ VtsID__rcdec(parent->viW);
+ parent->viR = VtsID__tick( parent->viR, parent );
+ parent->viW = VtsID__tick( parent->viW, parent );
+ VtsID__rcinc(parent->viR);
+ VtsID__rcinc(parent->viW);
+
+ show_thread_state(" child", child);
+ show_thread_state("parent", parent);
+
+ return child;
+}
+
+/* Shut down the library, and print stats (in fact that's _all_
+ this is for. */
+void libhb_shutdown ( Bool show_stats )
+{
+ if (show_stats) {
+ VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
+ VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
+ stats__secmaps_allocd,
+ stats__secmap_ga_space_covered);
+ VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
+ stats__secmap_linesZ_allocd,
+ stats__secmap_linesZ_bytes);
+ VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
+ stats__secmap_linesF_allocd,
+ stats__secmap_linesF_bytes);
+ VG_(printf)(" secmaps: %'10lu iterator steppings\n",
+ stats__secmap_iterator_steppings);
+ VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
+ stats__secmaps_search, stats__secmaps_search_slow);
+
+ VG_(printf)("%s","\n");
+ VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
+ stats__cache_totrefs, stats__cache_totmisses );
+ VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
+ stats__cache_Z_fetches, stats__cache_F_fetches );
+ VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
+ stats__cache_Z_wbacks, stats__cache_F_wbacks );
+ VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
+ stats__cache_invals, stats__cache_flushes );
+ VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
+ stats__cache_make_New_arange,
+ stats__cache_make_New_inZrep);
+
+ VG_(printf)("%s","\n");
+ VG_(printf)(" cline: %'10lu normalises\n",
+ stats__cline_normalises );
+ VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+ stats__cline_read64s,
+ stats__cline_read32s,
+ stats__cline_read16s,
+ stats__cline_read8s );
+ VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+ stats__cline_write64s,
+ stats__cline_write32s,
+ stats__cline_write16s,
+ stats__cline_write8s );
+ VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+ stats__cline_set64s,
+ stats__cline_set32s,
+ stats__cline_set16s,
+ stats__cline_set8s );
+ VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
+ stats__cline_get8s, stats__cline_copy8s );
+ VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
+ stats__cline_64to32splits,
+ stats__cline_32to16splits,
+ stats__cline_16to8splits );
+ VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
+ stats__cline_64to32pulldown,
+ stats__cline_32to16pulldown,
+ stats__cline_16to8pulldown );
+ if (0)
+ VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
+ (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
+
+ VG_(printf)("%s","\n");
+
+ VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
+ stats__msm_read, stats__msm_read_change);
+ VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
+ stats__msm_write, stats__msm_write_change);
+ VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
+ stats__getOrdering_queries, stats__getOrdering_misses);
+ VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
+ stats__join2_queries, stats__join2_misses);
+
+ VG_(printf)("%s","\n");
+ VG_(printf)(
+ " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
+ VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
+ );
+ VG_(printf)( " libhb: %lu entries in vts_set\n",
+ VG_(sizeFM)( vts_set ) );
+
+ VG_(printf)("%s","\n");
+ VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
+ stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
+ stats__ctxt_rcdec2,
+ stats__ctxt_rcdec3 );
+ VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
+ stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
+ VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
+ (UWord)N_RCEC_TAB,
+ stats__ctxt_tab_curr );
+ VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
+ stats__ctxt_tab_qs,
+ stats__ctxt_tab_cmps );
+#if 0
+ VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
+ VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
+ VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
+ VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
+ VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
+ VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
+ VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
+ VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
+ VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
+ VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
+ VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
+ VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
+ VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
+ VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
+
+ VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
+ VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
+ VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
+ VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
+#endif
+
+ VG_(printf)("%s","<<< END libhb stats >>>\n");
+ VG_(printf)("%s","\n");
+
+ }
+}
+
+void libhb_async_exit ( Thr* thr )
+{
+ /* is there anything we need to do? */
+}
+
+/* Both Segs and SOs point to VTSs. However, there is no sharing, so
+ a Seg that points at a VTS is its one-and-only owner, and ditto for
+ a SO that points at a VTS. */
+
+SO* libhb_so_alloc ( void )
+{
+ return SO__Alloc();
+}
+
+void libhb_so_dealloc ( SO* so )
+{
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+ SO__Dealloc(so);
+}
+
+/* See comments in libhb.h for details on the meaning of
+ strong vs weak sends and strong vs weak receives. */
+void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
+{
+ /* Copy the VTSs from 'thr' into the sync object, and then move
+ the thread along one step. */
+
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+
+ /* stay sane .. a thread's read-clock must always lead or be the
+ same as its write-clock */
+ { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
+ tl_assert(ord == POrd_EQ || ord == POrd_LT);
+ }
+
+ /* since we're overwriting the VtsIDs in the SO, we need to drop
+ any references made by the previous contents thereof */
+ if (so->viR == VtsID_INVALID) {
+ tl_assert(so->viW == VtsID_INVALID);
+ so->viR = thr->viR;
+ so->viW = thr->viW;
+ VtsID__rcinc(so->viR);
+ VtsID__rcinc(so->viW);
+ } else {
+ /* In a strong send, we dump any previous VC in the SO and
+ install the sending thread's VC instead. For a weak send we
+ must join2 with what's already there. */
+ tl_assert(so->viW != VtsID_INVALID);
+ VtsID__rcdec(so->viR);
+ VtsID__rcdec(so->viW);
+ so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
+ so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
+ VtsID__rcinc(so->viR);
+ VtsID__rcinc(so->viW);
+ }
+
+ /* move both parent clocks along */
+ VtsID__rcdec(thr->viR);
+ VtsID__rcdec(thr->viW);
+ thr->viR = VtsID__tick( thr->viR, thr );
+ thr->viW = VtsID__tick( thr->viW, thr );
+ VtsID__rcinc(thr->viR);
+ VtsID__rcinc(thr->viW);
+ if (strong_send)
+ show_thread_state("s-send", thr);
+ else
+ show_thread_state("w-send", thr);
+}
+
+void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
+{
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+
+ if (so->viR != VtsID_INVALID) {
+ tl_assert(so->viW != VtsID_INVALID);
+
+ /* Weak receive (basically, an R-acquisition of a R-W lock).
+ This advances the read-clock of the receiver, but not the
+ write-clock. */
+ VtsID__rcdec(thr->viR);
+ thr->viR = VtsID__join2( thr->viR, so->viR );
+ VtsID__rcinc(thr->viR);
+
+ /* For a strong receive, we also advance the receiver's write
+ clock, which means the receive as a whole is essentially
+ equivalent to a W-acquisition of a R-W lock. */
+ if (strong_recv) {
+ VtsID__rcdec(thr->viW);
+ thr->viW = VtsID__join2( thr->viW, so->viW );
+ VtsID__rcinc(thr->viW);
+ }
+
+ if (strong_recv)
+ show_thread_state("s-recv", thr);
+ else
+ show_thread_state("w-recv", thr);
+
+ } else {
+ tl_assert(so->viW == VtsID_INVALID);
+ /* Deal with degenerate case: 'so' has no vts, so there has been
+ no message posted to it. Just ignore this case. */
+ show_thread_state("d-recv", thr);
+ }
+}
+
+Bool libhb_so_everSent ( SO* so )
+{
+ if (so->viR == VtsID_INVALID) {
+ tl_assert(so->viW == VtsID_INVALID);
+ return False;
+ } else {
+ tl_assert(so->viW != VtsID_INVALID);
+ return True;
+ }
+}
+
+#define XXX1 0 // 0x67a106c
+#define XXX2 0
+
+static Bool TRACEME(Addr a, SizeT szB) {
+ if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
+ if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
+ return False;
+}
+static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
+ SVal sv = zsm_read8(a);
+ VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
+ show_thread_state("", thr);
+ VG_(printf)("%s","\n");
+}
+
+void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
+{
+ SVal sv = SVal__mkC(thr->viW, thr->viW);
+ tl_assert(is_sane_SVal_C(sv));
+ if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
+ zsm_set_range( a, szB, sv );
+ if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
+}
+
+void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
+{
+ if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
+ zsm_set_range( a, szB, SVal__mkA() );
+ if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
+}
+
+void* libhb_get_Thr_opaque ( Thr* thr ) {
+ tl_assert(thr);
+ return thr->opaque;
+}
+
+void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
+ tl_assert(thr);
+ thr->opaque = v;
+}
+
+void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
+{
+ zsm_copy_range(dst, src, len);
+}
+
+void libhb_maybe_GC ( void )
+{
+ event_map_maybe_GC();
+ /* If there are still freelist entries available, no need for a
+ GC. */
+ if (vts_tab_freelist != VtsID_INVALID)
+ return;
+ /* So all the table entries are full, and we're having to expand
+ the table. But did we hit the threshhold point yet? */
+ if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
+ return;
+ vts_tab__do_GC( False/*don't show stats*/ );
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION END main library //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/*--------------------------------------------------------------------*/
+/*--- end libhb_main.c ---*/
+/*--------------------------------------------------------------------*/