blob: 15a64c845cf35e746b3f64607103b1ccbac91e86 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- LibHB: a library for implementing and checking ---*/
/*--- the happens-before relationship in concurrent programs. ---*/
/*--- libhb_main.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of LibHB, a library for implementing and checking
the happens-before relationship in concurrent programs.
Copyright (C) 2008-2008 OpenWorks Ltd
info@open-works.co.uk
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
#include "pub_tool_basics.h"
#include "pub_tool_libcassert.h"
#include "pub_tool_libcbase.h"
#include "pub_tool_libcprint.h"
#include "pub_tool_mallocfree.h"
#include "pub_tool_wordfm.h"
#include "pub_tool_xarray.h"
#include "pub_tool_oset.h"
#include "pub_tool_threadstate.h"
#include "pub_tool_aspacemgr.h"
#include "pub_tool_execontext.h"
#include "pub_tool_errormgr.h"
#include "pub_tool_options.h" // VG_(clo_verbosity)
#include "hg_basics.h"
#include "hg_wordset.h"
#include "hg_lock_n_thread.h"
#include "hg_errors.h"
#include "libhb.h"
/* fwds for
Globals needed by other parts of the library. These are set
once at startup and then never changed. */
static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
static ExeContext* (*main_get_EC)( Thr* ) = NULL;
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION BEGIN compressed shadow memory //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
#ifndef __HB_ZSM_H
#define __HB_ZSM_H
typedef ULong SVal;
/* This value has special significance to the implementation, and callers
may not store it in the shadow memory. */
#define SVal_INVALID (3ULL << 62)
/* This is the default value for shadow memory. Initially the shadow
memory contains no accessible areas and so all reads produce this
value. TODO: make this caller-defineable. */
#define SVal_NOACCESS (2ULL << 62)
/* Initialise the library. Once initialised, it will (or may) call
rcinc and rcdec in response to all the calls below, in order to
allow the user to do reference counting on the SVals stored herein.
It is important to understand, however, that due to internal
caching, the reference counts are in general inaccurate, and can be
both above or below the true reference count for an item. In
particular, the library may indicate that the reference count for
an item is zero, when in fact it is not.
To make the reference counting exact and therefore non-pointless,
call zsm_flush_cache. Immediately after it returns, the reference
counts for all items, as deduced by the caller by observing calls
to rcinc and rcdec, will be correct, and so any items with a zero
reference count may be freed (or at least considered to be
unreferenced by this library).
*/
static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
static void zsm_set_range ( Addr, SizeT, SVal );
static SVal zsm_read8 ( Addr );
static void zsm_copy_range ( Addr, Addr, SizeT );
static void zsm_flush_cache ( void );
#endif /* ! __HB_ZSM_H */
/* For the shadow mem cache stuff we may want more intrusive
checks. Unfortunately there's no almost-zero-cost way to make them
selectable at run time. Hence set the #if 0 to #if 1 and
rebuild if you want them. */
#if 0
# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
# define inline __attribute__((noinline))
/* probably want to ditch -fomit-frame-pointer too */
#else
# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
#endif
/* For the SegmentID, SegmentSet and SVal stuff we may want more
intrusive checks. Again there's no zero cost way to do this. Set
the #if 0 to #if 1 and rebuild if you want them. */
#if 0
# define SCE_SVALS 1 /* sanity-check shadow value stuff */
#else
# define SCE_SVALS 0
#endif
/* Round a up to the next multiple of N. N must be a power of 2 */
#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
/* Round a down to the next multiple of N. N must be a power of 2 */
#define ROUNDDN(a, N) ((a) & ~(N-1))
/* ------ User-supplied RC functions ------ */
static void(*rcinc)(SVal) = NULL;
static void(*rcdec)(SVal) = NULL;
/* ------ CacheLine ------ */
#define N_LINE_BITS 6 /* must be >= 3 */
#define N_LINE_ARANGE (1 << N_LINE_BITS)
#define N_LINE_TREES (N_LINE_ARANGE >> 3)
typedef
struct {
UShort descrs[N_LINE_TREES];
SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
}
CacheLine;
#define TREE_DESCR_16_0 (1<<0)
#define TREE_DESCR_32_0 (1<<1)
#define TREE_DESCR_16_1 (1<<2)
#define TREE_DESCR_64 (1<<3)
#define TREE_DESCR_16_2 (1<<4)
#define TREE_DESCR_32_1 (1<<5)
#define TREE_DESCR_16_3 (1<<6)
#define TREE_DESCR_8_0 (1<<7)
#define TREE_DESCR_8_1 (1<<8)
#define TREE_DESCR_8_2 (1<<9)
#define TREE_DESCR_8_3 (1<<10)
#define TREE_DESCR_8_4 (1<<11)
#define TREE_DESCR_8_5 (1<<12)
#define TREE_DESCR_8_6 (1<<13)
#define TREE_DESCR_8_7 (1<<14)
#define TREE_DESCR_DTY (1<<15)
typedef
struct {
SVal dict[4]; /* can represent up to 4 diff values in the line */
UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
dict indexes */
/* if dict[0] == SVal_INVALID then dict[1] is the index of the
LineF to use, and dict[2..] are also SVal_INVALID. */
}
LineZ; /* compressed rep for a cache line */
typedef
struct {
Bool inUse;
SVal w64s[N_LINE_ARANGE];
}
LineF; /* full rep for a cache line */
/* Shadow memory.
Primary map is a WordFM Addr SecMap*.
SecMaps cover some page-size-ish section of address space and hold
a compressed representation.
CacheLine-sized chunks of SecMaps are copied into a Cache, being
decompressed when moved into the cache and recompressed on the
way out. Because of this, the cache must operate as a writeback
cache, not a writethrough one.
Each SecMap must hold a power-of-2 number of CacheLines. Hence
N_SECMAP_BITS must >= N_LINE_BITS.
*/
#define N_SECMAP_BITS 13
#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
// # CacheLines held by a SecMap
#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
/* The data in the SecMap is held in the array of LineZs. Each LineZ
either carries the required data directly, in a compressed
representation, or it holds (in .dict[0]) an index to the LineF in
.linesF that holds the full representation.
Currently-unused LineF's have their .inUse bit set to zero.
Since each in-use LineF is referred to be exactly one LineZ,
the number of .linesZ[] that refer to .linesF should equal
the number of .linesF[] that have .inUse == True.
RC obligations: the RCs presented to the user include exactly
the values in:
* direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
* F reps that are in use (.inUse == True)
Hence the following actions at the following transitions are required:
F rep: .inUse==True -> .inUse==False -- rcdec_LineF
F rep: .inUse==False -> .inUse==True -- rcinc_LineF
Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
*/
typedef
struct {
UInt magic;
LineZ linesZ[N_SECMAP_ZLINES];
LineF* linesF;
UInt linesF_size;
}
SecMap;
#define SecMap_MAGIC 0x571e58cbU
static inline Bool is_sane_SecMap ( SecMap* sm ) {
return sm != NULL && sm->magic == SecMap_MAGIC;
}
/* ------ Cache ------ */
#define N_WAY_BITS 16
#define N_WAY_NENT (1 << N_WAY_BITS)
/* Each tag is the address of the associated CacheLine, rounded down
to a CacheLine address boundary. A CacheLine size must be a power
of 2 and must be 8 or more. Hence an easy way to initialise the
cache so it is empty is to set all the tag values to any value % 8
!= 0, eg 1. This means all queries in the cache initially miss.
It does however require us to detect and not writeback, any line
with a bogus tag. */
typedef
struct {
CacheLine lyns0[N_WAY_NENT];
Addr tags0[N_WAY_NENT];
}
Cache;
static inline Bool is_valid_scache_tag ( Addr tag ) {
/* a valid tag should be naturally aligned to the start of
a CacheLine. */
return 0 == (tag & (N_LINE_ARANGE - 1));
}
/* --------- Primary data structures --------- */
/* Shadow memory primary map */
static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
static Cache cache_shmem;
static UWord stats__secmaps_search = 0; // # SM finds
static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
static UWord stats__secmaps_allocd = 0; // # SecMaps issued
static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
static UWord stats__cache_F_fetches = 0; // # F lines fetched
static UWord stats__cache_F_wbacks = 0; // # F lines written back
static UWord stats__cache_invals = 0; // # cache invals
static UWord stats__cache_flushes = 0; // # cache flushes
static UWord stats__cache_totrefs = 0; // # total accesses
static UWord stats__cache_totmisses = 0; // # misses
static ULong stats__cache_make_New_arange = 0; // total arange made New
static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
static UWord stats__cline_read64s = 0; // # calls to s_m_read64
static UWord stats__cline_read32s = 0; // # calls to s_m_read32
static UWord stats__cline_read16s = 0; // # calls to s_m_read16
static UWord stats__cline_read8s = 0; // # calls to s_m_read8
static UWord stats__cline_write64s = 0; // # calls to s_m_write64
static UWord stats__cline_write32s = 0; // # calls to s_m_write32
static UWord stats__cline_write16s = 0; // # calls to s_m_write16
static UWord stats__cline_write8s = 0; // # calls to s_m_write8
static UWord stats__cline_set64s = 0; // # calls to s_m_set64
static UWord stats__cline_set32s = 0; // # calls to s_m_set32
static UWord stats__cline_set16s = 0; // # calls to s_m_set16
static UWord stats__cline_set8s = 0; // # calls to s_m_set8
static UWord stats__cline_get8s = 0; // # calls to s_m_get8
static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
return a & ~(N_SECMAP_ARANGE - 1);
}
static inline UWord shmem__get_SecMap_offset ( Addr a ) {
return a & (N_SECMAP_ARANGE - 1);
}
/*----------------------------------------------------------------*/
/*--- map_shmem :: WordFM Addr SecMap ---*/
/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
/*----------------------------------------------------------------*/
/*--------------- SecMap allocation --------------- */
static HChar* shmem__bigchunk_next = NULL;
static HChar* shmem__bigchunk_end1 = NULL;
static void* shmem__bigchunk_alloc ( SizeT n )
{
const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
tl_assert(n > 0);
n = VG_ROUNDUP(n, 16);
tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
<= (SSizeT)sHMEM__BIGCHUNK_SIZE);
if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
if (0)
VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
(Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
if (shmem__bigchunk_next == NULL)
VG_(out_of_memory_NORETURN)(
"helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
}
tl_assert(shmem__bigchunk_next);
tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
shmem__bigchunk_next += n;
return shmem__bigchunk_next - n;
}
static SecMap* shmem__alloc_SecMap ( void )
{
Word i, j;
SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
if (0) VG_(printf)("alloc_SecMap %p\n",sm);
tl_assert(sm);
sm->magic = SecMap_MAGIC;
for (i = 0; i < N_SECMAP_ZLINES; i++) {
sm->linesZ[i].dict[0] = SVal_NOACCESS;
sm->linesZ[i].dict[1] = SVal_INVALID;
sm->linesZ[i].dict[2] = SVal_INVALID;
sm->linesZ[i].dict[3] = SVal_INVALID;
for (j = 0; j < N_LINE_ARANGE/4; j++)
sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
}
sm->linesF = NULL;
sm->linesF_size = 0;
stats__secmaps_allocd++;
stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
return sm;
}
typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
static SecMap* shmem__find_SecMap ( Addr ga )
{
SecMap* sm = NULL;
Addr gaKey = shmem__round_to_SecMap_base(ga);
// Cache
stats__secmaps_search++;
if (LIKELY(gaKey == smCache[0].gaKey))
return smCache[0].sm;
if (LIKELY(gaKey == smCache[1].gaKey)) {
SMCacheEnt tmp = smCache[0];
smCache[0] = smCache[1];
smCache[1] = tmp;
return smCache[0].sm;
}
if (gaKey == smCache[2].gaKey) {
SMCacheEnt tmp = smCache[1];
smCache[1] = smCache[2];
smCache[2] = tmp;
return smCache[1].sm;
}
// end Cache
stats__secmaps_search_slow++;
if (VG_(lookupFM)( map_shmem,
NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
tl_assert(sm != NULL);
smCache[2] = smCache[1];
smCache[1] = smCache[0];
smCache[0].gaKey = gaKey;
smCache[0].sm = sm;
} else {
tl_assert(sm == NULL);
}
return sm;
}
static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
{
SecMap* sm = shmem__find_SecMap ( ga );
if (LIKELY(sm)) {
return sm;
} else {
/* create a new one */
Addr gaKey = shmem__round_to_SecMap_base(ga);
sm = shmem__alloc_SecMap();
tl_assert(sm);
VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
return sm;
}
}
/* ------------ LineF and LineZ related ------------ */
static void rcinc_LineF ( LineF* lineF ) {
UWord i;
tl_assert(lineF->inUse);
for (i = 0; i < N_LINE_ARANGE; i++)
rcinc(lineF->w64s[i]);
}
static void rcdec_LineF ( LineF* lineF ) {
UWord i;
tl_assert(lineF->inUse);
for (i = 0; i < N_LINE_ARANGE; i++)
rcdec(lineF->w64s[i]);
}
static void rcinc_LineZ ( LineZ* lineZ ) {
tl_assert(lineZ->dict[0] != SVal_INVALID);
rcinc(lineZ->dict[0]);
if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
}
static void rcdec_LineZ ( LineZ* lineZ ) {
tl_assert(lineZ->dict[0] != SVal_INVALID);
rcdec(lineZ->dict[0]);
if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
}
inline
static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
Word bix, shft, mask, prep;
tl_assert(ix >= 0);
bix = ix >> 2;
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
mask = 3 << shft;
prep = b2 << shft;
arr[bix] = (arr[bix] & ~mask) | prep;
}
inline
static UWord read_twobit_array ( UChar* arr, UWord ix ) {
Word bix, shft;
tl_assert(ix >= 0);
bix = ix >> 2;
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
return (arr[bix] >> shft) & 3;
}
/* Given address 'tag', find either the Z or F line containing relevant
data, so it can be read into the cache.
*/
static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
/*OUT*/LineF** fp, Addr tag ) {
LineZ* lineZ;
LineF* lineF;
UWord zix;
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
UWord smoff = shmem__get_SecMap_offset(tag);
/* since smoff is derived from a valid tag, it should be
cacheline-aligned. */
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
zix = smoff >> N_LINE_BITS;
tl_assert(zix < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[zix];
lineF = NULL;
if (lineZ->dict[0] == SVal_INVALID) {
UInt fix = (UInt)lineZ->dict[1];
tl_assert(sm->linesF);
tl_assert(sm->linesF_size > 0);
tl_assert(fix >= 0 && fix < sm->linesF_size);
lineF = &sm->linesF[fix];
tl_assert(lineF->inUse);
lineZ = NULL;
}
*zp = lineZ;
*fp = lineF;
}
/* Given address 'tag', return the relevant SecMap and the index of
the LineZ within it, in the expectation that the line is to be
overwritten. Regardless of whether 'tag' is currently associated
with a Z or F representation, to rcdec on the current
representation, in recognition of the fact that the contents are
just about to be overwritten. */
static __attribute__((noinline))
void find_Z_for_writing ( /*OUT*/SecMap** smp,
/*OUT*/Word* zixp,
Addr tag ) {
LineZ* lineZ;
LineF* lineF;
UWord zix;
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
UWord smoff = shmem__get_SecMap_offset(tag);
/* since smoff is derived from a valid tag, it should be
cacheline-aligned. */
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
zix = smoff >> N_LINE_BITS;
tl_assert(zix < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[zix];
lineF = NULL;
/* re RCs, we are freeing up this LineZ/LineF so that new data can
be parked in it. Hence have to rcdec it accordingly. */
/* If lineZ has an associated lineF, free it up. */
if (lineZ->dict[0] == SVal_INVALID) {
UInt fix = (UInt)lineZ->dict[1];
tl_assert(sm->linesF);
tl_assert(sm->linesF_size > 0);
tl_assert(fix >= 0 && fix < sm->linesF_size);
lineF = &sm->linesF[fix];
tl_assert(lineF->inUse);
rcdec_LineF(lineF);
lineF->inUse = False;
} else {
rcdec_LineZ(lineZ);
}
*smp = sm;
*zixp = zix;
}
static __attribute__((noinline))
void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
UInt i, new_size;
LineF* nyu;
if (sm->linesF) {
tl_assert(sm->linesF_size > 0);
} else {
tl_assert(sm->linesF_size == 0);
}
if (sm->linesF) {
for (i = 0; i < sm->linesF_size; i++) {
if (!sm->linesF[i].inUse) {
*fixp = (Word)i;
return;
}
}
}
/* No free F line found. Expand existing array and try again. */
new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
new_size * sizeof(LineF) );
tl_assert(nyu);
stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
* sizeof(LineF);
if (0)
VG_(printf)("SM %p: expand F array from %d to %d\n",
sm, (Int)sm->linesF_size, new_size);
for (i = 0; i < new_size; i++)
nyu[i].inUse = False;
if (sm->linesF) {
for (i = 0; i < sm->linesF_size; i++) {
tl_assert(sm->linesF[i].inUse);
nyu[i] = sm->linesF[i];
}
VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
HG_(free)(sm->linesF);
}
sm->linesF = nyu;
sm->linesF_size = new_size;
for (i = 0; i < sm->linesF_size; i++) {
if (!sm->linesF[i].inUse) {
*fixp = (Word)i;
return;
}
}
/*NOTREACHED*/
tl_assert(0);
}
/* ------------ CacheLine and implicit-tree related ------------ */
__attribute__((unused))
static void pp_CacheLine ( CacheLine* cl ) {
Word i;
if (!cl) {
VG_(printf)("%s","pp_CacheLine(NULL)\n");
return;
}
for (i = 0; i < N_LINE_TREES; i++)
VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
for (i = 0; i < N_LINE_ARANGE; i++)
VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
}
static UChar descr_to_validbits ( UShort descr )
{
/* a.k.a Party Time for gcc's constant folder */
# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
( (b8_5) << 12) | ( (b8_4) << 11) | \
( (b8_3) << 10) | ( (b8_2) << 9) | \
( (b8_1) << 8) | ( (b8_0) << 7) | \
( (b16_3) << 6) | ( (b32_1) << 5) | \
( (b16_2) << 4) | ( (b64) << 3) | \
( (b16_1) << 2) | ( (b32_0) << 1) | \
( (b16_0) << 0) ) )
# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
( (bit5) << 5) | ( (bit4) << 4) | \
( (bit3) << 3) | ( (bit2) << 2) | \
( (bit1) << 1) | ( (bit0) << 0) ) )
/* these should all get folded out at compile time */
tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
switch (descr) {
/*
+--------------------------------- TREE_DESCR_8_7
| +------------------- TREE_DESCR_8_0
| | +---------------- TREE_DESCR_16_3
| | | +-------------- TREE_DESCR_32_1
| | | | +------------ TREE_DESCR_16_2
| | | | | +--------- TREE_DESCR_64
| | | | | | +------ TREE_DESCR_16_1
| | | | | | | +---- TREE_DESCR_32_0
| | | | | | | | +-- TREE_DESCR_16_0
| | | | | | | | |
| | | | | | | | | GRANULARITY, 7 -> 0 */
case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
return BYTE(1,1,1,1,1,1,1,1);
case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
return BYTE(1,1,0,1,1,1,1,1);
case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
return BYTE(0,1,1,1,1,1,1,1);
case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
return BYTE(0,1,0,1,1,1,1,1);
case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
return BYTE(1,1,1,1,1,1,0,1);
case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
return BYTE(1,1,0,1,1,1,0,1);
case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
return BYTE(0,1,1,1,1,1,0,1);
case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
return BYTE(0,1,0,1,1,1,0,1);
case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
return BYTE(1,1,1,1,0,1,1,1);
case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
return BYTE(1,1,0,1,0,1,1,1);
case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
return BYTE(0,1,1,1,0,1,1,1);
case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
return BYTE(0,1,0,1,0,1,1,1);
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
return BYTE(1,1,1,1,0,1,0,1);
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
return BYTE(1,1,0,1,0,1,0,1);
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
return BYTE(0,1,1,1,0,1,0,1);
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
return BYTE(0,1,0,1,0,1,0,1);
case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
return BYTE(0,0,0,1,1,1,1,1);
case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
return BYTE(0,0,0,1,1,1,0,1);
case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
return BYTE(0,0,0,1,0,1,1,1);
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
return BYTE(0,0,0,1,0,1,0,1);
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
return BYTE(1,1,1,1,0,0,0,1);
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
return BYTE(1,1,0,1,0,0,0,1);
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
return BYTE(0,1,1,1,0,0,0,1);
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
return BYTE(0,1,0,1,0,0,0,1);
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
return BYTE(0,0,0,1,0,0,0,1);
case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
return BYTE(0,0,0,0,0,0,0,1);
default: return BYTE(0,0,0,0,0,0,0,0);
/* INVALID - any valid descr produces at least one
valid bit in tree[0..7]*/
}
/* NOTREACHED*/
tl_assert(0);
# undef DESCR
# undef BYTE
}
__attribute__((unused))
static Bool is_sane_Descr ( UShort descr ) {
return descr_to_validbits(descr) != 0;
}
static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
VG_(sprintf)(dst,
"%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
(Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
(Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
(Int)((descr & TREE_DESCR_64) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
(Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
);
}
static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
(Int)((byte & 128) ? 1 : 0),
(Int)((byte & 64) ? 1 : 0),
(Int)((byte & 32) ? 1 : 0),
(Int)((byte & 16) ? 1 : 0),
(Int)((byte & 8) ? 1 : 0),
(Int)((byte & 4) ? 1 : 0),
(Int)((byte & 2) ? 1 : 0),
(Int)((byte & 1) ? 1 : 0)
);
}
static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
Word i;
UChar validbits = descr_to_validbits(descr);
HChar buf[128], buf2[128];
if (validbits == 0)
goto bad;
for (i = 0; i < 8; i++) {
if (validbits & (1<<i)) {
if (tree[i] == SVal_INVALID)
goto bad;
} else {
if (tree[i] != SVal_INVALID)
goto bad;
}
}
return True;
bad:
sprintf_Descr( buf, descr );
sprintf_Byte( buf2, validbits );
VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
for (i = 0; i < 8; i++)
VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
VG_(printf)("%s","}\n");
return 0;
}
static Bool is_sane_CacheLine ( CacheLine* cl )
{
Word tno, cloff;
if (!cl) goto bad;
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
UShort descr = cl->descrs[tno];
SVal* tree = &cl->svals[cloff];
if (!is_sane_Descr_and_Tree(descr, tree))
goto bad;
}
tl_assert(cloff == N_LINE_ARANGE);
return True;
bad:
pp_CacheLine(cl);
return False;
}
static UShort normalise_tree ( /*MOD*/SVal* tree )
{
UShort descr;
/* pre: incoming tree[0..7] does not have any invalid shvals, in
particular no zeroes. */
if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
|| tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
|| tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
|| tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
tl_assert(0);
descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
| TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
| TREE_DESCR_8_1 | TREE_DESCR_8_0;
/* build 16-bit layer */
if (tree[1] == tree[0]) {
tree[1] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
descr |= TREE_DESCR_16_0;
}
if (tree[3] == tree[2]) {
tree[3] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
descr |= TREE_DESCR_16_1;
}
if (tree[5] == tree[4]) {
tree[5] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
descr |= TREE_DESCR_16_2;
}
if (tree[7] == tree[6]) {
tree[7] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
descr |= TREE_DESCR_16_3;
}
/* build 32-bit layer */
if (tree[2] == tree[0]
&& (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
descr |= TREE_DESCR_32_0;
}
if (tree[6] == tree[4]
&& (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
descr |= TREE_DESCR_32_1;
}
/* build 64-bit layer */
if (tree[4] == tree[0]
&& (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
descr |= TREE_DESCR_64;
}
return descr;
}
/* This takes a cacheline where all the data is at the leaves
(w8[..]) and builds a correctly normalised tree. */
static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
{
Word tno, cloff;
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
SVal* tree = &cl->svals[cloff];
cl->descrs[tno] = normalise_tree( tree );
}
tl_assert(cloff == N_LINE_ARANGE);
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
stats__cline_normalises++;
}
typedef struct { UChar count; SVal sval; } CountedSVal;
static
void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
/*OUT*/Word* dstUsedP,
Word nDst, CacheLine* src )
{
Word tno, cloff, dstUsed;
tl_assert(nDst == N_LINE_ARANGE);
dstUsed = 0;
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
UShort descr = src->descrs[tno];
SVal* tree = &src->svals[cloff];
/* sequentialise the tree described by (descr,tree). */
# define PUT(_n,_v) \
do { dst[dstUsed ].count = (_n); \
dst[dstUsed++].sval = (_v); \
} while (0)
/* byte 0 */
if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
/* byte 1 */
if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
/* byte 2 */
if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
/* byte 3 */
if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
/* byte 4 */
if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
/* byte 5 */
if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
/* byte 6 */
if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
/* byte 7 */
if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
# undef PUT
/* END sequentialise the tree described by (descr,tree). */
}
tl_assert(cloff == N_LINE_ARANGE);
tl_assert(dstUsed <= nDst);
*dstUsedP = dstUsed;
}
/* Write the cacheline 'wix' to backing store. Where it ends up
is determined by its tag field. */
static __attribute__((noinline)) void cacheline_wback ( UWord wix )
{
Word i, j, k, m;
Addr tag;
SecMap* sm;
CacheLine* cl;
LineZ* lineZ;
LineF* lineF;
Word zix, fix, csvalsUsed;
CountedSVal csvals[N_LINE_ARANGE];
SVal sv;
if (0)
VG_(printf)("scache wback line %d\n", (Int)wix);
tl_assert(wix >= 0 && wix < N_WAY_NENT);
tag = cache_shmem.tags0[wix];
cl = &cache_shmem.lyns0[wix];
/* The cache line may have been invalidated; if so, ignore it. */
if (!is_valid_scache_tag(tag))
return;
/* Where are we going to put it? */
sm = NULL;
lineZ = NULL;
lineF = NULL;
zix = fix = -1;
/* find the Z line to write in and rcdec it or the associated F
line. */
find_Z_for_writing( &sm, &zix, tag );
tl_assert(sm);
tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[zix];
/* Generate the data to be stored */
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
csvalsUsed = -1;
sequentialise_CacheLine( csvals, &csvalsUsed,
N_LINE_ARANGE, cl );
tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
if (0) VG_(printf)("%lu ", csvalsUsed);
lineZ->dict[0] = lineZ->dict[1]
= lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
/* i indexes actual shadow values, k is cursor in csvals */
i = 0;
for (k = 0; k < csvalsUsed; k++) {
sv = csvals[k].sval;
if (SCE_SVALS)
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
/* do we already have it? */
if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
/* no. look for a free slot. */
if (SCE_SVALS)
tl_assert(sv != SVal_INVALID);
if (lineZ->dict[0]
== SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
if (lineZ->dict[1]
== SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
if (lineZ->dict[2]
== SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
if (lineZ->dict[3]
== SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
break; /* we'll have to use the f rep */
dict_ok:
m = csvals[k].count;
if (m == 8) {
write_twobit_array( lineZ->ix2s, i+0, j );
write_twobit_array( lineZ->ix2s, i+1, j );
write_twobit_array( lineZ->ix2s, i+2, j );
write_twobit_array( lineZ->ix2s, i+3, j );
write_twobit_array( lineZ->ix2s, i+4, j );
write_twobit_array( lineZ->ix2s, i+5, j );
write_twobit_array( lineZ->ix2s, i+6, j );
write_twobit_array( lineZ->ix2s, i+7, j );
i += 8;
}
else if (m == 4) {
write_twobit_array( lineZ->ix2s, i+0, j );
write_twobit_array( lineZ->ix2s, i+1, j );
write_twobit_array( lineZ->ix2s, i+2, j );
write_twobit_array( lineZ->ix2s, i+3, j );
i += 4;
}
else if (m == 1) {
write_twobit_array( lineZ->ix2s, i+0, j );
i += 1;
}
else if (m == 2) {
write_twobit_array( lineZ->ix2s, i+0, j );
write_twobit_array( lineZ->ix2s, i+1, j );
i += 2;
}
else {
tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
}
}
if (LIKELY(i == N_LINE_ARANGE)) {
/* Construction of the compressed representation was
successful. */
rcinc_LineZ(lineZ);
stats__cache_Z_wbacks++;
} else {
/* Cannot use the compressed(z) representation. Use the full(f)
rep instead. */
tl_assert(i >= 0 && i < N_LINE_ARANGE);
alloc_F_for_writing( sm, &fix );
tl_assert(sm->linesF);
tl_assert(sm->linesF_size > 0);
tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
lineF = &sm->linesF[fix];
tl_assert(!lineF->inUse);
lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
lineZ->dict[1] = (SVal)fix;
lineF->inUse = True;
i = 0;
for (k = 0; k < csvalsUsed; k++) {
if (SCE_SVALS)
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
sv = csvals[k].sval;
if (SCE_SVALS)
tl_assert(sv != SVal_INVALID);
for (m = csvals[k].count; m > 0; m--) {
lineF->w64s[i] = sv;
i++;
}
}
tl_assert(i == N_LINE_ARANGE);
rcinc_LineF(lineF);
stats__cache_F_wbacks++;
}
//if (anyShared)
// sm->mbHasShared = True;
/* mb_tidy_one_cacheline(); */
}
/* Fetch the cacheline 'wix' from the backing store. The tag
associated with 'wix' is assumed to have already been filled in;
hence that is used to determine where in the backing store to read
from. */
static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
{
Word i;
Addr tag;
CacheLine* cl;
LineZ* lineZ;
LineF* lineF;
if (0)
VG_(printf)("scache fetch line %d\n", (Int)wix);
tl_assert(wix >= 0 && wix < N_WAY_NENT);
tag = cache_shmem.tags0[wix];
cl = &cache_shmem.lyns0[wix];
/* reject nonsense requests */
tl_assert(is_valid_scache_tag(tag));
lineZ = NULL;
lineF = NULL;
find_ZF_for_reading( &lineZ, &lineF, tag );
tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
/* expand the data into the bottom layer of the tree, then get
cacheline_normalise to build the descriptor array. */
if (lineF) {
tl_assert(lineF->inUse);
for (i = 0; i < N_LINE_ARANGE; i++) {
cl->svals[i] = lineF->w64s[i];
}
stats__cache_F_fetches++;
} else {
for (i = 0; i < N_LINE_ARANGE; i++) {
SVal sv;
UWord ix = read_twobit_array( lineZ->ix2s, i );
/* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
sv = lineZ->dict[ix];
tl_assert(sv != SVal_INVALID);
cl->svals[i] = sv;
}
stats__cache_Z_fetches++;
}
normalise_CacheLine( cl );
}
static void shmem__invalidate_scache ( void ) {
Word wix;
if (0) VG_(printf)("%s","scache inval\n");
tl_assert(!is_valid_scache_tag(1));
for (wix = 0; wix < N_WAY_NENT; wix++) {
cache_shmem.tags0[wix] = 1/*INVALID*/;
}
stats__cache_invals++;
}
static void shmem__flush_and_invalidate_scache ( void ) {
Word wix;
Addr tag;
if (0) VG_(printf)("%s","scache flush and invalidate\n");
tl_assert(!is_valid_scache_tag(1));
for (wix = 0; wix < N_WAY_NENT; wix++) {
tag = cache_shmem.tags0[wix];
if (tag == 1/*INVALID*/) {
/* already invalid; nothing to do */
} else {
tl_assert(is_valid_scache_tag(tag));
cacheline_wback( wix );
}
cache_shmem.tags0[wix] = 1/*INVALID*/;
}
stats__cache_flushes++;
stats__cache_invals++;
}
static inline Bool aligned16 ( Addr a ) {
return 0 == (a & 1);
}
static inline Bool aligned32 ( Addr a ) {
return 0 == (a & 3);
}
static inline Bool aligned64 ( Addr a ) {
return 0 == (a & 7);
}
static inline UWord get_cacheline_offset ( Addr a ) {
return (UWord)(a & (N_LINE_ARANGE - 1));
}
static inline Addr cacheline_ROUNDUP ( Addr a ) {
return ROUNDUP(a, N_LINE_ARANGE);
}
static inline Addr cacheline_ROUNDDN ( Addr a ) {
return ROUNDDN(a, N_LINE_ARANGE);
}
static inline UWord get_treeno ( Addr a ) {
return get_cacheline_offset(a) >> 3;
}
static inline UWord get_tree_offset ( Addr a ) {
return a & 7;
}
static __attribute__((noinline))
CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
static inline CacheLine* get_cacheline ( Addr a )
{
/* tag is 'a' with the in-line offset masked out,
eg a[31]..a[4] 0000 */
Addr tag = a & ~(N_LINE_ARANGE - 1);
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
stats__cache_totrefs++;
if (LIKELY(tag == cache_shmem.tags0[wix])) {
return &cache_shmem.lyns0[wix];
} else {
return get_cacheline_MISS( a );
}
}
static __attribute__((noinline))
CacheLine* get_cacheline_MISS ( Addr a )
{
/* tag is 'a' with the in-line offset masked out,
eg a[31]..a[4] 0000 */
CacheLine* cl;
Addr* tag_old_p;
Addr tag = a & ~(N_LINE_ARANGE - 1);
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
tl_assert(tag != cache_shmem.tags0[wix]);
/* Dump the old line into the backing store. */
stats__cache_totmisses++;
cl = &cache_shmem.lyns0[wix];
tag_old_p = &cache_shmem.tags0[wix];
if (is_valid_scache_tag( *tag_old_p )) {
/* EXPENSIVE and REDUNDANT: callee does it */
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
cacheline_wback( wix );
}
/* and reload the new one */
*tag_old_p = tag;
cacheline_fetch( wix );
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
return cl;
}
static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
stats__cline_64to32pulldown++;
switch (toff) {
case 0: case 4:
tl_assert(descr & TREE_DESCR_64);
tree[4] = tree[0];
descr &= ~TREE_DESCR_64;
descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
stats__cline_32to16pulldown++;
switch (toff) {
case 0: case 2:
if (!(descr & TREE_DESCR_32_0)) {
descr = pulldown_to_32(tree, 0, descr);
}
tl_assert(descr & TREE_DESCR_32_0);
tree[2] = tree[0];
descr &= ~TREE_DESCR_32_0;
descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
break;
case 4: case 6:
if (!(descr & TREE_DESCR_32_1)) {
descr = pulldown_to_32(tree, 4, descr);
}
tl_assert(descr & TREE_DESCR_32_1);
tree[6] = tree[4];
descr &= ~TREE_DESCR_32_1;
descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
stats__cline_16to8pulldown++;
switch (toff) {
case 0: case 1:
if (!(descr & TREE_DESCR_16_0)) {
descr = pulldown_to_16(tree, 0, descr);
}
tl_assert(descr & TREE_DESCR_16_0);
tree[1] = tree[0];
descr &= ~TREE_DESCR_16_0;
descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
break;
case 2: case 3:
if (!(descr & TREE_DESCR_16_1)) {
descr = pulldown_to_16(tree, 2, descr);
}
tl_assert(descr & TREE_DESCR_16_1);
tree[3] = tree[2];
descr &= ~TREE_DESCR_16_1;
descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
break;
case 4: case 5:
if (!(descr & TREE_DESCR_16_2)) {
descr = pulldown_to_16(tree, 4, descr);
}
tl_assert(descr & TREE_DESCR_16_2);
tree[5] = tree[4];
descr &= ~TREE_DESCR_16_2;
descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
break;
case 6: case 7:
if (!(descr & TREE_DESCR_16_3)) {
descr = pulldown_to_16(tree, 6, descr);
}
tl_assert(descr & TREE_DESCR_16_3);
tree[7] = tree[6];
descr &= ~TREE_DESCR_16_3;
descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
UShort mask;
switch (toff) {
case 0:
mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_0;
break;
case 2:
mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_1;
break;
case 4:
mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_2;
break;
case 6:
mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_3;
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
UShort mask;
switch (toff) {
case 0:
if (!(descr & TREE_DESCR_16_0))
descr = pullup_descr_to_16(descr, 0);
if (!(descr & TREE_DESCR_16_1))
descr = pullup_descr_to_16(descr, 2);
mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_32_0;
break;
case 4:
if (!(descr & TREE_DESCR_16_2))
descr = pullup_descr_to_16(descr, 4);
if (!(descr & TREE_DESCR_16_3))
descr = pullup_descr_to_16(descr, 6);
mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_32_1;
break;
default:
tl_assert(0);
}
return descr;
}
static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
switch (toff) {
case 0: case 4:
return 0 != (descr & TREE_DESCR_64);
default:
tl_assert(0);
}
}
static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
switch (toff) {
case 0:
return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
case 2:
return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
case 4:
return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
case 6:
return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
default:
tl_assert(0);
}
}
/* ------------ Cache management ------------ */
static void zsm_flush_cache ( void )
{
shmem__flush_and_invalidate_scache();
}
static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
{
tl_assert( sizeof(UWord) == sizeof(Addr) );
rcinc = p_rcinc;
rcdec = p_rcdec;
tl_assert(map_shmem == NULL);
map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
HG_(free),
NULL/*unboxed UWord cmp*/);
tl_assert(map_shmem != NULL);
shmem__invalidate_scache();
/* a SecMap must contain an integral number of CacheLines */
tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
/* also ... a CacheLine holds an integral number of trees */
tl_assert(0 == (N_LINE_ARANGE % 8));
}
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION END compressed shadow memory //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION BEGIN vts primitives //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
#ifndef __HB_VTS_H
#define __HB_VTS_H
/* VtsIDs can't exceed 30 bits, since they have to be packed into the
lowest 30 bits of an SVal. */
typedef UInt VtsID;
#define VtsID_INVALID 0xFFFFFFFF
/* A VTS contains .ts, its vector clock, and also .id, a field to hold
a backlink for the caller's convenience. Since we have no idea
what to set that to in the library, it always gets set to
VtsID_INVALID. */
typedef
struct {
VtsID id;
XArray* ts; /* XArray* ScalarTS(abstract) */
}
VTS;
/* Create a new, empty VTS. */
VTS* VTS__new ( void );
/* Delete this VTS in its entirety. */
void VTS__delete ( VTS* vts );
/* Create a new singleton VTS. */
VTS* VTS__singleton ( Thr* thr, ULong tym );
/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
not modified. */
VTS* VTS__tick ( Thr* me, VTS* vts );
/* Return a new VTS constructed as the join (max) of the 2 args.
Neither arg is modified. */
VTS* VTS__join ( VTS* a, VTS* b );
/* Compute the partial ordering relation of the two args. */
typedef
enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
POrd;
POrd VTS__cmp ( VTS* a, VTS* b );
/* Compute an arbitrary structural (total) ordering on the two args,
based on their VCs, so they can be looked up in a table, tree, etc.
Returns -1, 0 or 1. */
Word VTS__cmp_structural ( VTS* a, VTS* b );
/* Debugging only. Display the given VTS in the buffer. */
void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
/* Debugging only. Return vts[index], so to speak. */
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
#endif /* ! __HB_VTS_H */
/*--------------- to do with Vector Timestamps ---------------*/
/* Scalar Timestamp */
typedef
struct {
Thr* thr;
ULong tym;
}
ScalarTS;
static Bool is_sane_VTS ( VTS* vts )
{
UWord i, n;
ScalarTS *st1, *st2;
if (!vts) return False;
if (!vts->ts) return False;
n = VG_(sizeXA)( vts->ts );
if (n >= 2) {
for (i = 0; i < n-1; i++) {
st1 = VG_(indexXA)( vts->ts, i );
st2 = VG_(indexXA)( vts->ts, i+1 );
if (st1->thr >= st2->thr)
return False;
if (st1->tym == 0 || st2->tym == 0)
return False;
}
}
return True;
}
/* Create a new, empty VTS.
*/
VTS* VTS__new ( void )
{
VTS* vts;
vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
tl_assert(vts);
vts->id = VtsID_INVALID;
vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
HG_(free), sizeof(ScalarTS) );
tl_assert(vts->ts);
return vts;
}
/* Delete this VTS in its entirety.
*/
void VTS__delete ( VTS* vts )
{
tl_assert(vts);
tl_assert(vts->ts);
VG_(deleteXA)( vts->ts );
HG_(free)(vts);
}
/* Create a new singleton VTS.
*/
VTS* VTS__singleton ( Thr* thr, ULong tym ) {
ScalarTS st;
VTS* vts;
tl_assert(thr);
tl_assert(tym >= 1);
vts = VTS__new();
st.thr = thr;
st.tym = tym;
VG_(addToXA)( vts->ts, &st );
return vts;
}
/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
not modified.
*/
VTS* VTS__tick ( Thr* me, VTS* vts )
{
ScalarTS* here = NULL;
ScalarTS tmp;
VTS* res;
Word i, n;
tl_assert(me);
tl_assert(is_sane_VTS(vts));
//if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
// (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
res = VTS__new();
n = VG_(sizeXA)( vts->ts );
/* main loop doesn't handle zero-entry case correctly, so
special-case it. */
if (n == 0) {
tmp.thr = me;
tmp.tym = 1;
VG_(addToXA)( res->ts, &tmp );
tl_assert(is_sane_VTS(res));
return res;
}
for (i = 0; i < n; i++) {
here = VG_(indexXA)( vts->ts, i );
if (me < here->thr) {
/* We just went past 'me', without seeing it. */
tmp.thr = me;
tmp.tym = 1;
VG_(addToXA)( res->ts, &tmp );
tmp = *here;
VG_(addToXA)( res->ts, &tmp );
i++;
break;
}
else if (me == here->thr) {
tmp = *here;
tmp.tym++;
VG_(addToXA)( res->ts, &tmp );
i++;
break;
}
else /* me > here->thr */ {
tmp = *here;
VG_(addToXA)( res->ts, &tmp );
}
}
tl_assert(i >= 0 && i <= n);
if (i == n && here && here->thr < me) {
tmp.thr = me;
tmp.tym = 1;
VG_(addToXA)( res->ts, &tmp );
} else {
for (/*keepgoing*/; i < n; i++) {
here = VG_(indexXA)( vts->ts, i );
tmp = *here;
VG_(addToXA)( res->ts, &tmp );
}
}
tl_assert(is_sane_VTS(res));
//if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
// (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
return res;
}
/* Return a new VTS constructed as the join (max) of the 2 args.
Neither arg is modified.
*/
VTS* VTS__join ( VTS* a, VTS* b )
{
Word ia, ib, useda, usedb;
ULong tyma, tymb, tymMax;
Thr* thr;
VTS* res;
ScalarTS *tmpa, *tmpb;
tl_assert(a && a->ts);
tl_assert(b && b->ts);
useda = VG_(sizeXA)( a->ts );
usedb = VG_(sizeXA)( b->ts );
res = VTS__new();
ia = ib = 0;
while (1) {
/* This logic is to enumerate triples (thr, tyma, tymb) drawn
from a and b in order, where thr is the next Thr*
occurring in either a or b, and tyma/b are the relevant
scalar timestamps, taking into account implicit zeroes. */
tl_assert(ia >= 0 && ia <= useda);
tl_assert(ib >= 0 && ib <= usedb);
tmpa = tmpb = NULL;
if (ia == useda && ib == usedb) {
/* both empty - done */
break;
}
else
if (ia == useda && ib != usedb) {
/* a empty, use up b */
tmpb = VG_(indexXA)( b->ts, ib );
thr = tmpb->thr;
tyma = 0;
tymb = tmpb->tym;
ib++;
}
else
if (ia != useda && ib == usedb) {
/* b empty, use up a */
tmpa = VG_(indexXA)( a->ts, ia );
thr = tmpa->thr;
tyma = tmpa->tym;
tymb = 0;
ia++;
}
else {
/* both not empty; extract lowest-Thr*'d triple */
tmpa = VG_(indexXA)( a->ts, ia );
tmpb = VG_(indexXA)( b->ts, ib );
if (tmpa->thr < tmpb->thr) {
/* a has the lowest unconsidered Thr* */
thr = tmpa->thr;
tyma = tmpa->tym;
tymb = 0;
ia++;
}
else
if (tmpa->thr > tmpb->thr) {
/* b has the lowest unconsidered Thr* */
thr = tmpb->thr;
tyma = 0;
tymb = tmpb->tym;
ib++;
} else {
/* they both next mention the same Thr* */
tl_assert(tmpa->thr == tmpb->thr);
thr = tmpa->thr; /* == tmpb->thr */
tyma = tmpa->tym;
tymb = tmpb->tym;
ia++;
ib++;
}
}
/* having laboriously determined (thr, tyma, tymb), do something
useful with it. */
tymMax = tyma > tymb ? tyma : tymb;
if (tymMax > 0) {
ScalarTS st;
st.thr = thr;
st.tym = tymMax;
VG_(addToXA)( res->ts, &st );
}
}
tl_assert(is_sane_VTS( res ));
return res;
}
/* Compute the partial ordering relation of the two args.
*/
POrd VTS__cmp ( VTS* a, VTS* b )
{
Word ia, ib, useda, usedb;
ULong tyma, tymb;
Thr* thr;
ScalarTS *tmpa, *tmpb;
Bool all_leq = True;
Bool all_geq = True;
tl_assert(a && a->ts);
tl_assert(b && b->ts);
useda = VG_(sizeXA)( a->ts );
usedb = VG_(sizeXA)( b->ts );
ia = ib = 0;
while (1) {
/* This logic is to enumerate triples (thr, tyma, tymb) drawn
from a and b in order, where thr is the next Thr*
occurring in either a or b, and tyma/b are the relevant
scalar timestamps, taking into account implicit zeroes. */
tl_assert(ia >= 0 && ia <= useda);
tl_assert(ib >= 0 && ib <= usedb);
tmpa = tmpb = NULL;
if (ia == useda && ib == usedb) {
/* both empty - done */
break;
}
else
if (ia == useda && ib != usedb) {
/* a empty, use up b */
tmpb = VG_(indexXA)( b->ts, ib );
thr = tmpb->thr;
tyma = 0;
tymb = tmpb->tym;
ib++;
}
else
if (ia != useda && ib == usedb) {
/* b empty, use up a */
tmpa = VG_(indexXA)( a->ts, ia );
thr = tmpa->thr;
tyma = tmpa->tym;
tymb = 0;
ia++;
}
else {
/* both not empty; extract lowest-Thr*'d triple */
tmpa = VG_(indexXA)( a->ts, ia );
tmpb = VG_(indexXA)( b->ts, ib );
if (tmpa->thr < tmpb->thr) {
/* a has the lowest unconsidered Thr* */
thr = tmpa->thr;
tyma = tmpa->tym;
tymb = 0;
ia++;
}
else
if (tmpa->thr > tmpb->thr) {
/* b has the lowest unconsidered Thr* */
thr = tmpb->thr;
tyma = 0;
tymb = tmpb->tym;
ib++;
} else {
/* they both next mention the same Thr* */
tl_assert(tmpa->thr == tmpb->thr);
thr = tmpa->thr; /* == tmpb->thr */
tyma = tmpa->tym;
tymb = tmpb->tym;
ia++;
ib++;
}
}
/* having laboriously determined (thr, tyma, tymb), do something
useful with it. */
if (tyma < tymb)
all_geq = False;
if (tyma > tymb)
all_leq = False;
}
if (all_leq && all_geq)
return POrd_EQ;
/* now we know they aren't equal, so either all_leq or all_geq or
both are false. */
if (all_leq)
return POrd_LT;
if (all_geq)
return POrd_GT;
/* hmm, neither all_geq or all_leq. This means unordered. */
return POrd_UN;
}
/* Compute an arbitrary structural (total) ordering on the two args,
based on their VCs, so they can be looked up in a table, tree, etc.
Returns -1, 0 or 1. (really just 'deriving Ord' :-)
*/
Word VTS__cmp_structural ( VTS* a, VTS* b )
{
/* We just need to generate an arbitrary total ordering based on
a->ts and b->ts. Preferably do it in a way which comes across likely
differences relatively quickly. */
Word i, useda, usedb;
ScalarTS *tmpa, *tmpb;
tl_assert(a && a->ts);
tl_assert(b && b->ts);
useda = VG_(sizeXA)( a->ts );
usedb = VG_(sizeXA)( b->ts );
if (useda < usedb) return -1;
if (useda > usedb) return 1;
/* Same length vectors, so let's step through them together. */
tl_assert(useda == usedb);
for (i = 0; i < useda; i++) {
tmpa = VG_(indexXA)( a->ts, i );
tmpb = VG_(indexXA)( b->ts, i );
if (tmpa->tym < tmpb->tym) return -1;
if (tmpa->tym > tmpb->tym) return 1;
if (tmpa->thr < tmpb->thr) return -1;
if (tmpa->thr > tmpb->thr) return 1;
}
/* They're identical. */
return 0;
}
/* Debugging only. Display the given VTS in the buffer.
*/
void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
ScalarTS* st;
HChar unit[64];
Word i, n;
Int avail = nBuf;
tl_assert(vts && vts->ts);
tl_assert(nBuf > 16);
buf[0] = '[';
buf[1] = 0;
n = VG_(sizeXA)( vts->ts );
for (i = 0; i < n; i++) {
tl_assert(avail >= 40);
st = VG_(indexXA)( vts->ts, i );
VG_(memset)(unit, 0, sizeof(unit));
VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
st->thr, st->tym);
if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
VG_(strcat)(buf, " ...]");
buf[nBuf-1] = 0;
return;
}
VG_(strcat)(buf, unit);
avail -= VG_(strlen)(unit);
}
VG_(strcat)(buf, "]");
buf[nBuf-1] = 0;
}
/* Debugging only. Return vts[index], so to speak.
*/
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
UWord i, n;
tl_assert(vts && vts->ts);
n = VG_(sizeXA)( vts->ts );
for (i = 0; i < n; i++) {
ScalarTS* st = VG_(indexXA)( vts->ts, i );
if (st->thr == idx)
return st->tym;
}
return 0;
}
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION END vts primitives //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION BEGIN main library //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////
// //
// VTS set //
// //
/////////////////////////////////////////////////////////
static WordFM* /* VTS* void void */ vts_set = NULL;
static void vts_set_init ( void )
{
tl_assert(!vts_set);
vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
HG_(free),
(Word(*)(UWord,UWord))VTS__cmp_structural );
tl_assert(vts_set);
}
/* Given a newly made VTS, look in vts_set to see if we already have
an identical one. If yes, free up this one and return instead a
pointer to the existing one. If no, add this one to the set and
return the same pointer. Caller differentiates the two cases by
comparing returned pointer with the supplied one (although that
does require that the supplied VTS is not already in the set).
*/
static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
{
UWord keyW, valW;
/* lookup cand (by value) */
if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
/* found it */
tl_assert(valW == 0);
/* if this fails, cand (by ref) was already present (!) */
tl_assert(keyW != (UWord)cand);
VTS__delete(cand);
return (VTS*)keyW;
} else {
/* not present. Add and return pointer to same. */
VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
return cand;
}
}
/////////////////////////////////////////////////////////
// //
// VTS table //
// //
/////////////////////////////////////////////////////////
static void VtsID__invalidate_caches ( void ); /* fwds */
/* A type to hold VTS table entries. Invariants:
If .vts == NULL, then this entry is not in use, so:
- .rc == 0
- this entry is on the freelist (unfortunately, does not imply
any constraints on value for .nextfree)
If .vts != NULL, then this entry is in use:
- .vts is findable in vts_set
- .vts->id == this entry number
- no specific value for .rc (even 0 is OK)
- this entry is not on freelist, so .nextfree == VtsID_INVALID
*/
typedef
struct {
VTS* vts; /* vts, in vts_set */
UWord rc; /* reference count - enough for entire aspace */
VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
}
VtsTE;
/* The VTS table. */
static XArray* /* of VtsTE */ vts_tab = NULL;
/* An index into the VTS table, indicating the start of the list of
free (available for use) entries. If the list is empty, this is
VtsID_INVALID. */
static VtsID vts_tab_freelist = VtsID_INVALID;
/* Do a GC of vts_tab when the freelist becomes empty AND the size of
vts_tab equals or exceeds this size. After GC, the value here is
set appropriately so as to check for the next GC point. */
static Word vts_next_GC_at = 1000;
static void vts_tab_init ( void )
{
vts_tab
= VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
HG_(free), sizeof(VtsTE) );
vts_tab_freelist
= VtsID_INVALID;
tl_assert(vts_tab);
}
/* Add ii to the free list, checking that it looks out-of-use. */
static void add_to_free_list ( VtsID ii )
{
VtsTE* ie = VG_(indexXA)( vts_tab, ii );
tl_assert(ie->vts == NULL);
tl_assert(ie->rc == 0);
tl_assert(ie->freelink == VtsID_INVALID);
ie->freelink = vts_tab_freelist;
vts_tab_freelist = ii;
}
/* Get an entry from the free list. This will return VtsID_INVALID if
the free list is empty. */
static VtsID get_from_free_list ( void )
{
VtsID ii;
VtsTE* ie;
if (vts_tab_freelist == VtsID_INVALID)
return VtsID_INVALID;
ii = vts_tab_freelist;
ie = VG_(indexXA)( vts_tab, ii );
tl_assert(ie->vts == NULL);
tl_assert(ie->rc == 0);
vts_tab_freelist = ie->freelink;
return ii;
}
/* Produce a new VtsID that can be used, either by getting it from
the freelist, or, if that is empty, by expanding vts_tab. */
static VtsID get_new_VtsID ( void )
{
VtsID ii;
VtsTE te;
ii = get_from_free_list();
if (ii != VtsID_INVALID)
return ii;
te.vts = NULL;
te.rc = 0;
te.freelink = VtsID_INVALID;
ii = (VtsID)VG_(addToXA)( vts_tab, &te );
return ii;
}
/* Indirect callback from lib_zsm. */
static void VtsID__rcinc ( VtsID ii )
{
VtsTE* ie;
/* VG_(indexXA) does a range check for us */
ie = VG_(indexXA)( vts_tab, ii );
tl_assert(ie->vts); /* else it's not in use */
tl_assert(ie->rc < ~0UL); /* else we can't continue */
tl_assert(ie->vts->id == ii);
ie->rc++;
}
/* Indirect callback from lib_zsm. */
static void VtsID__rcdec ( VtsID ii )
{
VtsTE* ie;
/* VG_(indexXA) does a range check for us */
ie = VG_(indexXA)( vts_tab, ii );
tl_assert(ie->vts); /* else it's not in use */
tl_assert(ie->rc > 0); /* else RC snafu */
tl_assert(ie->vts->id == ii);
ie->rc--;
}
/* Look up 'cand' in our collection of VTSs. If present, deallocate
it and return the VtsID for the pre-existing version. If not
present, add it to both vts_tab and vts_set, allocate a fresh VtsID
for it, and return that. */
static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
{
VTS* auld;
tl_assert(cand->id == VtsID_INVALID);
auld = vts_set__find_and_dealloc__or_add(cand);
if (auld != cand) {
/* We already have an Aulde one. Use that. */
VtsTE* ie;
tl_assert(auld->id != VtsID_INVALID);
ie = VG_(indexXA)( vts_tab, auld->id );
tl_assert(ie->vts == auld);
return auld->id;
} else {
VtsID ii = get_new_VtsID();
VtsTE* ie = VG_(indexXA)( vts_tab, ii );
ie->vts = cand;
ie->rc = 0;
ie->freelink = VtsID_INVALID;
cand->id = ii;
return ii;
}
}
static void show_vts_stats ( HChar* caller )
{
UWord nSet, nTab, nLive;
ULong totrc;
UWord n, i;
nSet = VG_(sizeFM)( vts_set );
nTab = VG_(sizeXA)( vts_tab );
totrc = 0;
nLive = 0;
n = VG_(sizeXA)( vts_tab );
for (i = 0; i < n; i++) {
VtsTE* ie = VG_(indexXA)( vts_tab, i );
if (ie->vts) {
nLive++;
totrc += (ULong)ie->rc;
} else {
tl_assert(ie->rc == 0);
}
}
VG_(printf)(" show_vts_stats %s\n", caller);
VG_(printf)(" vts_tab size %4lu\n", nTab);
VG_(printf)(" vts_tab live %4lu\n", nLive);
VG_(printf)(" vts_set size %4lu\n", nSet);
VG_(printf)(" total rc %4llu\n", totrc);
}
/* NOT TO BE CALLED FROM WITHIN libzsm. */
static void vts_tab__do_GC ( Bool show_stats )
{
UWord i, nTab, nLive, nFreed;
/* check this is actually necessary. */
tl_assert(vts_tab_freelist == VtsID_INVALID);
/* empty the caches for partial order checks and binary joins. We
could do better and prune out the entries to be deleted, but it
ain't worth the hassle. */
VtsID__invalidate_caches();
/* First, make the reference counts up to date. */
zsm_flush_cache();
nTab = VG_(sizeXA)( vts_tab );
if (show_stats) {
VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
show_vts_stats("before GC");
}
/* Now we can inspect the entire vts_tab. Any entries
with zero .rc fields are now no longer in use and can be
free list, removed from vts_set, and deleted. */
nFreed = 0;
for (i = 0; i < nTab; i++) {
Bool present;
UWord oldK = 0, oldV = 0;
VtsTE* te = VG_(indexXA)( vts_tab, i );
if (te->vts == NULL) {
tl_assert(te->rc == 0);
continue; /* already on the free list (presumably) */
}
if (te->rc > 0)
continue; /* in use */
/* Ok, we got one we can free. */
tl_assert(te->vts->id == i);
/* first, remove it from vts_set. */
present = VG_(delFromFM)( vts_set,
&oldK, &oldV, (UWord)te->vts );
tl_assert(present); /* else it isn't in vts_set ?! */
tl_assert(oldV == 0); /* no info stored in vts_set val fields */
tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
/* now free the VTS itself */
VTS__delete(te->vts);
te->vts = NULL;
/* and finally put this entry on the free list */
tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
add_to_free_list( i );
nFreed++;
}
/* Now figure out when the next GC should be. We'll allow the
number of VTSs to double before GCing again. Except of course
that since we can't (or, at least, don't) shrink vts_tab, we
can't set the threshhold value smaller than it. */
tl_assert(nFreed <= nTab);
nLive = nTab - nFreed;
tl_assert(nLive >= 0 && nLive <= nTab);
vts_next_GC_at = 2 * nLive;
if (vts_next_GC_at < nTab)
vts_next_GC_at = nTab;
if (show_stats) {
show_vts_stats("after GC");
VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
}
if (VG_(clo_verbosity) > 1) {
static UInt ctr = 0;
tl_assert(nTab > 0);
VG_(message)(Vg_DebugMsg,
"libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
ctr++, nTab, nLive, (100ULL * nLive) / nTab);
}
}
/////////////////////////////////////////////////////////
// //
// Vts IDs //
// //
/////////////////////////////////////////////////////////
//////////////////////////
static ULong stats__getOrdering_queries = 0;
static ULong stats__getOrdering_misses = 0;
static ULong stats__join2_queries = 0;
static ULong stats__join2_misses = 0;
static inline UInt ROL32 ( UInt w, Int n ) {
w = (w << n) | (w >> (32-n));
return w;
}
static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
return hash % nTab;
}
#define N_GETORDERING_CACHE 1023
static
struct { VtsID vi1; VtsID vi2; POrd ord; }
getOrdering_cache[N_GETORDERING_CACHE];
#define N_JOIN2_CACHE 1023
static
struct { VtsID vi1; VtsID vi2; VtsID res; }
join2_cache[N_JOIN2_CACHE];
static void VtsID__invalidate_caches ( void ) {
Int i;
for (i = 0; i < N_GETORDERING_CACHE; i++) {
getOrdering_cache[i].vi1 = VtsID_INVALID;
getOrdering_cache[i].vi2 = VtsID_INVALID;
getOrdering_cache[i].ord = 0; /* an invalid POrd value */
}
for (i = 0; i < N_JOIN2_CACHE; i++) {
join2_cache[i].vi1 = VtsID_INVALID;
join2_cache[i].vi2 = VtsID_INVALID;
join2_cache[i].res = VtsID_INVALID;
}
}
//////////////////////////
//static Bool VtsID__is_valid ( VtsID vi ) {
// VtsTE* ve;
// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
// return False;
// ve = VG_(indexXA)( vts_tab, vi );
// if (!ve->vts)
// return False;
// tl_assert(ve->vts->id == vi);
// return True;
//}
static VTS* VtsID__to_VTS ( VtsID vi ) {
VtsTE* te = VG_(indexXA)( vts_tab, vi );
tl_assert(te->vts);
return te->vts;
}
static void VtsID__pp ( VtsID vi ) {
HChar buf[100];
VTS* vts = VtsID__to_VTS(vi);
VTS__show( buf, sizeof(buf)-1, vts );
buf[sizeof(buf)-1] = 0;
VG_(printf)("%s", buf);
}
/* compute partial ordering relation of vi1 and vi2. */
__attribute__((noinline))
static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
UInt hash;
POrd ord;
VTS *v1, *v2;
//if (vi1 == vi2) return POrd_EQ;
tl_assert(vi1 != vi2);
////++
stats__getOrdering_queries++;
hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
if (getOrdering_cache[hash].vi1 == vi1
&& getOrdering_cache[hash].vi2 == vi2)
return getOrdering_cache[hash].ord;
stats__getOrdering_misses++;
////--
v1 = VtsID__to_VTS(vi1);
v2 = VtsID__to_VTS(vi2);
ord = VTS__cmp( v1, v2 );
////++
getOrdering_cache[hash].vi1 = vi1;
getOrdering_cache[hash].vi2 = vi2;
getOrdering_cache[hash].ord = ord;
////--
return ord;
}
static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
}
/* compute binary join */
__attribute__((noinline))
static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
UInt hash;
VtsID res;
VTS *vts1, *vts2, *nyu;
//if (vi1 == vi2) return vi1;
tl_assert(vi1 != vi2);
////++
stats__join2_queries++;
hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
if (join2_cache[hash].vi1 == vi1
&& join2_cache[hash].vi2 == vi2)
return join2_cache[hash].res;
stats__join2_misses++;
////--
vts1 = VtsID__to_VTS(vi1);
vts2 = VtsID__to_VTS(vi2);
nyu = VTS__join(vts1,vts2);
res = vts_tab__find_and_dealloc__or_add(nyu);
////++
join2_cache[hash].vi1 = vi1;
join2_cache[hash].vi2 = vi2;
join2_cache[hash].res = res;
////--
return res;
}
static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
}
/* create a singleton VTS, namely [thr:1] */
static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
VTS* nyu = VTS__singleton(thr,tym);
return vts_tab__find_and_dealloc__or_add(nyu);
}
/* tick operation, creates value 1 if specified index is absent */
static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
VTS* vts = VtsID__to_VTS(vi);
VTS* nyu = VTS__tick(idx,vts);
return vts_tab__find_and_dealloc__or_add(nyu);
}
/* index into a VTS (only for assertions) */
static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
VTS* vts = VtsID__to_VTS(vi);
return VTS__indexAt_SLOW( vts, idx );
}
/////////////////////////////////////////////////////////
// //
// Threads //
// //
/////////////////////////////////////////////////////////
struct _Thr {
/* Current VTSs for this thread. They change as we go along. viR
is the VTS to be used for reads, viW for writes. Usually they
are the same, but can differ when we deal with reader-writer
locks. It is always the case that VtsID__getOrdering(viW,viR)
== POrd_LT or POrdEQ -- that is, viW must be the same, or
lagging behind, viR. */
VtsID viR;
VtsID viW;
/* opaque (to us) data we hold on behalf of the library's user. */
void* opaque;
};
static Thr* Thr__new ( void ) {
Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
thr->viR = VtsID_INVALID;
thr->viW = VtsID_INVALID;
return thr;
}
/////////////////////////////////////////////////////////
// //
// Shadow Values //
// //
/////////////////////////////////////////////////////////
// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
// hb_zsm.h. We have to do everything else here.
/* SVal is 64 bit unsigned int.
<---------30---------> <---------30--------->
00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
01 X--------------------X XX X--------------------X E(rror)
10 X--------------------X XX X--------------------X A: SVal_NOACCESS
11 X--------------------X XX X--------------------X I: SVal_INVALID
*/
#define SVAL_TAGMASK (3ULL << 62)
static inline Bool SVal__isC ( SVal s ) {
return (0ULL << 62) == (s & SVAL_TAGMASK);
}
static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
//tl_assert(VtsID__is_valid(rmini));
//tl_assert(VtsID__is_valid(wmini));
return (((ULong)rmini) << 32) | ((ULong)wmini);
}
static inline VtsID SVal__unC_Rmin ( SVal s ) {
tl_assert(SVal__isC(s));
return (VtsID)(s >> 32);
}
static inline VtsID SVal__unC_Wmin ( SVal s ) {
tl_assert(SVal__isC(s));
return (VtsID)(s & 0xFFFFFFFFULL);
}
static Bool SVal__isE ( SVal s ) {
return (1ULL << 62) == (s & SVAL_TAGMASK);
}
static SVal SVal__mkE ( void ) {
return 1ULL << 62;
}
static Bool SVal__isA ( SVal s ) {
return (2ULL << 62) == (s & SVAL_TAGMASK);
}
static SVal SVal__mkA ( void ) {
return 2ULL << 62;
}
/* Direct callback from lib_zsm. */
static void SVal__rcinc ( SVal s ) {
if (SVal__isC(s)) {
VtsID__rcinc( SVal__unC_Rmin(s) );
VtsID__rcinc( SVal__unC_Wmin(s) );
}
}
/* Direct callback from lib_zsm. */
static void SVal__rcdec ( SVal s ) {
if (SVal__isC(s)) {
VtsID__rcdec( SVal__unC_Rmin(s) );
VtsID__rcdec( SVal__unC_Wmin(s) );
}
}
/////////////////////////////////////////////////////////
// //
// Change-event map2 //
// //
/////////////////////////////////////////////////////////
#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
/* This is in two parts:
1. An OSet of RCECs. This is a set of reference-counted stack
traces. When the reference count of a stack trace becomes zero,
it is removed from the set and freed up. The intent is to have
a set of stack traces which can be referred to from (2), but to
only represent each one once. The set is indexed/searched by
ordering on the stack trace vectors.
2. An OSet of OldRefs. These store information about each old ref
that we need to record. It is indexed by address of the
location for which the information is recorded. For LRU
purposes, each OldRef also contains a generation number,
indicating when it was most recently accessed.
The important part of an OldRef is, however, its accs[] array.
This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
allows us to collect the last access-traceback by up to
N_OLDREF_ACCS different threads for this location. The accs[]
array is a MTF-array. If a pair falls off the end, that's too
bad -- we will lose info about that thread's access to this
location.
When this OSet becomes too big, we can throw away the entries
whose generation numbers are below some threshold; hence doing
approximate LRU discarding. For each discarded OldRef we must
of course decrement the reference count on the all RCECs it
refers to, in order that entries from (1) eventually get
discarded too.
*/
static UWord stats__ctxt_rcdec1 = 0;
static UWord stats__ctxt_rcdec2 = 0;
static UWord stats__ctxt_rcdec3 = 0;
static UWord stats__ctxt_rcdec_calls = 0;
static UWord stats__ctxt_rcdec_discards = 0;
static UWord stats__ctxt_rcdec1_eq = 0;
static UWord stats__ctxt_tab_curr = 0;
static UWord stats__ctxt_tab_max = 0;
static UWord stats__ctxt_tab_qs = 0;
static UWord stats__ctxt_tab_cmps = 0;
///////////////////////////////////////////////////////
//// Part (1): An OSet of RCECs
///
#define N_FRAMES 8
// (UInt) `echo "Reference Counted Execution Context" | md5sum`
#define RCEC_MAGIC 0xab88abb2UL
//#define N_RCEC_TAB 98317 /* prime */
#define N_RCEC_TAB 196613 /* prime */
typedef
struct _RCEC {
struct _RCEC* next;
UWord magic;
UWord rc;
UWord rcX; /* used for crosschecking */
UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
}
RCEC;
static RCEC** contextTab = NULL; /* hash table of RCEC*s */
/* Gives an arbitrary total order on RCEC .frames fields */
static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
Word i;
tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
if (ec1->frames[0] < ec2->frames[0]) return -1;
if (ec1->frames[0] > ec2->frames[0]) return 1;
for (i = 1; i < 1 + N_FRAMES; i++) {
if (ec1->frames[i] < ec2->frames[i]) return -1;
if (ec1->frames[i] > ec2->frames[i]) return 1;
}
return 0;
}
/* Dec the ref of this RCEC. */
static void ctxt__rcdec ( RCEC* ec )
{
stats__ctxt_rcdec_calls++;
tl_assert(ec && ec->magic == RCEC_MAGIC);
tl_assert(ec->rc > 0);
ec->rc--;
}
static void ctxt__rcinc ( RCEC* ec )
{
tl_assert(ec && ec->magic == RCEC_MAGIC);
ec->rc++;
}
/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
move it one step closer the the front of the list, so as to make
subsequent searches for it cheaper. */
static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
{
RCEC *ec0, *ec1, *ec2;
if (ec == *headp)
tl_assert(0); /* already at head of list */
tl_assert(ec != NULL);
ec0 = *headp;
ec1 = NULL;
ec2 = NULL;
while (True) {
if (ec0 == NULL || ec0 == ec) break;
ec2 = ec1;
ec1 = ec0;
ec0 = ec0->next;
}
tl_assert(ec0 == ec);
if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
RCEC* tmp;
/* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
predecessor. Swap ec0 and ec1, that is, move ec0 one step
closer to the start of the list. */
tl_assert(ec2->next == ec1);
tl_assert(ec1->next == ec0);
tmp = ec0->next;
ec2->next = ec0;
ec0->next = ec1;
ec1->next = tmp;
}
else
if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
/* it's second in the list. */
tl_assert(*headp == ec1);
tl_assert(ec1->next == ec0);
ec1->next = ec0->next;
ec0->next = ec1;
*headp = ec0;
}
}
/* Find the given RCEC in the tree, and return a pointer to it. Or,
if not present, add the given one to the tree (by making a copy of
it, so the caller can immediately deallocate the original) and
return a pointer to the copy. The caller can safely have 'example'
on its stack, since we will always return a pointer to a copy of
it, not to the original. Note that the inserted node will have .rc
of zero and so the caller must immediatly increment it. */
__attribute__((noinline))
static RCEC* ctxt__find_or_add ( RCEC* example )
{
UWord hent;
RCEC* copy;
tl_assert(example && example->magic == RCEC_MAGIC);
tl_assert(example->rc == 0);
/* Search the hash table to see if we already have it. */
stats__ctxt_tab_qs++;
hent = example->frames[0] % N_RCEC_TAB;
copy = contextTab[hent];
while (1) {
if (!copy) break;
tl_assert(copy->magic == RCEC_MAGIC);
stats__ctxt_tab_cmps++;
if (0 == RCEC__cmp_by_frames(copy, example)) break;
copy = copy->next;
}
if (copy) {
tl_assert(copy != example);
/* optimisation: if it's not at the head of its list, move 1
step fwds, to make future searches cheaper */
if (copy != contextTab[hent]) {
move_RCEC_one_step_forward( &contextTab[hent], copy );
}
} else {
copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
tl_assert(copy != example);
*copy = *example;
copy->next = contextTab[hent];
contextTab[hent] = copy;
stats__ctxt_tab_curr++;
if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
stats__ctxt_tab_max = stats__ctxt_tab_curr;
}
return copy;
}
static inline UWord ROLW ( UWord w, Int n )
{
Int bpw = 8 * sizeof(UWord);
w = (w << n) | (w >> (bpw-n));
return w;
}
__attribute__((noinline))
static RCEC* get_RCEC ( Thr* thr )
{
UWord hash, i;
RCEC example;
example.magic = RCEC_MAGIC;
example.rc = 0;
example.rcX = 0;
main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
hash = 0;
for (i = 1; i < 1 + N_FRAMES; i++) {
hash ^= example.frames[i];
hash = ROLW(hash, 19);
}
example.frames[0] = hash;
return ctxt__find_or_add( &example );
}
///////////////////////////////////////////////////////
//// Part (2): An OSet of OldRefs, that refer to (1)
///
// (UInt) `echo "Old Reference Information" | md5sum`
#define OldRef_MAGIC 0x30b1f075UL
typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
#define N_OLDREF_ACCS 3
typedef
struct {
Addr ea;
UWord magic;
UWord gen; /* when most recently accessed */
/* unused slots in this array have .thr == NULL */
Thr_n_RCEC accs[N_OLDREF_ACCS];
}
OldRef;
static OSet* oldrefTree = NULL; /* OSet* of OldRef */
static UWord oldrefGen = 0; /* current LRU generation # */
static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
static void event_map_bind ( Addr a, Thr* thr )
{
OldRef key, *ref;
RCEC* here;
Word i, j;
key.ea = a;
key.magic = OldRef_MAGIC;
ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
if (ref) {
/* We already have a record for this address. We now need to
see if we have a stack trace pertaining to this thread's
access. */
tl_assert(ref->magic == OldRef_MAGIC);
tl_assert(thr);
for (i = 0; i < N_OLDREF_ACCS; i++) {
if (ref->accs[i].thr == thr)
break;
}
if (i < N_OLDREF_ACCS) {
/* thread 'thr' has an entry at index 'i'. Update it. */
if (i > 0) {
Thr_n_RCEC tmp = ref->accs[i-1];
ref->accs[i-1] = ref->accs[i];
ref->accs[i] = tmp;
i--;
}
here = get_RCEC( thr );
if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
ctxt__rcinc( here );
stats__ctxt_rcdec1++;
ctxt__rcdec( ref->accs[i].rcec );
ref->accs[i].rcec = here;
tl_assert(ref->accs[i].thr == thr);
} else {
here = get_RCEC( thr );
ctxt__rcinc( here );
/* No entry for this thread. Shuffle all of them down one
slot, and put the new entry at the start of the array. */
if (ref->accs[N_OLDREF_ACCS-1].thr) {
/* the last slot is in use. We must dec the rc on the
associated rcec. */
tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
stats__ctxt_rcdec2++;
ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
} else {
tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
}
for (j = N_OLDREF_ACCS-1; j >= 1; j--)
ref->accs[j] = ref->accs[j-1];
ref->accs[0].thr = thr;
ref->accs[0].rcec = here;
tl_assert(thr); /* thr==NULL is used to signify an empty slot,
so we can't add a NULL thr. */
}
ref->gen = oldrefGen;
tl_assert(ref->ea == a);
} else {
/* We don't have a record for this address. Create a new one. */
if (oldrefTreeN >= oldrefGenIncAt) {
oldrefGen++;
oldrefGenIncAt = oldrefTreeN + 50000;
if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
oldrefGen, oldrefTreeN );
}
here = get_RCEC( thr );
ctxt__rcinc(here);
ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
ref->magic = OldRef_MAGIC;
ref->gen = oldrefGen;
ref->ea = a;
ref->accs[0].rcec = here;
ref->accs[0].thr = thr;
tl_assert(thr); /* thr==NULL is used to signify an empty slot,
so we can't add a NULL thr. */
for (j = 1; j < N_OLDREF_ACCS; j++) {
ref->accs[j].thr = NULL;
ref->accs[j].rcec = NULL;
}
VG_(OSetGen_Insert)( oldrefTree, ref );
oldrefTreeN++;
}
}
static
Bool event_map_lookup ( /*OUT*/ExeContext** resEC,
/*OUT*/Thr** resThr,
Thr* thr_acc, Addr a )
{
Word i;
OldRef key, *ref;
tl_assert(thr_acc);
key.ea = a;
key.magic = OldRef_MAGIC;
ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
if (ref) {
tl_assert(ref->magic == OldRef_MAGIC);
tl_assert(ref->accs[0].thr); /* first slot must always be used */
for (i = 0; i < N_OLDREF_ACCS; i++) {
if (ref->accs[i].thr != NULL
&& ref->accs[i].thr != thr_acc)
break;
}
/* If we didn't find an entry for some thread other than
thr_acc, just return the entry for thread 0. It'll look
pretty stupid to the user though. */
if (i == N_OLDREF_ACCS)
i = 0;
tl_assert(i >= 0 && i < N_OLDREF_ACCS);
tl_assert(ref->accs[i].thr);
tl_assert(ref->accs[i].rcec);
tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
*resEC = VG_(make_ExeContext_from_StackTrace)(
&ref->accs[i].rcec->frames[1], N_FRAMES
);
*resThr = ref->accs[i].thr;
return True;
} else {
return False;
}
}
static void event_map_init ( void )
{
Word i;
tl_assert(!contextTab);
contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
N_RCEC_TAB * sizeof(RCEC*) );
tl_assert(contextTab);
for (i = 0; i < N_RCEC_TAB; i++)
contextTab[i] = NULL;
tl_assert(!oldrefTree);
tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
oldrefTree = VG_(OSetGen_Create)(
offsetof(OldRef,ea), /* == 0 */
NULL, /* use unboxed cmp on OldRefs */
HG_(zalloc), "libhb.event_map_init.2 (oldref tree)",
HG_(free)
);
tl_assert(oldrefTree);
oldrefGen = 0;
oldrefGenIncAt = 0;
oldrefTreeN = 0;
}
static void event_map__check_reference_counts ( Bool before )
{
RCEC* rcec;
OldRef* oldref;
Word i;
UWord nEnts = 0;
/* Set the 'check' reference counts to zero. Also, optionally
check that the real reference counts are non-zero. We allow
these to fall to zero before a GC, but the GC must get rid of
all those that are zero, hence none should be zero after a
GC. */
for (i = 0; i < N_RCEC_TAB; i++) {
for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
nEnts++;
tl_assert(rcec);
tl_assert(rcec->magic == RCEC_MAGIC);
if (!before)
tl_assert(rcec->rc > 0);
rcec->rcX = 0;
}
}
/* check that the stats are sane */
tl_assert(nEnts == stats__ctxt_tab_curr);
tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
/* visit all the referencing points, inc check ref counts */
VG_(OSetGen_ResetIter)( oldrefTree );
while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
tl_assert(oldref->magic == OldRef_MAGIC);
for (i = 0; i < N_OLDREF_ACCS; i++) {
if (oldref->accs[i].thr) {
tl_assert(oldref->accs[i].rcec);
tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
oldref->accs[i].rcec->rcX++;
} else {
tl_assert(!oldref->accs[i].rcec);
}
}
}
/* compare check ref counts with actual */
for (i = 0; i < N_RCEC_TAB; i++) {
for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
tl_assert(rcec->rc == rcec->rcX);
}
}
}
static void event_map_maybe_GC ( void )
{
OldRef* oldref;
UWord keyW, valW, retained, maxGen;
WordFM* genMap;
XArray* refs2del;
Word i, j, n2del;
if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
return;
if (0)
VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
/* Check our counting is sane */
tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
/* Check the reference counts */
event_map__check_reference_counts( True/*before*/ );
/* Compute the distribution of generation values in the ref tree */
/* genMap :: generation-number -> count-of-nodes-with-that-number */
genMap = VG_(newFM)( HG_(zalloc), "libhb.emmG.1",
HG_(free), NULL );
VG_(OSetGen_ResetIter)( oldrefTree );
while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
UWord key = oldref->gen;
keyW = valW = 0;
if (VG_(lookupFM)(genMap, &keyW, &valW, key )) {
tl_assert(keyW == key);
tl_assert(valW > 0);
}
/* now valW is the old count for generation 'key' */
VG_(addToFM)(genMap, key, valW+1);
}
tl_assert(VG_(sizeFM)(genMap) > 0);
retained = oldrefTreeN;
maxGen = 0;
VG_(initIterFM)( genMap );
while (VG_(nextIterFM)( genMap, &keyW, &valW )) {
tl_assert(keyW > 0); /* can't allow a generation # 0 */
if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
tl_assert(keyW >= maxGen);
tl_assert(retained >= valW);
if (retained - valW
> (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
retained -= valW;
maxGen = keyW;
} else {
break;
}
}
VG_(doneIterFM)( genMap );
if (VG_(clo_verbosity) > 1) {
VG_(message)(Vg_DebugMsg,
"libhb: EvM GC: delete generations %lu and below, "
"retaining %lu entries",
maxGen, retained );
}
VG_(deleteFM)( genMap, NULL, NULL );
/* If this fails, it means there's only one generation in the
entire tree. So we're kind of in a bad situation, and need to
do some stop-gap measure, such as randomly deleting half the
entries. */
tl_assert(retained < oldrefTreeN);
/* Now make up a big list of the oldrefTree entries we want to
delete. We can't simultaneously traverse the tree and delete
stuff from it, so first we need to copy them off somewhere
else. (sigh) */
refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.1",
HG_(free), sizeof(OldRef*) );
VG_(OSetGen_ResetIter)( oldrefTree );
while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
tl_assert(oldref->magic == OldRef_MAGIC);
if (oldref->gen <= maxGen) {
VG_(addToXA)( refs2del, &oldref );
}
}
n2del = VG_(sizeXA)( refs2del );
tl_assert(n2del == (Word)(oldrefTreeN - retained));
if (0) VG_(printf)("%s","deleting entries\n");
for (i = 0; i < n2del; i++) {
void* nd;
OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
tl_assert(ref);
tl_assert(ref->magic == OldRef_MAGIC);
for (j = 0; j < N_OLDREF_ACCS; j++) {
if (ref->accs[j].rcec) {
tl_assert(ref->accs[j].thr);
stats__ctxt_rcdec3++;
ctxt__rcdec( ref->accs[j].rcec );
} else {
tl_assert(!ref->accs[j].thr);
}
}
nd = VG_(OSetGen_Remove)( oldrefTree, ref );
VG_(OSetGen_FreeNode)( oldrefTree, nd );
}
VG_(deleteXA)( refs2del );
tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
oldrefTreeN = retained;
oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
/* Throw away all RCECs with zero reference counts */
for (i = 0; i < N_RCEC_TAB; i++) {
RCEC** pp = &contextTab[i];
RCEC* p = *pp;
while (p) {
if (p->rc == 0) {
*pp = p->next;
HG_(free)(p);
p = *pp;
tl_assert(stats__ctxt_tab_curr > 0);
stats__ctxt_tab_curr--;
} else {
pp = &p->next;
p = p->next;
}
}
}
/* Check the reference counts */
event_map__check_reference_counts( False/*after*/ );
//if (0)
//VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
// VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
}
/////////////////////////////////////////////////////////
// //
// Core MSM //
// //
/////////////////////////////////////////////////////////
#define MSM_CONFACC 1
#define MSM_RACE2ERR 1
#define MSM_CHECK 0
static ULong stats__msm_read = 0;
static ULong stats__msm_read_change = 0;
static ULong stats__msm_write = 0;
static ULong stats__msm_write_change = 0;
__attribute__((noinline))
static void record_race_info ( Thr* acc_thr,
Addr acc_addr, SizeT szB, Bool isWrite,
SVal svOld, SVal svNew )
{
Bool found;
Thr* thrp = NULL;
ExeContext* where = NULL;
ExeContext* wherep = NULL;
where = main_get_EC( acc_thr );
found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
if (found) {
tl_assert(wherep);
tl_assert(thrp);
tl_assert(thrp->opaque);
tl_assert(acc_thr->opaque);
HG_(record_error_Race)( acc_thr->opaque, acc_addr,
isWrite, szB, NULL/*mb_lastlock*/,
wherep, thrp->opaque );
} else {
tl_assert(!wherep);
tl_assert(!thrp);
tl_assert(acc_thr->opaque);
HG_(record_error_Race)( acc_thr->opaque, acc_addr,
isWrite, szB, NULL/*mb_lastlock*/,
NULL, NULL );
}
}
static Bool is_sane_SVal_C ( SVal sv ) {
POrd ord;
if (!SVal__isC(sv)) return True;
ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
if (ord == POrd_EQ || ord == POrd_LT) return True;
return False;
}
/* Compute new state following a read */
static inline SVal msm_read ( SVal svOld,
/* The following are only needed for
creating error reports. */
Thr* acc_thr,
Addr acc_addr, SizeT szB )
{
SVal svNew = SVal_INVALID;
stats__msm_read++;
/* Redundant sanity check on the constraints */
if (MSM_CHECK) {
tl_assert(is_sane_SVal_C(svOld));
}
if (SVal__isC(svOld)) {
POrd ord;
VtsID tviR = acc_thr->viR;
VtsID tviW = acc_thr->viW;
VtsID rmini = SVal__unC_Rmin(svOld);
VtsID wmini = SVal__unC_Wmin(svOld);
ord = VtsID__getOrdering(rmini,tviR);
if (ord == POrd_EQ || ord == POrd_LT) {
/* no race */
/* Note: RWLOCK subtlety: use tviW, not tviR */
svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
goto out;
} else {
svNew = MSM_RACE2ERR
? SVal__mkE()
: SVal__mkC( rmini, VtsID__join2(wmini,tviR) );
record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
svOld, svNew );
goto out;
}
}
if (SVal__isA(svOld)) {
/* reading no-access memory (sigh); leave unchanged */
/* check for no pollution */
tl_assert(svOld == SVal_NOACCESS);
svNew = SVal_NOACCESS;
goto out;
}
if (SVal__isE(svOld)) {
/* no race, location is already "in error" */
svNew = SVal__mkE();
goto out;
}
VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
tl_assert(0);
out:
if (MSM_CHECK) {
tl_assert(is_sane_SVal_C(svNew));
}
tl_assert(svNew != SVal_INVALID);
if (svNew != svOld) {
if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
event_map_bind( acc_addr, acc_thr );
stats__msm_read_change++;
}
}
return svNew;
}
/* Compute new state following a write */
static inline SVal msm_write ( SVal svOld,
/* The following are only needed for
creating error reports. */
Thr* acc_thr,
Addr acc_addr, SizeT szB )
{
SVal svNew = SVal_INVALID;
stats__msm_write++;
/* Redundant sanity check on the constraints */
if (MSM_CHECK) {
tl_assert(is_sane_SVal_C(svOld));
}
if (SVal__isC(svOld)) {
POrd ord;
VtsID tviW = acc_thr->viW;
VtsID wmini = SVal__unC_Wmin(svOld);
ord = VtsID__getOrdering(wmini,tviW);
if (ord == POrd_EQ || ord == POrd_LT) {
/* no race */
svNew = SVal__mkC( tviW, tviW );
goto out;
} else {
VtsID rmini = SVal__unC_Rmin(svOld);
svNew = MSM_RACE2ERR
? SVal__mkE()
: SVal__mkC( VtsID__join2(rmini,tviW),
VtsID__join2(wmini,tviW) );
record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
svOld, svNew );
goto out;
}
}
if (SVal__isA(svOld)) {
/* writing no-access memory (sigh); leave unchanged */
/* check for no pollution */
tl_assert(svOld == SVal_NOACCESS);
svNew = SVal_NOACCESS;
goto out;
}
if (SVal__isE(svOld)) {
/* no race, location is already "in error" */
svNew = SVal__mkE();
goto out;
}
VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
tl_assert(0);
out:
if (MSM_CHECK) {
tl_assert(is_sane_SVal_C(svNew));
}
tl_assert(svNew != SVal_INVALID);
if (svNew != svOld) {
if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
event_map_bind( acc_addr, acc_thr );
stats__msm_write_change++;
}
}
return svNew;
}
/////////////////////////////////////////////////////////
// //
// Apply core MSM to specific memory locations //
// //
/////////////////////////////////////////////////////////
/*------------- ZSM accesses: 8 bit apply ------------- */
void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
stats__cline_read8s++;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 .. 7 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
svOld = cl->svals[cloff];
svNew = msm_read( svOld, thr,a,1 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
}
void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
stats__cline_read8s++;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 .. 7 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
svOld = cl->svals[cloff];
svNew = msm_write( svOld, thr,a,1 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
}
/*------------- ZSM accesses: 16 bit apply ------------- */
void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
stats__cline_read16s++;
if (UNLIKELY(!aligned16(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
if (valid_value_is_below_me_16(descr, toff)) {
goto slowcase;
} else {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
}
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
svOld = cl->svals[cloff];
svNew = msm_read( svOld, thr,a,2 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_16to8splits++;
zsm_apply8___msm_read( thr, a + 0 );
zsm_apply8___msm_read( thr, a + 1 );
}
void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
stats__cline_read16s++;
if (UNLIKELY(!aligned16(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
if (valid_value_is_below_me_16(descr, toff)) {
goto slowcase;
} else {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
}
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
svOld = cl->svals[cloff];
svNew = msm_write( svOld, thr,a,2 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_16to8splits++;
zsm_apply8___msm_write( thr, a + 0 );
zsm_apply8___msm_write( thr, a + 1 );
}
/*------------- ZSM accesses: 32 bit apply ------------- */
void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
if (UNLIKELY(!aligned32(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 or 4 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
if (valid_value_is_above_me_32(descr, toff)) {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
} else {
goto slowcase;
}
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
svOld = cl->svals[cloff];
svNew = msm_read( svOld, thr,a,4 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_32to16splits++;
zsm_apply16___msm_read( thr, a + 0 );
zsm_apply16___msm_read( thr, a + 2 );
}
void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
if (UNLIKELY(!aligned32(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 or 4 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
if (valid_value_is_above_me_32(descr, toff)) {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
} else {
goto slowcase;
}
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
svOld = cl->svals[cloff];
svNew = msm_write( svOld, thr,a,4 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_32to16splits++;
zsm_apply16___msm_write( thr, a + 0 );
zsm_apply16___msm_write( thr, a + 2 );
}
/*------------- ZSM accesses: 64 bit apply ------------- */
void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
stats__cline_read64s++;
if (UNLIKELY(!aligned64(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0, unused */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
goto slowcase;
}
svOld = cl->svals[cloff];
svNew = msm_read( svOld, thr,a,8 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_64to32splits++;
zsm_apply32___msm_read( thr, a + 0 );
zsm_apply32___msm_read( thr, a + 4 );
}
void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
SVal svOld, svNew;
UShort descr;
stats__cline_read64s++;
if (UNLIKELY(!aligned64(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0, unused */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
goto slowcase;
}
svOld = cl->svals[cloff];
svNew = msm_write( svOld, thr,a,8 );
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_64to32splits++;
zsm_apply32___msm_write( thr, a + 0 );
zsm_apply32___msm_write( thr, a + 4 );
}
/*--------------- ZSM accesses: 8 bit write --------------- */
static
void zsm_write8 ( Addr a, SVal svNew ) {
CacheLine* cl;
UWord cloff, tno, toff;
UShort descr;
stats__cline_set8s++;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 .. 7 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff] = svNew;
}
/*--------------- ZSM accesses: 16 bit write --------------- */
static
void zsm_write16 ( Addr a, SVal svNew ) {
CacheLine* cl;
UWord cloff, tno, toff;
UShort descr;
stats__cline_set16s++;
if (UNLIKELY(!aligned16(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
if (valid_value_is_below_me_16(descr, toff)) {
/* Writing at this level. Need to fix up 'descr'. */
cl->descrs[tno] = pullup_descr_to_16(descr, toff);
/* At this point, the tree does not match cl->descr[tno] any
more. The assignments below will fix it up. */
} else {
/* We can't indiscriminately write on the w16 node as in the
w64 case, as that might make the node inconsistent with
its parent. So first, pull down to this level. */
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
}
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff + 0] = svNew;
cl->svals[cloff + 1] = SVal_INVALID;
return;
slowcase: /* misaligned */
stats__cline_16to8splits++;
zsm_write8( a + 0, svNew );
zsm_write8( a + 1, svNew );
}
/*--------------- ZSM accesses: 32 bit write --------------- */
static
void zsm_write32 ( Addr a, SVal svNew ) {
CacheLine* cl;
UWord cloff, tno, toff;
UShort descr;
stats__cline_set32s++;
if (UNLIKELY(!aligned32(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 or 4 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
if (valid_value_is_above_me_32(descr, toff)) {
/* We can't indiscriminately write on the w32 node as in the
w64 case, as that might make the node inconsistent with
its parent. So first, pull down to this level. */
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
if (SCE_CACHELINE)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
} else {
/* Writing at this level. Need to fix up 'descr'. */
cl->descrs[tno] = pullup_descr_to_32(descr, toff);
/* At this point, the tree does not match cl->descr[tno] any
more. The assignments below will fix it up. */
}
}
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff + 0] = svNew;
cl->svals[cloff + 1] = SVal_INVALID;
cl->svals[cloff + 2] = SVal_INVALID;
cl->svals[cloff + 3] = SVal_INVALID;
return;
slowcase: /* misaligned */
stats__cline_32to16splits++;
zsm_write16( a + 0, svNew );
zsm_write16( a + 2, svNew );
}
/*--------------- ZSM accesses: 64 bit write --------------- */
static
void zsm_write64 ( Addr a, SVal svNew ) {
CacheLine* cl;
UWord cloff, tno, toff;
stats__cline_set64s++;
if (UNLIKELY(!aligned64(a))) goto slowcase;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 */
cl->descrs[tno] = TREE_DESCR_64;
tl_assert(svNew != SVal_INVALID);
cl->svals[cloff + 0] = svNew;
cl->svals[cloff + 1] = SVal_INVALID;
cl->svals[cloff + 2] = SVal_INVALID;
cl->svals[cloff + 3] = SVal_INVALID;
cl->svals[cloff + 4] = SVal_INVALID;
cl->svals[cloff + 5] = SVal_INVALID;
cl->svals[cloff + 6] = SVal_INVALID;
cl->svals[cloff + 7] = SVal_INVALID;
return;
slowcase: /* misaligned */
stats__cline_64to32splits++;
zsm_write32( a + 0, svNew );
zsm_write32( a + 4, svNew );
}
/*------------- ZSM accesses: 8 bit read/copy ------------- */
static
SVal zsm_read8 ( Addr a ) {
CacheLine* cl;
UWord cloff, tno, toff;
UShort descr;
stats__cline_get8s++;
cl = get_cacheline(a);
cloff = get_cacheline_offset(a);
tno = get_treeno(a);
toff = get_tree_offset(a); /* == 0 .. 7 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
SVal* tree = &cl->svals[tno << 3];
cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
}
return cl->svals[cloff];
}
static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
SVal sv;
stats__cline_copy8s++;
sv = zsm_read8( src );
zsm_write8( dst, sv );
}
/* ------------ Shadow memory range setting ops ------------ */
void zsm_apply_range___msm_read ( Thr* thr,
Addr a, SizeT len )
{
/* fast track a couple of common cases */
if (len == 4 && aligned32(a)) {
zsm_apply32___msm_read( thr, a );
return;
}
if (len == 8 && aligned64(a)) {
zsm_apply64___msm_read( thr, a );
return;
}
/* be completely general (but as efficient as possible) */
if (len == 0) return;
if (!aligned16(a) && len >= 1) {
zsm_apply8___msm_read( thr, a );
a += 1;
len -= 1;
tl_assert(aligned16(a));
}
if (len == 0) return;
if (!aligned32(a) && len >= 2) {
zsm_apply16___msm_read( thr, a );
a += 2;
len -= 2;
tl_assert(aligned32(a));
}
if (len == 0) return;
if (!aligned64(a) && len >= 4) {
zsm_apply32___msm_read( thr, a );
a += 4;
len -= 4;
tl_assert(aligned64(a));
}
if (len == 0) return;
if (len >= 8) {
tl_assert(aligned64(a));
while (len >= 8) {
zsm_apply64___msm_read( thr, a );
a += 8;
len -= 8;
}
tl_assert(aligned64(a));
}
if (len == 0) return;
if (len >= 4)
tl_assert(aligned32(a));
if (len >= 4) {
zsm_apply32___msm_read( thr, a );
a += 4;
len -= 4;
}
if (len == 0) return;
if (len >= 2)
tl_assert(aligned16(a));
if (len >= 2) {
zsm_apply16___msm_read( thr, a );
a += 2;
len -= 2;
}
if (len == 0) return;
if (len >= 1) {
zsm_apply8___msm_read( thr, a );
a += 1;
len -= 1;
}
tl_assert(len == 0);
}
void zsm_apply_range___msm_write ( Thr* thr,
Addr a, SizeT len )
{
/* fast track a couple of common cases */
if (len == 4 && aligned32(a)) {
zsm_apply32___msm_write( thr, a );
return;
}
if (len == 8 && aligned64(a)) {
zsm_apply64___msm_write( thr, a );
return;
}
/* be completely general (but as efficient as possible) */
if (len == 0) return;
if (!aligned16(a) && len >= 1) {
zsm_apply8___msm_write( thr, a );
a += 1;
len -= 1;
tl_assert(aligned16(a));
}
if (len == 0) return;
if (!aligned32(a) && len >= 2) {
zsm_apply16___msm_write( thr, a );
a += 2;
len -= 2;
tl_assert(aligned32(a));
}
if (len == 0) return;
if (!aligned64(a) && len >= 4) {
zsm_apply32___msm_write( thr, a );
a += 4;
len -= 4;
tl_assert(aligned64(a));
}
if (len == 0) return;
if (len >= 8) {
tl_assert(aligned64(a));
while (len >= 8) {
zsm_apply64___msm_write( thr, a );
a += 8;
len -= 8;
}
tl_assert(aligned64(a));
}
if (len == 0) return;
if (len >= 4)
tl_assert(aligned32(a));
if (len >= 4) {
zsm_apply32___msm_write( thr, a );
a += 4;
len -= 4;
}
if (len == 0) return;
if (len >= 2)
tl_assert(aligned16(a));
if (len >= 2) {
zsm_apply16___msm_write( thr, a );
a += 2;
len -= 2;
}
if (len == 0) return;
if (len >= 1) {
zsm_apply8___msm_write( thr, a );
a += 1;
len -= 1;
}
tl_assert(len == 0);
}
/* Block-copy states (needed for implementing realloc()). */
static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
{
SizeT i;
if (len == 0)
return;
/* assert for non-overlappingness */
tl_assert(src+len <= dst || dst+len <= src);
/* To be simple, just copy byte by byte. But so as not to wreck
performance for later accesses to dst[0 .. len-1], normalise
destination lines as we finish with them, and also normalise the
line containing the first and last address. */
for (i = 0; i < len; i++) {
Bool normalise
= get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
|| i == 0 /* first in range */
|| i == len-1; /* last in range */
zsm_copy8( src+i, dst+i, normalise );
}
}
/* For setting address ranges to a given value. Has considerable
sophistication so as to avoid generating large numbers of pointless
cache loads/writebacks for large ranges. */
/* Do small ranges in-cache, in the obvious way. */
static
void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
{
/* fast track a couple of common cases */
if (len == 4 && aligned32(a)) {
zsm_write32( a, svNew );
return;
}
if (len == 8 && aligned64(a)) {
zsm_write64( a, svNew );
return;
}
/* be completely general (but as efficient as possible) */
if (len == 0) return;
if (!aligned16(a) && len >= 1) {
zsm_write8( a, svNew );
a += 1;
len -= 1;
tl_assert(aligned16(a));
}
if (len == 0) return;
if (!aligned32(a) && len >= 2) {
zsm_write16( a, svNew );
a += 2;
len -= 2;
tl_assert(aligned32(a));
}
if (len == 0) return;
if (!aligned64(a) && len >= 4) {
zsm_write32( a, svNew );
a += 4;
len -= 4;
tl_assert(aligned64(a));
}
if (len == 0) return;
if (len >= 8) {
tl_assert(aligned64(a));
while (len >= 8) {
zsm_write64( a, svNew );
a += 8;
len -= 8;
}
tl_assert(aligned64(a));
}
if (len == 0) return;
if (len >= 4)
tl_assert(aligned32(a));
if (len >= 4) {
zsm_write32( a, svNew );
a += 4;
len -= 4;
}
if (len == 0) return;
if (len >= 2)
tl_assert(aligned16(a));
if (len >= 2) {
zsm_write16( a, svNew );
a += 2;
len -= 2;
}
if (len == 0) return;
if (len >= 1) {
zsm_write8( a, svNew );
a += 1;
len -= 1;
}
tl_assert(len == 0);
}
/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
for larger ranges, try to operate directly on the out-of-cache
representation, rather than dragging lines into the cache,
overwriting them, and forcing them out. This turns out to be an
important performance optimisation. */
static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
{
tl_assert(svNew != SVal_INVALID);
stats__cache_make_New_arange += (ULong)len;
if (0 && len > 500)
VG_(printf)("make New ( %#lx, %ld )\n", a, len );
if (0) {
static UWord n_New_in_cache = 0;
static UWord n_New_not_in_cache = 0;
/* tag is 'a' with the in-line offset masked out,
eg a[31]..a[4] 0000 */
Addr tag = a & ~(N_LINE_ARANGE - 1);
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
if (LIKELY(tag == cache_shmem.tags0[wix])) {
n_New_in_cache++;
} else {
n_New_not_in_cache++;
}
if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
n_New_in_cache, n_New_not_in_cache );
}
if (LIKELY(len < 2 * N_LINE_ARANGE)) {
zsm_set_range_SMALL( a, len, svNew );
} else {
Addr before_start = a;
Addr aligned_start = cacheline_ROUNDUP(a);
Addr after_start = cacheline_ROUNDDN(a + len);
UWord before_len = aligned_start - before_start;
UWord aligned_len = after_start - aligned_start;
UWord after_len = a + len - after_start;
tl_assert(before_start <= aligned_start);
tl_assert(aligned_start <= after_start);
tl_assert(before_len < N_LINE_ARANGE);
tl_assert(after_len < N_LINE_ARANGE);
tl_assert(get_cacheline_offset(aligned_start) == 0);
if (get_cacheline_offset(a) == 0) {
tl_assert(before_len == 0);
tl_assert(a == aligned_start);
}
if (get_cacheline_offset(a+len) == 0) {
tl_assert(after_len == 0);
tl_assert(after_start == a+len);
}
if (before_len > 0) {
zsm_set_range_SMALL( before_start, before_len, svNew );
}
if (after_len > 0) {
zsm_set_range_SMALL( after_start, after_len, svNew );
}
stats__cache_make_New_inZrep += (ULong)aligned_len;
while (1) {
Addr tag;
UWord wix;
if (aligned_start >= after_start)
break;
tl_assert(get_cacheline_offset(aligned_start) == 0);
tag = aligned_start & ~(N_LINE_ARANGE - 1);
wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
if (tag == cache_shmem.tags0[wix]) {
UWord i;
for (i = 0; i < N_LINE_ARANGE / 8; i++)
zsm_write64( aligned_start + i * 8, svNew );
} else {
UWord i;
Word zix;
SecMap* sm;
LineZ* lineZ;
/* This line is not in the cache. Do not force it in; instead
modify it in-place. */
/* find the Z line to write in and rcdec it or the
associated F line. */
find_Z_for_writing( &sm, &zix, tag );
tl_assert(sm);
tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[zix];
lineZ->dict[0] = svNew;
lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
for (i = 0; i < N_LINE_ARANGE/4; i++)
lineZ->ix2s[i] = 0; /* all refer to dict[0] */
rcinc_LineZ(lineZ);
}
aligned_start += N_LINE_ARANGE;
aligned_len -= N_LINE_ARANGE;
}
tl_assert(aligned_start == after_start);
tl_assert(aligned_len == 0);
}
}
/////////////////////////////////////////////////////////
// //
// Synchronisation objects //
// //
/////////////////////////////////////////////////////////
// (UInt) `echo "Synchronisation object" | md5sum`
#define SO_MAGIC 0x56b3c5b0U
struct _SO {
VtsID viR; /* r-clock of sender */
VtsID viW; /* w-clock of sender */
UInt magic;
};
static SO* SO__Alloc ( void ) {
SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
so->viR = VtsID_INVALID;
so->viW = VtsID_INVALID;
so->magic = SO_MAGIC;
return so;
}
static void SO__Dealloc ( SO* so ) {
tl_assert(so);
tl_assert(so->magic == SO_MAGIC);
if (so->viR == VtsID_INVALID) {
tl_assert(so->viW == VtsID_INVALID);
} else {
tl_assert(so->viW != VtsID_INVALID);
VtsID__rcdec(so->viR);
VtsID__rcdec(so->viW);
}
so->magic = 0;
HG_(free)( so );
}
/////////////////////////////////////////////////////////
// //
// Top Level API //
// //
/////////////////////////////////////////////////////////
static void show_thread_state ( HChar* str, Thr* t )
{
if (1) return;
if (t->viR == t->viW) {
VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
VtsID__pp( t->viR );
VG_(printf)("%s","\n");
} else {
VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
VtsID__pp( t->viR );
VG_(printf)(" viW %u==", t->viW);
VtsID__pp( t->viW );
VG_(printf)("%s","\n");
}
}
Thr* libhb_init (
void (*get_stacktrace)( Thr*, Addr*, UWord ),
ExeContext* (*get_EC)( Thr* )
)
{
Thr* thr;
VtsID vi;
tl_assert(get_stacktrace);
tl_assert(get_EC);
main_get_stacktrace = get_stacktrace;
main_get_EC = get_EC;
// No need to initialise hg_wordfm.
// No need to initialise hg_wordset.
vts_set_init();
vts_tab_init();
event_map_init();
VtsID__invalidate_caches();
// initialise shadow memory
zsm_init( SVal__rcinc, SVal__rcdec );
thr = Thr__new();
vi = VtsID__mk_Singleton( thr, 1 );
thr->viR = vi;
thr->viW = vi;
VtsID__rcinc(thr->viR);
VtsID__rcinc(thr->viW);
show_thread_state(" root", thr);
return thr;
}
Thr* libhb_create ( Thr* parent )
{
/* The child's VTSs are copies of the parent's VTSs, but ticked at
the child's index. Since the child's index is guaranteed
unique, it has never been seen before, so the implicit value
before the tick is zero and after that is one. */
Thr* child = Thr__new();
child->viR = VtsID__tick( parent->viR, child );
child->viW = VtsID__tick( parent->viW, child );
VtsID__rcinc(child->viR);
VtsID__rcinc(child->viW);
tl_assert(VtsID__indexAt( child->viR, child ) == 1);
tl_assert(VtsID__indexAt( child->viW, child ) == 1);
/* and the parent has to move along too */
VtsID__rcdec(parent->viR);
VtsID__rcdec(parent->viW);
parent->viR = VtsID__tick( parent->viR, parent );
parent->viW = VtsID__tick( parent->viW, parent );
VtsID__rcinc(parent->viR);
VtsID__rcinc(parent->viW);
show_thread_state(" child", child);
show_thread_state("parent", parent);
return child;
}
/* Shut down the library, and print stats (in fact that's _all_
this is for. */
void libhb_shutdown ( Bool show_stats )
{
if (show_stats) {
VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
stats__secmaps_allocd,
stats__secmap_ga_space_covered);
VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
stats__secmap_linesZ_allocd,
stats__secmap_linesZ_bytes);
VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
stats__secmap_linesF_allocd,
stats__secmap_linesF_bytes);
VG_(printf)(" secmaps: %'10lu iterator steppings\n",
stats__secmap_iterator_steppings);
VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
stats__secmaps_search, stats__secmaps_search_slow);
VG_(printf)("%s","\n");
VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
stats__cache_totrefs, stats__cache_totmisses );
VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
stats__cache_Z_fetches, stats__cache_F_fetches );
VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
stats__cache_Z_wbacks, stats__cache_F_wbacks );
VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
stats__cache_invals, stats__cache_flushes );
VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
stats__cache_make_New_arange,
stats__cache_make_New_inZrep);
VG_(printf)("%s","\n");
VG_(printf)(" cline: %'10lu normalises\n",
stats__cline_normalises );
VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
stats__cline_read64s,
stats__cline_read32s,
stats__cline_read16s,
stats__cline_read8s );
VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
stats__cline_write64s,
stats__cline_write32s,
stats__cline_write16s,
stats__cline_write8s );
VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
stats__cline_set64s,
stats__cline_set32s,
stats__cline_set16s,
stats__cline_set8s );
VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
stats__cline_get8s, stats__cline_copy8s );
VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
stats__cline_64to32splits,
stats__cline_32to16splits,
stats__cline_16to8splits );
VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
stats__cline_64to32pulldown,
stats__cline_32to16pulldown,
stats__cline_16to8pulldown );
if (0)
VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
(Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
VG_(printf)("%s","\n");
VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
stats__msm_read, stats__msm_read_change);
VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
stats__msm_write, stats__msm_write_change);
VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
stats__getOrdering_queries, stats__getOrdering_misses);
VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
stats__join2_queries, stats__join2_misses);
VG_(printf)("%s","\n");
VG_(printf)(
" libhb: %ld entries in vts_table (approximately %lu bytes)\n",
VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
);
VG_(printf)( " libhb: %lu entries in vts_set\n",
VG_(sizeFM)( vts_set ) );
VG_(printf)("%s","\n");
VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
stats__ctxt_rcdec2,
stats__ctxt_rcdec3 );
VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
(UWord)N_RCEC_TAB,
stats__ctxt_tab_curr );
VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
stats__ctxt_tab_qs,
stats__ctxt_tab_cmps );
#if 0
VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
#endif
VG_(printf)("%s","<<< END libhb stats >>>\n");
VG_(printf)("%s","\n");
}
}
void libhb_async_exit ( Thr* thr )
{
/* is there anything we need to do? */
}
/* Both Segs and SOs point to VTSs. However, there is no sharing, so
a Seg that points at a VTS is its one-and-only owner, and ditto for
a SO that points at a VTS. */
SO* libhb_so_alloc ( void )
{
return SO__Alloc();
}
void libhb_so_dealloc ( SO* so )
{
tl_assert(so);
tl_assert(so->magic == SO_MAGIC);
SO__Dealloc(so);
}
/* See comments in libhb.h for details on the meaning of
strong vs weak sends and strong vs weak receives. */
void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
{
/* Copy the VTSs from 'thr' into the sync object, and then move
the thread along one step. */
tl_assert(so);
tl_assert(so->magic == SO_MAGIC);
/* stay sane .. a thread's read-clock must always lead or be the
same as its write-clock */
{ POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
tl_assert(ord == POrd_EQ || ord == POrd_LT);
}
/* since we're overwriting the VtsIDs in the SO, we need to drop
any references made by the previous contents thereof */
if (so->viR == VtsID_INVALID) {
tl_assert(so->viW == VtsID_INVALID);
so->viR = thr->viR;
so->viW = thr->viW;
VtsID__rcinc(so->viR);
VtsID__rcinc(so->viW);
} else {
/* In a strong send, we dump any previous VC in the SO and
install the sending thread's VC instead. For a weak send we
must join2 with what's already there. */
tl_assert(so->viW != VtsID_INVALID);
VtsID__rcdec(so->viR);
VtsID__rcdec(so->viW);
so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
VtsID__rcinc(so->viR);
VtsID__rcinc(so->viW);
}
/* move both parent clocks along */
VtsID__rcdec(thr->viR);
VtsID__rcdec(thr->viW);
thr->viR = VtsID__tick( thr->viR, thr );
thr->viW = VtsID__tick( thr->viW, thr );
VtsID__rcinc(thr->viR);
VtsID__rcinc(thr->viW);
if (strong_send)
show_thread_state("s-send", thr);
else
show_thread_state("w-send", thr);
}
void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
{
tl_assert(so);
tl_assert(so->magic == SO_MAGIC);
if (so->viR != VtsID_INVALID) {
tl_assert(so->viW != VtsID_INVALID);
/* Weak receive (basically, an R-acquisition of a R-W lock).
This advances the read-clock of the receiver, but not the
write-clock. */
VtsID__rcdec(thr->viR);
thr->viR = VtsID__join2( thr->viR, so->viR );
VtsID__rcinc(thr->viR);
/* For a strong receive, we also advance the receiver's write
clock, which means the receive as a whole is essentially
equivalent to a W-acquisition of a R-W lock. */
if (strong_recv) {
VtsID__rcdec(thr->viW);
thr->viW = VtsID__join2( thr->viW, so->viW );
VtsID__rcinc(thr->viW);
}
if (strong_recv)
show_thread_state("s-recv", thr);
else
show_thread_state("w-recv", thr);
} else {
tl_assert(so->viW == VtsID_INVALID);
/* Deal with degenerate case: 'so' has no vts, so there has been
no message posted to it. Just ignore this case. */
show_thread_state("d-recv", thr);
}
}
Bool libhb_so_everSent ( SO* so )
{
if (so->viR == VtsID_INVALID) {
tl_assert(so->viW == VtsID_INVALID);
return False;
} else {
tl_assert(so->viW != VtsID_INVALID);
return True;
}
}
#define XXX1 0 // 0x67a106c
#define XXX2 0
static Bool TRACEME(Addr a, SizeT szB) {
if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
return False;
}
static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
SVal sv = zsm_read8(a);
VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
show_thread_state("", thr);
VG_(printf)("%s","\n");
}
void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
{
SVal sv = SVal__mkC(thr->viW, thr->viW);
tl_assert(is_sane_SVal_C(sv));
if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
zsm_set_range( a, szB, sv );
if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
}
void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
{
if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
zsm_set_range( a, szB, SVal__mkA() );
if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
}
void* libhb_get_Thr_opaque ( Thr* thr ) {
tl_assert(thr);
return thr->opaque;
}
void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
tl_assert(thr);
thr->opaque = v;
}
void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
{
zsm_copy_range(dst, src, len);
}
void libhb_maybe_GC ( void )
{
event_map_maybe_GC();
/* If there are still freelist entries available, no need for a
GC. */
if (vts_tab_freelist != VtsID_INVALID)
return;
/* So all the table entries are full, and we're having to expand
the table. But did we hit the threshhold point yet? */
if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
return;
vts_tab__do_GC( False/*don't show stats*/ );
}
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION END main library //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/*--------------------------------------------------------------------*/
/*--- end libhb_main.c ---*/
/*--------------------------------------------------------------------*/