blob: 004ce69fbfff0561afcfe46c91807ac894dc830d [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj9eecbbb2010-05-03 21:37:12 +000012 Copyright (C) 2008-2010 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000046#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
sewardj23f12002009-07-24 08:45:08 +0000150static void zsm_sset_range ( Addr, SizeT, SVal );
151static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000152static void zsm_flush_cache ( void );
153
154#endif /* ! __HB_ZSM_H */
155
156
sewardjf98e1c02008-10-25 16:22:41 +0000157/* Round a up to the next multiple of N. N must be a power of 2 */
158#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
159/* Round a down to the next multiple of N. N must be a power of 2 */
160#define ROUNDDN(a, N) ((a) & ~(N-1))
161
162
163
164/* ------ User-supplied RC functions ------ */
165static void(*rcinc)(SVal) = NULL;
166static void(*rcdec)(SVal) = NULL;
167
168
169/* ------ CacheLine ------ */
170
171#define N_LINE_BITS 6 /* must be >= 3 */
172#define N_LINE_ARANGE (1 << N_LINE_BITS)
173#define N_LINE_TREES (N_LINE_ARANGE >> 3)
174
175typedef
176 struct {
177 UShort descrs[N_LINE_TREES];
178 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
179 }
180 CacheLine;
181
182#define TREE_DESCR_16_0 (1<<0)
183#define TREE_DESCR_32_0 (1<<1)
184#define TREE_DESCR_16_1 (1<<2)
185#define TREE_DESCR_64 (1<<3)
186#define TREE_DESCR_16_2 (1<<4)
187#define TREE_DESCR_32_1 (1<<5)
188#define TREE_DESCR_16_3 (1<<6)
189#define TREE_DESCR_8_0 (1<<7)
190#define TREE_DESCR_8_1 (1<<8)
191#define TREE_DESCR_8_2 (1<<9)
192#define TREE_DESCR_8_3 (1<<10)
193#define TREE_DESCR_8_4 (1<<11)
194#define TREE_DESCR_8_5 (1<<12)
195#define TREE_DESCR_8_6 (1<<13)
196#define TREE_DESCR_8_7 (1<<14)
197#define TREE_DESCR_DTY (1<<15)
198
199typedef
200 struct {
201 SVal dict[4]; /* can represent up to 4 diff values in the line */
202 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
203 dict indexes */
204 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
205 LineF to use, and dict[2..] are also SVal_INVALID. */
206 }
207 LineZ; /* compressed rep for a cache line */
208
209typedef
210 struct {
211 Bool inUse;
212 SVal w64s[N_LINE_ARANGE];
213 }
214 LineF; /* full rep for a cache line */
215
216/* Shadow memory.
217 Primary map is a WordFM Addr SecMap*.
218 SecMaps cover some page-size-ish section of address space and hold
219 a compressed representation.
220 CacheLine-sized chunks of SecMaps are copied into a Cache, being
221 decompressed when moved into the cache and recompressed on the
222 way out. Because of this, the cache must operate as a writeback
223 cache, not a writethrough one.
224
225 Each SecMap must hold a power-of-2 number of CacheLines. Hence
226 N_SECMAP_BITS must >= N_LINE_BITS.
227*/
228#define N_SECMAP_BITS 13
229#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
230
231// # CacheLines held by a SecMap
232#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
233
234/* The data in the SecMap is held in the array of LineZs. Each LineZ
235 either carries the required data directly, in a compressed
236 representation, or it holds (in .dict[0]) an index to the LineF in
237 .linesF that holds the full representation.
238
239 Currently-unused LineF's have their .inUse bit set to zero.
240 Since each in-use LineF is referred to be exactly one LineZ,
241 the number of .linesZ[] that refer to .linesF should equal
242 the number of .linesF[] that have .inUse == True.
243
244 RC obligations: the RCs presented to the user include exactly
245 the values in:
246 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
247 * F reps that are in use (.inUse == True)
248
249 Hence the following actions at the following transitions are required:
250
251 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
252 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
253 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
254 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
255*/
256typedef
257 struct {
258 UInt magic;
259 LineZ linesZ[N_SECMAP_ZLINES];
260 LineF* linesF;
261 UInt linesF_size;
262 }
263 SecMap;
264
265#define SecMap_MAGIC 0x571e58cbU
266
267static inline Bool is_sane_SecMap ( SecMap* sm ) {
268 return sm != NULL && sm->magic == SecMap_MAGIC;
269}
270
271/* ------ Cache ------ */
272
273#define N_WAY_BITS 16
274#define N_WAY_NENT (1 << N_WAY_BITS)
275
276/* Each tag is the address of the associated CacheLine, rounded down
277 to a CacheLine address boundary. A CacheLine size must be a power
278 of 2 and must be 8 or more. Hence an easy way to initialise the
279 cache so it is empty is to set all the tag values to any value % 8
280 != 0, eg 1. This means all queries in the cache initially miss.
281 It does however require us to detect and not writeback, any line
282 with a bogus tag. */
283typedef
284 struct {
285 CacheLine lyns0[N_WAY_NENT];
286 Addr tags0[N_WAY_NENT];
287 }
288 Cache;
289
290static inline Bool is_valid_scache_tag ( Addr tag ) {
291 /* a valid tag should be naturally aligned to the start of
292 a CacheLine. */
293 return 0 == (tag & (N_LINE_ARANGE - 1));
294}
295
296
297/* --------- Primary data structures --------- */
298
299/* Shadow memory primary map */
300static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
301static Cache cache_shmem;
302
303
304static UWord stats__secmaps_search = 0; // # SM finds
305static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
306static UWord stats__secmaps_allocd = 0; // # SecMaps issued
307static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
308static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
309static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
310static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
311static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
312static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
313static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
314static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
315static UWord stats__cache_F_fetches = 0; // # F lines fetched
316static UWord stats__cache_F_wbacks = 0; // # F lines written back
317static UWord stats__cache_invals = 0; // # cache invals
318static UWord stats__cache_flushes = 0; // # cache flushes
319static UWord stats__cache_totrefs = 0; // # total accesses
320static UWord stats__cache_totmisses = 0; // # misses
321static ULong stats__cache_make_New_arange = 0; // total arange made New
322static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
323static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000324static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
325static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
326static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
327static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
328static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
329static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
330static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
331static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
332static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
333static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
334static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
335static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
336static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
337static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000338static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
339static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
340static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
341static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
342static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
343static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000344static UWord stats__vts__tick = 0; // # calls to VTS__tick
345static UWord stats__vts__join = 0; // # calls to VTS__join
346static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
347static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
348static UWord stats__vts__cmp_structural_slow = 0; // # calls to VTS__cmp_structural w/ slow case
349static UWord stats__vts__indexat_slow = 0; // # calls to VTS__indexAt_SLOW
350static UWord stats__vts_set__fadoa = 0; // # calls to vts_set__find_and_dealloc__or_add
351static UWord stats__vts_set__fadoa_d = 0; // # calls to vts_set__find_and_dealloc__or_add
352 // that lead to a deallocation
353
sewardjf98e1c02008-10-25 16:22:41 +0000354
355static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
356 return a & ~(N_SECMAP_ARANGE - 1);
357}
358static inline UWord shmem__get_SecMap_offset ( Addr a ) {
359 return a & (N_SECMAP_ARANGE - 1);
360}
361
362
363/*----------------------------------------------------------------*/
364/*--- map_shmem :: WordFM Addr SecMap ---*/
365/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
366/*----------------------------------------------------------------*/
367
368/*--------------- SecMap allocation --------------- */
369
370static HChar* shmem__bigchunk_next = NULL;
371static HChar* shmem__bigchunk_end1 = NULL;
372
373static void* shmem__bigchunk_alloc ( SizeT n )
374{
375 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
376 tl_assert(n > 0);
377 n = VG_ROUNDUP(n, 16);
378 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
379 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
380 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
381 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
382 if (0)
383 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
384 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
385 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
386 if (shmem__bigchunk_next == NULL)
387 VG_(out_of_memory_NORETURN)(
388 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
389 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
390 }
391 tl_assert(shmem__bigchunk_next);
392 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
393 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
394 shmem__bigchunk_next += n;
395 return shmem__bigchunk_next - n;
396}
397
398static SecMap* shmem__alloc_SecMap ( void )
399{
400 Word i, j;
401 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
402 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
403 tl_assert(sm);
404 sm->magic = SecMap_MAGIC;
405 for (i = 0; i < N_SECMAP_ZLINES; i++) {
406 sm->linesZ[i].dict[0] = SVal_NOACCESS;
407 sm->linesZ[i].dict[1] = SVal_INVALID;
408 sm->linesZ[i].dict[2] = SVal_INVALID;
409 sm->linesZ[i].dict[3] = SVal_INVALID;
410 for (j = 0; j < N_LINE_ARANGE/4; j++)
411 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
412 }
413 sm->linesF = NULL;
414 sm->linesF_size = 0;
415 stats__secmaps_allocd++;
416 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
417 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
418 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
419 return sm;
420}
421
422typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
423static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
424
425static SecMap* shmem__find_SecMap ( Addr ga )
426{
427 SecMap* sm = NULL;
428 Addr gaKey = shmem__round_to_SecMap_base(ga);
429 // Cache
430 stats__secmaps_search++;
431 if (LIKELY(gaKey == smCache[0].gaKey))
432 return smCache[0].sm;
433 if (LIKELY(gaKey == smCache[1].gaKey)) {
434 SMCacheEnt tmp = smCache[0];
435 smCache[0] = smCache[1];
436 smCache[1] = tmp;
437 return smCache[0].sm;
438 }
439 if (gaKey == smCache[2].gaKey) {
440 SMCacheEnt tmp = smCache[1];
441 smCache[1] = smCache[2];
442 smCache[2] = tmp;
443 return smCache[1].sm;
444 }
445 // end Cache
446 stats__secmaps_search_slow++;
447 if (VG_(lookupFM)( map_shmem,
448 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
449 tl_assert(sm != NULL);
450 smCache[2] = smCache[1];
451 smCache[1] = smCache[0];
452 smCache[0].gaKey = gaKey;
453 smCache[0].sm = sm;
454 } else {
455 tl_assert(sm == NULL);
456 }
457 return sm;
458}
459
460static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
461{
462 SecMap* sm = shmem__find_SecMap ( ga );
463 if (LIKELY(sm)) {
464 return sm;
465 } else {
466 /* create a new one */
467 Addr gaKey = shmem__round_to_SecMap_base(ga);
468 sm = shmem__alloc_SecMap();
469 tl_assert(sm);
470 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
471 return sm;
472 }
473}
474
475
476/* ------------ LineF and LineZ related ------------ */
477
478static void rcinc_LineF ( LineF* lineF ) {
479 UWord i;
480 tl_assert(lineF->inUse);
481 for (i = 0; i < N_LINE_ARANGE; i++)
482 rcinc(lineF->w64s[i]);
483}
484
485static void rcdec_LineF ( LineF* lineF ) {
486 UWord i;
487 tl_assert(lineF->inUse);
488 for (i = 0; i < N_LINE_ARANGE; i++)
489 rcdec(lineF->w64s[i]);
490}
491
492static void rcinc_LineZ ( LineZ* lineZ ) {
493 tl_assert(lineZ->dict[0] != SVal_INVALID);
494 rcinc(lineZ->dict[0]);
495 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
496 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
497 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
498}
499
500static void rcdec_LineZ ( LineZ* lineZ ) {
501 tl_assert(lineZ->dict[0] != SVal_INVALID);
502 rcdec(lineZ->dict[0]);
503 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
504 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
505 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
506}
507
508inline
509static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
510 Word bix, shft, mask, prep;
511 tl_assert(ix >= 0);
512 bix = ix >> 2;
513 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
514 mask = 3 << shft;
515 prep = b2 << shft;
516 arr[bix] = (arr[bix] & ~mask) | prep;
517}
518
519inline
520static UWord read_twobit_array ( UChar* arr, UWord ix ) {
521 Word bix, shft;
522 tl_assert(ix >= 0);
523 bix = ix >> 2;
524 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
525 return (arr[bix] >> shft) & 3;
526}
527
528/* Given address 'tag', find either the Z or F line containing relevant
529 data, so it can be read into the cache.
530*/
531static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
532 /*OUT*/LineF** fp, Addr tag ) {
533 LineZ* lineZ;
534 LineF* lineF;
535 UWord zix;
536 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
537 UWord smoff = shmem__get_SecMap_offset(tag);
538 /* since smoff is derived from a valid tag, it should be
539 cacheline-aligned. */
540 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
541 zix = smoff >> N_LINE_BITS;
542 tl_assert(zix < N_SECMAP_ZLINES);
543 lineZ = &sm->linesZ[zix];
544 lineF = NULL;
545 if (lineZ->dict[0] == SVal_INVALID) {
546 UInt fix = (UInt)lineZ->dict[1];
547 tl_assert(sm->linesF);
548 tl_assert(sm->linesF_size > 0);
549 tl_assert(fix >= 0 && fix < sm->linesF_size);
550 lineF = &sm->linesF[fix];
551 tl_assert(lineF->inUse);
552 lineZ = NULL;
553 }
554 *zp = lineZ;
555 *fp = lineF;
556}
557
558/* Given address 'tag', return the relevant SecMap and the index of
559 the LineZ within it, in the expectation that the line is to be
560 overwritten. Regardless of whether 'tag' is currently associated
561 with a Z or F representation, to rcdec on the current
562 representation, in recognition of the fact that the contents are
563 just about to be overwritten. */
564static __attribute__((noinline))
565void find_Z_for_writing ( /*OUT*/SecMap** smp,
566 /*OUT*/Word* zixp,
567 Addr tag ) {
568 LineZ* lineZ;
569 LineF* lineF;
570 UWord zix;
571 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
572 UWord smoff = shmem__get_SecMap_offset(tag);
573 /* since smoff is derived from a valid tag, it should be
574 cacheline-aligned. */
575 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
576 zix = smoff >> N_LINE_BITS;
577 tl_assert(zix < N_SECMAP_ZLINES);
578 lineZ = &sm->linesZ[zix];
579 lineF = NULL;
580 /* re RCs, we are freeing up this LineZ/LineF so that new data can
581 be parked in it. Hence have to rcdec it accordingly. */
582 /* If lineZ has an associated lineF, free it up. */
583 if (lineZ->dict[0] == SVal_INVALID) {
584 UInt fix = (UInt)lineZ->dict[1];
585 tl_assert(sm->linesF);
586 tl_assert(sm->linesF_size > 0);
587 tl_assert(fix >= 0 && fix < sm->linesF_size);
588 lineF = &sm->linesF[fix];
589 tl_assert(lineF->inUse);
590 rcdec_LineF(lineF);
591 lineF->inUse = False;
592 } else {
593 rcdec_LineZ(lineZ);
594 }
595 *smp = sm;
596 *zixp = zix;
597}
598
599static __attribute__((noinline))
600void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
601 UInt i, new_size;
602 LineF* nyu;
603
604 if (sm->linesF) {
605 tl_assert(sm->linesF_size > 0);
606 } else {
607 tl_assert(sm->linesF_size == 0);
608 }
609
610 if (sm->linesF) {
611 for (i = 0; i < sm->linesF_size; i++) {
612 if (!sm->linesF[i].inUse) {
613 *fixp = (Word)i;
614 return;
615 }
616 }
617 }
618
619 /* No free F line found. Expand existing array and try again. */
620 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
621 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
622 new_size * sizeof(LineF) );
623 tl_assert(nyu);
624
625 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
626 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
627 * sizeof(LineF);
628
629 if (0)
630 VG_(printf)("SM %p: expand F array from %d to %d\n",
631 sm, (Int)sm->linesF_size, new_size);
632
633 for (i = 0; i < new_size; i++)
634 nyu[i].inUse = False;
635
636 if (sm->linesF) {
637 for (i = 0; i < sm->linesF_size; i++) {
638 tl_assert(sm->linesF[i].inUse);
639 nyu[i] = sm->linesF[i];
640 }
641 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
642 HG_(free)(sm->linesF);
643 }
644
645 sm->linesF = nyu;
646 sm->linesF_size = new_size;
647
648 for (i = 0; i < sm->linesF_size; i++) {
649 if (!sm->linesF[i].inUse) {
650 *fixp = (Word)i;
651 return;
652 }
653 }
654
655 /*NOTREACHED*/
656 tl_assert(0);
657}
658
659
660/* ------------ CacheLine and implicit-tree related ------------ */
661
662__attribute__((unused))
663static void pp_CacheLine ( CacheLine* cl ) {
664 Word i;
665 if (!cl) {
666 VG_(printf)("%s","pp_CacheLine(NULL)\n");
667 return;
668 }
669 for (i = 0; i < N_LINE_TREES; i++)
670 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
671 for (i = 0; i < N_LINE_ARANGE; i++)
672 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
673}
674
675static UChar descr_to_validbits ( UShort descr )
676{
677 /* a.k.a Party Time for gcc's constant folder */
678# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
679 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
680 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
681 ( (b8_5) << 12) | ( (b8_4) << 11) | \
682 ( (b8_3) << 10) | ( (b8_2) << 9) | \
683 ( (b8_1) << 8) | ( (b8_0) << 7) | \
684 ( (b16_3) << 6) | ( (b32_1) << 5) | \
685 ( (b16_2) << 4) | ( (b64) << 3) | \
686 ( (b16_1) << 2) | ( (b32_0) << 1) | \
687 ( (b16_0) << 0) ) )
688
689# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
690 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
691 ( (bit5) << 5) | ( (bit4) << 4) | \
692 ( (bit3) << 3) | ( (bit2) << 2) | \
693 ( (bit1) << 1) | ( (bit0) << 0) ) )
694
695 /* these should all get folded out at compile time */
696 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
697 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
698 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
699 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
700 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
701 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
702 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
703 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
704 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
705
706 switch (descr) {
707 /*
708 +--------------------------------- TREE_DESCR_8_7
709 | +------------------- TREE_DESCR_8_0
710 | | +---------------- TREE_DESCR_16_3
711 | | | +-------------- TREE_DESCR_32_1
712 | | | | +------------ TREE_DESCR_16_2
713 | | | | | +--------- TREE_DESCR_64
714 | | | | | | +------ TREE_DESCR_16_1
715 | | | | | | | +---- TREE_DESCR_32_0
716 | | | | | | | | +-- TREE_DESCR_16_0
717 | | | | | | | | |
718 | | | | | | | | | GRANULARITY, 7 -> 0 */
719 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
720 return BYTE(1,1,1,1,1,1,1,1);
721 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
722 return BYTE(1,1,0,1,1,1,1,1);
723 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
724 return BYTE(0,1,1,1,1,1,1,1);
725 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
726 return BYTE(0,1,0,1,1,1,1,1);
727
728 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
729 return BYTE(1,1,1,1,1,1,0,1);
730 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
731 return BYTE(1,1,0,1,1,1,0,1);
732 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
733 return BYTE(0,1,1,1,1,1,0,1);
734 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
735 return BYTE(0,1,0,1,1,1,0,1);
736
737 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
738 return BYTE(1,1,1,1,0,1,1,1);
739 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
740 return BYTE(1,1,0,1,0,1,1,1);
741 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
742 return BYTE(0,1,1,1,0,1,1,1);
743 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
744 return BYTE(0,1,0,1,0,1,1,1);
745
746 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
747 return BYTE(1,1,1,1,0,1,0,1);
748 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
749 return BYTE(1,1,0,1,0,1,0,1);
750 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
751 return BYTE(0,1,1,1,0,1,0,1);
752 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
753 return BYTE(0,1,0,1,0,1,0,1);
754
755 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
756 return BYTE(0,0,0,1,1,1,1,1);
757 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
758 return BYTE(0,0,0,1,1,1,0,1);
759 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
760 return BYTE(0,0,0,1,0,1,1,1);
761 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
762 return BYTE(0,0,0,1,0,1,0,1);
763
764 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
765 return BYTE(1,1,1,1,0,0,0,1);
766 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
767 return BYTE(1,1,0,1,0,0,0,1);
768 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
769 return BYTE(0,1,1,1,0,0,0,1);
770 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
771 return BYTE(0,1,0,1,0,0,0,1);
772
773 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
774 return BYTE(0,0,0,1,0,0,0,1);
775
776 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
777 return BYTE(0,0,0,0,0,0,0,1);
778
779 default: return BYTE(0,0,0,0,0,0,0,0);
780 /* INVALID - any valid descr produces at least one
781 valid bit in tree[0..7]*/
782 }
783 /* NOTREACHED*/
784 tl_assert(0);
785
786# undef DESCR
787# undef BYTE
788}
789
790__attribute__((unused))
791static Bool is_sane_Descr ( UShort descr ) {
792 return descr_to_validbits(descr) != 0;
793}
794
795static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
796 VG_(sprintf)(dst,
797 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
798 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
803 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
804 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
805 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
806 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
807 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
808 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
809 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
810 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
811 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
812 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
813 );
814}
815static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
816 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
817 (Int)((byte & 128) ? 1 : 0),
818 (Int)((byte & 64) ? 1 : 0),
819 (Int)((byte & 32) ? 1 : 0),
820 (Int)((byte & 16) ? 1 : 0),
821 (Int)((byte & 8) ? 1 : 0),
822 (Int)((byte & 4) ? 1 : 0),
823 (Int)((byte & 2) ? 1 : 0),
824 (Int)((byte & 1) ? 1 : 0)
825 );
826}
827
828static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
829 Word i;
830 UChar validbits = descr_to_validbits(descr);
831 HChar buf[128], buf2[128];
832 if (validbits == 0)
833 goto bad;
834 for (i = 0; i < 8; i++) {
835 if (validbits & (1<<i)) {
836 if (tree[i] == SVal_INVALID)
837 goto bad;
838 } else {
839 if (tree[i] != SVal_INVALID)
840 goto bad;
841 }
842 }
843 return True;
844 bad:
845 sprintf_Descr( buf, descr );
846 sprintf_Byte( buf2, validbits );
847 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
848 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
849 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
850 for (i = 0; i < 8; i++)
851 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
852 VG_(printf)("%s","}\n");
853 return 0;
854}
855
856static Bool is_sane_CacheLine ( CacheLine* cl )
857{
858 Word tno, cloff;
859
860 if (!cl) goto bad;
861
862 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
863 UShort descr = cl->descrs[tno];
864 SVal* tree = &cl->svals[cloff];
865 if (!is_sane_Descr_and_Tree(descr, tree))
866 goto bad;
867 }
868 tl_assert(cloff == N_LINE_ARANGE);
869 return True;
870 bad:
871 pp_CacheLine(cl);
872 return False;
873}
874
875static UShort normalise_tree ( /*MOD*/SVal* tree )
876{
877 UShort descr;
878 /* pre: incoming tree[0..7] does not have any invalid shvals, in
879 particular no zeroes. */
880 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
881 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
882 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
883 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
884 tl_assert(0);
885
886 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
887 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
888 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
889 /* build 16-bit layer */
890 if (tree[1] == tree[0]) {
891 tree[1] = SVal_INVALID;
892 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
893 descr |= TREE_DESCR_16_0;
894 }
895 if (tree[3] == tree[2]) {
896 tree[3] = SVal_INVALID;
897 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
898 descr |= TREE_DESCR_16_1;
899 }
900 if (tree[5] == tree[4]) {
901 tree[5] = SVal_INVALID;
902 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
903 descr |= TREE_DESCR_16_2;
904 }
905 if (tree[7] == tree[6]) {
906 tree[7] = SVal_INVALID;
907 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
908 descr |= TREE_DESCR_16_3;
909 }
910 /* build 32-bit layer */
911 if (tree[2] == tree[0]
912 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
913 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
914 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
915 descr |= TREE_DESCR_32_0;
916 }
917 if (tree[6] == tree[4]
918 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
919 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
920 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
921 descr |= TREE_DESCR_32_1;
922 }
923 /* build 64-bit layer */
924 if (tree[4] == tree[0]
925 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
926 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
927 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
928 descr |= TREE_DESCR_64;
929 }
930 return descr;
931}
932
933/* This takes a cacheline where all the data is at the leaves
934 (w8[..]) and builds a correctly normalised tree. */
935static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
936{
937 Word tno, cloff;
938 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
939 SVal* tree = &cl->svals[cloff];
940 cl->descrs[tno] = normalise_tree( tree );
941 }
942 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000943 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000944 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
945 stats__cline_normalises++;
946}
947
948
949typedef struct { UChar count; SVal sval; } CountedSVal;
950
951static
952void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
953 /*OUT*/Word* dstUsedP,
954 Word nDst, CacheLine* src )
955{
956 Word tno, cloff, dstUsed;
957
958 tl_assert(nDst == N_LINE_ARANGE);
959 dstUsed = 0;
960
961 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
962 UShort descr = src->descrs[tno];
963 SVal* tree = &src->svals[cloff];
964
965 /* sequentialise the tree described by (descr,tree). */
966# define PUT(_n,_v) \
967 do { dst[dstUsed ].count = (_n); \
968 dst[dstUsed++].sval = (_v); \
969 } while (0)
970
971 /* byte 0 */
972 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
973 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
974 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
975 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
976 /* byte 1 */
977 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
978 /* byte 2 */
979 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
980 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
981 /* byte 3 */
982 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
983 /* byte 4 */
984 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
985 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
986 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
987 /* byte 5 */
988 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
989 /* byte 6 */
990 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
991 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
992 /* byte 7 */
993 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
994
995# undef PUT
996 /* END sequentialise the tree described by (descr,tree). */
997
998 }
999 tl_assert(cloff == N_LINE_ARANGE);
1000 tl_assert(dstUsed <= nDst);
1001
1002 *dstUsedP = dstUsed;
1003}
1004
1005/* Write the cacheline 'wix' to backing store. Where it ends up
1006 is determined by its tag field. */
1007static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1008{
1009 Word i, j, k, m;
1010 Addr tag;
1011 SecMap* sm;
1012 CacheLine* cl;
1013 LineZ* lineZ;
1014 LineF* lineF;
1015 Word zix, fix, csvalsUsed;
1016 CountedSVal csvals[N_LINE_ARANGE];
1017 SVal sv;
1018
1019 if (0)
1020 VG_(printf)("scache wback line %d\n", (Int)wix);
1021
1022 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1023
1024 tag = cache_shmem.tags0[wix];
1025 cl = &cache_shmem.lyns0[wix];
1026
1027 /* The cache line may have been invalidated; if so, ignore it. */
1028 if (!is_valid_scache_tag(tag))
1029 return;
1030
1031 /* Where are we going to put it? */
1032 sm = NULL;
1033 lineZ = NULL;
1034 lineF = NULL;
1035 zix = fix = -1;
1036
1037 /* find the Z line to write in and rcdec it or the associated F
1038 line. */
1039 find_Z_for_writing( &sm, &zix, tag );
1040
1041 tl_assert(sm);
1042 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1043 lineZ = &sm->linesZ[zix];
1044
1045 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001046 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001047 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1048
1049 csvalsUsed = -1;
1050 sequentialise_CacheLine( csvals, &csvalsUsed,
1051 N_LINE_ARANGE, cl );
1052 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1053 if (0) VG_(printf)("%lu ", csvalsUsed);
1054
1055 lineZ->dict[0] = lineZ->dict[1]
1056 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1057
1058 /* i indexes actual shadow values, k is cursor in csvals */
1059 i = 0;
1060 for (k = 0; k < csvalsUsed; k++) {
1061
1062 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001063 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001064 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1065 /* do we already have it? */
1066 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1067 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1068 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1069 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1070 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001071 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001072 tl_assert(sv != SVal_INVALID);
1073 if (lineZ->dict[0]
1074 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1075 if (lineZ->dict[1]
1076 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1077 if (lineZ->dict[2]
1078 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1079 if (lineZ->dict[3]
1080 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1081 break; /* we'll have to use the f rep */
1082 dict_ok:
1083 m = csvals[k].count;
1084 if (m == 8) {
1085 write_twobit_array( lineZ->ix2s, i+0, j );
1086 write_twobit_array( lineZ->ix2s, i+1, j );
1087 write_twobit_array( lineZ->ix2s, i+2, j );
1088 write_twobit_array( lineZ->ix2s, i+3, j );
1089 write_twobit_array( lineZ->ix2s, i+4, j );
1090 write_twobit_array( lineZ->ix2s, i+5, j );
1091 write_twobit_array( lineZ->ix2s, i+6, j );
1092 write_twobit_array( lineZ->ix2s, i+7, j );
1093 i += 8;
1094 }
1095 else if (m == 4) {
1096 write_twobit_array( lineZ->ix2s, i+0, j );
1097 write_twobit_array( lineZ->ix2s, i+1, j );
1098 write_twobit_array( lineZ->ix2s, i+2, j );
1099 write_twobit_array( lineZ->ix2s, i+3, j );
1100 i += 4;
1101 }
1102 else if (m == 1) {
1103 write_twobit_array( lineZ->ix2s, i+0, j );
1104 i += 1;
1105 }
1106 else if (m == 2) {
1107 write_twobit_array( lineZ->ix2s, i+0, j );
1108 write_twobit_array( lineZ->ix2s, i+1, j );
1109 i += 2;
1110 }
1111 else {
1112 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1113 }
1114
1115 }
1116
1117 if (LIKELY(i == N_LINE_ARANGE)) {
1118 /* Construction of the compressed representation was
1119 successful. */
1120 rcinc_LineZ(lineZ);
1121 stats__cache_Z_wbacks++;
1122 } else {
1123 /* Cannot use the compressed(z) representation. Use the full(f)
1124 rep instead. */
1125 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1126 alloc_F_for_writing( sm, &fix );
1127 tl_assert(sm->linesF);
1128 tl_assert(sm->linesF_size > 0);
1129 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1130 lineF = &sm->linesF[fix];
1131 tl_assert(!lineF->inUse);
1132 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1133 lineZ->dict[1] = (SVal)fix;
1134 lineF->inUse = True;
1135 i = 0;
1136 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001137 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001138 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1139 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001140 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001141 tl_assert(sv != SVal_INVALID);
1142 for (m = csvals[k].count; m > 0; m--) {
1143 lineF->w64s[i] = sv;
1144 i++;
1145 }
1146 }
1147 tl_assert(i == N_LINE_ARANGE);
1148 rcinc_LineF(lineF);
1149 stats__cache_F_wbacks++;
1150 }
sewardjf98e1c02008-10-25 16:22:41 +00001151}
1152
1153/* Fetch the cacheline 'wix' from the backing store. The tag
1154 associated with 'wix' is assumed to have already been filled in;
1155 hence that is used to determine where in the backing store to read
1156 from. */
1157static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1158{
1159 Word i;
1160 Addr tag;
1161 CacheLine* cl;
1162 LineZ* lineZ;
1163 LineF* lineF;
1164
1165 if (0)
1166 VG_(printf)("scache fetch line %d\n", (Int)wix);
1167
1168 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1169
1170 tag = cache_shmem.tags0[wix];
1171 cl = &cache_shmem.lyns0[wix];
1172
1173 /* reject nonsense requests */
1174 tl_assert(is_valid_scache_tag(tag));
1175
1176 lineZ = NULL;
1177 lineF = NULL;
1178 find_ZF_for_reading( &lineZ, &lineF, tag );
1179 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1180
1181 /* expand the data into the bottom layer of the tree, then get
1182 cacheline_normalise to build the descriptor array. */
1183 if (lineF) {
1184 tl_assert(lineF->inUse);
1185 for (i = 0; i < N_LINE_ARANGE; i++) {
1186 cl->svals[i] = lineF->w64s[i];
1187 }
1188 stats__cache_F_fetches++;
1189 } else {
1190 for (i = 0; i < N_LINE_ARANGE; i++) {
1191 SVal sv;
1192 UWord ix = read_twobit_array( lineZ->ix2s, i );
1193 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1194 sv = lineZ->dict[ix];
1195 tl_assert(sv != SVal_INVALID);
1196 cl->svals[i] = sv;
1197 }
1198 stats__cache_Z_fetches++;
1199 }
1200 normalise_CacheLine( cl );
1201}
1202
1203static void shmem__invalidate_scache ( void ) {
1204 Word wix;
1205 if (0) VG_(printf)("%s","scache inval\n");
1206 tl_assert(!is_valid_scache_tag(1));
1207 for (wix = 0; wix < N_WAY_NENT; wix++) {
1208 cache_shmem.tags0[wix] = 1/*INVALID*/;
1209 }
1210 stats__cache_invals++;
1211}
1212
1213static void shmem__flush_and_invalidate_scache ( void ) {
1214 Word wix;
1215 Addr tag;
1216 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1217 tl_assert(!is_valid_scache_tag(1));
1218 for (wix = 0; wix < N_WAY_NENT; wix++) {
1219 tag = cache_shmem.tags0[wix];
1220 if (tag == 1/*INVALID*/) {
1221 /* already invalid; nothing to do */
1222 } else {
1223 tl_assert(is_valid_scache_tag(tag));
1224 cacheline_wback( wix );
1225 }
1226 cache_shmem.tags0[wix] = 1/*INVALID*/;
1227 }
1228 stats__cache_flushes++;
1229 stats__cache_invals++;
1230}
1231
1232
1233static inline Bool aligned16 ( Addr a ) {
1234 return 0 == (a & 1);
1235}
1236static inline Bool aligned32 ( Addr a ) {
1237 return 0 == (a & 3);
1238}
1239static inline Bool aligned64 ( Addr a ) {
1240 return 0 == (a & 7);
1241}
1242static inline UWord get_cacheline_offset ( Addr a ) {
1243 return (UWord)(a & (N_LINE_ARANGE - 1));
1244}
1245static inline Addr cacheline_ROUNDUP ( Addr a ) {
1246 return ROUNDUP(a, N_LINE_ARANGE);
1247}
1248static inline Addr cacheline_ROUNDDN ( Addr a ) {
1249 return ROUNDDN(a, N_LINE_ARANGE);
1250}
1251static inline UWord get_treeno ( Addr a ) {
1252 return get_cacheline_offset(a) >> 3;
1253}
1254static inline UWord get_tree_offset ( Addr a ) {
1255 return a & 7;
1256}
1257
1258static __attribute__((noinline))
1259 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1260static inline CacheLine* get_cacheline ( Addr a )
1261{
1262 /* tag is 'a' with the in-line offset masked out,
1263 eg a[31]..a[4] 0000 */
1264 Addr tag = a & ~(N_LINE_ARANGE - 1);
1265 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1266 stats__cache_totrefs++;
1267 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1268 return &cache_shmem.lyns0[wix];
1269 } else {
1270 return get_cacheline_MISS( a );
1271 }
1272}
1273
1274static __attribute__((noinline))
1275 CacheLine* get_cacheline_MISS ( Addr a )
1276{
1277 /* tag is 'a' with the in-line offset masked out,
1278 eg a[31]..a[4] 0000 */
1279
1280 CacheLine* cl;
1281 Addr* tag_old_p;
1282 Addr tag = a & ~(N_LINE_ARANGE - 1);
1283 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1284
1285 tl_assert(tag != cache_shmem.tags0[wix]);
1286
1287 /* Dump the old line into the backing store. */
1288 stats__cache_totmisses++;
1289
1290 cl = &cache_shmem.lyns0[wix];
1291 tag_old_p = &cache_shmem.tags0[wix];
1292
1293 if (is_valid_scache_tag( *tag_old_p )) {
1294 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001295 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001296 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1297 cacheline_wback( wix );
1298 }
1299 /* and reload the new one */
1300 *tag_old_p = tag;
1301 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001302 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001303 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1304 return cl;
1305}
1306
1307static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1308 stats__cline_64to32pulldown++;
1309 switch (toff) {
1310 case 0: case 4:
1311 tl_assert(descr & TREE_DESCR_64);
1312 tree[4] = tree[0];
1313 descr &= ~TREE_DESCR_64;
1314 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1315 break;
1316 default:
1317 tl_assert(0);
1318 }
1319 return descr;
1320}
1321
1322static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1323 stats__cline_32to16pulldown++;
1324 switch (toff) {
1325 case 0: case 2:
1326 if (!(descr & TREE_DESCR_32_0)) {
1327 descr = pulldown_to_32(tree, 0, descr);
1328 }
1329 tl_assert(descr & TREE_DESCR_32_0);
1330 tree[2] = tree[0];
1331 descr &= ~TREE_DESCR_32_0;
1332 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1333 break;
1334 case 4: case 6:
1335 if (!(descr & TREE_DESCR_32_1)) {
1336 descr = pulldown_to_32(tree, 4, descr);
1337 }
1338 tl_assert(descr & TREE_DESCR_32_1);
1339 tree[6] = tree[4];
1340 descr &= ~TREE_DESCR_32_1;
1341 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1342 break;
1343 default:
1344 tl_assert(0);
1345 }
1346 return descr;
1347}
1348
1349static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1350 stats__cline_16to8pulldown++;
1351 switch (toff) {
1352 case 0: case 1:
1353 if (!(descr & TREE_DESCR_16_0)) {
1354 descr = pulldown_to_16(tree, 0, descr);
1355 }
1356 tl_assert(descr & TREE_DESCR_16_0);
1357 tree[1] = tree[0];
1358 descr &= ~TREE_DESCR_16_0;
1359 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1360 break;
1361 case 2: case 3:
1362 if (!(descr & TREE_DESCR_16_1)) {
1363 descr = pulldown_to_16(tree, 2, descr);
1364 }
1365 tl_assert(descr & TREE_DESCR_16_1);
1366 tree[3] = tree[2];
1367 descr &= ~TREE_DESCR_16_1;
1368 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1369 break;
1370 case 4: case 5:
1371 if (!(descr & TREE_DESCR_16_2)) {
1372 descr = pulldown_to_16(tree, 4, descr);
1373 }
1374 tl_assert(descr & TREE_DESCR_16_2);
1375 tree[5] = tree[4];
1376 descr &= ~TREE_DESCR_16_2;
1377 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1378 break;
1379 case 6: case 7:
1380 if (!(descr & TREE_DESCR_16_3)) {
1381 descr = pulldown_to_16(tree, 6, descr);
1382 }
1383 tl_assert(descr & TREE_DESCR_16_3);
1384 tree[7] = tree[6];
1385 descr &= ~TREE_DESCR_16_3;
1386 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1387 break;
1388 default:
1389 tl_assert(0);
1390 }
1391 return descr;
1392}
1393
1394
1395static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1396 UShort mask;
1397 switch (toff) {
1398 case 0:
1399 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1400 tl_assert( (descr & mask) == mask );
1401 descr &= ~mask;
1402 descr |= TREE_DESCR_16_0;
1403 break;
1404 case 2:
1405 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1406 tl_assert( (descr & mask) == mask );
1407 descr &= ~mask;
1408 descr |= TREE_DESCR_16_1;
1409 break;
1410 case 4:
1411 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1412 tl_assert( (descr & mask) == mask );
1413 descr &= ~mask;
1414 descr |= TREE_DESCR_16_2;
1415 break;
1416 case 6:
1417 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1418 tl_assert( (descr & mask) == mask );
1419 descr &= ~mask;
1420 descr |= TREE_DESCR_16_3;
1421 break;
1422 default:
1423 tl_assert(0);
1424 }
1425 return descr;
1426}
1427
1428static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1429 UShort mask;
1430 switch (toff) {
1431 case 0:
1432 if (!(descr & TREE_DESCR_16_0))
1433 descr = pullup_descr_to_16(descr, 0);
1434 if (!(descr & TREE_DESCR_16_1))
1435 descr = pullup_descr_to_16(descr, 2);
1436 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1437 tl_assert( (descr & mask) == mask );
1438 descr &= ~mask;
1439 descr |= TREE_DESCR_32_0;
1440 break;
1441 case 4:
1442 if (!(descr & TREE_DESCR_16_2))
1443 descr = pullup_descr_to_16(descr, 4);
1444 if (!(descr & TREE_DESCR_16_3))
1445 descr = pullup_descr_to_16(descr, 6);
1446 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1447 tl_assert( (descr & mask) == mask );
1448 descr &= ~mask;
1449 descr |= TREE_DESCR_32_1;
1450 break;
1451 default:
1452 tl_assert(0);
1453 }
1454 return descr;
1455}
1456
1457static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1458 switch (toff) {
1459 case 0: case 4:
1460 return 0 != (descr & TREE_DESCR_64);
1461 default:
1462 tl_assert(0);
1463 }
1464}
1465
1466static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1467 switch (toff) {
1468 case 0:
1469 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1470 case 2:
1471 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1472 case 4:
1473 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1474 case 6:
1475 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1476 default:
1477 tl_assert(0);
1478 }
1479}
1480
1481/* ------------ Cache management ------------ */
1482
1483static void zsm_flush_cache ( void )
1484{
1485 shmem__flush_and_invalidate_scache();
1486}
1487
1488
1489static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1490{
1491 tl_assert( sizeof(UWord) == sizeof(Addr) );
1492
1493 rcinc = p_rcinc;
1494 rcdec = p_rcdec;
1495
1496 tl_assert(map_shmem == NULL);
1497 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1498 HG_(free),
1499 NULL/*unboxed UWord cmp*/);
1500 tl_assert(map_shmem != NULL);
1501 shmem__invalidate_scache();
1502
1503 /* a SecMap must contain an integral number of CacheLines */
1504 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1505 /* also ... a CacheLine holds an integral number of trees */
1506 tl_assert(0 == (N_LINE_ARANGE % 8));
1507}
1508
1509/////////////////////////////////////////////////////////////////
1510/////////////////////////////////////////////////////////////////
1511// //
1512// SECTION END compressed shadow memory //
1513// //
1514/////////////////////////////////////////////////////////////////
1515/////////////////////////////////////////////////////////////////
1516
1517
1518
1519/////////////////////////////////////////////////////////////////
1520/////////////////////////////////////////////////////////////////
1521// //
1522// SECTION BEGIN vts primitives //
1523// //
1524/////////////////////////////////////////////////////////////////
1525/////////////////////////////////////////////////////////////////
1526
1527#ifndef __HB_VTS_H
1528#define __HB_VTS_H
1529
1530/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1531 lowest 30 bits of an SVal. */
1532typedef UInt VtsID;
1533#define VtsID_INVALID 0xFFFFFFFF
1534
1535/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1536 a backlink for the caller's convenience. Since we have no idea
1537 what to set that to in the library, it always gets set to
1538 VtsID_INVALID. */
1539typedef
1540 struct {
1541 VtsID id;
1542 XArray* ts; /* XArray* ScalarTS(abstract) */
1543 }
1544 VTS;
1545
1546
1547/* Create a new, empty VTS. */
sewardj23f12002009-07-24 08:45:08 +00001548static VTS* VTS__new ( void );
sewardjf98e1c02008-10-25 16:22:41 +00001549
1550/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001551static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001552
1553/* Create a new singleton VTS. */
sewardj23f12002009-07-24 08:45:08 +00001554static VTS* VTS__singleton ( Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001555
1556/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1557 not modified. */
sewardj23f12002009-07-24 08:45:08 +00001558static VTS* VTS__tick ( Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001559
1560/* Return a new VTS constructed as the join (max) of the 2 args.
1561 Neither arg is modified. */
sewardj23f12002009-07-24 08:45:08 +00001562static VTS* VTS__join ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001563
sewardj23f12002009-07-24 08:45:08 +00001564/* Compute the partial ordering relation of the two args. Although we
1565 could be completely general and return an enumeration value (EQ,
1566 LT, GT, UN), in fact we only need LEQ, and so we may as well
1567 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001568
sewardj23f12002009-07-24 08:45:08 +00001569 Returns NULL iff LEQ(A,B), or non-NULL if not. In the latter case,
1570 the returned Thr* indicates the discovered point for which they are
1571 not. There may be more than one such point, but we only care about
1572 seeing one of them, not all of them. This rather strange
1573 convention is used because sometimes we want to know the actual
1574 index at which they first differ. */
1575static Thr* VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001576
1577/* Compute an arbitrary structural (total) ordering on the two args,
1578 based on their VCs, so they can be looked up in a table, tree, etc.
1579 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001580static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001581
1582/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001583static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001584
1585/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001586static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001587
1588#endif /* ! __HB_VTS_H */
1589
1590
1591/*--------------- to do with Vector Timestamps ---------------*/
1592
1593/* Scalar Timestamp */
1594typedef
1595 struct {
1596 Thr* thr;
1597 ULong tym;
1598 }
1599 ScalarTS;
1600
1601
1602static Bool is_sane_VTS ( VTS* vts )
1603{
1604 UWord i, n;
1605 ScalarTS *st1, *st2;
1606 if (!vts) return False;
1607 if (!vts->ts) return False;
1608 n = VG_(sizeXA)( vts->ts );
1609 if (n >= 2) {
1610 for (i = 0; i < n-1; i++) {
1611 st1 = VG_(indexXA)( vts->ts, i );
1612 st2 = VG_(indexXA)( vts->ts, i+1 );
1613 if (st1->thr >= st2->thr)
1614 return False;
1615 if (st1->tym == 0 || st2->tym == 0)
1616 return False;
1617 }
1618 }
1619 return True;
1620}
1621
1622
1623/* Create a new, empty VTS.
1624*/
1625VTS* VTS__new ( void )
1626{
1627 VTS* vts;
1628 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1629 tl_assert(vts);
1630 vts->id = VtsID_INVALID;
1631 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1632 HG_(free), sizeof(ScalarTS) );
1633 tl_assert(vts->ts);
1634 return vts;
1635}
1636
1637
1638/* Delete this VTS in its entirety.
1639*/
1640void VTS__delete ( VTS* vts )
1641{
1642 tl_assert(vts);
1643 tl_assert(vts->ts);
1644 VG_(deleteXA)( vts->ts );
1645 HG_(free)(vts);
1646}
1647
1648
1649/* Create a new singleton VTS.
1650*/
1651VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1652 ScalarTS st;
1653 VTS* vts;
1654 tl_assert(thr);
1655 tl_assert(tym >= 1);
1656 vts = VTS__new();
1657 st.thr = thr;
1658 st.tym = tym;
1659 VG_(addToXA)( vts->ts, &st );
1660 return vts;
1661}
1662
1663
1664/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1665 not modified.
1666*/
1667VTS* VTS__tick ( Thr* me, VTS* vts )
1668{
1669 ScalarTS* here = NULL;
1670 ScalarTS tmp;
1671 VTS* res;
1672 Word i, n;
sewardjc8028ad2010-05-05 09:34:42 +00001673
1674 stats__vts__tick++;
1675
sewardjf98e1c02008-10-25 16:22:41 +00001676 tl_assert(me);
1677 tl_assert(is_sane_VTS(vts));
1678 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1679 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1680 res = VTS__new();
1681 n = VG_(sizeXA)( vts->ts );
1682
1683 /* main loop doesn't handle zero-entry case correctly, so
1684 special-case it. */
1685 if (n == 0) {
1686 tmp.thr = me;
1687 tmp.tym = 1;
1688 VG_(addToXA)( res->ts, &tmp );
1689 tl_assert(is_sane_VTS(res));
1690 return res;
1691 }
1692
1693 for (i = 0; i < n; i++) {
1694 here = VG_(indexXA)( vts->ts, i );
1695 if (me < here->thr) {
1696 /* We just went past 'me', without seeing it. */
1697 tmp.thr = me;
1698 tmp.tym = 1;
1699 VG_(addToXA)( res->ts, &tmp );
1700 tmp = *here;
1701 VG_(addToXA)( res->ts, &tmp );
1702 i++;
1703 break;
1704 }
1705 else if (me == here->thr) {
1706 tmp = *here;
1707 tmp.tym++;
1708 VG_(addToXA)( res->ts, &tmp );
1709 i++;
1710 break;
1711 }
1712 else /* me > here->thr */ {
1713 tmp = *here;
1714 VG_(addToXA)( res->ts, &tmp );
1715 }
1716 }
1717 tl_assert(i >= 0 && i <= n);
1718 if (i == n && here && here->thr < me) {
1719 tmp.thr = me;
1720 tmp.tym = 1;
1721 VG_(addToXA)( res->ts, &tmp );
1722 } else {
1723 for (/*keepgoing*/; i < n; i++) {
1724 here = VG_(indexXA)( vts->ts, i );
1725 tmp = *here;
1726 VG_(addToXA)( res->ts, &tmp );
1727 }
1728 }
1729 tl_assert(is_sane_VTS(res));
1730 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1731 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1732 return res;
1733}
1734
1735
1736/* Return a new VTS constructed as the join (max) of the 2 args.
1737 Neither arg is modified.
1738*/
1739VTS* VTS__join ( VTS* a, VTS* b )
1740{
1741 Word ia, ib, useda, usedb;
1742 ULong tyma, tymb, tymMax;
1743 Thr* thr;
1744 VTS* res;
sewardjf98e1c02008-10-25 16:22:41 +00001745
sewardjc8028ad2010-05-05 09:34:42 +00001746 stats__vts__join++;
1747
sewardjf98e1c02008-10-25 16:22:41 +00001748 tl_assert(a && a->ts);
1749 tl_assert(b && b->ts);
1750 useda = VG_(sizeXA)( a->ts );
1751 usedb = VG_(sizeXA)( b->ts );
1752
1753 res = VTS__new();
1754 ia = ib = 0;
1755
1756 while (1) {
1757
1758 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1759 from a and b in order, where thr is the next Thr*
1760 occurring in either a or b, and tyma/b are the relevant
1761 scalar timestamps, taking into account implicit zeroes. */
1762 tl_assert(ia >= 0 && ia <= useda);
1763 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001764
njn4c245e52009-03-15 23:25:38 +00001765 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001766 /* both empty - done */
1767 break;
njn4c245e52009-03-15 23:25:38 +00001768
1769 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001770 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001771 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001772 thr = tmpb->thr;
1773 tyma = 0;
1774 tymb = tmpb->tym;
1775 ib++;
njn4c245e52009-03-15 23:25:38 +00001776
1777 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001778 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001779 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001780 thr = tmpa->thr;
1781 tyma = tmpa->tym;
1782 tymb = 0;
1783 ia++;
njn4c245e52009-03-15 23:25:38 +00001784
1785 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001786 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001787 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1788 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001789 if (tmpa->thr < tmpb->thr) {
1790 /* a has the lowest unconsidered Thr* */
1791 thr = tmpa->thr;
1792 tyma = tmpa->tym;
1793 tymb = 0;
1794 ia++;
njn4c245e52009-03-15 23:25:38 +00001795 } else if (tmpa->thr > tmpb->thr) {
sewardjf98e1c02008-10-25 16:22:41 +00001796 /* b has the lowest unconsidered Thr* */
1797 thr = tmpb->thr;
1798 tyma = 0;
1799 tymb = tmpb->tym;
1800 ib++;
1801 } else {
1802 /* they both next mention the same Thr* */
1803 tl_assert(tmpa->thr == tmpb->thr);
1804 thr = tmpa->thr; /* == tmpb->thr */
1805 tyma = tmpa->tym;
1806 tymb = tmpb->tym;
1807 ia++;
1808 ib++;
1809 }
1810 }
1811
1812 /* having laboriously determined (thr, tyma, tymb), do something
1813 useful with it. */
1814 tymMax = tyma > tymb ? tyma : tymb;
1815 if (tymMax > 0) {
1816 ScalarTS st;
1817 st.thr = thr;
1818 st.tym = tymMax;
1819 VG_(addToXA)( res->ts, &st );
1820 }
1821
1822 }
1823
1824 tl_assert(is_sane_VTS( res ));
1825
1826 return res;
1827}
1828
1829
sewardj23f12002009-07-24 08:45:08 +00001830/* Determine if 'a' <= 'b', in the partial ordering. Returns NULL if
1831 they are, or the first Thr* for which they are not. This rather
1832 strange convention is used because sometimes we want to know the
1833 actual index at which they first differ. */
1834static Thr* VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00001835{
sewardj23f12002009-07-24 08:45:08 +00001836 Word ia, ib, useda, usedb;
1837 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00001838
sewardjc8028ad2010-05-05 09:34:42 +00001839 stats__vts__cmpLEQ++;
1840
sewardjf98e1c02008-10-25 16:22:41 +00001841 tl_assert(a && a->ts);
1842 tl_assert(b && b->ts);
1843 useda = VG_(sizeXA)( a->ts );
1844 usedb = VG_(sizeXA)( b->ts );
1845
1846 ia = ib = 0;
1847
1848 while (1) {
1849
njn4c245e52009-03-15 23:25:38 +00001850 /* This logic is to enumerate doubles (tyma, tymb) drawn
1851 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00001852 scalar timestamps, taking into account implicit zeroes. */
sewardj23f12002009-07-24 08:45:08 +00001853 Thr* thr;
1854
sewardjf98e1c02008-10-25 16:22:41 +00001855 tl_assert(ia >= 0 && ia <= useda);
1856 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001857
njn4c245e52009-03-15 23:25:38 +00001858 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001859 /* both empty - done */
1860 break;
njn4c245e52009-03-15 23:25:38 +00001861
1862 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001863 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001864 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001865 tyma = 0;
1866 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001867 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001868 ib++;
njn4c245e52009-03-15 23:25:38 +00001869
1870 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001871 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001872 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001873 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001874 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001875 tymb = 0;
1876 ia++;
njn4c245e52009-03-15 23:25:38 +00001877
1878 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001879 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001880 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1881 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001882 if (tmpa->thr < tmpb->thr) {
1883 /* a has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001884 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001885 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001886 tymb = 0;
1887 ia++;
1888 }
1889 else
1890 if (tmpa->thr > tmpb->thr) {
1891 /* b has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001892 tyma = 0;
1893 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001894 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001895 ib++;
1896 } else {
1897 /* they both next mention the same Thr* */
1898 tl_assert(tmpa->thr == tmpb->thr);
sewardjf98e1c02008-10-25 16:22:41 +00001899 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001900 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001901 tymb = tmpb->tym;
1902 ia++;
1903 ib++;
1904 }
1905 }
1906
njn4c245e52009-03-15 23:25:38 +00001907 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00001908 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00001909 if (tyma > tymb) {
1910 /* not LEQ at this index. Quit, since the answer is
1911 determined already. */
1912 tl_assert(thr);
1913 return thr;
1914 }
sewardjf98e1c02008-10-25 16:22:41 +00001915 }
1916
sewardj23f12002009-07-24 08:45:08 +00001917 return NULL; /* all points are LEQ */
sewardjf98e1c02008-10-25 16:22:41 +00001918}
1919
1920
1921/* Compute an arbitrary structural (total) ordering on the two args,
1922 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00001923 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
1924 performance critical so there is some effort expended to make it sa
1925 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00001926*/
1927Word VTS__cmp_structural ( VTS* a, VTS* b )
1928{
1929 /* We just need to generate an arbitrary total ordering based on
1930 a->ts and b->ts. Preferably do it in a way which comes across likely
1931 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00001932 Word i;
1933 Word useda = 0, usedb = 0;
1934 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00001935
sewardjc8028ad2010-05-05 09:34:42 +00001936 stats__vts__cmp_structural++;
1937
1938 tl_assert(a);
1939 tl_assert(b);
1940
1941 VG_(getContentsXA_UNSAFE)( a->ts, (void**)&ctsa, &useda );
1942 VG_(getContentsXA_UNSAFE)( b->ts, (void**)&ctsb, &usedb );
1943
1944 if (LIKELY(useda == usedb)) {
1945 ScalarTS *tmpa = NULL, *tmpb = NULL;
1946 stats__vts__cmp_structural_slow++;
1947 /* Same length vectors. Find the first difference, if any, as
1948 fast as possible. */
1949 for (i = 0; i < useda; i++) {
1950 tmpa = &ctsa[i];
1951 tmpb = &ctsb[i];
1952 if (LIKELY(tmpa->tym == tmpb->tym && tmpa->thr == tmpb->thr))
1953 continue;
1954 else
1955 break;
1956 }
1957 if (UNLIKELY(i == useda)) {
1958 /* They're identical. */
1959 return 0;
1960 } else {
1961 tl_assert(i >= 0 && i < useda);
1962 if (tmpa->tym < tmpb->tym) return -1;
1963 if (tmpa->tym > tmpb->tym) return 1;
1964 if (tmpa->thr < tmpb->thr) return -1;
1965 if (tmpa->thr > tmpb->thr) return 1;
1966 /* we just established them as non-identical, hence: */
1967 }
1968 /*NOTREACHED*/
1969 tl_assert(0);
1970 }
sewardjf98e1c02008-10-25 16:22:41 +00001971
1972 if (useda < usedb) return -1;
1973 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00001974 /*NOTREACHED*/
1975 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00001976}
1977
1978
1979/* Debugging only. Display the given VTS in the buffer.
1980*/
1981void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1982 ScalarTS* st;
1983 HChar unit[64];
1984 Word i, n;
1985 Int avail = nBuf;
1986 tl_assert(vts && vts->ts);
1987 tl_assert(nBuf > 16);
1988 buf[0] = '[';
1989 buf[1] = 0;
1990 n = VG_(sizeXA)( vts->ts );
1991 for (i = 0; i < n; i++) {
1992 tl_assert(avail >= 40);
1993 st = VG_(indexXA)( vts->ts, i );
1994 VG_(memset)(unit, 0, sizeof(unit));
1995 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1996 st->thr, st->tym);
1997 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1998 VG_(strcat)(buf, " ...]");
1999 buf[nBuf-1] = 0;
2000 return;
2001 }
2002 VG_(strcat)(buf, unit);
2003 avail -= VG_(strlen)(unit);
2004 }
2005 VG_(strcat)(buf, "]");
2006 buf[nBuf-1] = 0;
2007}
2008
2009
2010/* Debugging only. Return vts[index], so to speak.
2011*/
2012ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
2013 UWord i, n;
sewardjc8028ad2010-05-05 09:34:42 +00002014 stats__vts__indexat_slow++;
sewardjf98e1c02008-10-25 16:22:41 +00002015 tl_assert(vts && vts->ts);
2016 n = VG_(sizeXA)( vts->ts );
2017 for (i = 0; i < n; i++) {
2018 ScalarTS* st = VG_(indexXA)( vts->ts, i );
2019 if (st->thr == idx)
2020 return st->tym;
2021 }
2022 return 0;
2023}
2024
2025
2026/////////////////////////////////////////////////////////////////
2027/////////////////////////////////////////////////////////////////
2028// //
2029// SECTION END vts primitives //
2030// //
2031/////////////////////////////////////////////////////////////////
2032/////////////////////////////////////////////////////////////////
2033
2034
2035
2036/////////////////////////////////////////////////////////////////
2037/////////////////////////////////////////////////////////////////
2038// //
2039// SECTION BEGIN main library //
2040// //
2041/////////////////////////////////////////////////////////////////
2042/////////////////////////////////////////////////////////////////
2043
2044
2045/////////////////////////////////////////////////////////
2046// //
2047// VTS set //
2048// //
2049/////////////////////////////////////////////////////////
2050
2051static WordFM* /* VTS* void void */ vts_set = NULL;
2052
2053static void vts_set_init ( void )
2054{
2055 tl_assert(!vts_set);
2056 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2057 HG_(free),
2058 (Word(*)(UWord,UWord))VTS__cmp_structural );
2059 tl_assert(vts_set);
2060}
2061
2062/* Given a newly made VTS, look in vts_set to see if we already have
2063 an identical one. If yes, free up this one and return instead a
2064 pointer to the existing one. If no, add this one to the set and
2065 return the same pointer. Caller differentiates the two cases by
2066 comparing returned pointer with the supplied one (although that
2067 does require that the supplied VTS is not already in the set).
2068*/
2069static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2070{
2071 UWord keyW, valW;
sewardjc8028ad2010-05-05 09:34:42 +00002072 stats__vts_set__fadoa++;
sewardjf98e1c02008-10-25 16:22:41 +00002073 /* lookup cand (by value) */
2074 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2075 /* found it */
2076 tl_assert(valW == 0);
2077 /* if this fails, cand (by ref) was already present (!) */
2078 tl_assert(keyW != (UWord)cand);
sewardjc8028ad2010-05-05 09:34:42 +00002079 stats__vts_set__fadoa_d++;
sewardjf98e1c02008-10-25 16:22:41 +00002080 VTS__delete(cand);
2081 return (VTS*)keyW;
2082 } else {
2083 /* not present. Add and return pointer to same. */
2084 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2085 return cand;
2086 }
2087}
2088
2089
2090/////////////////////////////////////////////////////////
2091// //
2092// VTS table //
2093// //
2094/////////////////////////////////////////////////////////
2095
2096static void VtsID__invalidate_caches ( void ); /* fwds */
2097
2098/* A type to hold VTS table entries. Invariants:
2099 If .vts == NULL, then this entry is not in use, so:
2100 - .rc == 0
2101 - this entry is on the freelist (unfortunately, does not imply
2102 any constraints on value for .nextfree)
2103 If .vts != NULL, then this entry is in use:
2104 - .vts is findable in vts_set
2105 - .vts->id == this entry number
2106 - no specific value for .rc (even 0 is OK)
2107 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2108*/
2109typedef
2110 struct {
2111 VTS* vts; /* vts, in vts_set */
2112 UWord rc; /* reference count - enough for entire aspace */
2113 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2114 }
2115 VtsTE;
2116
2117/* The VTS table. */
2118static XArray* /* of VtsTE */ vts_tab = NULL;
2119
2120/* An index into the VTS table, indicating the start of the list of
2121 free (available for use) entries. If the list is empty, this is
2122 VtsID_INVALID. */
2123static VtsID vts_tab_freelist = VtsID_INVALID;
2124
2125/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2126 vts_tab equals or exceeds this size. After GC, the value here is
2127 set appropriately so as to check for the next GC point. */
2128static Word vts_next_GC_at = 1000;
2129
2130static void vts_tab_init ( void )
2131{
2132 vts_tab
2133 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2134 HG_(free), sizeof(VtsTE) );
2135 vts_tab_freelist
2136 = VtsID_INVALID;
2137 tl_assert(vts_tab);
2138}
2139
2140/* Add ii to the free list, checking that it looks out-of-use. */
2141static void add_to_free_list ( VtsID ii )
2142{
2143 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2144 tl_assert(ie->vts == NULL);
2145 tl_assert(ie->rc == 0);
2146 tl_assert(ie->freelink == VtsID_INVALID);
2147 ie->freelink = vts_tab_freelist;
2148 vts_tab_freelist = ii;
2149}
2150
2151/* Get an entry from the free list. This will return VtsID_INVALID if
2152 the free list is empty. */
2153static VtsID get_from_free_list ( void )
2154{
2155 VtsID ii;
2156 VtsTE* ie;
2157 if (vts_tab_freelist == VtsID_INVALID)
2158 return VtsID_INVALID;
2159 ii = vts_tab_freelist;
2160 ie = VG_(indexXA)( vts_tab, ii );
2161 tl_assert(ie->vts == NULL);
2162 tl_assert(ie->rc == 0);
2163 vts_tab_freelist = ie->freelink;
2164 return ii;
2165}
2166
2167/* Produce a new VtsID that can be used, either by getting it from
2168 the freelist, or, if that is empty, by expanding vts_tab. */
2169static VtsID get_new_VtsID ( void )
2170{
2171 VtsID ii;
2172 VtsTE te;
2173 ii = get_from_free_list();
2174 if (ii != VtsID_INVALID)
2175 return ii;
2176 te.vts = NULL;
2177 te.rc = 0;
2178 te.freelink = VtsID_INVALID;
2179 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2180 return ii;
2181}
2182
2183
2184/* Indirect callback from lib_zsm. */
2185static void VtsID__rcinc ( VtsID ii )
2186{
2187 VtsTE* ie;
2188 /* VG_(indexXA) does a range check for us */
2189 ie = VG_(indexXA)( vts_tab, ii );
2190 tl_assert(ie->vts); /* else it's not in use */
2191 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2192 tl_assert(ie->vts->id == ii);
2193 ie->rc++;
2194}
2195
2196/* Indirect callback from lib_zsm. */
2197static void VtsID__rcdec ( VtsID ii )
2198{
2199 VtsTE* ie;
2200 /* VG_(indexXA) does a range check for us */
2201 ie = VG_(indexXA)( vts_tab, ii );
2202 tl_assert(ie->vts); /* else it's not in use */
2203 tl_assert(ie->rc > 0); /* else RC snafu */
2204 tl_assert(ie->vts->id == ii);
2205 ie->rc--;
2206}
2207
2208
2209/* Look up 'cand' in our collection of VTSs. If present, deallocate
2210 it and return the VtsID for the pre-existing version. If not
2211 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2212 for it, and return that. */
2213static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2214{
2215 VTS* auld;
2216 tl_assert(cand->id == VtsID_INVALID);
2217 auld = vts_set__find_and_dealloc__or_add(cand);
2218 if (auld != cand) {
2219 /* We already have an Aulde one. Use that. */
2220 VtsTE* ie;
2221 tl_assert(auld->id != VtsID_INVALID);
2222 ie = VG_(indexXA)( vts_tab, auld->id );
2223 tl_assert(ie->vts == auld);
2224 return auld->id;
2225 } else {
2226 VtsID ii = get_new_VtsID();
2227 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2228 ie->vts = cand;
2229 ie->rc = 0;
2230 ie->freelink = VtsID_INVALID;
2231 cand->id = ii;
2232 return ii;
2233 }
2234}
2235
2236
2237static void show_vts_stats ( HChar* caller )
2238{
2239 UWord nSet, nTab, nLive;
2240 ULong totrc;
2241 UWord n, i;
2242 nSet = VG_(sizeFM)( vts_set );
2243 nTab = VG_(sizeXA)( vts_tab );
2244 totrc = 0;
2245 nLive = 0;
2246 n = VG_(sizeXA)( vts_tab );
2247 for (i = 0; i < n; i++) {
2248 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2249 if (ie->vts) {
2250 nLive++;
2251 totrc += (ULong)ie->rc;
2252 } else {
2253 tl_assert(ie->rc == 0);
2254 }
2255 }
2256 VG_(printf)(" show_vts_stats %s\n", caller);
2257 VG_(printf)(" vts_tab size %4lu\n", nTab);
2258 VG_(printf)(" vts_tab live %4lu\n", nLive);
2259 VG_(printf)(" vts_set size %4lu\n", nSet);
2260 VG_(printf)(" total rc %4llu\n", totrc);
2261}
2262
2263/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002264__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002265static void vts_tab__do_GC ( Bool show_stats )
2266{
2267 UWord i, nTab, nLive, nFreed;
2268
2269 /* check this is actually necessary. */
2270 tl_assert(vts_tab_freelist == VtsID_INVALID);
2271
2272 /* empty the caches for partial order checks and binary joins. We
2273 could do better and prune out the entries to be deleted, but it
2274 ain't worth the hassle. */
2275 VtsID__invalidate_caches();
2276
2277 /* First, make the reference counts up to date. */
2278 zsm_flush_cache();
2279
2280 nTab = VG_(sizeXA)( vts_tab );
2281
2282 if (show_stats) {
2283 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2284 show_vts_stats("before GC");
2285 }
2286
2287 /* Now we can inspect the entire vts_tab. Any entries
2288 with zero .rc fields are now no longer in use and can be
2289 free list, removed from vts_set, and deleted. */
2290 nFreed = 0;
2291 for (i = 0; i < nTab; i++) {
2292 Bool present;
2293 UWord oldK = 0, oldV = 0;
2294 VtsTE* te = VG_(indexXA)( vts_tab, i );
2295 if (te->vts == NULL) {
2296 tl_assert(te->rc == 0);
2297 continue; /* already on the free list (presumably) */
2298 }
2299 if (te->rc > 0)
2300 continue; /* in use */
2301 /* Ok, we got one we can free. */
2302 tl_assert(te->vts->id == i);
2303 /* first, remove it from vts_set. */
2304 present = VG_(delFromFM)( vts_set,
2305 &oldK, &oldV, (UWord)te->vts );
2306 tl_assert(present); /* else it isn't in vts_set ?! */
2307 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2308 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2309 /* now free the VTS itself */
2310 VTS__delete(te->vts);
2311 te->vts = NULL;
2312 /* and finally put this entry on the free list */
2313 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2314 add_to_free_list( i );
2315 nFreed++;
2316 }
2317
2318 /* Now figure out when the next GC should be. We'll allow the
2319 number of VTSs to double before GCing again. Except of course
2320 that since we can't (or, at least, don't) shrink vts_tab, we
2321 can't set the threshhold value smaller than it. */
2322 tl_assert(nFreed <= nTab);
2323 nLive = nTab - nFreed;
2324 tl_assert(nLive >= 0 && nLive <= nTab);
2325 vts_next_GC_at = 2 * nLive;
2326 if (vts_next_GC_at < nTab)
2327 vts_next_GC_at = nTab;
2328
2329 if (show_stats) {
2330 show_vts_stats("after GC");
2331 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2332 }
2333
sewardj5e2ac3b2009-08-11 10:39:25 +00002334 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00002335 static UInt ctr = 0;
2336 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002337 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002338 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002339 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002340 }
2341}
2342
2343
2344/////////////////////////////////////////////////////////
2345// //
2346// Vts IDs //
2347// //
2348/////////////////////////////////////////////////////////
2349
2350//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00002351static ULong stats__cmpLEQ_queries = 0;
2352static ULong stats__cmpLEQ_misses = 0;
2353static ULong stats__join2_queries = 0;
2354static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002355
2356static inline UInt ROL32 ( UInt w, Int n ) {
2357 w = (w << n) | (w >> (32-n));
2358 return w;
2359}
2360static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2361 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2362 return hash % nTab;
2363}
2364
sewardj23f12002009-07-24 08:45:08 +00002365#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00002366static
sewardj23f12002009-07-24 08:45:08 +00002367 struct { VtsID vi1; VtsID vi2; Bool leq; }
2368 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00002369
2370#define N_JOIN2_CACHE 1023
2371static
2372 struct { VtsID vi1; VtsID vi2; VtsID res; }
2373 join2_cache[N_JOIN2_CACHE];
2374
2375static void VtsID__invalidate_caches ( void ) {
2376 Int i;
sewardj23f12002009-07-24 08:45:08 +00002377 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
2378 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
2379 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
2380 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00002381 }
2382 for (i = 0; i < N_JOIN2_CACHE; i++) {
2383 join2_cache[i].vi1 = VtsID_INVALID;
2384 join2_cache[i].vi2 = VtsID_INVALID;
2385 join2_cache[i].res = VtsID_INVALID;
2386 }
2387}
2388//////////////////////////
2389
sewardjd52392d2008-11-08 20:36:26 +00002390//static Bool VtsID__is_valid ( VtsID vi ) {
2391// VtsTE* ve;
2392// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2393// return False;
2394// ve = VG_(indexXA)( vts_tab, vi );
2395// if (!ve->vts)
2396// return False;
2397// tl_assert(ve->vts->id == vi);
2398// return True;
2399//}
sewardjf98e1c02008-10-25 16:22:41 +00002400
2401static VTS* VtsID__to_VTS ( VtsID vi ) {
2402 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2403 tl_assert(te->vts);
2404 return te->vts;
2405}
2406
2407static void VtsID__pp ( VtsID vi ) {
2408 HChar buf[100];
2409 VTS* vts = VtsID__to_VTS(vi);
2410 VTS__show( buf, sizeof(buf)-1, vts );
2411 buf[sizeof(buf)-1] = 0;
2412 VG_(printf)("%s", buf);
2413}
2414
2415/* compute partial ordering relation of vi1 and vi2. */
2416__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00002417static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00002418 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00002419 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00002420 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00002421 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00002422 tl_assert(vi1 != vi2);
2423 ////++
sewardj23f12002009-07-24 08:45:08 +00002424 stats__cmpLEQ_queries++;
2425 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
2426 if (cmpLEQ_cache[hash].vi1 == vi1
2427 && cmpLEQ_cache[hash].vi2 == vi2)
2428 return cmpLEQ_cache[hash].leq;
2429 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00002430 ////--
2431 v1 = VtsID__to_VTS(vi1);
2432 v2 = VtsID__to_VTS(vi2);
sewardj23f12002009-07-24 08:45:08 +00002433 leq = VTS__cmpLEQ( v1, v2 ) == NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002434 ////++
sewardj23f12002009-07-24 08:45:08 +00002435 cmpLEQ_cache[hash].vi1 = vi1;
2436 cmpLEQ_cache[hash].vi2 = vi2;
2437 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00002438 ////--
sewardj23f12002009-07-24 08:45:08 +00002439 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00002440}
sewardj23f12002009-07-24 08:45:08 +00002441static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
2442 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002443}
2444
2445/* compute binary join */
2446__attribute__((noinline))
2447static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2448 UInt hash;
2449 VtsID res;
2450 VTS *vts1, *vts2, *nyu;
2451 //if (vi1 == vi2) return vi1;
2452 tl_assert(vi1 != vi2);
2453 ////++
2454 stats__join2_queries++;
2455 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2456 if (join2_cache[hash].vi1 == vi1
2457 && join2_cache[hash].vi2 == vi2)
2458 return join2_cache[hash].res;
2459 stats__join2_misses++;
2460 ////--
2461 vts1 = VtsID__to_VTS(vi1);
2462 vts2 = VtsID__to_VTS(vi2);
2463 nyu = VTS__join(vts1,vts2);
2464 res = vts_tab__find_and_dealloc__or_add(nyu);
2465 ////++
2466 join2_cache[hash].vi1 = vi1;
2467 join2_cache[hash].vi2 = vi2;
2468 join2_cache[hash].res = res;
2469 ////--
2470 return res;
2471}
2472static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00002473 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002474}
2475
2476/* create a singleton VTS, namely [thr:1] */
2477static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2478 VTS* nyu = VTS__singleton(thr,tym);
2479 return vts_tab__find_and_dealloc__or_add(nyu);
2480}
2481
2482/* tick operation, creates value 1 if specified index is absent */
2483static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2484 VTS* vts = VtsID__to_VTS(vi);
2485 VTS* nyu = VTS__tick(idx,vts);
2486 return vts_tab__find_and_dealloc__or_add(nyu);
2487}
2488
2489/* index into a VTS (only for assertions) */
2490static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2491 VTS* vts = VtsID__to_VTS(vi);
2492 return VTS__indexAt_SLOW( vts, idx );
2493}
2494
sewardj23f12002009-07-24 08:45:08 +00002495/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
2496 any, really) element in vi1 which is pointwise greater-than the
2497 corresponding element in vi2. If no such element exists, return
2498 NULL. This needs to be fairly quick since it is called every time
2499 a race is detected. */
2500static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
2501{
2502 VTS *vts1, *vts2;
2503 Thr* diffthr;
2504 tl_assert(vi1 != vi2);
2505 vts1 = VtsID__to_VTS(vi1);
2506 vts2 = VtsID__to_VTS(vi2);
2507 tl_assert(vts1 != vts2);
2508 diffthr = VTS__cmpLEQ(vts1, vts2);
2509 tl_assert(diffthr); /* else they are LEQ ! */
2510 return diffthr;
2511}
2512
2513
2514/////////////////////////////////////////////////////////
2515// //
2516// Filters //
2517// //
2518/////////////////////////////////////////////////////////
2519
2520// baseline: 5, 9
2521#define FI_LINE_SZB_LOG2 5
2522#define FI_NUM_LINES_LOG2 10
2523
2524#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
2525#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
2526
2527#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
2528#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
2529
2530#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
2531 & (Addr)(FI_NUM_LINES-1) )
2532
2533
2534/* In the lines, each 8 bytes are treated individually, and are mapped
2535 to a UShort. Regardless of endianness of the underlying machine,
2536 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
2537 the highest address.
2538
2539 Of each bit pair, the higher numbered bit is set if a R has been
2540 seen, so the actual layout is:
2541
2542 15 14 ... 01 00
2543
2544 R W for addr+7 ... R W for addr+0
2545
2546 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
2547*/
2548
2549/* tags are separated from lines. tags are Addrs and are
2550 the base address of the line. */
2551typedef
2552 struct {
2553 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
2554 }
2555 FiLine;
2556
2557typedef
2558 struct {
2559 Addr tags[FI_NUM_LINES];
2560 FiLine lines[FI_NUM_LINES];
2561 }
2562 Filter;
2563
2564/* Forget everything we know -- clear the filter and let everything
2565 through. This needs to be as fast as possible, since it is called
2566 every time the running thread changes, and every time a thread's
2567 vector clocks change, which can be quite frequent. The obvious
2568 fast way to do this is simply to stuff in tags which we know are
2569 not going to match anything, since they're not aligned to the start
2570 of a line. */
2571static void Filter__clear ( Filter* fi, HChar* who )
2572{
2573 UWord i;
2574 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
2575 for (i = 0; i < FI_NUM_LINES; i += 8) {
2576 fi->tags[i+0] = 1; /* impossible value -- cannot match */
2577 fi->tags[i+1] = 1;
2578 fi->tags[i+2] = 1;
2579 fi->tags[i+3] = 1;
2580 fi->tags[i+4] = 1;
2581 fi->tags[i+5] = 1;
2582 fi->tags[i+6] = 1;
2583 fi->tags[i+7] = 1;
2584 }
2585 tl_assert(i == FI_NUM_LINES);
2586}
2587
2588/* Clearing an arbitrary range in the filter. Unfortunately
2589 we have to do this due to core-supplied new/die-mem events. */
2590
2591static void Filter__clear_1byte ( Filter* fi, Addr a )
2592{
2593 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2594 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2595 FiLine* line = &fi->lines[lineno];
2596 UWord loff = (a - atag) / 8;
2597 UShort mask = 0x3 << (2 * (a & 7));
2598 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2599 if (LIKELY( fi->tags[lineno] == atag )) {
2600 /* hit. clear the bits. */
2601 UShort u16 = line->u16s[loff];
2602 line->u16s[loff] = u16 & ~mask; /* clear them */
2603 } else {
2604 /* miss. The filter doesn't hold this address, so ignore. */
2605 }
2606}
2607
2608static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
2609{
2610 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2611 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2612 FiLine* line = &fi->lines[lineno];
2613 UWord loff = (a - atag) / 8;
2614 if (LIKELY( fi->tags[lineno] == atag )) {
2615 line->u16s[loff] = 0;
2616 } else {
2617 /* miss. The filter doesn't hold this address, so ignore. */
2618 }
2619}
2620
2621static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
2622{
2623 //VG_(printf)("%lu ", len);
2624 /* slowly do part preceding 8-alignment */
2625 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
2626 Filter__clear_1byte( fi, a );
2627 a++;
2628 len--;
2629 }
2630 /* vector loop */
2631 while (len >= 8) {
2632 Filter__clear_8bytes_aligned( fi, a );
2633 a += 8;
2634 len -= 8;
2635 }
2636 /* slowly do tail */
2637 while (UNLIKELY(len > 0)) {
2638 Filter__clear_1byte( fi, a );
2639 a++;
2640 len--;
2641 }
2642}
2643
2644
2645/* ------ Read handlers for the filter. ------ */
2646
2647static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
2648{
2649 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2650 return False;
2651 {
2652 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2653 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2654 FiLine* line = &fi->lines[lineno];
2655 UWord loff = (a - atag) / 8;
2656 UShort mask = 0xAAAA;
2657 if (LIKELY( fi->tags[lineno] == atag )) {
2658 /* hit. check line and update. */
2659 UShort u16 = line->u16s[loff];
2660 Bool ok = (u16 & mask) == mask; /* all R bits set? */
2661 line->u16s[loff] = u16 | mask; /* set them */
2662 return ok;
2663 } else {
2664 /* miss. nuke existing line and re-use it. */
2665 UWord i;
2666 fi->tags[lineno] = atag;
2667 for (i = 0; i < FI_LINE_SZB / 8; i++)
2668 line->u16s[i] = 0;
2669 line->u16s[loff] = mask;
2670 return False;
2671 }
2672 }
2673}
2674
2675static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
2676{
2677 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2678 return False;
2679 {
2680 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2681 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2682 FiLine* line = &fi->lines[lineno];
2683 UWord loff = (a - atag) / 8;
2684 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
2685 if (LIKELY( fi->tags[lineno] == atag )) {
2686 /* hit. check line and update. */
2687 UShort u16 = line->u16s[loff];
2688 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
2689 line->u16s[loff] = u16 | mask; /* set them */
2690 return ok;
2691 } else {
2692 /* miss. nuke existing line and re-use it. */
2693 UWord i;
2694 fi->tags[lineno] = atag;
2695 for (i = 0; i < FI_LINE_SZB / 8; i++)
2696 line->u16s[i] = 0;
2697 line->u16s[loff] = mask;
2698 return False;
2699 }
2700 }
2701}
2702
2703static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
2704{
2705 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2706 return False;
2707 {
2708 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2709 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2710 FiLine* line = &fi->lines[lineno];
2711 UWord loff = (a - atag) / 8;
2712 UShort mask = 0xA << (2 * (a & 6));
2713 /* mask is A000, 0A00, 00A0 or 000A */
2714 if (LIKELY( fi->tags[lineno] == atag )) {
2715 /* hit. check line and update. */
2716 UShort u16 = line->u16s[loff];
2717 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
2718 line->u16s[loff] = u16 | mask; /* set them */
2719 return ok;
2720 } else {
2721 /* miss. nuke existing line and re-use it. */
2722 UWord i;
2723 fi->tags[lineno] = atag;
2724 for (i = 0; i < FI_LINE_SZB / 8; i++)
2725 line->u16s[i] = 0;
2726 line->u16s[loff] = mask;
2727 return False;
2728 }
2729 }
2730}
2731
2732static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
2733{
2734 {
2735 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2736 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2737 FiLine* line = &fi->lines[lineno];
2738 UWord loff = (a - atag) / 8;
2739 UShort mask = 0x2 << (2 * (a & 7));
2740 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
2741 if (LIKELY( fi->tags[lineno] == atag )) {
2742 /* hit. check line and update. */
2743 UShort u16 = line->u16s[loff];
2744 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2745 line->u16s[loff] = u16 | mask; /* set them */
2746 return ok;
2747 } else {
2748 /* miss. nuke existing line and re-use it. */
2749 UWord i;
2750 fi->tags[lineno] = atag;
2751 for (i = 0; i < FI_LINE_SZB / 8; i++)
2752 line->u16s[i] = 0;
2753 line->u16s[loff] = mask;
2754 return False;
2755 }
2756 }
2757}
2758
2759
2760/* ------ Write handlers for the filter. ------ */
2761
2762static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
2763{
2764 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2765 return False;
2766 {
2767 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2768 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2769 FiLine* line = &fi->lines[lineno];
2770 UWord loff = (a - atag) / 8;
2771 UShort mask = 0xFFFF;
2772 if (LIKELY( fi->tags[lineno] == atag )) {
2773 /* hit. check line and update. */
2774 UShort u16 = line->u16s[loff];
2775 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
2776 line->u16s[loff] = u16 | mask; /* set them */
2777 return ok;
2778 } else {
2779 /* miss. nuke existing line and re-use it. */
2780 UWord i;
2781 fi->tags[lineno] = atag;
2782 for (i = 0; i < FI_LINE_SZB / 8; i++)
2783 line->u16s[i] = 0;
2784 line->u16s[loff] = mask;
2785 return False;
2786 }
2787 }
2788}
2789
2790static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
2791{
2792 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2793 return False;
2794 {
2795 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2796 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2797 FiLine* line = &fi->lines[lineno];
2798 UWord loff = (a - atag) / 8;
2799 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
2800 if (LIKELY( fi->tags[lineno] == atag )) {
2801 /* hit. check line and update. */
2802 UShort u16 = line->u16s[loff];
2803 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
2804 line->u16s[loff] = u16 | mask; /* set them */
2805 return ok;
2806 } else {
2807 /* miss. nuke existing line and re-use it. */
2808 UWord i;
2809 fi->tags[lineno] = atag;
2810 for (i = 0; i < FI_LINE_SZB / 8; i++)
2811 line->u16s[i] = 0;
2812 line->u16s[loff] = mask;
2813 return False;
2814 }
2815 }
2816}
2817
2818static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
2819{
2820 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2821 return False;
2822 {
2823 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2824 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2825 FiLine* line = &fi->lines[lineno];
2826 UWord loff = (a - atag) / 8;
2827 UShort mask = 0xF << (2 * (a & 6));
2828 /* mask is F000, 0F00, 00F0 or 000F */
2829 if (LIKELY( fi->tags[lineno] == atag )) {
2830 /* hit. check line and update. */
2831 UShort u16 = line->u16s[loff];
2832 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
2833 line->u16s[loff] = u16 | mask; /* set them */
2834 return ok;
2835 } else {
2836 /* miss. nuke existing line and re-use it. */
2837 UWord i;
2838 fi->tags[lineno] = atag;
2839 for (i = 0; i < FI_LINE_SZB / 8; i++)
2840 line->u16s[i] = 0;
2841 line->u16s[loff] = mask;
2842 return False;
2843 }
2844 }
2845}
2846
2847static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
2848{
2849 {
2850 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2851 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2852 FiLine* line = &fi->lines[lineno];
2853 UWord loff = (a - atag) / 8;
2854 UShort mask = 0x3 << (2 * (a & 7));
2855 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2856 if (LIKELY( fi->tags[lineno] == atag )) {
2857 /* hit. check line and update. */
2858 UShort u16 = line->u16s[loff];
2859 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2860 line->u16s[loff] = u16 | mask; /* set them */
2861 return ok;
2862 } else {
2863 /* miss. nuke existing line and re-use it. */
2864 UWord i;
2865 fi->tags[lineno] = atag;
2866 for (i = 0; i < FI_LINE_SZB / 8; i++)
2867 line->u16s[i] = 0;
2868 line->u16s[loff] = mask;
2869 return False;
2870 }
2871 }
2872}
2873
sewardjf98e1c02008-10-25 16:22:41 +00002874
2875/////////////////////////////////////////////////////////
2876// //
2877// Threads //
2878// //
2879/////////////////////////////////////////////////////////
2880
sewardj23f12002009-07-24 08:45:08 +00002881// QQQ move this somewhere else
2882typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
2883
sewardj8ab2c132009-08-02 09:34:35 +00002884/* How many of the above records to collect for each thread? Older
2885 ones are dumped when we run out of space. 62.5k requires 1MB per
2886 thread, since each ULong_n_EC record is 16 bytes long. When more
2887 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
2888 deleted to make space. Hence in the worst case we will be able to
2889 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
2890 Kw transitions (segments in this thread). For the current setting
2891 that gives a guaranteed stack for at least the last 31.25k
2892 segments. */
2893#define N_KWs_N_STACKs_PER_THREAD 62500
2894
2895
sewardjf98e1c02008-10-25 16:22:41 +00002896struct _Thr {
2897 /* Current VTSs for this thread. They change as we go along. viR
2898 is the VTS to be used for reads, viW for writes. Usually they
2899 are the same, but can differ when we deal with reader-writer
sewardj23f12002009-07-24 08:45:08 +00002900 locks. It is always the case that
2901 VtsID__cmpLEQ(viW,viR) == True
2902 that is, viW must be the same, or lagging behind, viR. */
sewardjf98e1c02008-10-25 16:22:41 +00002903 VtsID viR;
2904 VtsID viW;
sewardj23f12002009-07-24 08:45:08 +00002905
2906 /* Is initially False, and is set to true after the thread really
2907 has done a low-level exit. */
2908 Bool still_alive;
2909
2910 /* A filter that removes references for which we believe that
2911 msmcread/msmcwrite will not change the state, nor report a
2912 race. */
2913 Filter* filter;
2914
sewardjf98e1c02008-10-25 16:22:41 +00002915 /* opaque (to us) data we hold on behalf of the library's user. */
2916 void* opaque;
sewardj23f12002009-07-24 08:45:08 +00002917
sewardj8ab2c132009-08-02 09:34:35 +00002918 /* The ULongs (scalar Kws) in this accumulate in strictly
sewardj23f12002009-07-24 08:45:08 +00002919 increasing order, without duplicates. This is important because
sewardj8ab2c132009-08-02 09:34:35 +00002920 we need to be able to find a given scalar Kw in this array
sewardj23f12002009-07-24 08:45:08 +00002921 later, by binary search. */
sewardj8ab2c132009-08-02 09:34:35 +00002922 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
sewardjf98e1c02008-10-25 16:22:41 +00002923};
2924
2925static Thr* Thr__new ( void ) {
2926 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2927 thr->viR = VtsID_INVALID;
2928 thr->viW = VtsID_INVALID;
sewardj23f12002009-07-24 08:45:08 +00002929 thr->still_alive = True;
2930 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
sewardj2d2ea2f2009-08-02 10:15:07 +00002931 /* We only really need this at history level 1, but unfortunately
2932 this routine is called before the command line processing is
2933 done (sigh), so we can't rely on HG_(clo_history_level) at this
2934 point. Hence always allocate it. Bah. */
sewardj8ab2c132009-08-02 09:34:35 +00002935 thr->local_Kws_n_stacks
sewardj2d2ea2f2009-08-02 10:15:07 +00002936 = VG_(newXA)( HG_(zalloc),
2937 "libhb.Thr__new.3 (local_Kws_and_stacks)",
sewardj23f12002009-07-24 08:45:08 +00002938 HG_(free), sizeof(ULong_n_EC) );
sewardjf98e1c02008-10-25 16:22:41 +00002939 return thr;
2940}
2941
sewardj8ab2c132009-08-02 09:34:35 +00002942static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00002943{
2944 Word nPresent;
2945 ULong_n_EC pair;
2946 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00002947
2948 // We only collect this info at history level 1 (approx)
2949 if (HG_(clo_history_level) != 1)
2950 return;
2951
sewardj8ab2c132009-08-02 09:34:35 +00002952 /* This is the scalar Kw for thr. */
2953 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00002954 pair.ec = main_get_EC( thr );
2955 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00002956 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00002957
2958 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00002959 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00002960
2961 /* Throw away old stacks, if necessary. We can't accumulate stuff
2962 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00002963 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
2964 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
2965 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
2966 if (0)
2967 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00002968 thr, pair.ull, pair.ec );
2969 }
2970
2971 if (nPresent > 0) {
2972 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00002973 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
2974 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00002975 }
2976
2977 if (nPresent == 0)
2978 pair.ec = NULL;
2979
sewardj8ab2c132009-08-02 09:34:35 +00002980 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00002981
2982 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00002983 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00002984 thr, pair.ull, pair.ec );
2985 if (0)
2986 VG_(pp_ExeContext)(pair.ec);
2987}
2988
2989static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
2990{
2991 if (pair1->ull < pair2->ull) return -1;
2992 if (pair1->ull > pair2->ull) return 1;
2993 return 0;
2994}
2995
sewardjf98e1c02008-10-25 16:22:41 +00002996
2997/////////////////////////////////////////////////////////
2998// //
2999// Shadow Values //
3000// //
3001/////////////////////////////////////////////////////////
3002
3003// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3004// hb_zsm.h. We have to do everything else here.
3005
3006/* SVal is 64 bit unsigned int.
3007
3008 <---------30---------> <---------30--------->
3009 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003010 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003011 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3012
sewardjf98e1c02008-10-25 16:22:41 +00003013*/
3014#define SVAL_TAGMASK (3ULL << 62)
3015
3016static inline Bool SVal__isC ( SVal s ) {
3017 return (0ULL << 62) == (s & SVAL_TAGMASK);
3018}
3019static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3020 //tl_assert(VtsID__is_valid(rmini));
3021 //tl_assert(VtsID__is_valid(wmini));
3022 return (((ULong)rmini) << 32) | ((ULong)wmini);
3023}
3024static inline VtsID SVal__unC_Rmin ( SVal s ) {
3025 tl_assert(SVal__isC(s));
3026 return (VtsID)(s >> 32);
3027}
3028static inline VtsID SVal__unC_Wmin ( SVal s ) {
3029 tl_assert(SVal__isC(s));
3030 return (VtsID)(s & 0xFFFFFFFFULL);
3031}
3032
sewardj23f12002009-07-24 08:45:08 +00003033static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003034 return (2ULL << 62) == (s & SVAL_TAGMASK);
3035}
sewardj23f12002009-07-24 08:45:08 +00003036static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003037 return 2ULL << 62;
3038}
3039
3040/* Direct callback from lib_zsm. */
3041static void SVal__rcinc ( SVal s ) {
3042 if (SVal__isC(s)) {
3043 VtsID__rcinc( SVal__unC_Rmin(s) );
3044 VtsID__rcinc( SVal__unC_Wmin(s) );
3045 }
3046}
3047
3048/* Direct callback from lib_zsm. */
3049static void SVal__rcdec ( SVal s ) {
3050 if (SVal__isC(s)) {
3051 VtsID__rcdec( SVal__unC_Rmin(s) );
3052 VtsID__rcdec( SVal__unC_Wmin(s) );
3053 }
3054}
3055
3056
3057/////////////////////////////////////////////////////////
3058// //
sewardjd86e3a22008-12-03 11:39:37 +00003059// A simple group (memory) allocator //
3060// //
3061/////////////////////////////////////////////////////////
3062
3063//////////////// BEGIN general group allocator
3064typedef
3065 struct {
3066 UWord elemSzB; /* element size */
3067 UWord nPerGroup; /* # elems per group */
3068 void* (*alloc)(HChar*, SizeT); /* group allocator */
3069 HChar* cc; /* group allocator's cc */
3070 void (*free)(void*); /* group allocator's free-er (unused) */
3071 /* XArray of void* (pointers to groups). The groups themselves.
3072 Each element is a pointer to a block of size (elemSzB *
3073 nPerGroup) bytes. */
3074 XArray* groups;
3075 /* next free element. Is a pointer to an element in one of the
3076 groups pointed to by .groups. */
3077 void* nextFree;
3078 }
3079 GroupAlloc;
3080
3081static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3082 UWord elemSzB,
3083 UWord nPerGroup,
3084 void* (*alloc)(HChar*, SizeT),
3085 HChar* cc,
3086 void (*free)(void*) )
3087{
3088 tl_assert(0 == (elemSzB % sizeof(UWord)));
3089 tl_assert(elemSzB >= sizeof(UWord));
3090 tl_assert(nPerGroup >= 100); /* let's say */
3091 tl_assert(alloc);
3092 tl_assert(cc);
3093 tl_assert(free);
3094 tl_assert(ga);
3095 VG_(memset)(ga, 0, sizeof(*ga));
3096 ga->elemSzB = elemSzB;
3097 ga->nPerGroup = nPerGroup;
3098 ga->groups = NULL;
3099 ga->alloc = alloc;
3100 ga->cc = cc;
3101 ga->free = free;
3102 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3103 ga->nextFree = NULL;
3104 tl_assert(ga->groups);
3105}
3106
3107/* The freelist is empty. Allocate a new group and put all the new
3108 elements in it onto the freelist. */
3109__attribute__((noinline))
3110static void gal_add_new_group ( GroupAlloc* ga )
3111{
3112 Word i;
3113 UWord* group;
3114 tl_assert(ga);
3115 tl_assert(ga->nextFree == NULL);
3116 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3117 tl_assert(group);
3118 /* extend the freelist through the new group. Place the freelist
3119 pointer in the first word of each element. That's why the
3120 element size must be at least one word. */
3121 for (i = ga->nPerGroup-1; i >= 0; i--) {
3122 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3123 UWord* elem = (UWord*)elemC;
3124 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3125 *elem = (UWord)ga->nextFree;
3126 ga->nextFree = elem;
3127 }
3128 /* and add to our collection of groups */
3129 VG_(addToXA)( ga->groups, &group );
3130}
3131
3132inline static void* gal_Alloc ( GroupAlloc* ga )
3133{
3134 UWord* elem;
3135 if (UNLIKELY(ga->nextFree == NULL)) {
3136 gal_add_new_group(ga);
3137 }
3138 elem = ga->nextFree;
3139 ga->nextFree = (void*)*elem;
3140 *elem = 0; /* unnecessary, but just to be on the safe side */
3141 return elem;
3142}
3143
3144inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3145{
3146 tl_assert(n == ga->elemSzB);
3147 return gal_Alloc( ga );
3148}
3149
3150inline static void gal_Free ( GroupAlloc* ga, void* p )
3151{
3152 UWord* elem = (UWord*)p;
3153 *elem = (UWord)ga->nextFree;
3154 ga->nextFree = elem;
3155}
3156//////////////// END general group allocator
3157
3158
3159/////////////////////////////////////////////////////////
3160// //
sewardjf98e1c02008-10-25 16:22:41 +00003161// Change-event map2 //
3162// //
3163/////////////////////////////////////////////////////////
3164
sewardjf98e1c02008-10-25 16:22:41 +00003165#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3166
3167/* This is in two parts:
3168
sewardj23f12002009-07-24 08:45:08 +00003169 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003170 traces. When the reference count of a stack trace becomes zero,
3171 it is removed from the set and freed up. The intent is to have
3172 a set of stack traces which can be referred to from (2), but to
3173 only represent each one once. The set is indexed/searched by
3174 ordering on the stack trace vectors.
3175
sewardj849b0ed2008-12-21 10:43:10 +00003176 2. A SparseWA of OldRefs. These store information about each old
3177 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003178 location for which the information is recorded. For LRU
3179 purposes, each OldRef also contains a generation number,
3180 indicating when it was most recently accessed.
3181
3182 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003183 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3184 size) triples to RCECs. This allows us to collect the last
3185 access-traceback by up to N_OLDREF_ACCS different triples for
3186 this location. The accs[] array is a MTF-array. If a binding
3187 falls off the end, that's too bad -- we will lose info about
3188 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003189
sewardj849b0ed2008-12-21 10:43:10 +00003190 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003191 whose generation numbers are below some threshold; hence doing
3192 approximate LRU discarding. For each discarded OldRef we must
3193 of course decrement the reference count on the all RCECs it
3194 refers to, in order that entries from (1) eventually get
3195 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003196
3197 A major improvement in reliability of this mechanism would be to
3198 have a dynamically sized OldRef.accs[] array, so no entries ever
3199 fall off the end. In investigations (Dec 08) it appears that a
3200 major cause for the non-availability of conflicting-access traces
3201 in race reports is caused by the fixed size of this array. I
3202 suspect for most OldRefs, only a few entries are used, but for a
3203 minority of cases there is an overflow, leading to info lossage.
3204 Investigations also suggest this is very workload and scheduling
3205 sensitive. Therefore a dynamic sizing would be better.
3206
3207 However, dynamic sizing would defeat the use of a GroupAllocator
3208 for OldRef structures. And that's important for performance. So
3209 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003210*/
3211
3212
3213static UWord stats__ctxt_rcdec1 = 0;
3214static UWord stats__ctxt_rcdec2 = 0;
3215static UWord stats__ctxt_rcdec3 = 0;
3216static UWord stats__ctxt_rcdec_calls = 0;
3217static UWord stats__ctxt_rcdec_discards = 0;
3218static UWord stats__ctxt_rcdec1_eq = 0;
3219
3220static UWord stats__ctxt_tab_curr = 0;
3221static UWord stats__ctxt_tab_max = 0;
3222
3223static UWord stats__ctxt_tab_qs = 0;
3224static UWord stats__ctxt_tab_cmps = 0;
3225
3226
3227///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00003228//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00003229///
3230
3231#define N_FRAMES 8
3232
3233// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3234#define RCEC_MAGIC 0xab88abb2UL
3235
3236//#define N_RCEC_TAB 98317 /* prime */
3237#define N_RCEC_TAB 196613 /* prime */
3238
3239typedef
3240 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003241 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003242 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003243 UWord rc;
3244 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003245 UWord frames_hash; /* hash of all the frames */
3246 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003247 }
3248 RCEC;
3249
3250static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3251
3252
3253/* Gives an arbitrary total order on RCEC .frames fields */
3254static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3255 Word i;
3256 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3257 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003258 if (ec1->frames_hash < ec2->frames_hash) return -1;
3259 if (ec1->frames_hash > ec2->frames_hash) return 1;
3260 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003261 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003262 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003263 }
3264 return 0;
3265}
3266
3267
3268/* Dec the ref of this RCEC. */
3269static void ctxt__rcdec ( RCEC* ec )
3270{
3271 stats__ctxt_rcdec_calls++;
3272 tl_assert(ec && ec->magic == RCEC_MAGIC);
3273 tl_assert(ec->rc > 0);
3274 ec->rc--;
3275}
3276
3277static void ctxt__rcinc ( RCEC* ec )
3278{
3279 tl_assert(ec && ec->magic == RCEC_MAGIC);
3280 ec->rc++;
3281}
3282
3283
sewardjd86e3a22008-12-03 11:39:37 +00003284//////////// BEGIN RCEC group allocator
3285static GroupAlloc rcec_group_allocator;
3286
3287static RCEC* alloc_RCEC ( void ) {
3288 return gal_Alloc ( &rcec_group_allocator );
3289}
3290
3291static void free_RCEC ( RCEC* rcec ) {
3292 tl_assert(rcec->magic == RCEC_MAGIC);
3293 gal_Free( &rcec_group_allocator, rcec );
3294}
sewardj111544a2010-04-12 20:05:24 +00003295//////////// END RCEC group allocator
sewardjd86e3a22008-12-03 11:39:37 +00003296
3297
sewardjf98e1c02008-10-25 16:22:41 +00003298/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3299 move it one step closer the the front of the list, so as to make
3300 subsequent searches for it cheaper. */
3301static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3302{
3303 RCEC *ec0, *ec1, *ec2;
3304 if (ec == *headp)
3305 tl_assert(0); /* already at head of list */
3306 tl_assert(ec != NULL);
3307 ec0 = *headp;
3308 ec1 = NULL;
3309 ec2 = NULL;
3310 while (True) {
3311 if (ec0 == NULL || ec0 == ec) break;
3312 ec2 = ec1;
3313 ec1 = ec0;
3314 ec0 = ec0->next;
3315 }
3316 tl_assert(ec0 == ec);
3317 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3318 RCEC* tmp;
3319 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3320 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3321 closer to the start of the list. */
3322 tl_assert(ec2->next == ec1);
3323 tl_assert(ec1->next == ec0);
3324 tmp = ec0->next;
3325 ec2->next = ec0;
3326 ec0->next = ec1;
3327 ec1->next = tmp;
3328 }
3329 else
3330 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3331 /* it's second in the list. */
3332 tl_assert(*headp == ec1);
3333 tl_assert(ec1->next == ec0);
3334 ec1->next = ec0->next;
3335 ec0->next = ec1;
3336 *headp = ec0;
3337 }
3338}
3339
3340
3341/* Find the given RCEC in the tree, and return a pointer to it. Or,
3342 if not present, add the given one to the tree (by making a copy of
3343 it, so the caller can immediately deallocate the original) and
3344 return a pointer to the copy. The caller can safely have 'example'
3345 on its stack, since we will always return a pointer to a copy of
3346 it, not to the original. Note that the inserted node will have .rc
3347 of zero and so the caller must immediatly increment it. */
3348__attribute__((noinline))
3349static RCEC* ctxt__find_or_add ( RCEC* example )
3350{
3351 UWord hent;
3352 RCEC* copy;
3353 tl_assert(example && example->magic == RCEC_MAGIC);
3354 tl_assert(example->rc == 0);
3355
3356 /* Search the hash table to see if we already have it. */
3357 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00003358 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00003359 copy = contextTab[hent];
3360 while (1) {
3361 if (!copy) break;
3362 tl_assert(copy->magic == RCEC_MAGIC);
3363 stats__ctxt_tab_cmps++;
3364 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3365 copy = copy->next;
3366 }
3367
3368 if (copy) {
3369 tl_assert(copy != example);
3370 /* optimisation: if it's not at the head of its list, move 1
3371 step fwds, to make future searches cheaper */
3372 if (copy != contextTab[hent]) {
3373 move_RCEC_one_step_forward( &contextTab[hent], copy );
3374 }
3375 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003376 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00003377 tl_assert(copy != example);
3378 *copy = *example;
3379 copy->next = contextTab[hent];
3380 contextTab[hent] = copy;
3381 stats__ctxt_tab_curr++;
3382 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
3383 stats__ctxt_tab_max = stats__ctxt_tab_curr;
3384 }
3385 return copy;
3386}
3387
3388static inline UWord ROLW ( UWord w, Int n )
3389{
3390 Int bpw = 8 * sizeof(UWord);
3391 w = (w << n) | (w >> (bpw-n));
3392 return w;
3393}
3394
3395__attribute__((noinline))
3396static RCEC* get_RCEC ( Thr* thr )
3397{
3398 UWord hash, i;
3399 RCEC example;
3400 example.magic = RCEC_MAGIC;
3401 example.rc = 0;
3402 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00003403 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00003404 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00003405 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003406 hash ^= example.frames[i];
3407 hash = ROLW(hash, 19);
3408 }
njn6c83d5e2009-05-05 23:46:24 +00003409 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00003410 return ctxt__find_or_add( &example );
3411}
3412
3413///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00003414//// Part (2):
3415/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00003416///
3417
3418// (UInt) `echo "Old Reference Information" | md5sum`
3419#define OldRef_MAGIC 0x30b1f075UL
3420
sewardjc5ea9962008-12-07 01:41:46 +00003421/* Records an access: a thread and a context. The size
3422 (1,2,4,8) and read-or-writeness are also encoded as
3423 follows: bottom bit of .thr is 1 if write, 0 if read
3424 bottom 2 bits of .rcec are encode size:
3425 00 = 1, 01 = 2, 10 = 4, 11 = 8
3426*/
sewardjf98e1c02008-10-25 16:22:41 +00003427typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
3428
sewardj849b0ed2008-12-21 10:43:10 +00003429#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00003430
3431typedef
3432 struct {
sewardjd86e3a22008-12-03 11:39:37 +00003433 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003434 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00003435 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00003436 /* unused slots in this array have .thr == NULL */
3437 Thr_n_RCEC accs[N_OLDREF_ACCS];
3438 }
3439 OldRef;
3440
sewardjd86e3a22008-12-03 11:39:37 +00003441
3442//////////// BEGIN OldRef group allocator
3443static GroupAlloc oldref_group_allocator;
3444
3445static OldRef* alloc_OldRef ( void ) {
3446 return gal_Alloc ( &oldref_group_allocator );
3447}
3448
3449static void free_OldRef ( OldRef* r ) {
3450 tl_assert(r->magic == OldRef_MAGIC);
3451 gal_Free( &oldref_group_allocator, r );
3452}
3453//////////// END OldRef group allocator
3454
sewardjd86e3a22008-12-03 11:39:37 +00003455
sewardjbc307e52008-12-06 22:10:54 +00003456static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
3457static UWord oldrefGen = 0; /* current LRU generation # */
3458static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
3459static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00003460
sewardjc5ea9962008-12-07 01:41:46 +00003461inline static void* ptr_or_UWord ( void* p, UWord w ) {
3462 return (void*)( ((UWord)p) | ((UWord)w) );
3463}
3464inline static void* ptr_and_UWord ( void* p, UWord w ) {
3465 return (void*)( ((UWord)p) & ((UWord)w) );
3466}
3467
sewardj1669cc72008-12-13 01:20:21 +00003468inline static UInt min_UInt ( UInt a, UInt b ) {
3469 return a < b ? a : b;
3470}
3471
sewardja781be62008-12-08 00:12:28 +00003472/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
3473 first interval is lower, 1 if the first interval is higher, and 0
3474 if there is any overlap. Redundant paranoia with casting is there
3475 following what looked distinctly like a bug in gcc-4.1.2, in which
3476 some of the comparisons were done signedly instead of
3477 unsignedly. */
3478/* Copied from exp-ptrcheck/sg_main.c */
3479static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
3480 Addr a2, SizeT n2 ) {
3481 UWord a1w = (UWord)a1;
3482 UWord n1w = (UWord)n1;
3483 UWord a2w = (UWord)a2;
3484 UWord n2w = (UWord)n2;
3485 tl_assert(n1w > 0 && n2w > 0);
3486 if (a1w + n1w <= a2w) return -1L;
3487 if (a2w + n2w <= a1w) return 1L;
3488 return 0;
3489}
3490
sewardjc5ea9962008-12-07 01:41:46 +00003491static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00003492{
sewardjd86e3a22008-12-03 11:39:37 +00003493 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00003494 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00003495 Word i, j;
3496 UWord keyW, valW;
3497 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003498
sewardjc5ea9962008-12-07 01:41:46 +00003499 rcec = get_RCEC( thr );
3500 ctxt__rcinc(rcec);
3501
3502 /* encode the size and writeness of the transaction in the bottom
3503 two bits of thr and rcec. */
3504 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3505 switch (szB) {
3506 /* This doesn't look particularly branch-predictor friendly. */
3507 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3508 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3509 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3510 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3511 default: tl_assert(0);
3512 }
3513
3514 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003515 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003516
sewardjd86e3a22008-12-03 11:39:37 +00003517 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003518
3519 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003520 see if we have a stack trace pertaining to this (thread, R/W,
3521 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003522 tl_assert(keyW == a);
3523 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003524 tl_assert(ref->magic == OldRef_MAGIC);
3525
3526 tl_assert(thr);
3527 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003528 if (ref->accs[i].thr != thr)
3529 continue;
3530 /* since .thr encodes both the accessing thread and the
3531 read/writeness, we know now that at least those features
3532 of the access match this entry. So we just need to check
3533 the size indication. Do this by inspecting the lowest 2 bits of
3534 .rcec, which contain the encoded size info. */
3535 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3536 continue;
3537 /* else we have a match, so stop looking. */
3538 break;
sewardjf98e1c02008-10-25 16:22:41 +00003539 }
3540
3541 if (i < N_OLDREF_ACCS) {
3542 /* thread 'thr' has an entry at index 'i'. Update it. */
3543 if (i > 0) {
3544 Thr_n_RCEC tmp = ref->accs[i-1];
3545 ref->accs[i-1] = ref->accs[i];
3546 ref->accs[i] = tmp;
3547 i--;
3548 }
sewardjc5ea9962008-12-07 01:41:46 +00003549 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003550 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003551 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3552 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003553 tl_assert(ref->accs[i].thr == thr);
3554 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003555 /* No entry for this (thread, R/W, size) triple. Shuffle all
3556 of them down one slot, and put the new entry at the start
3557 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003558 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3559 /* the last slot is in use. We must dec the rc on the
3560 associated rcec. */
3561 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3562 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003563 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3564 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003565 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003566 } else {
3567 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3568 }
3569 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3570 ref->accs[j] = ref->accs[j-1];
3571 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003572 ref->accs[0].rcec = rcec;
3573 /* thr==NULL is used to signify an empty slot, so we can't
3574 add a NULL thr. */
3575 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003576 }
3577
3578 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003579
3580 } else {
3581
3582 /* We don't have a record for this address. Create a new one. */
3583 if (oldrefTreeN >= oldrefGenIncAt) {
3584 oldrefGen++;
3585 oldrefGenIncAt = oldrefTreeN + 50000;
3586 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3587 oldrefGen, oldrefTreeN );
3588 }
sewardjd86e3a22008-12-03 11:39:37 +00003589
3590 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003591 ref->magic = OldRef_MAGIC;
3592 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003593 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003594 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003595 /* thr==NULL is used to signify an empty slot, so we can't add a
3596 NULL thr. */
3597 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003598 for (j = 1; j < N_OLDREF_ACCS; j++) {
3599 ref->accs[j].thr = NULL;
3600 ref->accs[j].rcec = NULL;
3601 }
sewardjbc307e52008-12-06 22:10:54 +00003602 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003603 oldrefTreeN++;
3604
3605 }
3606}
3607
3608
sewardjc5ea9962008-12-07 01:41:46 +00003609Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3610 /*OUT*/Thr** resThr,
3611 /*OUT*/SizeT* resSzB,
3612 /*OUT*/Bool* resIsW,
3613 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003614{
sewardja781be62008-12-08 00:12:28 +00003615 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003616 OldRef* ref;
3617 UWord keyW, valW;
3618 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003619
sewardjc5ea9962008-12-07 01:41:46 +00003620 Thr* cand_thr;
3621 RCEC* cand_rcec;
3622 Bool cand_isW;
3623 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003624 Addr cand_a;
3625
3626 Addr toCheck[15];
3627 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003628
3629 tl_assert(thr);
3630 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003631
sewardja781be62008-12-08 00:12:28 +00003632 toCheck[nToCheck++] = a;
3633 for (i = -7; i < (Word)szB; i++) {
3634 if (i != 0)
3635 toCheck[nToCheck++] = a + i;
3636 }
3637 tl_assert(nToCheck <= 15);
3638
3639 /* Now see if we can find a suitable matching event for
3640 any of the addresses in toCheck[0 .. nToCheck-1]. */
3641 for (j = 0; j < nToCheck; j++) {
3642
3643 cand_a = toCheck[j];
3644 // VG_(printf)("test %ld %p\n", j, cand_a);
3645
3646 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3647 if (!b)
3648 continue;
3649
sewardjd86e3a22008-12-03 11:39:37 +00003650 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003651 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003652 tl_assert(ref->magic == OldRef_MAGIC);
3653 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3654
sewardjc5ea9962008-12-07 01:41:46 +00003655 cand_thr = NULL;
3656 cand_rcec = NULL;
3657 cand_isW = False;
3658 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003659
sewardjc5ea9962008-12-07 01:41:46 +00003660 for (i = 0; i < N_OLDREF_ACCS; i++) {
3661 Thr_n_RCEC* cand = &ref->accs[i];
3662 cand_thr = ptr_and_UWord(cand->thr, ~3);
3663 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3664 /* Decode the writeness from the bottom bit of .thr. */
3665 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3666 /* Decode the size from the bottom two bits of .rcec. */
3667 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3668 case 0: cand_szB = 1; break;
3669 case 1: cand_szB = 2; break;
3670 case 2: cand_szB = 4; break;
3671 case 3: cand_szB = 8; break;
3672 default: tl_assert(0);
3673 }
3674
3675 if (cand_thr == NULL)
3676 /* This slot isn't in use. Ignore it. */
3677 continue;
3678
3679 if (cand_thr == thr)
3680 /* This is an access by the same thread, but we're only
3681 interested in accesses from other threads. Ignore. */
3682 continue;
3683
3684 if ((!cand_isW) && (!isW))
3685 /* We don't want to report a read racing against another
3686 read; that's stupid. So in this case move on. */
3687 continue;
3688
sewardja781be62008-12-08 00:12:28 +00003689 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3690 /* No overlap with the access we're asking about. Ignore. */
3691 continue;
3692
sewardjc5ea9962008-12-07 01:41:46 +00003693 /* We have a match. Stop searching. */
3694 break;
3695 }
3696
3697 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3698
sewardja781be62008-12-08 00:12:28 +00003699 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00003700 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00003701 /* return with success */
3702 tl_assert(cand_thr);
3703 tl_assert(cand_rcec);
3704 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3705 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00003706 /* Count how many non-zero frames we have. */
3707 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3708 for (n = 0; n < maxNFrames; n++) {
3709 if (0 == cand_rcec->frames[n]) break;
3710 }
3711 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
sewardja781be62008-12-08 00:12:28 +00003712 *resThr = cand_thr;
3713 *resSzB = cand_szB;
3714 *resIsW = cand_isW;
3715 return True;
3716 }
sewardjc5ea9962008-12-07 01:41:46 +00003717
sewardja781be62008-12-08 00:12:28 +00003718 /* consider next address in toCheck[] */
3719 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003720
sewardja781be62008-12-08 00:12:28 +00003721 /* really didn't find anything. */
3722 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003723}
3724
3725static void event_map_init ( void )
3726{
3727 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003728
3729 /* Context (RCEC) group allocator */
3730 init_GroupAlloc ( &rcec_group_allocator,
3731 sizeof(RCEC),
3732 1000 /* RCECs per group */,
3733 HG_(zalloc),
3734 "libhb.event_map_init.1 (RCEC groups)",
3735 HG_(free) );
3736
3737 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003738 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003739 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003740 N_RCEC_TAB * sizeof(RCEC*) );
3741 tl_assert(contextTab);
3742 for (i = 0; i < N_RCEC_TAB; i++)
3743 contextTab[i] = NULL;
3744
sewardjd86e3a22008-12-03 11:39:37 +00003745 /* Oldref group allocator */
3746 init_GroupAlloc ( &oldref_group_allocator,
3747 sizeof(OldRef),
3748 1000 /* OldRefs per group */,
3749 HG_(zalloc),
3750 "libhb.event_map_init.3 (OldRef groups)",
3751 HG_(free) );
3752
sewardjd86e3a22008-12-03 11:39:37 +00003753 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003754 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003755 oldrefTree = VG_(newSWA)(
3756 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003757 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003758 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003759 );
3760 tl_assert(oldrefTree);
3761
3762 oldrefGen = 0;
3763 oldrefGenIncAt = 0;
3764 oldrefTreeN = 0;
3765}
3766
3767static void event_map__check_reference_counts ( Bool before )
3768{
3769 RCEC* rcec;
3770 OldRef* oldref;
3771 Word i;
3772 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003773 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003774
3775 /* Set the 'check' reference counts to zero. Also, optionally
3776 check that the real reference counts are non-zero. We allow
3777 these to fall to zero before a GC, but the GC must get rid of
3778 all those that are zero, hence none should be zero after a
3779 GC. */
3780 for (i = 0; i < N_RCEC_TAB; i++) {
3781 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3782 nEnts++;
3783 tl_assert(rcec);
3784 tl_assert(rcec->magic == RCEC_MAGIC);
3785 if (!before)
3786 tl_assert(rcec->rc > 0);
3787 rcec->rcX = 0;
3788 }
3789 }
3790
3791 /* check that the stats are sane */
3792 tl_assert(nEnts == stats__ctxt_tab_curr);
3793 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3794
3795 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003796 VG_(initIterSWA)( oldrefTree );
3797 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003798 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003799 tl_assert(oldref->magic == OldRef_MAGIC);
3800 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003801 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3802 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3803 if (aThr) {
3804 tl_assert(aRef);
3805 tl_assert(aRef->magic == RCEC_MAGIC);
3806 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003807 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003808 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003809 }
3810 }
3811 }
3812
3813 /* compare check ref counts with actual */
3814 for (i = 0; i < N_RCEC_TAB; i++) {
3815 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3816 tl_assert(rcec->rc == rcec->rcX);
3817 }
3818 }
3819}
3820
sewardj8fd92d32008-11-20 23:17:01 +00003821__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003822static void event_map_maybe_GC ( void )
3823{
3824 OldRef* oldref;
3825 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003826 XArray* refs2del;
3827 Word i, j, n2del;
3828
sewardj8fd92d32008-11-20 23:17:01 +00003829 UWord* genMap = NULL;
3830 UWord genMap_min = 0;
3831 UWord genMap_size = 0;
3832
sewardj849b0ed2008-12-21 10:43:10 +00003833 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00003834 return;
3835
3836 if (0)
3837 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3838
sewardj849b0ed2008-12-21 10:43:10 +00003839 /* Check for sane command line params. Limit values must match
3840 those in hg_process_cmd_line_option. */
3841 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
sewardjf585e482009-08-16 22:52:29 +00003842 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
sewardj849b0ed2008-12-21 10:43:10 +00003843
sewardj8f5374e2008-12-07 11:40:17 +00003844 /* Check our counting is sane (expensive) */
3845 if (CHECK_CEM)
3846 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003847
sewardj8f5374e2008-12-07 11:40:17 +00003848 /* Check the reference counts (expensive) */
3849 if (CHECK_CEM)
3850 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003851
sewardj8fd92d32008-11-20 23:17:01 +00003852 /* Compute the distribution of generation values in the ref tree.
3853 There are likely only to be a few different generation numbers
3854 in the whole tree, but we don't know what they are. Hence use a
3855 dynamically resized array of counters. The array is genMap[0
3856 .. genMap_size-1], where genMap[0] is the count for the
3857 generation number genMap_min, genMap[1] is the count for
3858 genMap_min+1, etc. If a new number is seen outside the range
3859 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3860 copied into a larger array, and genMap_min and genMap_size are
3861 adjusted accordingly. */
3862
sewardjf98e1c02008-10-25 16:22:41 +00003863 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003864
sewardjbc307e52008-12-06 22:10:54 +00003865 VG_(initIterSWA)( oldrefTree );
3866 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003867
sewardjd86e3a22008-12-03 11:39:37 +00003868 UWord ea, key;
3869 oldref = (OldRef*)valW;
3870 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003871
3872 /* BEGIN find 'ea', which is the index in genMap holding the
3873 count for generation number 'key'. */
3874 if (UNLIKELY(genMap == NULL)) {
3875 /* deal with the first key to be seen, so that the following
3876 cases don't need to handle the complexity of a NULL count
3877 array. */
3878 genMap_min = key;
3879 genMap_size = 1;
3880 genMap = HG_(zalloc)( "libhb.emmG.1a",
3881 genMap_size * sizeof(UWord) );
3882 ea = 0;
3883 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3884 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003885 }
sewardj8fd92d32008-11-20 23:17:01 +00003886 else
3887 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3888 /* this is the expected (almost-always-happens) case: 'key'
3889 is already mapped in the array. */
3890 ea = key - genMap_min;
3891 }
3892 else
3893 if (key < genMap_min) {
3894 /* 'key' appears before the start of the current array.
3895 Extend the current array by allocating a larger one and
3896 copying the current one to the upper end of it. */
3897 Word more;
3898 UWord* map2;
3899 more = genMap_min - key;
3900 tl_assert(more > 0);
3901 map2 = HG_(zalloc)( "libhb.emmG.1b",
3902 (genMap_size + more) * sizeof(UWord) );
3903 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3904 HG_(free)( genMap );
3905 genMap = map2;
3906 genMap_size += more;
3907 genMap_min -= more;
3908 ea = 0;
3909 tl_assert(genMap_min == key);
3910 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3911 key, genMap_min, genMap_min+genMap_size- 1 );
3912 }
3913 else {
3914 /* 'key' appears after the end of the current array. Extend
3915 the current array by allocating a larger one and copying
3916 the current one to the lower end of it. */
3917 Word more;
3918 UWord* map2;
3919 tl_assert(key >= genMap_min + genMap_size);
3920 more = key - (genMap_min + genMap_size) + 1;
3921 tl_assert(more > 0);
3922 map2 = HG_(zalloc)( "libhb.emmG.1c",
3923 (genMap_size + more) * sizeof(UWord) );
3924 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3925 HG_(free)( genMap );
3926 genMap = map2;
3927 genMap_size += more;
3928 ea = genMap_size - 1;;
3929 tl_assert(genMap_min + genMap_size - 1 == key);
3930 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3931 key, genMap_min, genMap_min+genMap_size- 1 );
3932 }
3933 /* END find 'ea' from 'key' */
3934
3935 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003936 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003937 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003938 }
3939
sewardj8fd92d32008-11-20 23:17:01 +00003940 tl_assert(genMap);
3941 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003942
sewardj8fd92d32008-11-20 23:17:01 +00003943 /* Sanity check what we just computed */
3944 { UWord sum = 0;
3945 for (i = 0; i < genMap_size; i++) {
3946 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3947 i + genMap_min, genMap[i] );
3948 sum += genMap[i];
3949 }
3950 tl_assert(sum == oldrefTreeN);
3951 }
3952
3953 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003954 retained = oldrefTreeN;
3955 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003956
3957 for (i = 0; i < genMap_size; i++) {
3958 keyW = i + genMap_min;
3959 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003960 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3961 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3962 tl_assert(keyW >= maxGen);
3963 tl_assert(retained >= valW);
3964 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00003965 > (UWord)(HG_(clo_conflict_cache_size)
3966 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00003967 retained -= valW;
3968 maxGen = keyW;
3969 } else {
3970 break;
3971 }
3972 }
sewardjf98e1c02008-10-25 16:22:41 +00003973
sewardj8fd92d32008-11-20 23:17:01 +00003974 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003975
sewardj9b1f0fd2008-11-18 23:40:00 +00003976 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003977
3978 /* Now make up a big list of the oldrefTree entries we want to
3979 delete. We can't simultaneously traverse the tree and delete
3980 stuff from it, so first we need to copy them off somewhere
3981 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003982 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003983 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003984
sewardj9b1f0fd2008-11-18 23:40:00 +00003985 if (retained < oldrefTreeN) {
3986
3987 /* This is the normal (expected) case. We discard any ref whose
3988 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003989 VG_(initIterSWA)( oldrefTree );
3990 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003991 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003992 tl_assert(oldref->magic == OldRef_MAGIC);
3993 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003994 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003995 }
sewardjf98e1c02008-10-25 16:22:41 +00003996 }
sewardj5e2ac3b2009-08-11 10:39:25 +00003997 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003998 VG_(message)(Vg_DebugMsg,
3999 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00004000 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004001 maxGen, retained );
4002 }
4003
4004 } else {
4005
4006 static UInt rand_seed = 0; /* leave as static */
4007
4008 /* Degenerate case: there's only one generation in the entire
4009 tree, so we need to have some other way of deciding which
4010 refs to throw away. Just throw out half of them randomly. */
4011 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00004012 VG_(initIterSWA)( oldrefTree );
4013 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004014 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00004015 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004016 tl_assert(oldref->magic == OldRef_MAGIC);
4017 n = VG_(random)( &rand_seed );
4018 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00004019 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004020 retained--;
4021 }
4022 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004023 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004024 VG_(message)(Vg_DebugMsg,
4025 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00004026 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004027 retained );
4028 }
4029
sewardjf98e1c02008-10-25 16:22:41 +00004030 }
4031
4032 n2del = VG_(sizeXA)( refs2del );
4033 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4034
4035 if (0) VG_(printf)("%s","deleting entries\n");
4036 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00004037 Bool b;
4038 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00004039 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00004040 tl_assert(b);
4041 tl_assert(keyW == ga2del);
4042 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004043 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00004044 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
4045 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
4046 if (aRef) {
4047 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004048 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004049 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004050 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004051 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004052 }
4053 }
sewardjd86e3a22008-12-03 11:39:37 +00004054
4055 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004056 }
4057
4058 VG_(deleteXA)( refs2del );
4059
sewardjc5ea9962008-12-07 01:41:46 +00004060 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004061
4062 oldrefTreeN = retained;
4063 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4064
4065 /* Throw away all RCECs with zero reference counts */
4066 for (i = 0; i < N_RCEC_TAB; i++) {
4067 RCEC** pp = &contextTab[i];
4068 RCEC* p = *pp;
4069 while (p) {
4070 if (p->rc == 0) {
4071 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004072 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004073 p = *pp;
4074 tl_assert(stats__ctxt_tab_curr > 0);
4075 stats__ctxt_tab_curr--;
4076 } else {
4077 pp = &p->next;
4078 p = p->next;
4079 }
4080 }
4081 }
4082
sewardj8f5374e2008-12-07 11:40:17 +00004083 /* Check the reference counts (expensive) */
4084 if (CHECK_CEM)
4085 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004086
4087 //if (0)
4088 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4089 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4090
4091}
4092
4093
4094/////////////////////////////////////////////////////////
4095// //
4096// Core MSM //
4097// //
4098/////////////////////////////////////////////////////////
4099
sewardj23f12002009-07-24 08:45:08 +00004100/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4101 Nov 08, and again after [...],
4102 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004103
sewardj23f12002009-07-24 08:45:08 +00004104static ULong stats__msmcread = 0;
4105static ULong stats__msmcread_change = 0;
4106static ULong stats__msmcwrite = 0;
4107static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004108
sewardj8ab2c132009-08-02 09:34:35 +00004109/* Some notes on the H1 history mechanism:
4110
4111 Transition rules are:
4112
4113 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4114 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4115
4116 After any access by a thread T to a location L, L's constraint pair
4117 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4118
4119 After a race by thread T conflicting with some previous access by
4120 some other thread U, for a location with constraint (before
4121 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4122 which the previously access lies.
4123
4124 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4125 are compared so as to find out which thread(s) this access
4126 conflicts with. Once that is established, we also require the
4127 pre-update Cw for the location, so we can index into it for those
4128 threads, to get the scalar clock values for the point at which the
4129 former accesses were made. (In fact we only bother to do any of
4130 this for an arbitrarily chosen one of the conflicting threads, as
4131 that's simpler, it avoids flooding the user with vast amounts of
4132 mostly useless information, and because the program is wrong if it
4133 contains any races at all -- so we don't really need to show all
4134 conflicting access pairs initially, so long as we only show none if
4135 none exist).
4136
4137 ---
4138
4139 That requires the auxiliary proof that
4140
4141 (Cr `join` Kw)[T] == Kw[T]
4142
4143 Why should that be true? Because for any thread T, Kw[T] >= the
4144 scalar clock value for T known by any other thread. In other
4145 words, because T's value for its own scalar clock is at least as up
4146 to date as the value for it known by any other thread (that is true
4147 for both the R- and W- scalar clocks). Hence no other thread will
4148 be able to feed in a value for that element (indirectly via a
4149 constraint) which will exceed Kw[T], and hence the join cannot
4150 cause that particular element to advance.
4151*/
4152
sewardjf98e1c02008-10-25 16:22:41 +00004153__attribute__((noinline))
4154static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004155 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004156 VtsID Cfailed,
4157 VtsID Kfailed,
4158 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004159{
sewardjc5ea9962008-12-07 01:41:46 +00004160 /* Call here to report a race. We just hand it onwards to
4161 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004162 error is going to be collected, then, at history_level 2, that
4163 queries the conflicting-event map. The alternative would be to
4164 query it right here. But that causes a lot of pointless queries
4165 for errors which will shortly be discarded as duplicates, and
4166 can become a performance overhead; so we defer the query until
4167 we know the error is not a duplicate. */
4168
4169 /* Stacks for the bounds of the (or one of the) conflicting
4170 segment(s). These are only set at history_level 1. */
4171 ExeContext* hist1_seg_start = NULL;
4172 ExeContext* hist1_seg_end = NULL;
4173 Thread* hist1_conf_thr = NULL;
4174
4175 tl_assert(acc_thr);
sewardjc5ea9962008-12-07 01:41:46 +00004176 tl_assert(acc_thr->opaque);
sewardj23f12002009-07-24 08:45:08 +00004177 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4178
4179 if (HG_(clo_history_level) == 1) {
4180 Bool found;
4181 Word firstIx, lastIx;
4182 ULong_n_EC key;
4183
4184 /* At history_level 1, we must round up the relevant stack-pair
4185 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004186 deferring it is complex; we can't (easily) put Kfailed and
4187 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004188 getting tied up in difficulties with VtsID reference
4189 counting. So just do it now. */
4190 Thr* confThr;
4191 ULong confTym = 0;
4192 /* Which thread are we in conflict with? There may be more than
4193 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4194 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004195 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004196 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004197 conflict (semantics of return value of
4198 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4199 called us, just checked exactly this -- that there was in
4200 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004201 tl_assert(confThr);
4202
4203 /* Get the scalar clock value that the conflicting thread
4204 introduced into the constraint. A careful examination of the
4205 base machine rules shows that this must be the same as the
4206 conflicting thread's scalar clock when it created this
4207 constraint. Hence we know the scalar clock of the
4208 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004209 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004210
4211 /* Using this scalar clock, index into the conflicting thread's
4212 collection of stack traces made each time its vector clock
4213 (hence its scalar clock) changed. This gives the stack
4214 traces at the start and end of the conflicting segment (well,
4215 as per comment just above, of one of the conflicting
4216 segments, if there are more than one). */
4217 key.ull = confTym;
4218 key.ec = NULL;
4219 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004220 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004221 firstIx = lastIx = 0;
4222 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004223 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004224 &key, &firstIx, &lastIx,
4225 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4226 );
sewardj8ab2c132009-08-02 09:34:35 +00004227 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004228 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004229 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004230 confThr, confTym, found, firstIx, lastIx);
4231 /* We can't indefinitely collect stack traces at VTS
4232 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004233 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004234 ones, which in turn means we might fail to find index value
4235 confTym in the array. */
4236 if (found) {
4237 ULong_n_EC *pair_start, *pair_end;
4238 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004239 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004240 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004241 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004242 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004243 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004244 lastIx+1 );
4245 /* from properties of VG_(lookupXA) and the comparison fn used: */
4246 tl_assert(pair_start->ull < pair_end->ull);
4247 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004248 /* Could do a bit better here. It may be that pair_end
4249 doesn't have a stack, but the following entries in the
4250 array have the same scalar Kw and to have a stack. So
4251 we should search a bit further along the array than
4252 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004253 } else {
4254 if (confThr->still_alive)
4255 hist1_seg_end = main_get_EC( confThr );
4256 }
4257 // seg_start could be NULL iff this is the first stack in the thread
4258 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4259 //if (seg_end) VG_(pp_ExeContext)(seg_end);
4260 hist1_conf_thr = confThr->opaque;
4261 }
4262 }
4263
sewardjc5ea9962008-12-07 01:41:46 +00004264 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004265 szB, isWrite,
4266 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004267}
4268
4269static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004270 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004271 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004272 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4273 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004274}
4275
4276
4277/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004278static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004279 /* The following are only needed for
4280 creating error reports. */
4281 Thr* acc_thr,
4282 Addr acc_addr, SizeT szB )
4283{
4284 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004285 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004286
4287 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004288 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004289 tl_assert(is_sane_SVal_C(svOld));
4290 }
4291
sewardj1c0ce7a2009-07-01 08:10:49 +00004292 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004293 VtsID tviR = acc_thr->viR;
4294 VtsID tviW = acc_thr->viW;
4295 VtsID rmini = SVal__unC_Rmin(svOld);
4296 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004297 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4298 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004299 /* no race */
4300 /* Note: RWLOCK subtlety: use tviW, not tviR */
4301 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4302 goto out;
4303 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004304 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004305 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4306 tl_assert(leqxx);
4307 // same as in non-race case
4308 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4309 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004310 rmini, /* Cfailed */
4311 tviR, /* Kfailed */
4312 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004313 goto out;
4314 }
4315 }
4316 if (SVal__isA(svOld)) {
4317 /* reading no-access memory (sigh); leave unchanged */
4318 /* check for no pollution */
4319 tl_assert(svOld == SVal_NOACCESS);
4320 svNew = SVal_NOACCESS;
4321 goto out;
4322 }
sewardj23f12002009-07-24 08:45:08 +00004323 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004324 tl_assert(0);
4325
4326 out:
sewardj8f5374e2008-12-07 11:40:17 +00004327 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004328 tl_assert(is_sane_SVal_C(svNew));
4329 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004330 if (UNLIKELY(svNew != svOld)) {
4331 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004332 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004333 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004334 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004335 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004336 }
4337 }
4338 return svNew;
4339}
4340
4341
4342/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004343static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004344 /* The following are only needed for
4345 creating error reports. */
4346 Thr* acc_thr,
4347 Addr acc_addr, SizeT szB )
4348{
4349 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004350 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004351
4352 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004353 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004354 tl_assert(is_sane_SVal_C(svOld));
4355 }
4356
sewardj1c0ce7a2009-07-01 08:10:49 +00004357 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004358 VtsID tviW = acc_thr->viW;
4359 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004360 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4361 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004362 /* no race */
4363 svNew = SVal__mkC( tviW, tviW );
4364 goto out;
4365 } else {
4366 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00004367 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004368 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4369 tl_assert(leqxx);
4370 // same as in non-race case
4371 // proof: in the non-race case, we have
4372 // rmini <= wmini (invar on constraints)
4373 // tviW <= tviR (invar on thread clocks)
4374 // wmini <= tviW (from run-time check)
4375 // hence from transitivity of <= we have
4376 // rmini <= wmini <= tviW
4377 // and so join(rmini,tviW) == tviW
4378 // and join(wmini,tviW) == tviW
4379 // qed.
4380 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4381 VtsID__join2(wmini, tviW) );
4382 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004383 wmini, /* Cfailed */
4384 tviW, /* Kfailed */
4385 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004386 goto out;
4387 }
4388 }
4389 if (SVal__isA(svOld)) {
4390 /* writing no-access memory (sigh); leave unchanged */
4391 /* check for no pollution */
4392 tl_assert(svOld == SVal_NOACCESS);
4393 svNew = SVal_NOACCESS;
4394 goto out;
4395 }
sewardj23f12002009-07-24 08:45:08 +00004396 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004397 tl_assert(0);
4398
4399 out:
sewardj8f5374e2008-12-07 11:40:17 +00004400 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004401 tl_assert(is_sane_SVal_C(svNew));
4402 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004403 if (UNLIKELY(svNew != svOld)) {
4404 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004405 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004406 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004407 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004408 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004409 }
4410 }
4411 return svNew;
4412}
4413
4414
4415/////////////////////////////////////////////////////////
4416// //
4417// Apply core MSM to specific memory locations //
4418// //
4419/////////////////////////////////////////////////////////
4420
sewardj23f12002009-07-24 08:45:08 +00004421/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004422
sewardj23f12002009-07-24 08:45:08 +00004423static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004424 CacheLine* cl;
4425 UWord cloff, tno, toff;
4426 SVal svOld, svNew;
4427 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004428 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004429 cl = get_cacheline(a);
4430 cloff = get_cacheline_offset(a);
4431 tno = get_treeno(a);
4432 toff = get_tree_offset(a); /* == 0 .. 7 */
4433 descr = cl->descrs[tno];
4434 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4435 SVal* tree = &cl->svals[tno << 3];
4436 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004437 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004438 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4439 }
4440 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004441 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004442 if (CHECK_ZSM)
4443 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004444 cl->svals[cloff] = svNew;
4445}
4446
sewardj23f12002009-07-24 08:45:08 +00004447static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004448 CacheLine* cl;
4449 UWord cloff, tno, toff;
4450 SVal svOld, svNew;
4451 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004452 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004453 cl = get_cacheline(a);
4454 cloff = get_cacheline_offset(a);
4455 tno = get_treeno(a);
4456 toff = get_tree_offset(a); /* == 0 .. 7 */
4457 descr = cl->descrs[tno];
4458 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4459 SVal* tree = &cl->svals[tno << 3];
4460 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004461 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004462 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4463 }
4464 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004465 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004466 if (CHECK_ZSM)
4467 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004468 cl->svals[cloff] = svNew;
4469}
4470
sewardj23f12002009-07-24 08:45:08 +00004471/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004472
sewardj23f12002009-07-24 08:45:08 +00004473static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004474 CacheLine* cl;
4475 UWord cloff, tno, toff;
4476 SVal svOld, svNew;
4477 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004478 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004479 if (UNLIKELY(!aligned16(a))) goto slowcase;
4480 cl = get_cacheline(a);
4481 cloff = get_cacheline_offset(a);
4482 tno = get_treeno(a);
4483 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4484 descr = cl->descrs[tno];
4485 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4486 if (valid_value_is_below_me_16(descr, toff)) {
4487 goto slowcase;
4488 } else {
4489 SVal* tree = &cl->svals[tno << 3];
4490 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4491 }
sewardj8f5374e2008-12-07 11:40:17 +00004492 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004493 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4494 }
4495 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004496 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004497 if (CHECK_ZSM)
4498 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004499 cl->svals[cloff] = svNew;
4500 return;
4501 slowcase: /* misaligned, or must go further down the tree */
4502 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004503 zsm_sapply08__msmcread( thr, a + 0 );
4504 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004505}
4506
sewardj23f12002009-07-24 08:45:08 +00004507static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004508 CacheLine* cl;
4509 UWord cloff, tno, toff;
4510 SVal svOld, svNew;
4511 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004512 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004513 if (UNLIKELY(!aligned16(a))) goto slowcase;
4514 cl = get_cacheline(a);
4515 cloff = get_cacheline_offset(a);
4516 tno = get_treeno(a);
4517 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4518 descr = cl->descrs[tno];
4519 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4520 if (valid_value_is_below_me_16(descr, toff)) {
4521 goto slowcase;
4522 } else {
4523 SVal* tree = &cl->svals[tno << 3];
4524 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4525 }
sewardj8f5374e2008-12-07 11:40:17 +00004526 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004527 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4528 }
4529 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004530 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004531 if (CHECK_ZSM)
4532 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004533 cl->svals[cloff] = svNew;
4534 return;
4535 slowcase: /* misaligned, or must go further down the tree */
4536 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004537 zsm_sapply08__msmcwrite( thr, a + 0 );
4538 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004539}
4540
sewardj23f12002009-07-24 08:45:08 +00004541/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004542
sewardj23f12002009-07-24 08:45:08 +00004543static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004544 CacheLine* cl;
4545 UWord cloff, tno, toff;
4546 SVal svOld, svNew;
4547 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004548 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004549 if (UNLIKELY(!aligned32(a))) goto slowcase;
4550 cl = get_cacheline(a);
4551 cloff = get_cacheline_offset(a);
4552 tno = get_treeno(a);
4553 toff = get_tree_offset(a); /* == 0 or 4 */
4554 descr = cl->descrs[tno];
4555 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4556 if (valid_value_is_above_me_32(descr, toff)) {
4557 SVal* tree = &cl->svals[tno << 3];
4558 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4559 } else {
4560 goto slowcase;
4561 }
sewardj8f5374e2008-12-07 11:40:17 +00004562 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004563 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4564 }
4565 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004566 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004567 if (CHECK_ZSM)
4568 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004569 cl->svals[cloff] = svNew;
4570 return;
4571 slowcase: /* misaligned, or must go further down the tree */
4572 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004573 zsm_sapply16__msmcread( thr, a + 0 );
4574 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004575}
4576
sewardj23f12002009-07-24 08:45:08 +00004577static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004578 CacheLine* cl;
4579 UWord cloff, tno, toff;
4580 SVal svOld, svNew;
4581 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004582 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004583 if (UNLIKELY(!aligned32(a))) goto slowcase;
4584 cl = get_cacheline(a);
4585 cloff = get_cacheline_offset(a);
4586 tno = get_treeno(a);
4587 toff = get_tree_offset(a); /* == 0 or 4 */
4588 descr = cl->descrs[tno];
4589 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4590 if (valid_value_is_above_me_32(descr, toff)) {
4591 SVal* tree = &cl->svals[tno << 3];
4592 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4593 } else {
4594 goto slowcase;
4595 }
sewardj8f5374e2008-12-07 11:40:17 +00004596 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004597 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4598 }
4599 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004600 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004601 if (CHECK_ZSM)
4602 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004603 cl->svals[cloff] = svNew;
4604 return;
4605 slowcase: /* misaligned, or must go further down the tree */
4606 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004607 zsm_sapply16__msmcwrite( thr, a + 0 );
4608 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004609}
4610
sewardj23f12002009-07-24 08:45:08 +00004611/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004612
sewardj23f12002009-07-24 08:45:08 +00004613static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004614 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004615 UWord cloff, tno;
4616 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004617 SVal svOld, svNew;
4618 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004619 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004620 if (UNLIKELY(!aligned64(a))) goto slowcase;
4621 cl = get_cacheline(a);
4622 cloff = get_cacheline_offset(a);
4623 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004624 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004625 descr = cl->descrs[tno];
4626 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4627 goto slowcase;
4628 }
4629 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004630 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004631 if (CHECK_ZSM)
4632 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004633 cl->svals[cloff] = svNew;
4634 return;
4635 slowcase: /* misaligned, or must go further down the tree */
4636 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004637 zsm_sapply32__msmcread( thr, a + 0 );
4638 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004639}
4640
sewardj23f12002009-07-24 08:45:08 +00004641static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004642 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004643 UWord cloff, tno;
4644 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004645 SVal svOld, svNew;
4646 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004647 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004648 if (UNLIKELY(!aligned64(a))) goto slowcase;
4649 cl = get_cacheline(a);
4650 cloff = get_cacheline_offset(a);
4651 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004652 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004653 descr = cl->descrs[tno];
4654 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4655 goto slowcase;
4656 }
4657 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004658 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004659 if (CHECK_ZSM)
4660 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004661 cl->svals[cloff] = svNew;
4662 return;
4663 slowcase: /* misaligned, or must go further down the tree */
4664 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004665 zsm_sapply32__msmcwrite( thr, a + 0 );
4666 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004667}
4668
sewardj23f12002009-07-24 08:45:08 +00004669/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004670
4671static
sewardj23f12002009-07-24 08:45:08 +00004672void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004673 CacheLine* cl;
4674 UWord cloff, tno, toff;
4675 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004676 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004677 cl = get_cacheline(a);
4678 cloff = get_cacheline_offset(a);
4679 tno = get_treeno(a);
4680 toff = get_tree_offset(a); /* == 0 .. 7 */
4681 descr = cl->descrs[tno];
4682 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4683 SVal* tree = &cl->svals[tno << 3];
4684 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004685 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004686 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4687 }
4688 tl_assert(svNew != SVal_INVALID);
4689 cl->svals[cloff] = svNew;
4690}
4691
sewardj23f12002009-07-24 08:45:08 +00004692/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004693
4694static
sewardj23f12002009-07-24 08:45:08 +00004695void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004696 CacheLine* cl;
4697 UWord cloff, tno, toff;
4698 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004699 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004700 if (UNLIKELY(!aligned16(a))) goto slowcase;
4701 cl = get_cacheline(a);
4702 cloff = get_cacheline_offset(a);
4703 tno = get_treeno(a);
4704 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4705 descr = cl->descrs[tno];
4706 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4707 if (valid_value_is_below_me_16(descr, toff)) {
4708 /* Writing at this level. Need to fix up 'descr'. */
4709 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4710 /* At this point, the tree does not match cl->descr[tno] any
4711 more. The assignments below will fix it up. */
4712 } else {
4713 /* We can't indiscriminately write on the w16 node as in the
4714 w64 case, as that might make the node inconsistent with
4715 its parent. So first, pull down to this level. */
4716 SVal* tree = &cl->svals[tno << 3];
4717 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004718 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004719 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4720 }
4721 }
4722 tl_assert(svNew != SVal_INVALID);
4723 cl->svals[cloff + 0] = svNew;
4724 cl->svals[cloff + 1] = SVal_INVALID;
4725 return;
4726 slowcase: /* misaligned */
4727 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004728 zsm_swrite08( a + 0, svNew );
4729 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004730}
4731
sewardj23f12002009-07-24 08:45:08 +00004732/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004733
4734static
sewardj23f12002009-07-24 08:45:08 +00004735void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004736 CacheLine* cl;
4737 UWord cloff, tno, toff;
4738 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004739 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004740 if (UNLIKELY(!aligned32(a))) goto slowcase;
4741 cl = get_cacheline(a);
4742 cloff = get_cacheline_offset(a);
4743 tno = get_treeno(a);
4744 toff = get_tree_offset(a); /* == 0 or 4 */
4745 descr = cl->descrs[tno];
4746 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4747 if (valid_value_is_above_me_32(descr, toff)) {
4748 /* We can't indiscriminately write on the w32 node as in the
4749 w64 case, as that might make the node inconsistent with
4750 its parent. So first, pull down to this level. */
4751 SVal* tree = &cl->svals[tno << 3];
4752 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004753 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004754 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4755 } else {
4756 /* Writing at this level. Need to fix up 'descr'. */
4757 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4758 /* At this point, the tree does not match cl->descr[tno] any
4759 more. The assignments below will fix it up. */
4760 }
4761 }
4762 tl_assert(svNew != SVal_INVALID);
4763 cl->svals[cloff + 0] = svNew;
4764 cl->svals[cloff + 1] = SVal_INVALID;
4765 cl->svals[cloff + 2] = SVal_INVALID;
4766 cl->svals[cloff + 3] = SVal_INVALID;
4767 return;
4768 slowcase: /* misaligned */
4769 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004770 zsm_swrite16( a + 0, svNew );
4771 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004772}
4773
sewardj23f12002009-07-24 08:45:08 +00004774/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004775
4776static
sewardj23f12002009-07-24 08:45:08 +00004777void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004778 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004779 UWord cloff, tno;
4780 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00004781 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004782 if (UNLIKELY(!aligned64(a))) goto slowcase;
4783 cl = get_cacheline(a);
4784 cloff = get_cacheline_offset(a);
4785 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004786 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004787 cl->descrs[tno] = TREE_DESCR_64;
4788 tl_assert(svNew != SVal_INVALID);
4789 cl->svals[cloff + 0] = svNew;
4790 cl->svals[cloff + 1] = SVal_INVALID;
4791 cl->svals[cloff + 2] = SVal_INVALID;
4792 cl->svals[cloff + 3] = SVal_INVALID;
4793 cl->svals[cloff + 4] = SVal_INVALID;
4794 cl->svals[cloff + 5] = SVal_INVALID;
4795 cl->svals[cloff + 6] = SVal_INVALID;
4796 cl->svals[cloff + 7] = SVal_INVALID;
4797 return;
4798 slowcase: /* misaligned */
4799 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004800 zsm_swrite32( a + 0, svNew );
4801 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004802}
4803
sewardj23f12002009-07-24 08:45:08 +00004804/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004805
4806static
sewardj23f12002009-07-24 08:45:08 +00004807SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004808 CacheLine* cl;
4809 UWord cloff, tno, toff;
4810 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004811 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004812 cl = get_cacheline(a);
4813 cloff = get_cacheline_offset(a);
4814 tno = get_treeno(a);
4815 toff = get_tree_offset(a); /* == 0 .. 7 */
4816 descr = cl->descrs[tno];
4817 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4818 SVal* tree = &cl->svals[tno << 3];
4819 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4820 }
4821 return cl->svals[cloff];
4822}
4823
sewardj23f12002009-07-24 08:45:08 +00004824static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00004825 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00004826 stats__cline_scopy08s++;
4827 sv = zsm_sread08( src );
4828 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00004829}
4830
4831
sewardj23f12002009-07-24 08:45:08 +00004832/* Block-copy states (needed for implementing realloc()). Note this
4833 doesn't change the filtering arrangements. The caller of
4834 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00004835
sewardj23f12002009-07-24 08:45:08 +00004836static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00004837{
4838 SizeT i;
4839 if (len == 0)
4840 return;
4841
4842 /* assert for non-overlappingness */
4843 tl_assert(src+len <= dst || dst+len <= src);
4844
4845 /* To be simple, just copy byte by byte. But so as not to wreck
4846 performance for later accesses to dst[0 .. len-1], normalise
4847 destination lines as we finish with them, and also normalise the
4848 line containing the first and last address. */
4849 for (i = 0; i < len; i++) {
4850 Bool normalise
4851 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4852 || i == 0 /* first in range */
4853 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00004854 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00004855 }
4856}
4857
4858
4859/* For setting address ranges to a given value. Has considerable
4860 sophistication so as to avoid generating large numbers of pointless
4861 cache loads/writebacks for large ranges. */
4862
4863/* Do small ranges in-cache, in the obvious way. */
4864static
sewardj23f12002009-07-24 08:45:08 +00004865void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004866{
4867 /* fast track a couple of common cases */
4868 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00004869 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004870 return;
4871 }
4872 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00004873 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004874 return;
4875 }
4876
4877 /* be completely general (but as efficient as possible) */
4878 if (len == 0) return;
4879
4880 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004881 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004882 a += 1;
4883 len -= 1;
4884 tl_assert(aligned16(a));
4885 }
4886 if (len == 0) return;
4887
4888 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004889 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004890 a += 2;
4891 len -= 2;
4892 tl_assert(aligned32(a));
4893 }
4894 if (len == 0) return;
4895
4896 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004897 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004898 a += 4;
4899 len -= 4;
4900 tl_assert(aligned64(a));
4901 }
4902 if (len == 0) return;
4903
4904 if (len >= 8) {
4905 tl_assert(aligned64(a));
4906 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00004907 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004908 a += 8;
4909 len -= 8;
4910 }
4911 tl_assert(aligned64(a));
4912 }
4913 if (len == 0) return;
4914
4915 if (len >= 4)
4916 tl_assert(aligned32(a));
4917 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004918 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004919 a += 4;
4920 len -= 4;
4921 }
4922 if (len == 0) return;
4923
4924 if (len >= 2)
4925 tl_assert(aligned16(a));
4926 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004927 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004928 a += 2;
4929 len -= 2;
4930 }
4931 if (len == 0) return;
4932
4933 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004934 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00004935 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004936 len -= 1;
4937 }
4938 tl_assert(len == 0);
4939}
4940
4941
sewardj23f12002009-07-24 08:45:08 +00004942/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00004943 for larger ranges, try to operate directly on the out-of-cache
4944 representation, rather than dragging lines into the cache,
4945 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00004946 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00004947
sewardj23f12002009-07-24 08:45:08 +00004948 Note that this doesn't change the filtering arrangements. The
4949 caller of zsm_sset_range needs to attend to that. */
4950
4951static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004952{
4953 tl_assert(svNew != SVal_INVALID);
4954 stats__cache_make_New_arange += (ULong)len;
4955
4956 if (0 && len > 500)
4957 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4958
4959 if (0) {
4960 static UWord n_New_in_cache = 0;
4961 static UWord n_New_not_in_cache = 0;
4962 /* tag is 'a' with the in-line offset masked out,
4963 eg a[31]..a[4] 0000 */
4964 Addr tag = a & ~(N_LINE_ARANGE - 1);
4965 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4966 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4967 n_New_in_cache++;
4968 } else {
4969 n_New_not_in_cache++;
4970 }
4971 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4972 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4973 n_New_in_cache, n_New_not_in_cache );
4974 }
4975
4976 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00004977 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004978 } else {
4979 Addr before_start = a;
4980 Addr aligned_start = cacheline_ROUNDUP(a);
4981 Addr after_start = cacheline_ROUNDDN(a + len);
4982 UWord before_len = aligned_start - before_start;
4983 UWord aligned_len = after_start - aligned_start;
4984 UWord after_len = a + len - after_start;
4985 tl_assert(before_start <= aligned_start);
4986 tl_assert(aligned_start <= after_start);
4987 tl_assert(before_len < N_LINE_ARANGE);
4988 tl_assert(after_len < N_LINE_ARANGE);
4989 tl_assert(get_cacheline_offset(aligned_start) == 0);
4990 if (get_cacheline_offset(a) == 0) {
4991 tl_assert(before_len == 0);
4992 tl_assert(a == aligned_start);
4993 }
4994 if (get_cacheline_offset(a+len) == 0) {
4995 tl_assert(after_len == 0);
4996 tl_assert(after_start == a+len);
4997 }
4998 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004999 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005000 }
5001 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005002 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005003 }
5004 stats__cache_make_New_inZrep += (ULong)aligned_len;
5005
5006 while (1) {
5007 Addr tag;
5008 UWord wix;
5009 if (aligned_start >= after_start)
5010 break;
5011 tl_assert(get_cacheline_offset(aligned_start) == 0);
5012 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5013 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5014 if (tag == cache_shmem.tags0[wix]) {
5015 UWord i;
5016 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005017 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005018 } else {
5019 UWord i;
5020 Word zix;
5021 SecMap* sm;
5022 LineZ* lineZ;
5023 /* This line is not in the cache. Do not force it in; instead
5024 modify it in-place. */
5025 /* find the Z line to write in and rcdec it or the
5026 associated F line. */
5027 find_Z_for_writing( &sm, &zix, tag );
5028 tl_assert(sm);
5029 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5030 lineZ = &sm->linesZ[zix];
5031 lineZ->dict[0] = svNew;
5032 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5033 for (i = 0; i < N_LINE_ARANGE/4; i++)
5034 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5035 rcinc_LineZ(lineZ);
5036 }
5037 aligned_start += N_LINE_ARANGE;
5038 aligned_len -= N_LINE_ARANGE;
5039 }
5040 tl_assert(aligned_start == after_start);
5041 tl_assert(aligned_len == 0);
5042 }
5043}
5044
5045
5046/////////////////////////////////////////////////////////
5047// //
sewardj23f12002009-07-24 08:45:08 +00005048// Front-filtering accesses //
5049// //
5050/////////////////////////////////////////////////////////
5051
5052static UWord stats__f_ac = 0;
5053static UWord stats__f_sk = 0;
5054
5055#if 0
5056# define STATS__F_SHOW \
5057 do { \
5058 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5059 VG_(printf)("filters: ac %lu sk %lu\n", \
5060 stats__f_ac, stats__f_sk); \
5061 } while (0)
5062#else
5063# define STATS__F_SHOW /* */
5064#endif
5065
5066void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5067 stats__f_ac++;
5068 STATS__F_SHOW;
5069 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5070 stats__f_sk++;
5071 return;
5072 }
5073 zsm_sapply08__msmcwrite(thr, a);
5074}
5075
5076void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5077 stats__f_ac++;
5078 STATS__F_SHOW;
5079 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5080 stats__f_sk++;
5081 return;
5082 }
5083 zsm_sapply16__msmcwrite(thr, a);
5084}
5085
5086void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5087 stats__f_ac++;
5088 STATS__F_SHOW;
5089 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5090 stats__f_sk++;
5091 return;
5092 }
5093 zsm_sapply32__msmcwrite(thr, a);
5094}
5095
5096void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5097 stats__f_ac++;
5098 STATS__F_SHOW;
5099 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5100 stats__f_sk++;
5101 return;
5102 }
5103 zsm_sapply64__msmcwrite(thr, a);
5104}
5105
5106void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5107{
5108 /* fast track a couple of common cases */
5109 if (len == 4 && aligned32(a)) {
5110 zsm_sapply32_f__msmcwrite( thr, a );
5111 return;
5112 }
5113 if (len == 8 && aligned64(a)) {
5114 zsm_sapply64_f__msmcwrite( thr, a );
5115 return;
5116 }
5117
5118 /* be completely general (but as efficient as possible) */
5119 if (len == 0) return;
5120
5121 if (!aligned16(a) && len >= 1) {
5122 zsm_sapply08_f__msmcwrite( thr, a );
5123 a += 1;
5124 len -= 1;
5125 tl_assert(aligned16(a));
5126 }
5127 if (len == 0) return;
5128
5129 if (!aligned32(a) && len >= 2) {
5130 zsm_sapply16_f__msmcwrite( thr, a );
5131 a += 2;
5132 len -= 2;
5133 tl_assert(aligned32(a));
5134 }
5135 if (len == 0) return;
5136
5137 if (!aligned64(a) && len >= 4) {
5138 zsm_sapply32_f__msmcwrite( thr, a );
5139 a += 4;
5140 len -= 4;
5141 tl_assert(aligned64(a));
5142 }
5143 if (len == 0) return;
5144
5145 if (len >= 8) {
5146 tl_assert(aligned64(a));
5147 while (len >= 8) {
5148 zsm_sapply64_f__msmcwrite( thr, a );
5149 a += 8;
5150 len -= 8;
5151 }
5152 tl_assert(aligned64(a));
5153 }
5154 if (len == 0) return;
5155
5156 if (len >= 4)
5157 tl_assert(aligned32(a));
5158 if (len >= 4) {
5159 zsm_sapply32_f__msmcwrite( thr, a );
5160 a += 4;
5161 len -= 4;
5162 }
5163 if (len == 0) return;
5164
5165 if (len >= 2)
5166 tl_assert(aligned16(a));
5167 if (len >= 2) {
5168 zsm_sapply16_f__msmcwrite( thr, a );
5169 a += 2;
5170 len -= 2;
5171 }
5172 if (len == 0) return;
5173
5174 if (len >= 1) {
5175 zsm_sapply08_f__msmcwrite( thr, a );
5176 //a += 1;
5177 len -= 1;
5178 }
5179 tl_assert(len == 0);
5180}
5181
5182void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5183 stats__f_ac++;
5184 STATS__F_SHOW;
5185 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5186 stats__f_sk++;
5187 return;
5188 }
5189 zsm_sapply08__msmcread(thr, a);
5190}
5191
5192void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5193 stats__f_ac++;
5194 STATS__F_SHOW;
5195 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5196 stats__f_sk++;
5197 return;
5198 }
5199 zsm_sapply16__msmcread(thr, a);
5200}
5201
5202void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5203 stats__f_ac++;
5204 STATS__F_SHOW;
5205 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5206 stats__f_sk++;
5207 return;
5208 }
5209 zsm_sapply32__msmcread(thr, a);
5210}
5211
5212void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5213 stats__f_ac++;
5214 STATS__F_SHOW;
5215 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5216 stats__f_sk++;
5217 return;
5218 }
5219 zsm_sapply64__msmcread(thr, a);
5220}
5221
5222void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5223{
5224 /* fast track a couple of common cases */
5225 if (len == 4 && aligned32(a)) {
5226 zsm_sapply32_f__msmcread( thr, a );
5227 return;
5228 }
5229 if (len == 8 && aligned64(a)) {
5230 zsm_sapply64_f__msmcread( thr, a );
5231 return;
5232 }
5233
5234 /* be completely general (but as efficient as possible) */
5235 if (len == 0) return;
5236
5237 if (!aligned16(a) && len >= 1) {
5238 zsm_sapply08_f__msmcread( thr, a );
5239 a += 1;
5240 len -= 1;
5241 tl_assert(aligned16(a));
5242 }
5243 if (len == 0) return;
5244
5245 if (!aligned32(a) && len >= 2) {
5246 zsm_sapply16_f__msmcread( thr, a );
5247 a += 2;
5248 len -= 2;
5249 tl_assert(aligned32(a));
5250 }
5251 if (len == 0) return;
5252
5253 if (!aligned64(a) && len >= 4) {
5254 zsm_sapply32_f__msmcread( thr, a );
5255 a += 4;
5256 len -= 4;
5257 tl_assert(aligned64(a));
5258 }
5259 if (len == 0) return;
5260
5261 if (len >= 8) {
5262 tl_assert(aligned64(a));
5263 while (len >= 8) {
5264 zsm_sapply64_f__msmcread( thr, a );
5265 a += 8;
5266 len -= 8;
5267 }
5268 tl_assert(aligned64(a));
5269 }
5270 if (len == 0) return;
5271
5272 if (len >= 4)
5273 tl_assert(aligned32(a));
5274 if (len >= 4) {
5275 zsm_sapply32_f__msmcread( thr, a );
5276 a += 4;
5277 len -= 4;
5278 }
5279 if (len == 0) return;
5280
5281 if (len >= 2)
5282 tl_assert(aligned16(a));
5283 if (len >= 2) {
5284 zsm_sapply16_f__msmcread( thr, a );
5285 a += 2;
5286 len -= 2;
5287 }
5288 if (len == 0) return;
5289
5290 if (len >= 1) {
5291 zsm_sapply08_f__msmcread( thr, a );
5292 //a += 1;
5293 len -= 1;
5294 }
5295 tl_assert(len == 0);
5296}
5297
5298void libhb_Thr_resumes ( Thr* thr )
5299{
5300 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005301 tl_assert(thr);
5302 tl_assert(thr->still_alive);
sewardj23f12002009-07-24 08:45:08 +00005303 Filter__clear(thr->filter, "libhb_Thr_resumes");
5304 /* A kludge, but .. if this thread doesn't have any marker stacks
5305 at all, get one right now. This is easier than figuring out
5306 exactly when at thread startup we can and can't take a stack
5307 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005308 if (HG_(clo_history_level) == 1) {
5309 tl_assert(thr->local_Kws_n_stacks);
5310 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5311 note_local_Kw_n_stack_for(thr);
5312 }
sewardj23f12002009-07-24 08:45:08 +00005313}
5314
5315
5316/////////////////////////////////////////////////////////
5317// //
sewardjf98e1c02008-10-25 16:22:41 +00005318// Synchronisation objects //
5319// //
5320/////////////////////////////////////////////////////////
5321
5322// (UInt) `echo "Synchronisation object" | md5sum`
5323#define SO_MAGIC 0x56b3c5b0U
5324
5325struct _SO {
5326 VtsID viR; /* r-clock of sender */
5327 VtsID viW; /* w-clock of sender */
5328 UInt magic;
5329};
5330
5331static SO* SO__Alloc ( void ) {
5332 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5333 so->viR = VtsID_INVALID;
5334 so->viW = VtsID_INVALID;
5335 so->magic = SO_MAGIC;
5336 return so;
5337}
5338static void SO__Dealloc ( SO* so ) {
5339 tl_assert(so);
5340 tl_assert(so->magic == SO_MAGIC);
5341 if (so->viR == VtsID_INVALID) {
5342 tl_assert(so->viW == VtsID_INVALID);
5343 } else {
5344 tl_assert(so->viW != VtsID_INVALID);
5345 VtsID__rcdec(so->viR);
5346 VtsID__rcdec(so->viW);
5347 }
5348 so->magic = 0;
5349 HG_(free)( so );
5350}
5351
5352
5353/////////////////////////////////////////////////////////
5354// //
5355// Top Level API //
5356// //
5357/////////////////////////////////////////////////////////
5358
5359static void show_thread_state ( HChar* str, Thr* t )
5360{
5361 if (1) return;
5362 if (t->viR == t->viW) {
5363 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5364 VtsID__pp( t->viR );
5365 VG_(printf)("%s","\n");
5366 } else {
5367 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5368 VtsID__pp( t->viR );
5369 VG_(printf)(" viW %u==", t->viW);
5370 VtsID__pp( t->viW );
5371 VG_(printf)("%s","\n");
5372 }
5373}
5374
5375
5376Thr* libhb_init (
5377 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00005378 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00005379 )
5380{
5381 Thr* thr;
5382 VtsID vi;
5383 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00005384 tl_assert(get_EC);
5385 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00005386 main_get_EC = get_EC;
5387
5388 // No need to initialise hg_wordfm.
5389 // No need to initialise hg_wordset.
5390
5391 vts_set_init();
5392 vts_tab_init();
5393 event_map_init();
5394 VtsID__invalidate_caches();
5395
5396 // initialise shadow memory
5397 zsm_init( SVal__rcinc, SVal__rcdec );
5398
5399 thr = Thr__new();
5400 vi = VtsID__mk_Singleton( thr, 1 );
5401 thr->viR = vi;
5402 thr->viW = vi;
5403 VtsID__rcinc(thr->viR);
5404 VtsID__rcinc(thr->viW);
5405
5406 show_thread_state(" root", thr);
5407 return thr;
5408}
5409
sewardj23f12002009-07-24 08:45:08 +00005410
sewardjf98e1c02008-10-25 16:22:41 +00005411Thr* libhb_create ( Thr* parent )
5412{
5413 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5414 the child's index. Since the child's index is guaranteed
5415 unique, it has never been seen before, so the implicit value
5416 before the tick is zero and after that is one. */
5417 Thr* child = Thr__new();
5418
5419 child->viR = VtsID__tick( parent->viR, child );
5420 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00005421 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00005422 VtsID__rcinc(child->viR);
5423 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005424 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00005425 early for that - it may not have a valid TId yet. So, let
5426 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00005427
5428 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5429 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5430
5431 /* and the parent has to move along too */
5432 VtsID__rcdec(parent->viR);
5433 VtsID__rcdec(parent->viW);
5434 parent->viR = VtsID__tick( parent->viR, parent );
5435 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00005436 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00005437 VtsID__rcinc(parent->viR);
5438 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005439 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00005440
5441 show_thread_state(" child", child);
5442 show_thread_state("parent", parent);
5443
5444 return child;
5445}
5446
5447/* Shut down the library, and print stats (in fact that's _all_
5448 this is for. */
5449void libhb_shutdown ( Bool show_stats )
5450{
5451 if (show_stats) {
5452 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
5453 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
5454 stats__secmaps_allocd,
5455 stats__secmap_ga_space_covered);
5456 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
5457 stats__secmap_linesZ_allocd,
5458 stats__secmap_linesZ_bytes);
5459 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
5460 stats__secmap_linesF_allocd,
5461 stats__secmap_linesF_bytes);
5462 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
5463 stats__secmap_iterator_steppings);
5464 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
5465 stats__secmaps_search, stats__secmaps_search_slow);
5466
5467 VG_(printf)("%s","\n");
5468 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
5469 stats__cache_totrefs, stats__cache_totmisses );
5470 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
5471 stats__cache_Z_fetches, stats__cache_F_fetches );
5472 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
5473 stats__cache_Z_wbacks, stats__cache_F_wbacks );
5474 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
5475 stats__cache_invals, stats__cache_flushes );
5476 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
5477 stats__cache_make_New_arange,
5478 stats__cache_make_New_inZrep);
5479
5480 VG_(printf)("%s","\n");
5481 VG_(printf)(" cline: %'10lu normalises\n",
5482 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00005483 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5484 stats__cline_cread64s,
5485 stats__cline_cread32s,
5486 stats__cline_cread16s,
5487 stats__cline_cread08s );
5488 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5489 stats__cline_cwrite64s,
5490 stats__cline_cwrite32s,
5491 stats__cline_cwrite16s,
5492 stats__cline_cwrite08s );
5493 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5494 stats__cline_swrite64s,
5495 stats__cline_swrite32s,
5496 stats__cline_swrite16s,
5497 stats__cline_swrite08s );
5498 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
5499 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00005500 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5501 stats__cline_64to32splits,
5502 stats__cline_32to16splits,
5503 stats__cline_16to8splits );
5504 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5505 stats__cline_64to32pulldown,
5506 stats__cline_32to16pulldown,
5507 stats__cline_16to8pulldown );
5508 if (0)
5509 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
5510 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
5511
5512 VG_(printf)("%s","\n");
5513
sewardjc8028ad2010-05-05 09:34:42 +00005514 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00005515 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00005516 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00005517 stats__msmcwrite, stats__msmcwrite_change);
5518 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
5519 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00005520 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
5521 stats__join2_queries, stats__join2_misses);
5522
5523 VG_(printf)("%s","\n");
sewardjc8028ad2010-05-05 09:34:42 +00005524 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
5525 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
5526 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
5527 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
5528 VG_(printf)( " libhb: VTSset: find_and_dealloc__or_add %'lu (%'lu deallocd)\n",
5529 stats__vts_set__fadoa, stats__vts_set__fadoa_d );
5530 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
5531 stats__vts__indexat_slow );
5532
5533 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00005534 VG_(printf)(
5535 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
5536 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
5537 );
5538 VG_(printf)( " libhb: %lu entries in vts_set\n",
5539 VG_(sizeFM)( vts_set ) );
5540
5541 VG_(printf)("%s","\n");
5542 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
5543 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
5544 stats__ctxt_rcdec2,
5545 stats__ctxt_rcdec3 );
5546 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
5547 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
5548 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
5549 (UWord)N_RCEC_TAB,
5550 stats__ctxt_tab_curr );
5551 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
5552 stats__ctxt_tab_qs,
5553 stats__ctxt_tab_cmps );
5554#if 0
5555 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
5556 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
5557 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
5558 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
5559 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
5560 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
5561 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
5562 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
5563 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
5564 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
5565 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
5566 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
5567 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
5568 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
5569
5570 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
5571 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
5572 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
5573 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
5574#endif
5575
5576 VG_(printf)("%s","<<< END libhb stats >>>\n");
5577 VG_(printf)("%s","\n");
5578
5579 }
5580}
5581
5582void libhb_async_exit ( Thr* thr )
5583{
sewardj23f12002009-07-24 08:45:08 +00005584 tl_assert(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005585 tl_assert(thr->still_alive);
sewardj23f12002009-07-24 08:45:08 +00005586 thr->still_alive = False;
sewardj2d2ea2f2009-08-02 10:15:07 +00005587
5588 /* free up Filter and local_Kws_n_stacks (well, actually not the
5589 latter ..) */
5590 tl_assert(thr->filter);
5591 HG_(free)(thr->filter);
5592 thr->filter = NULL;
5593
5594 /* Another space-accuracy tradeoff. Do we want to be able to show
5595 H1 history for conflicts in threads which have since exited? If
5596 yes, then we better not free up thr->local_Kws_n_stacks. The
5597 downside is a potential per-thread leak of up to
5598 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
5599 XArray average overcommit factor is (1.5 I'd guess). */
5600 // hence:
5601 // VG_(deleteXA)(thr->local_Kws_n_stacks);
5602 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00005603}
5604
5605/* Both Segs and SOs point to VTSs. However, there is no sharing, so
5606 a Seg that points at a VTS is its one-and-only owner, and ditto for
5607 a SO that points at a VTS. */
5608
5609SO* libhb_so_alloc ( void )
5610{
5611 return SO__Alloc();
5612}
5613
5614void libhb_so_dealloc ( SO* so )
5615{
5616 tl_assert(so);
5617 tl_assert(so->magic == SO_MAGIC);
5618 SO__Dealloc(so);
5619}
5620
5621/* See comments in libhb.h for details on the meaning of
5622 strong vs weak sends and strong vs weak receives. */
5623void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
5624{
5625 /* Copy the VTSs from 'thr' into the sync object, and then move
5626 the thread along one step. */
5627
5628 tl_assert(so);
5629 tl_assert(so->magic == SO_MAGIC);
5630
5631 /* stay sane .. a thread's read-clock must always lead or be the
5632 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00005633 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
5634 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00005635 }
5636
5637 /* since we're overwriting the VtsIDs in the SO, we need to drop
5638 any references made by the previous contents thereof */
5639 if (so->viR == VtsID_INVALID) {
5640 tl_assert(so->viW == VtsID_INVALID);
5641 so->viR = thr->viR;
5642 so->viW = thr->viW;
5643 VtsID__rcinc(so->viR);
5644 VtsID__rcinc(so->viW);
5645 } else {
5646 /* In a strong send, we dump any previous VC in the SO and
5647 install the sending thread's VC instead. For a weak send we
5648 must join2 with what's already there. */
5649 tl_assert(so->viW != VtsID_INVALID);
5650 VtsID__rcdec(so->viR);
5651 VtsID__rcdec(so->viW);
5652 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
5653 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
5654 VtsID__rcinc(so->viR);
5655 VtsID__rcinc(so->viW);
5656 }
5657
5658 /* move both parent clocks along */
5659 VtsID__rcdec(thr->viR);
5660 VtsID__rcdec(thr->viW);
5661 thr->viR = VtsID__tick( thr->viR, thr );
5662 thr->viW = VtsID__tick( thr->viW, thr );
sewardj2d2ea2f2009-08-02 10:15:07 +00005663 if (thr->still_alive) {
5664 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00005665 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005666 }
sewardjf98e1c02008-10-25 16:22:41 +00005667 VtsID__rcinc(thr->viR);
5668 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005669
sewardjf98e1c02008-10-25 16:22:41 +00005670 if (strong_send)
5671 show_thread_state("s-send", thr);
5672 else
5673 show_thread_state("w-send", thr);
5674}
5675
5676void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
5677{
5678 tl_assert(so);
5679 tl_assert(so->magic == SO_MAGIC);
5680
5681 if (so->viR != VtsID_INVALID) {
5682 tl_assert(so->viW != VtsID_INVALID);
5683
5684 /* Weak receive (basically, an R-acquisition of a R-W lock).
5685 This advances the read-clock of the receiver, but not the
5686 write-clock. */
5687 VtsID__rcdec(thr->viR);
5688 thr->viR = VtsID__join2( thr->viR, so->viR );
5689 VtsID__rcinc(thr->viR);
5690
sewardj90eb22e2009-07-28 20:22:18 +00005691 /* At one point (r10589) it seemed safest to tick the clocks for
5692 the receiving thread after the join. But on reflection, I
5693 wonder if that might cause it to 'overtake' constraints,
5694 which could lead to missing races. So, back out that part of
5695 r10589. */
5696 //VtsID__rcdec(thr->viR);
5697 //thr->viR = VtsID__tick( thr->viR, thr );
5698 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00005699
sewardjf98e1c02008-10-25 16:22:41 +00005700 /* For a strong receive, we also advance the receiver's write
5701 clock, which means the receive as a whole is essentially
5702 equivalent to a W-acquisition of a R-W lock. */
5703 if (strong_recv) {
5704 VtsID__rcdec(thr->viW);
5705 thr->viW = VtsID__join2( thr->viW, so->viW );
5706 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005707
sewardj90eb22e2009-07-28 20:22:18 +00005708 /* See comment just above, re r10589. */
5709 //VtsID__rcdec(thr->viW);
5710 //thr->viW = VtsID__tick( thr->viW, thr );
5711 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00005712 }
5713
sewardj23f12002009-07-24 08:45:08 +00005714 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00005715 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00005716
sewardjf98e1c02008-10-25 16:22:41 +00005717 if (strong_recv)
5718 show_thread_state("s-recv", thr);
5719 else
5720 show_thread_state("w-recv", thr);
5721
5722 } else {
5723 tl_assert(so->viW == VtsID_INVALID);
5724 /* Deal with degenerate case: 'so' has no vts, so there has been
5725 no message posted to it. Just ignore this case. */
5726 show_thread_state("d-recv", thr);
5727 }
5728}
5729
5730Bool libhb_so_everSent ( SO* so )
5731{
5732 if (so->viR == VtsID_INVALID) {
5733 tl_assert(so->viW == VtsID_INVALID);
5734 return False;
5735 } else {
5736 tl_assert(so->viW != VtsID_INVALID);
5737 return True;
5738 }
5739}
5740
5741#define XXX1 0 // 0x67a106c
5742#define XXX2 0
5743
sewardj23f12002009-07-24 08:45:08 +00005744static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00005745 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
5746 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
5747 return False;
5748}
5749static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
sewardj23f12002009-07-24 08:45:08 +00005750 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00005751 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
5752 show_thread_state("", thr);
5753 VG_(printf)("%s","\n");
5754}
5755
sewardj23f12002009-07-24 08:45:08 +00005756void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005757{
5758 SVal sv = SVal__mkC(thr->viW, thr->viW);
5759 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00005760 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
5761 zsm_sset_range( a, szB, sv );
5762 Filter__clear_range( thr->filter, a, szB );
5763 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00005764}
5765
sewardj23f12002009-07-24 08:45:08 +00005766void libhb_srange_noaccess ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005767{
sewardj23f12002009-07-24 08:45:08 +00005768 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00005769}
5770
sewardj406bac82010-03-03 23:03:40 +00005771void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
5772{
5773 SVal sv = SVal_NOACCESS;
5774 tl_assert(is_sane_SVal_C(sv));
5775 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
5776 zsm_sset_range( a, szB, sv );
5777 Filter__clear_range( thr->filter, a, szB );
5778 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
5779}
5780
sewardjf98e1c02008-10-25 16:22:41 +00005781void* libhb_get_Thr_opaque ( Thr* thr ) {
5782 tl_assert(thr);
5783 return thr->opaque;
5784}
5785
5786void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
5787 tl_assert(thr);
5788 thr->opaque = v;
5789}
5790
sewardj23f12002009-07-24 08:45:08 +00005791void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005792{
sewardj23f12002009-07-24 08:45:08 +00005793 zsm_scopy_range(src, dst, len);
5794 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00005795}
5796
5797void libhb_maybe_GC ( void )
5798{
5799 event_map_maybe_GC();
5800 /* If there are still freelist entries available, no need for a
5801 GC. */
5802 if (vts_tab_freelist != VtsID_INVALID)
5803 return;
5804 /* So all the table entries are full, and we're having to expand
5805 the table. But did we hit the threshhold point yet? */
5806 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5807 return;
5808 vts_tab__do_GC( False/*don't show stats*/ );
5809}
5810
5811
5812/////////////////////////////////////////////////////////////////
5813/////////////////////////////////////////////////////////////////
5814// //
5815// SECTION END main library //
5816// //
5817/////////////////////////////////////////////////////////////////
5818/////////////////////////////////////////////////////////////////
5819
5820/*--------------------------------------------------------------------*/
5821/*--- end libhb_main.c ---*/
5822/*--------------------------------------------------------------------*/