blob: cb4530d9cea316cf076684443e52b17863a51693 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj9eecbbb2010-05-03 21:37:12 +000012 Copyright (C) 2008-2010 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000046#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
sewardj23f12002009-07-24 08:45:08 +0000150static void zsm_sset_range ( Addr, SizeT, SVal );
151static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000152static void zsm_flush_cache ( void );
153
154#endif /* ! __HB_ZSM_H */
155
156
sewardjf98e1c02008-10-25 16:22:41 +0000157/* Round a up to the next multiple of N. N must be a power of 2 */
158#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
159/* Round a down to the next multiple of N. N must be a power of 2 */
160#define ROUNDDN(a, N) ((a) & ~(N-1))
161
162
163
164/* ------ User-supplied RC functions ------ */
165static void(*rcinc)(SVal) = NULL;
166static void(*rcdec)(SVal) = NULL;
167
168
169/* ------ CacheLine ------ */
170
171#define N_LINE_BITS 6 /* must be >= 3 */
172#define N_LINE_ARANGE (1 << N_LINE_BITS)
173#define N_LINE_TREES (N_LINE_ARANGE >> 3)
174
175typedef
176 struct {
177 UShort descrs[N_LINE_TREES];
178 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
179 }
180 CacheLine;
181
182#define TREE_DESCR_16_0 (1<<0)
183#define TREE_DESCR_32_0 (1<<1)
184#define TREE_DESCR_16_1 (1<<2)
185#define TREE_DESCR_64 (1<<3)
186#define TREE_DESCR_16_2 (1<<4)
187#define TREE_DESCR_32_1 (1<<5)
188#define TREE_DESCR_16_3 (1<<6)
189#define TREE_DESCR_8_0 (1<<7)
190#define TREE_DESCR_8_1 (1<<8)
191#define TREE_DESCR_8_2 (1<<9)
192#define TREE_DESCR_8_3 (1<<10)
193#define TREE_DESCR_8_4 (1<<11)
194#define TREE_DESCR_8_5 (1<<12)
195#define TREE_DESCR_8_6 (1<<13)
196#define TREE_DESCR_8_7 (1<<14)
197#define TREE_DESCR_DTY (1<<15)
198
199typedef
200 struct {
201 SVal dict[4]; /* can represent up to 4 diff values in the line */
202 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
203 dict indexes */
204 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
205 LineF to use, and dict[2..] are also SVal_INVALID. */
206 }
207 LineZ; /* compressed rep for a cache line */
208
209typedef
210 struct {
211 Bool inUse;
212 SVal w64s[N_LINE_ARANGE];
213 }
214 LineF; /* full rep for a cache line */
215
216/* Shadow memory.
217 Primary map is a WordFM Addr SecMap*.
218 SecMaps cover some page-size-ish section of address space and hold
219 a compressed representation.
220 CacheLine-sized chunks of SecMaps are copied into a Cache, being
221 decompressed when moved into the cache and recompressed on the
222 way out. Because of this, the cache must operate as a writeback
223 cache, not a writethrough one.
224
225 Each SecMap must hold a power-of-2 number of CacheLines. Hence
226 N_SECMAP_BITS must >= N_LINE_BITS.
227*/
228#define N_SECMAP_BITS 13
229#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
230
231// # CacheLines held by a SecMap
232#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
233
234/* The data in the SecMap is held in the array of LineZs. Each LineZ
235 either carries the required data directly, in a compressed
236 representation, or it holds (in .dict[0]) an index to the LineF in
237 .linesF that holds the full representation.
238
239 Currently-unused LineF's have their .inUse bit set to zero.
240 Since each in-use LineF is referred to be exactly one LineZ,
241 the number of .linesZ[] that refer to .linesF should equal
242 the number of .linesF[] that have .inUse == True.
243
244 RC obligations: the RCs presented to the user include exactly
245 the values in:
246 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
247 * F reps that are in use (.inUse == True)
248
249 Hence the following actions at the following transitions are required:
250
251 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
252 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
253 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
254 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
255*/
256typedef
257 struct {
258 UInt magic;
259 LineZ linesZ[N_SECMAP_ZLINES];
260 LineF* linesF;
261 UInt linesF_size;
262 }
263 SecMap;
264
265#define SecMap_MAGIC 0x571e58cbU
266
267static inline Bool is_sane_SecMap ( SecMap* sm ) {
268 return sm != NULL && sm->magic == SecMap_MAGIC;
269}
270
271/* ------ Cache ------ */
272
273#define N_WAY_BITS 16
274#define N_WAY_NENT (1 << N_WAY_BITS)
275
276/* Each tag is the address of the associated CacheLine, rounded down
277 to a CacheLine address boundary. A CacheLine size must be a power
278 of 2 and must be 8 or more. Hence an easy way to initialise the
279 cache so it is empty is to set all the tag values to any value % 8
280 != 0, eg 1. This means all queries in the cache initially miss.
281 It does however require us to detect and not writeback, any line
282 with a bogus tag. */
283typedef
284 struct {
285 CacheLine lyns0[N_WAY_NENT];
286 Addr tags0[N_WAY_NENT];
287 }
288 Cache;
289
290static inline Bool is_valid_scache_tag ( Addr tag ) {
291 /* a valid tag should be naturally aligned to the start of
292 a CacheLine. */
293 return 0 == (tag & (N_LINE_ARANGE - 1));
294}
295
296
297/* --------- Primary data structures --------- */
298
299/* Shadow memory primary map */
300static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
301static Cache cache_shmem;
302
303
304static UWord stats__secmaps_search = 0; // # SM finds
305static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
306static UWord stats__secmaps_allocd = 0; // # SecMaps issued
307static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
308static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
309static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
310static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
311static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
312static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
313static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
314static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
315static UWord stats__cache_F_fetches = 0; // # F lines fetched
316static UWord stats__cache_F_wbacks = 0; // # F lines written back
317static UWord stats__cache_invals = 0; // # cache invals
318static UWord stats__cache_flushes = 0; // # cache flushes
319static UWord stats__cache_totrefs = 0; // # total accesses
320static UWord stats__cache_totmisses = 0; // # misses
321static ULong stats__cache_make_New_arange = 0; // total arange made New
322static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
323static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000324static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
325static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
326static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
327static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
328static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
329static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
330static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
331static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
332static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
333static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
334static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
335static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
336static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
337static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000338static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
339static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
340static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
341static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
342static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
343static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000344static UWord stats__vts__tick = 0; // # calls to VTS__tick
345static UWord stats__vts__join = 0; // # calls to VTS__join
346static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
347static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
sewardj7aa38a92011-02-27 23:04:12 +0000348
349// # calls to VTS__cmp_structural w/ slow case
350static UWord stats__vts__cmp_structural_slow = 0;
351
352// # calls to VTS__indexAt_SLOW
353static UWord stats__vts__indexat_slow = 0;
354
355// # calls to vts_set__find__or__clone_and_add
356static UWord stats__vts_set__focaa = 0;
357
358// # calls to vts_set__find__or__clone_and_add that lead to an
359// allocation
360static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000361
sewardjf98e1c02008-10-25 16:22:41 +0000362
363static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
364 return a & ~(N_SECMAP_ARANGE - 1);
365}
366static inline UWord shmem__get_SecMap_offset ( Addr a ) {
367 return a & (N_SECMAP_ARANGE - 1);
368}
369
370
371/*----------------------------------------------------------------*/
372/*--- map_shmem :: WordFM Addr SecMap ---*/
373/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
374/*----------------------------------------------------------------*/
375
376/*--------------- SecMap allocation --------------- */
377
378static HChar* shmem__bigchunk_next = NULL;
379static HChar* shmem__bigchunk_end1 = NULL;
380
381static void* shmem__bigchunk_alloc ( SizeT n )
382{
383 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
384 tl_assert(n > 0);
385 n = VG_ROUNDUP(n, 16);
386 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
387 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
388 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
389 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
390 if (0)
391 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
392 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
393 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
394 if (shmem__bigchunk_next == NULL)
395 VG_(out_of_memory_NORETURN)(
396 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
397 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
398 }
399 tl_assert(shmem__bigchunk_next);
400 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
401 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
402 shmem__bigchunk_next += n;
403 return shmem__bigchunk_next - n;
404}
405
406static SecMap* shmem__alloc_SecMap ( void )
407{
408 Word i, j;
409 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
410 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
411 tl_assert(sm);
412 sm->magic = SecMap_MAGIC;
413 for (i = 0; i < N_SECMAP_ZLINES; i++) {
414 sm->linesZ[i].dict[0] = SVal_NOACCESS;
415 sm->linesZ[i].dict[1] = SVal_INVALID;
416 sm->linesZ[i].dict[2] = SVal_INVALID;
417 sm->linesZ[i].dict[3] = SVal_INVALID;
418 for (j = 0; j < N_LINE_ARANGE/4; j++)
419 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
420 }
421 sm->linesF = NULL;
422 sm->linesF_size = 0;
423 stats__secmaps_allocd++;
424 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
425 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
426 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
427 return sm;
428}
429
430typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
431static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
432
433static SecMap* shmem__find_SecMap ( Addr ga )
434{
435 SecMap* sm = NULL;
436 Addr gaKey = shmem__round_to_SecMap_base(ga);
437 // Cache
438 stats__secmaps_search++;
439 if (LIKELY(gaKey == smCache[0].gaKey))
440 return smCache[0].sm;
441 if (LIKELY(gaKey == smCache[1].gaKey)) {
442 SMCacheEnt tmp = smCache[0];
443 smCache[0] = smCache[1];
444 smCache[1] = tmp;
445 return smCache[0].sm;
446 }
447 if (gaKey == smCache[2].gaKey) {
448 SMCacheEnt tmp = smCache[1];
449 smCache[1] = smCache[2];
450 smCache[2] = tmp;
451 return smCache[1].sm;
452 }
453 // end Cache
454 stats__secmaps_search_slow++;
455 if (VG_(lookupFM)( map_shmem,
456 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
457 tl_assert(sm != NULL);
458 smCache[2] = smCache[1];
459 smCache[1] = smCache[0];
460 smCache[0].gaKey = gaKey;
461 smCache[0].sm = sm;
462 } else {
463 tl_assert(sm == NULL);
464 }
465 return sm;
466}
467
468static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
469{
470 SecMap* sm = shmem__find_SecMap ( ga );
471 if (LIKELY(sm)) {
472 return sm;
473 } else {
474 /* create a new one */
475 Addr gaKey = shmem__round_to_SecMap_base(ga);
476 sm = shmem__alloc_SecMap();
477 tl_assert(sm);
478 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
479 return sm;
480 }
481}
482
483
484/* ------------ LineF and LineZ related ------------ */
485
486static void rcinc_LineF ( LineF* lineF ) {
487 UWord i;
488 tl_assert(lineF->inUse);
489 for (i = 0; i < N_LINE_ARANGE; i++)
490 rcinc(lineF->w64s[i]);
491}
492
493static void rcdec_LineF ( LineF* lineF ) {
494 UWord i;
495 tl_assert(lineF->inUse);
496 for (i = 0; i < N_LINE_ARANGE; i++)
497 rcdec(lineF->w64s[i]);
498}
499
500static void rcinc_LineZ ( LineZ* lineZ ) {
501 tl_assert(lineZ->dict[0] != SVal_INVALID);
502 rcinc(lineZ->dict[0]);
503 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
504 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
505 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
506}
507
508static void rcdec_LineZ ( LineZ* lineZ ) {
509 tl_assert(lineZ->dict[0] != SVal_INVALID);
510 rcdec(lineZ->dict[0]);
511 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
512 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
513 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
514}
515
516inline
517static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
518 Word bix, shft, mask, prep;
519 tl_assert(ix >= 0);
520 bix = ix >> 2;
521 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
522 mask = 3 << shft;
523 prep = b2 << shft;
524 arr[bix] = (arr[bix] & ~mask) | prep;
525}
526
527inline
528static UWord read_twobit_array ( UChar* arr, UWord ix ) {
529 Word bix, shft;
530 tl_assert(ix >= 0);
531 bix = ix >> 2;
532 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
533 return (arr[bix] >> shft) & 3;
534}
535
536/* Given address 'tag', find either the Z or F line containing relevant
537 data, so it can be read into the cache.
538*/
539static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
540 /*OUT*/LineF** fp, Addr tag ) {
541 LineZ* lineZ;
542 LineF* lineF;
543 UWord zix;
544 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
545 UWord smoff = shmem__get_SecMap_offset(tag);
546 /* since smoff is derived from a valid tag, it should be
547 cacheline-aligned. */
548 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
549 zix = smoff >> N_LINE_BITS;
550 tl_assert(zix < N_SECMAP_ZLINES);
551 lineZ = &sm->linesZ[zix];
552 lineF = NULL;
553 if (lineZ->dict[0] == SVal_INVALID) {
554 UInt fix = (UInt)lineZ->dict[1];
555 tl_assert(sm->linesF);
556 tl_assert(sm->linesF_size > 0);
557 tl_assert(fix >= 0 && fix < sm->linesF_size);
558 lineF = &sm->linesF[fix];
559 tl_assert(lineF->inUse);
560 lineZ = NULL;
561 }
562 *zp = lineZ;
563 *fp = lineF;
564}
565
566/* Given address 'tag', return the relevant SecMap and the index of
567 the LineZ within it, in the expectation that the line is to be
568 overwritten. Regardless of whether 'tag' is currently associated
569 with a Z or F representation, to rcdec on the current
570 representation, in recognition of the fact that the contents are
571 just about to be overwritten. */
572static __attribute__((noinline))
573void find_Z_for_writing ( /*OUT*/SecMap** smp,
574 /*OUT*/Word* zixp,
575 Addr tag ) {
576 LineZ* lineZ;
577 LineF* lineF;
578 UWord zix;
579 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
580 UWord smoff = shmem__get_SecMap_offset(tag);
581 /* since smoff is derived from a valid tag, it should be
582 cacheline-aligned. */
583 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
584 zix = smoff >> N_LINE_BITS;
585 tl_assert(zix < N_SECMAP_ZLINES);
586 lineZ = &sm->linesZ[zix];
587 lineF = NULL;
588 /* re RCs, we are freeing up this LineZ/LineF so that new data can
589 be parked in it. Hence have to rcdec it accordingly. */
590 /* If lineZ has an associated lineF, free it up. */
591 if (lineZ->dict[0] == SVal_INVALID) {
592 UInt fix = (UInt)lineZ->dict[1];
593 tl_assert(sm->linesF);
594 tl_assert(sm->linesF_size > 0);
595 tl_assert(fix >= 0 && fix < sm->linesF_size);
596 lineF = &sm->linesF[fix];
597 tl_assert(lineF->inUse);
598 rcdec_LineF(lineF);
599 lineF->inUse = False;
600 } else {
601 rcdec_LineZ(lineZ);
602 }
603 *smp = sm;
604 *zixp = zix;
605}
606
607static __attribute__((noinline))
608void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
609 UInt i, new_size;
610 LineF* nyu;
611
612 if (sm->linesF) {
613 tl_assert(sm->linesF_size > 0);
614 } else {
615 tl_assert(sm->linesF_size == 0);
616 }
617
618 if (sm->linesF) {
619 for (i = 0; i < sm->linesF_size; i++) {
620 if (!sm->linesF[i].inUse) {
621 *fixp = (Word)i;
622 return;
623 }
624 }
625 }
626
627 /* No free F line found. Expand existing array and try again. */
628 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
629 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
630 new_size * sizeof(LineF) );
631 tl_assert(nyu);
632
633 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
634 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
635 * sizeof(LineF);
636
637 if (0)
638 VG_(printf)("SM %p: expand F array from %d to %d\n",
639 sm, (Int)sm->linesF_size, new_size);
640
641 for (i = 0; i < new_size; i++)
642 nyu[i].inUse = False;
643
644 if (sm->linesF) {
645 for (i = 0; i < sm->linesF_size; i++) {
646 tl_assert(sm->linesF[i].inUse);
647 nyu[i] = sm->linesF[i];
648 }
649 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
650 HG_(free)(sm->linesF);
651 }
652
653 sm->linesF = nyu;
654 sm->linesF_size = new_size;
655
656 for (i = 0; i < sm->linesF_size; i++) {
657 if (!sm->linesF[i].inUse) {
658 *fixp = (Word)i;
659 return;
660 }
661 }
662
663 /*NOTREACHED*/
664 tl_assert(0);
665}
666
667
668/* ------------ CacheLine and implicit-tree related ------------ */
669
670__attribute__((unused))
671static void pp_CacheLine ( CacheLine* cl ) {
672 Word i;
673 if (!cl) {
674 VG_(printf)("%s","pp_CacheLine(NULL)\n");
675 return;
676 }
677 for (i = 0; i < N_LINE_TREES; i++)
678 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
679 for (i = 0; i < N_LINE_ARANGE; i++)
680 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
681}
682
683static UChar descr_to_validbits ( UShort descr )
684{
685 /* a.k.a Party Time for gcc's constant folder */
686# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
687 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
688 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
689 ( (b8_5) << 12) | ( (b8_4) << 11) | \
690 ( (b8_3) << 10) | ( (b8_2) << 9) | \
691 ( (b8_1) << 8) | ( (b8_0) << 7) | \
692 ( (b16_3) << 6) | ( (b32_1) << 5) | \
693 ( (b16_2) << 4) | ( (b64) << 3) | \
694 ( (b16_1) << 2) | ( (b32_0) << 1) | \
695 ( (b16_0) << 0) ) )
696
697# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
698 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
699 ( (bit5) << 5) | ( (bit4) << 4) | \
700 ( (bit3) << 3) | ( (bit2) << 2) | \
701 ( (bit1) << 1) | ( (bit0) << 0) ) )
702
703 /* these should all get folded out at compile time */
704 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
705 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
706 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
707 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
708 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
709 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
710 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
711 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
712 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
713
714 switch (descr) {
715 /*
716 +--------------------------------- TREE_DESCR_8_7
717 | +------------------- TREE_DESCR_8_0
718 | | +---------------- TREE_DESCR_16_3
719 | | | +-------------- TREE_DESCR_32_1
720 | | | | +------------ TREE_DESCR_16_2
721 | | | | | +--------- TREE_DESCR_64
722 | | | | | | +------ TREE_DESCR_16_1
723 | | | | | | | +---- TREE_DESCR_32_0
724 | | | | | | | | +-- TREE_DESCR_16_0
725 | | | | | | | | |
726 | | | | | | | | | GRANULARITY, 7 -> 0 */
727 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
728 return BYTE(1,1,1,1,1,1,1,1);
729 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
730 return BYTE(1,1,0,1,1,1,1,1);
731 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
732 return BYTE(0,1,1,1,1,1,1,1);
733 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
734 return BYTE(0,1,0,1,1,1,1,1);
735
736 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
737 return BYTE(1,1,1,1,1,1,0,1);
738 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
739 return BYTE(1,1,0,1,1,1,0,1);
740 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
741 return BYTE(0,1,1,1,1,1,0,1);
742 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
743 return BYTE(0,1,0,1,1,1,0,1);
744
745 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
746 return BYTE(1,1,1,1,0,1,1,1);
747 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
748 return BYTE(1,1,0,1,0,1,1,1);
749 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
750 return BYTE(0,1,1,1,0,1,1,1);
751 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
752 return BYTE(0,1,0,1,0,1,1,1);
753
754 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
755 return BYTE(1,1,1,1,0,1,0,1);
756 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
757 return BYTE(1,1,0,1,0,1,0,1);
758 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
759 return BYTE(0,1,1,1,0,1,0,1);
760 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
761 return BYTE(0,1,0,1,0,1,0,1);
762
763 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
764 return BYTE(0,0,0,1,1,1,1,1);
765 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
766 return BYTE(0,0,0,1,1,1,0,1);
767 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
768 return BYTE(0,0,0,1,0,1,1,1);
769 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
770 return BYTE(0,0,0,1,0,1,0,1);
771
772 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
773 return BYTE(1,1,1,1,0,0,0,1);
774 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
775 return BYTE(1,1,0,1,0,0,0,1);
776 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
777 return BYTE(0,1,1,1,0,0,0,1);
778 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
779 return BYTE(0,1,0,1,0,0,0,1);
780
781 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
782 return BYTE(0,0,0,1,0,0,0,1);
783
784 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
785 return BYTE(0,0,0,0,0,0,0,1);
786
787 default: return BYTE(0,0,0,0,0,0,0,0);
788 /* INVALID - any valid descr produces at least one
789 valid bit in tree[0..7]*/
790 }
791 /* NOTREACHED*/
792 tl_assert(0);
793
794# undef DESCR
795# undef BYTE
796}
797
798__attribute__((unused))
799static Bool is_sane_Descr ( UShort descr ) {
800 return descr_to_validbits(descr) != 0;
801}
802
803static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
804 VG_(sprintf)(dst,
805 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
806 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
807 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
808 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
809 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
810 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
811 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
812 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
813 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
814 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
815 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
816 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
817 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
818 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
819 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
820 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
821 );
822}
823static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
824 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
825 (Int)((byte & 128) ? 1 : 0),
826 (Int)((byte & 64) ? 1 : 0),
827 (Int)((byte & 32) ? 1 : 0),
828 (Int)((byte & 16) ? 1 : 0),
829 (Int)((byte & 8) ? 1 : 0),
830 (Int)((byte & 4) ? 1 : 0),
831 (Int)((byte & 2) ? 1 : 0),
832 (Int)((byte & 1) ? 1 : 0)
833 );
834}
835
836static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
837 Word i;
838 UChar validbits = descr_to_validbits(descr);
839 HChar buf[128], buf2[128];
840 if (validbits == 0)
841 goto bad;
842 for (i = 0; i < 8; i++) {
843 if (validbits & (1<<i)) {
844 if (tree[i] == SVal_INVALID)
845 goto bad;
846 } else {
847 if (tree[i] != SVal_INVALID)
848 goto bad;
849 }
850 }
851 return True;
852 bad:
853 sprintf_Descr( buf, descr );
854 sprintf_Byte( buf2, validbits );
855 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
856 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
857 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
858 for (i = 0; i < 8; i++)
859 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
860 VG_(printf)("%s","}\n");
861 return 0;
862}
863
864static Bool is_sane_CacheLine ( CacheLine* cl )
865{
866 Word tno, cloff;
867
868 if (!cl) goto bad;
869
870 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
871 UShort descr = cl->descrs[tno];
872 SVal* tree = &cl->svals[cloff];
873 if (!is_sane_Descr_and_Tree(descr, tree))
874 goto bad;
875 }
876 tl_assert(cloff == N_LINE_ARANGE);
877 return True;
878 bad:
879 pp_CacheLine(cl);
880 return False;
881}
882
883static UShort normalise_tree ( /*MOD*/SVal* tree )
884{
885 UShort descr;
886 /* pre: incoming tree[0..7] does not have any invalid shvals, in
887 particular no zeroes. */
888 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
889 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
890 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
891 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
892 tl_assert(0);
893
894 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
895 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
896 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
897 /* build 16-bit layer */
898 if (tree[1] == tree[0]) {
899 tree[1] = SVal_INVALID;
900 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
901 descr |= TREE_DESCR_16_0;
902 }
903 if (tree[3] == tree[2]) {
904 tree[3] = SVal_INVALID;
905 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
906 descr |= TREE_DESCR_16_1;
907 }
908 if (tree[5] == tree[4]) {
909 tree[5] = SVal_INVALID;
910 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
911 descr |= TREE_DESCR_16_2;
912 }
913 if (tree[7] == tree[6]) {
914 tree[7] = SVal_INVALID;
915 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
916 descr |= TREE_DESCR_16_3;
917 }
918 /* build 32-bit layer */
919 if (tree[2] == tree[0]
920 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
921 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
922 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
923 descr |= TREE_DESCR_32_0;
924 }
925 if (tree[6] == tree[4]
926 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
927 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
928 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
929 descr |= TREE_DESCR_32_1;
930 }
931 /* build 64-bit layer */
932 if (tree[4] == tree[0]
933 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
934 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
935 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
936 descr |= TREE_DESCR_64;
937 }
938 return descr;
939}
940
941/* This takes a cacheline where all the data is at the leaves
942 (w8[..]) and builds a correctly normalised tree. */
943static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
944{
945 Word tno, cloff;
946 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
947 SVal* tree = &cl->svals[cloff];
948 cl->descrs[tno] = normalise_tree( tree );
949 }
950 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000951 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000952 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
953 stats__cline_normalises++;
954}
955
956
957typedef struct { UChar count; SVal sval; } CountedSVal;
958
959static
960void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
961 /*OUT*/Word* dstUsedP,
962 Word nDst, CacheLine* src )
963{
964 Word tno, cloff, dstUsed;
965
966 tl_assert(nDst == N_LINE_ARANGE);
967 dstUsed = 0;
968
969 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
970 UShort descr = src->descrs[tno];
971 SVal* tree = &src->svals[cloff];
972
973 /* sequentialise the tree described by (descr,tree). */
974# define PUT(_n,_v) \
975 do { dst[dstUsed ].count = (_n); \
976 dst[dstUsed++].sval = (_v); \
977 } while (0)
978
979 /* byte 0 */
980 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
981 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
982 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
983 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
984 /* byte 1 */
985 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
986 /* byte 2 */
987 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
988 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
989 /* byte 3 */
990 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
991 /* byte 4 */
992 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
993 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
994 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
995 /* byte 5 */
996 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
997 /* byte 6 */
998 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
999 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1000 /* byte 7 */
1001 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1002
1003# undef PUT
1004 /* END sequentialise the tree described by (descr,tree). */
1005
1006 }
1007 tl_assert(cloff == N_LINE_ARANGE);
1008 tl_assert(dstUsed <= nDst);
1009
1010 *dstUsedP = dstUsed;
1011}
1012
1013/* Write the cacheline 'wix' to backing store. Where it ends up
1014 is determined by its tag field. */
1015static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1016{
1017 Word i, j, k, m;
1018 Addr tag;
1019 SecMap* sm;
1020 CacheLine* cl;
1021 LineZ* lineZ;
1022 LineF* lineF;
1023 Word zix, fix, csvalsUsed;
1024 CountedSVal csvals[N_LINE_ARANGE];
1025 SVal sv;
1026
1027 if (0)
1028 VG_(printf)("scache wback line %d\n", (Int)wix);
1029
1030 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1031
1032 tag = cache_shmem.tags0[wix];
1033 cl = &cache_shmem.lyns0[wix];
1034
1035 /* The cache line may have been invalidated; if so, ignore it. */
1036 if (!is_valid_scache_tag(tag))
1037 return;
1038
1039 /* Where are we going to put it? */
1040 sm = NULL;
1041 lineZ = NULL;
1042 lineF = NULL;
1043 zix = fix = -1;
1044
1045 /* find the Z line to write in and rcdec it or the associated F
1046 line. */
1047 find_Z_for_writing( &sm, &zix, tag );
1048
1049 tl_assert(sm);
1050 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1051 lineZ = &sm->linesZ[zix];
1052
1053 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001054 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001055 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1056
1057 csvalsUsed = -1;
1058 sequentialise_CacheLine( csvals, &csvalsUsed,
1059 N_LINE_ARANGE, cl );
1060 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1061 if (0) VG_(printf)("%lu ", csvalsUsed);
1062
1063 lineZ->dict[0] = lineZ->dict[1]
1064 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1065
1066 /* i indexes actual shadow values, k is cursor in csvals */
1067 i = 0;
1068 for (k = 0; k < csvalsUsed; k++) {
1069
1070 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001071 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001072 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1073 /* do we already have it? */
1074 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1075 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1076 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1077 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1078 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001079 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001080 tl_assert(sv != SVal_INVALID);
1081 if (lineZ->dict[0]
1082 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1083 if (lineZ->dict[1]
1084 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1085 if (lineZ->dict[2]
1086 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1087 if (lineZ->dict[3]
1088 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1089 break; /* we'll have to use the f rep */
1090 dict_ok:
1091 m = csvals[k].count;
1092 if (m == 8) {
1093 write_twobit_array( lineZ->ix2s, i+0, j );
1094 write_twobit_array( lineZ->ix2s, i+1, j );
1095 write_twobit_array( lineZ->ix2s, i+2, j );
1096 write_twobit_array( lineZ->ix2s, i+3, j );
1097 write_twobit_array( lineZ->ix2s, i+4, j );
1098 write_twobit_array( lineZ->ix2s, i+5, j );
1099 write_twobit_array( lineZ->ix2s, i+6, j );
1100 write_twobit_array( lineZ->ix2s, i+7, j );
1101 i += 8;
1102 }
1103 else if (m == 4) {
1104 write_twobit_array( lineZ->ix2s, i+0, j );
1105 write_twobit_array( lineZ->ix2s, i+1, j );
1106 write_twobit_array( lineZ->ix2s, i+2, j );
1107 write_twobit_array( lineZ->ix2s, i+3, j );
1108 i += 4;
1109 }
1110 else if (m == 1) {
1111 write_twobit_array( lineZ->ix2s, i+0, j );
1112 i += 1;
1113 }
1114 else if (m == 2) {
1115 write_twobit_array( lineZ->ix2s, i+0, j );
1116 write_twobit_array( lineZ->ix2s, i+1, j );
1117 i += 2;
1118 }
1119 else {
1120 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1121 }
1122
1123 }
1124
1125 if (LIKELY(i == N_LINE_ARANGE)) {
1126 /* Construction of the compressed representation was
1127 successful. */
1128 rcinc_LineZ(lineZ);
1129 stats__cache_Z_wbacks++;
1130 } else {
1131 /* Cannot use the compressed(z) representation. Use the full(f)
1132 rep instead. */
1133 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1134 alloc_F_for_writing( sm, &fix );
1135 tl_assert(sm->linesF);
1136 tl_assert(sm->linesF_size > 0);
1137 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1138 lineF = &sm->linesF[fix];
1139 tl_assert(!lineF->inUse);
1140 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1141 lineZ->dict[1] = (SVal)fix;
1142 lineF->inUse = True;
1143 i = 0;
1144 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001145 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001146 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1147 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001148 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001149 tl_assert(sv != SVal_INVALID);
1150 for (m = csvals[k].count; m > 0; m--) {
1151 lineF->w64s[i] = sv;
1152 i++;
1153 }
1154 }
1155 tl_assert(i == N_LINE_ARANGE);
1156 rcinc_LineF(lineF);
1157 stats__cache_F_wbacks++;
1158 }
sewardjf98e1c02008-10-25 16:22:41 +00001159}
1160
1161/* Fetch the cacheline 'wix' from the backing store. The tag
1162 associated with 'wix' is assumed to have already been filled in;
1163 hence that is used to determine where in the backing store to read
1164 from. */
1165static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1166{
1167 Word i;
1168 Addr tag;
1169 CacheLine* cl;
1170 LineZ* lineZ;
1171 LineF* lineF;
1172
1173 if (0)
1174 VG_(printf)("scache fetch line %d\n", (Int)wix);
1175
1176 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1177
1178 tag = cache_shmem.tags0[wix];
1179 cl = &cache_shmem.lyns0[wix];
1180
1181 /* reject nonsense requests */
1182 tl_assert(is_valid_scache_tag(tag));
1183
1184 lineZ = NULL;
1185 lineF = NULL;
1186 find_ZF_for_reading( &lineZ, &lineF, tag );
1187 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1188
1189 /* expand the data into the bottom layer of the tree, then get
1190 cacheline_normalise to build the descriptor array. */
1191 if (lineF) {
1192 tl_assert(lineF->inUse);
1193 for (i = 0; i < N_LINE_ARANGE; i++) {
1194 cl->svals[i] = lineF->w64s[i];
1195 }
1196 stats__cache_F_fetches++;
1197 } else {
1198 for (i = 0; i < N_LINE_ARANGE; i++) {
1199 SVal sv;
1200 UWord ix = read_twobit_array( lineZ->ix2s, i );
1201 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1202 sv = lineZ->dict[ix];
1203 tl_assert(sv != SVal_INVALID);
1204 cl->svals[i] = sv;
1205 }
1206 stats__cache_Z_fetches++;
1207 }
1208 normalise_CacheLine( cl );
1209}
1210
1211static void shmem__invalidate_scache ( void ) {
1212 Word wix;
1213 if (0) VG_(printf)("%s","scache inval\n");
1214 tl_assert(!is_valid_scache_tag(1));
1215 for (wix = 0; wix < N_WAY_NENT; wix++) {
1216 cache_shmem.tags0[wix] = 1/*INVALID*/;
1217 }
1218 stats__cache_invals++;
1219}
1220
1221static void shmem__flush_and_invalidate_scache ( void ) {
1222 Word wix;
1223 Addr tag;
1224 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1225 tl_assert(!is_valid_scache_tag(1));
1226 for (wix = 0; wix < N_WAY_NENT; wix++) {
1227 tag = cache_shmem.tags0[wix];
1228 if (tag == 1/*INVALID*/) {
1229 /* already invalid; nothing to do */
1230 } else {
1231 tl_assert(is_valid_scache_tag(tag));
1232 cacheline_wback( wix );
1233 }
1234 cache_shmem.tags0[wix] = 1/*INVALID*/;
1235 }
1236 stats__cache_flushes++;
1237 stats__cache_invals++;
1238}
1239
1240
1241static inline Bool aligned16 ( Addr a ) {
1242 return 0 == (a & 1);
1243}
1244static inline Bool aligned32 ( Addr a ) {
1245 return 0 == (a & 3);
1246}
1247static inline Bool aligned64 ( Addr a ) {
1248 return 0 == (a & 7);
1249}
1250static inline UWord get_cacheline_offset ( Addr a ) {
1251 return (UWord)(a & (N_LINE_ARANGE - 1));
1252}
1253static inline Addr cacheline_ROUNDUP ( Addr a ) {
1254 return ROUNDUP(a, N_LINE_ARANGE);
1255}
1256static inline Addr cacheline_ROUNDDN ( Addr a ) {
1257 return ROUNDDN(a, N_LINE_ARANGE);
1258}
1259static inline UWord get_treeno ( Addr a ) {
1260 return get_cacheline_offset(a) >> 3;
1261}
1262static inline UWord get_tree_offset ( Addr a ) {
1263 return a & 7;
1264}
1265
1266static __attribute__((noinline))
1267 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1268static inline CacheLine* get_cacheline ( Addr a )
1269{
1270 /* tag is 'a' with the in-line offset masked out,
1271 eg a[31]..a[4] 0000 */
1272 Addr tag = a & ~(N_LINE_ARANGE - 1);
1273 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1274 stats__cache_totrefs++;
1275 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1276 return &cache_shmem.lyns0[wix];
1277 } else {
1278 return get_cacheline_MISS( a );
1279 }
1280}
1281
1282static __attribute__((noinline))
1283 CacheLine* get_cacheline_MISS ( Addr a )
1284{
1285 /* tag is 'a' with the in-line offset masked out,
1286 eg a[31]..a[4] 0000 */
1287
1288 CacheLine* cl;
1289 Addr* tag_old_p;
1290 Addr tag = a & ~(N_LINE_ARANGE - 1);
1291 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1292
1293 tl_assert(tag != cache_shmem.tags0[wix]);
1294
1295 /* Dump the old line into the backing store. */
1296 stats__cache_totmisses++;
1297
1298 cl = &cache_shmem.lyns0[wix];
1299 tag_old_p = &cache_shmem.tags0[wix];
1300
1301 if (is_valid_scache_tag( *tag_old_p )) {
1302 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001303 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001304 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1305 cacheline_wback( wix );
1306 }
1307 /* and reload the new one */
1308 *tag_old_p = tag;
1309 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001310 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001311 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1312 return cl;
1313}
1314
1315static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1316 stats__cline_64to32pulldown++;
1317 switch (toff) {
1318 case 0: case 4:
1319 tl_assert(descr & TREE_DESCR_64);
1320 tree[4] = tree[0];
1321 descr &= ~TREE_DESCR_64;
1322 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1323 break;
1324 default:
1325 tl_assert(0);
1326 }
1327 return descr;
1328}
1329
1330static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1331 stats__cline_32to16pulldown++;
1332 switch (toff) {
1333 case 0: case 2:
1334 if (!(descr & TREE_DESCR_32_0)) {
1335 descr = pulldown_to_32(tree, 0, descr);
1336 }
1337 tl_assert(descr & TREE_DESCR_32_0);
1338 tree[2] = tree[0];
1339 descr &= ~TREE_DESCR_32_0;
1340 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1341 break;
1342 case 4: case 6:
1343 if (!(descr & TREE_DESCR_32_1)) {
1344 descr = pulldown_to_32(tree, 4, descr);
1345 }
1346 tl_assert(descr & TREE_DESCR_32_1);
1347 tree[6] = tree[4];
1348 descr &= ~TREE_DESCR_32_1;
1349 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1350 break;
1351 default:
1352 tl_assert(0);
1353 }
1354 return descr;
1355}
1356
1357static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1358 stats__cline_16to8pulldown++;
1359 switch (toff) {
1360 case 0: case 1:
1361 if (!(descr & TREE_DESCR_16_0)) {
1362 descr = pulldown_to_16(tree, 0, descr);
1363 }
1364 tl_assert(descr & TREE_DESCR_16_0);
1365 tree[1] = tree[0];
1366 descr &= ~TREE_DESCR_16_0;
1367 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1368 break;
1369 case 2: case 3:
1370 if (!(descr & TREE_DESCR_16_1)) {
1371 descr = pulldown_to_16(tree, 2, descr);
1372 }
1373 tl_assert(descr & TREE_DESCR_16_1);
1374 tree[3] = tree[2];
1375 descr &= ~TREE_DESCR_16_1;
1376 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1377 break;
1378 case 4: case 5:
1379 if (!(descr & TREE_DESCR_16_2)) {
1380 descr = pulldown_to_16(tree, 4, descr);
1381 }
1382 tl_assert(descr & TREE_DESCR_16_2);
1383 tree[5] = tree[4];
1384 descr &= ~TREE_DESCR_16_2;
1385 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1386 break;
1387 case 6: case 7:
1388 if (!(descr & TREE_DESCR_16_3)) {
1389 descr = pulldown_to_16(tree, 6, descr);
1390 }
1391 tl_assert(descr & TREE_DESCR_16_3);
1392 tree[7] = tree[6];
1393 descr &= ~TREE_DESCR_16_3;
1394 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1395 break;
1396 default:
1397 tl_assert(0);
1398 }
1399 return descr;
1400}
1401
1402
1403static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1404 UShort mask;
1405 switch (toff) {
1406 case 0:
1407 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1408 tl_assert( (descr & mask) == mask );
1409 descr &= ~mask;
1410 descr |= TREE_DESCR_16_0;
1411 break;
1412 case 2:
1413 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1414 tl_assert( (descr & mask) == mask );
1415 descr &= ~mask;
1416 descr |= TREE_DESCR_16_1;
1417 break;
1418 case 4:
1419 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1420 tl_assert( (descr & mask) == mask );
1421 descr &= ~mask;
1422 descr |= TREE_DESCR_16_2;
1423 break;
1424 case 6:
1425 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1426 tl_assert( (descr & mask) == mask );
1427 descr &= ~mask;
1428 descr |= TREE_DESCR_16_3;
1429 break;
1430 default:
1431 tl_assert(0);
1432 }
1433 return descr;
1434}
1435
1436static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1437 UShort mask;
1438 switch (toff) {
1439 case 0:
1440 if (!(descr & TREE_DESCR_16_0))
1441 descr = pullup_descr_to_16(descr, 0);
1442 if (!(descr & TREE_DESCR_16_1))
1443 descr = pullup_descr_to_16(descr, 2);
1444 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1445 tl_assert( (descr & mask) == mask );
1446 descr &= ~mask;
1447 descr |= TREE_DESCR_32_0;
1448 break;
1449 case 4:
1450 if (!(descr & TREE_DESCR_16_2))
1451 descr = pullup_descr_to_16(descr, 4);
1452 if (!(descr & TREE_DESCR_16_3))
1453 descr = pullup_descr_to_16(descr, 6);
1454 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1455 tl_assert( (descr & mask) == mask );
1456 descr &= ~mask;
1457 descr |= TREE_DESCR_32_1;
1458 break;
1459 default:
1460 tl_assert(0);
1461 }
1462 return descr;
1463}
1464
1465static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1466 switch (toff) {
1467 case 0: case 4:
1468 return 0 != (descr & TREE_DESCR_64);
1469 default:
1470 tl_assert(0);
1471 }
1472}
1473
1474static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1475 switch (toff) {
1476 case 0:
1477 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1478 case 2:
1479 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1480 case 4:
1481 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1482 case 6:
1483 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1484 default:
1485 tl_assert(0);
1486 }
1487}
1488
1489/* ------------ Cache management ------------ */
1490
1491static void zsm_flush_cache ( void )
1492{
1493 shmem__flush_and_invalidate_scache();
1494}
1495
1496
1497static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1498{
1499 tl_assert( sizeof(UWord) == sizeof(Addr) );
1500
1501 rcinc = p_rcinc;
1502 rcdec = p_rcdec;
1503
1504 tl_assert(map_shmem == NULL);
1505 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1506 HG_(free),
1507 NULL/*unboxed UWord cmp*/);
1508 tl_assert(map_shmem != NULL);
1509 shmem__invalidate_scache();
1510
1511 /* a SecMap must contain an integral number of CacheLines */
1512 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1513 /* also ... a CacheLine holds an integral number of trees */
1514 tl_assert(0 == (N_LINE_ARANGE % 8));
1515}
1516
1517/////////////////////////////////////////////////////////////////
1518/////////////////////////////////////////////////////////////////
1519// //
1520// SECTION END compressed shadow memory //
1521// //
1522/////////////////////////////////////////////////////////////////
1523/////////////////////////////////////////////////////////////////
1524
1525
1526
1527/////////////////////////////////////////////////////////////////
1528/////////////////////////////////////////////////////////////////
1529// //
1530// SECTION BEGIN vts primitives //
1531// //
1532/////////////////////////////////////////////////////////////////
1533/////////////////////////////////////////////////////////////////
1534
sewardjf98e1c02008-10-25 16:22:41 +00001535
sewardje4cce742011-02-24 15:25:24 +00001536/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1537 being compact stand-ins for Thr*'s. Use these functions to map
1538 between them. */
1539static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1540static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1541
1542
1543/* Scalar Timestamp. We have to store a lot of these, so there is
1544 some effort to make them as small as possible. Logically they are
1545 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
1546 We pack it into 64 bits by representing the Thr* using a ThrID, a
sewardj7aa38a92011-02-27 23:04:12 +00001547 small integer (18 bits), and a 46 bit integer for the timestamp
1548 number. The 46/18 split is arbitary, but has the effect that
1549 Helgrind can only handle programs that create 2^18 or fewer threads
1550 over their entire lifetime, and have no more than 2^46 timestamp
sewardje4cce742011-02-24 15:25:24 +00001551 ticks (synchronisation operations on the same thread).
1552
sewardj7aa38a92011-02-27 23:04:12 +00001553 This doesn't seem like much of a limitation. 2^46 ticks is
1554 7.06e+13, and if each tick (optimistically) takes the machine 1000
sewardje4cce742011-02-24 15:25:24 +00001555 cycles to process, then the minimum time to process that many ticks
sewardj7aa38a92011-02-27 23:04:12 +00001556 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
sewardje4cce742011-02-24 15:25:24 +00001557 but VTS ticks, which isn't realistic.
1558
1559 NB1: SCALARTS_N_THRBITS must be 32 or lower, so they fit in a ThrID
1560 (== a UInt).
1561
1562 NB2: thrid values are issued upwards from 1024, and values less
1563 than that aren't valid. This isn't per se necessary (any order
1564 will do, so long as they are unique), but it does help ensure they
1565 are less likely to get confused with the various other kinds of
1566 small-integer thread ids drifting around (eg, TId).
1567
1568 NB3: this probably also relies on the fact that Thr's are never
1569 deallocated -- they exist forever. Hence the 1-1 mapping from
1570 Thr's to thrid values (set up in Thr__new) persists forever.
sewardj7aa38a92011-02-27 23:04:12 +00001571
1572 NB4: temp_max_sized_VTS is allocated at startup and never freed.
1573 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
1574 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
1575 making the memory use for this go sky-high. With
1576 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
1577 like an OK tradeoff. If more than 256k threads need to be
1578 supported, we could change SCALARTS_N_THRBITS to 20, which would
1579 facilitate supporting 1 million threads at the cost of 8MB storage
1580 for temp_max_sized_VTS.
sewardje4cce742011-02-24 15:25:24 +00001581*/
sewardj7aa38a92011-02-27 23:04:12 +00001582#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 32 inclusive */
sewardje4cce742011-02-24 15:25:24 +00001583#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
1584typedef
1585 struct {
1586 ThrID thrid : SCALARTS_N_THRBITS;
1587 ULong tym : SCALARTS_N_TYMBITS;
1588 }
1589 ScalarTS;
1590
sewardj7aa38a92011-02-27 23:04:12 +00001591#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
1592
1593
sewardje4cce742011-02-24 15:25:24 +00001594__attribute__((noreturn))
1595static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1596{
1597 if (due_to_nThrs) {
1598 HChar* s =
1599 "\n"
1600 "Helgrind: cannot continue, run aborted: too many threads.\n"
1601 "Sorry. Helgrind can only handle programs that create\n"
1602 "%'llu or fewer threads over their entire lifetime.\n"
1603 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001604 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001605 } else {
1606 HChar* s =
1607 "\n"
1608 "Helgrind: cannot continue, run aborted: too many\n"
1609 "synchronisation events. Sorry. Helgrind can only handle\n"
1610 "programs which perform %'llu or fewer\n"
1611 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1612 "\n";
1613 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1614 }
1615 VG_(exit)(1);
1616 /*NOTREACHED*/
1617 tl_assert(0); /*wtf?!*/
1618}
1619
1620
1621/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
1622 bits, since they have to be packed into the lowest 30 bits of an
1623 SVal. */
sewardjf98e1c02008-10-25 16:22:41 +00001624typedef UInt VtsID;
1625#define VtsID_INVALID 0xFFFFFFFF
1626
1627/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1628 a backlink for the caller's convenience. Since we have no idea
1629 what to set that to in the library, it always gets set to
1630 VtsID_INVALID. */
1631typedef
1632 struct {
sewardj7aa38a92011-02-27 23:04:12 +00001633 VtsID id;
1634 UInt usedTS;
1635 UInt sizeTS;
1636 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00001637 }
1638 VTS;
1639
sewardj7aa38a92011-02-27 23:04:12 +00001640/* Allocate a VTS capable of storing 'sizeTS' entries. */
1641static VTS* VTS__new ( HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00001642
sewardj7aa38a92011-02-27 23:04:12 +00001643/* Make a clone of 'vts', resizing the array to exactly match the
1644 number of ScalarTSs present. */
1645static VTS* VTS__clone ( HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001646
1647/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001648static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001649
sewardj7aa38a92011-02-27 23:04:12 +00001650/* Create a new singleton VTS in 'out'. Caller must have
1651 pre-allocated 'out' sufficiently big to hold the result in all
1652 possible cases. */
1653static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001654
sewardj7aa38a92011-02-27 23:04:12 +00001655/* Create in 'out' a VTS which is the same as 'vts' except with
1656 vts[me]++, so to speak. Caller must have pre-allocated 'out'
1657 sufficiently big to hold the result in all possible cases. */
1658static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001659
sewardj7aa38a92011-02-27 23:04:12 +00001660/* Create in 'out' a VTS which is the join (max) of 'a' and
1661 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1662 the result in all possible cases. */
1663static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001664
sewardj23f12002009-07-24 08:45:08 +00001665/* Compute the partial ordering relation of the two args. Although we
1666 could be completely general and return an enumeration value (EQ,
1667 LT, GT, UN), in fact we only need LEQ, and so we may as well
1668 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001669
sewardje4cce742011-02-24 15:25:24 +00001670 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1671 invald ThrID). In the latter case, the returned ThrID indicates
1672 the discovered point for which they are not. There may be more
1673 than one such point, but we only care about seeing one of them, not
1674 all of them. This rather strange convention is used because
1675 sometimes we want to know the actual index at which they first
1676 differ. */
1677static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001678
1679/* Compute an arbitrary structural (total) ordering on the two args,
1680 based on their VCs, so they can be looked up in a table, tree, etc.
1681 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001682static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001683
1684/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001685static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001686
1687/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001688static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001689
sewardjf98e1c02008-10-25 16:22:41 +00001690
1691/*--------------- to do with Vector Timestamps ---------------*/
1692
sewardjf98e1c02008-10-25 16:22:41 +00001693static Bool is_sane_VTS ( VTS* vts )
1694{
1695 UWord i, n;
1696 ScalarTS *st1, *st2;
1697 if (!vts) return False;
1698 if (!vts->ts) return False;
sewardj555fc572011-02-27 23:39:53 +00001699 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00001700 n = vts->usedTS;
1701 if (n == 1) {
1702 st1 = &vts->ts[0];
1703 if (st1->tym == 0)
1704 return False;
1705 }
1706 else
sewardjf98e1c02008-10-25 16:22:41 +00001707 if (n >= 2) {
1708 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00001709 st1 = &vts->ts[i];
1710 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00001711 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00001712 return False;
1713 if (st1->tym == 0 || st2->tym == 0)
1714 return False;
1715 }
1716 }
1717 return True;
1718}
1719
1720
sewardj7aa38a92011-02-27 23:04:12 +00001721/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00001722*/
sewardj7aa38a92011-02-27 23:04:12 +00001723static VTS* VTS__new ( HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00001724{
sewardj7aa38a92011-02-27 23:04:12 +00001725 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1726 tl_assert(vts->usedTS == 0);
1727 vts->sizeTS = sizeTS;
1728 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00001729 return vts;
1730}
1731
sewardj7aa38a92011-02-27 23:04:12 +00001732/* Clone this VTS.
1733*/
1734static VTS* VTS__clone ( HChar* who, VTS* vts )
1735{
1736 tl_assert(vts);
1737 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1738 UInt nTS = vts->usedTS;
1739 VTS* clone = VTS__new(who, nTS);
1740 clone->id = vts->id;
1741 clone->sizeTS = nTS;
1742 clone->usedTS = nTS;
1743 UInt i;
1744 for (i = 0; i < nTS; i++) {
1745 clone->ts[i] = vts->ts[i];
1746 }
1747 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1748 return clone;
1749}
1750
sewardjf98e1c02008-10-25 16:22:41 +00001751
1752/* Delete this VTS in its entirety.
1753*/
sewardj7aa38a92011-02-27 23:04:12 +00001754static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00001755{
1756 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00001757 tl_assert(vts->usedTS <= vts->sizeTS);
1758 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00001759 HG_(free)(vts);
1760}
1761
1762
1763/* Create a new singleton VTS.
1764*/
sewardj7aa38a92011-02-27 23:04:12 +00001765static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
1766{
sewardjf98e1c02008-10-25 16:22:41 +00001767 tl_assert(thr);
1768 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00001769 tl_assert(out);
1770 tl_assert(out->usedTS == 0);
1771 tl_assert(out->sizeTS >= 1);
1772 UInt hi = out->usedTS++;
1773 out->ts[hi].thrid = Thr__to_ThrID(thr);
1774 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00001775}
1776
1777
1778/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1779 not modified.
1780*/
sewardj7aa38a92011-02-27 23:04:12 +00001781static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00001782{
sewardj7aa38a92011-02-27 23:04:12 +00001783 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00001784 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00001785 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00001786
1787 stats__vts__tick++;
1788
sewardj7aa38a92011-02-27 23:04:12 +00001789 tl_assert(out);
1790 tl_assert(out->usedTS == 0);
1791 if (vts->usedTS >= ThrID_MAX_VALID)
1792 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
1793 tl_assert(out->sizeTS >= 1 + vts->usedTS);
1794
sewardjf98e1c02008-10-25 16:22:41 +00001795 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00001796 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00001797 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00001798 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00001799
sewardj555fc572011-02-27 23:39:53 +00001800 /* Copy all entries which precede 'me'. */
1801 for (i = 0; i < n; i++) {
1802 ScalarTS* here = &vts->ts[i];
1803 if (UNLIKELY(here->thrid >= me_thrid))
1804 break;
1805 UInt hi = out->usedTS++;
1806 out->ts[hi] = *here;
1807 }
1808
1809 /* 'i' now indicates the next entry to copy, if any.
1810 There are 3 possibilities:
1811 (a) there is no next entry (we used them all up already):
1812 add (me_thrid,1) to the output, and quit
1813 (b) there is a next entry, and its thrid > me_thrid:
1814 add (me_thrid,1) to the output, then copy the remaining entries
1815 (c) there is a next entry, and its thrid == me_thrid:
1816 copy it to the output but increment its timestamp value.
1817 Then copy the remaining entries. (c) is the common case.
1818 */
1819 tl_assert(i >= 0 && i <= n);
1820 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00001821 UInt hi = out->usedTS++;
1822 out->ts[hi].thrid = me_thrid;
1823 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00001824 } else {
1825 /* cases (b) and (c) */
1826 ScalarTS* here = &vts->ts[i];
1827 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00001828 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00001829 /* We're hosed. We have to stop. */
1830 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
1831 }
sewardj7aa38a92011-02-27 23:04:12 +00001832 UInt hi = out->usedTS++;
1833 out->ts[hi].thrid = here->thrid;
1834 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00001835 i++;
sewardj555fc572011-02-27 23:39:53 +00001836 found = True;
1837 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00001838 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00001839 out->ts[hi].thrid = me_thrid;
1840 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00001841 }
sewardj555fc572011-02-27 23:39:53 +00001842 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00001843 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00001844 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00001845 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00001846 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00001847 }
1848 }
sewardj555fc572011-02-27 23:39:53 +00001849
sewardj7aa38a92011-02-27 23:04:12 +00001850 tl_assert(is_sane_VTS(out));
1851 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
1852 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00001853}
1854
1855
1856/* Return a new VTS constructed as the join (max) of the 2 args.
1857 Neither arg is modified.
1858*/
sewardj7aa38a92011-02-27 23:04:12 +00001859static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00001860{
sewardj7aa38a92011-02-27 23:04:12 +00001861 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00001862 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00001863 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00001864 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00001865
sewardjc8028ad2010-05-05 09:34:42 +00001866 stats__vts__join++;
1867
sewardj7aa38a92011-02-27 23:04:12 +00001868 tl_assert(a);
1869 tl_assert(b);
1870 useda = a->usedTS;
1871 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00001872
sewardj7aa38a92011-02-27 23:04:12 +00001873 tl_assert(out);
1874 tl_assert(out->usedTS == 0);
1875 /* overly conservative test, but doing better involves comparing
1876 the two VTSs, which we don't want to do at this point. */
1877 if (useda + usedb >= ThrID_MAX_VALID)
1878 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
1879 tl_assert(out->sizeTS >= useda + usedb);
1880
sewardjf98e1c02008-10-25 16:22:41 +00001881 ia = ib = 0;
1882
1883 while (1) {
1884
sewardje4cce742011-02-24 15:25:24 +00001885 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
1886 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00001887 occurring in either a or b, and tyma/b are the relevant
1888 scalar timestamps, taking into account implicit zeroes. */
1889 tl_assert(ia >= 0 && ia <= useda);
1890 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001891
njn4c245e52009-03-15 23:25:38 +00001892 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001893 /* both empty - done */
1894 break;
njn4c245e52009-03-15 23:25:38 +00001895
1896 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001897 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00001898 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00001899 thrid = tmpb->thrid;
1900 tyma = 0;
1901 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00001902 ib++;
njn4c245e52009-03-15 23:25:38 +00001903
1904 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001905 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00001906 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00001907 thrid = tmpa->thrid;
1908 tyma = tmpa->tym;
1909 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00001910 ia++;
njn4c245e52009-03-15 23:25:38 +00001911
1912 } else {
sewardje4cce742011-02-24 15:25:24 +00001913 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00001914 ScalarTS* tmpa = &a->ts[ia];
1915 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00001916 if (tmpa->thrid < tmpb->thrid) {
1917 /* a has the lowest unconsidered ThrID */
1918 thrid = tmpa->thrid;
1919 tyma = tmpa->tym;
1920 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00001921 ia++;
sewardje4cce742011-02-24 15:25:24 +00001922 } else if (tmpa->thrid > tmpb->thrid) {
1923 /* b has the lowest unconsidered ThrID */
1924 thrid = tmpb->thrid;
1925 tyma = 0;
1926 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00001927 ib++;
1928 } else {
sewardje4cce742011-02-24 15:25:24 +00001929 /* they both next mention the same ThrID */
1930 tl_assert(tmpa->thrid == tmpb->thrid);
1931 thrid = tmpa->thrid; /* == tmpb->thrid */
1932 tyma = tmpa->tym;
1933 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00001934 ia++;
1935 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00001936 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00001937 }
1938 }
1939
1940 /* having laboriously determined (thr, tyma, tymb), do something
1941 useful with it. */
1942 tymMax = tyma > tymb ? tyma : tymb;
1943 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00001944 UInt hi = out->usedTS++;
1945 out->ts[hi].thrid = thrid;
1946 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00001947 }
1948
1949 }
1950
sewardj7aa38a92011-02-27 23:04:12 +00001951 tl_assert(is_sane_VTS(out));
1952 tl_assert(out->usedTS <= out->sizeTS);
1953 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00001954}
1955
1956
sewardje4cce742011-02-24 15:25:24 +00001957/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
1958 they are, or the first ThrID for which they are not (no valid ThrID
1959 has the value zero). This rather strange convention is used
1960 because sometimes we want to know the actual index at which they
1961 first differ. */
1962static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00001963{
sewardj23f12002009-07-24 08:45:08 +00001964 Word ia, ib, useda, usedb;
1965 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00001966
sewardjc8028ad2010-05-05 09:34:42 +00001967 stats__vts__cmpLEQ++;
1968
sewardj7aa38a92011-02-27 23:04:12 +00001969 tl_assert(a);
1970 tl_assert(b);
1971 useda = a->usedTS;
1972 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00001973
1974 ia = ib = 0;
1975
1976 while (1) {
1977
njn4c245e52009-03-15 23:25:38 +00001978 /* This logic is to enumerate doubles (tyma, tymb) drawn
1979 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00001980 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00001981 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00001982
sewardjf98e1c02008-10-25 16:22:41 +00001983 tl_assert(ia >= 0 && ia <= useda);
1984 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001985
njn4c245e52009-03-15 23:25:38 +00001986 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001987 /* both empty - done */
1988 break;
njn4c245e52009-03-15 23:25:38 +00001989
1990 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001991 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00001992 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00001993 tyma = 0;
1994 tymb = tmpb->tym;
1995 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00001996 ib++;
njn4c245e52009-03-15 23:25:38 +00001997
1998 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001999 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002000 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002001 tyma = tmpa->tym;
2002 thrid = tmpa->thrid;
2003 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002004 ia++;
njn4c245e52009-03-15 23:25:38 +00002005
2006 } else {
sewardje4cce742011-02-24 15:25:24 +00002007 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002008 ScalarTS* tmpa = &a->ts[ia];
2009 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002010 if (tmpa->thrid < tmpb->thrid) {
2011 /* a has the lowest unconsidered ThrID */
2012 tyma = tmpa->tym;
2013 thrid = tmpa->thrid;
2014 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002015 ia++;
2016 }
2017 else
sewardje4cce742011-02-24 15:25:24 +00002018 if (tmpa->thrid > tmpb->thrid) {
2019 /* b has the lowest unconsidered ThrID */
2020 tyma = 0;
2021 tymb = tmpb->tym;
2022 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002023 ib++;
2024 } else {
sewardje4cce742011-02-24 15:25:24 +00002025 /* they both next mention the same ThrID */
2026 tl_assert(tmpa->thrid == tmpb->thrid);
2027 tyma = tmpa->tym;
2028 thrid = tmpa->thrid;
2029 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002030 ia++;
2031 ib++;
2032 }
2033 }
2034
njn4c245e52009-03-15 23:25:38 +00002035 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002036 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002037 if (tyma > tymb) {
2038 /* not LEQ at this index. Quit, since the answer is
2039 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002040 tl_assert(thrid >= 1024);
2041 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002042 }
sewardjf98e1c02008-10-25 16:22:41 +00002043 }
2044
sewardje4cce742011-02-24 15:25:24 +00002045 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002046}
2047
2048
2049/* Compute an arbitrary structural (total) ordering on the two args,
2050 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002051 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2052 performance critical so there is some effort expended to make it sa
2053 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002054*/
2055Word VTS__cmp_structural ( VTS* a, VTS* b )
2056{
2057 /* We just need to generate an arbitrary total ordering based on
2058 a->ts and b->ts. Preferably do it in a way which comes across likely
2059 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002060 Word i;
2061 Word useda = 0, usedb = 0;
2062 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002063
sewardjc8028ad2010-05-05 09:34:42 +00002064 stats__vts__cmp_structural++;
2065
2066 tl_assert(a);
2067 tl_assert(b);
2068
sewardj7aa38a92011-02-27 23:04:12 +00002069 ctsa = &a->ts[0]; useda = a->usedTS;
2070 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002071
2072 if (LIKELY(useda == usedb)) {
2073 ScalarTS *tmpa = NULL, *tmpb = NULL;
2074 stats__vts__cmp_structural_slow++;
2075 /* Same length vectors. Find the first difference, if any, as
2076 fast as possible. */
2077 for (i = 0; i < useda; i++) {
2078 tmpa = &ctsa[i];
2079 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002080 if (LIKELY(tmpa->tym == tmpb->tym
2081 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002082 continue;
2083 else
2084 break;
2085 }
2086 if (UNLIKELY(i == useda)) {
2087 /* They're identical. */
2088 return 0;
2089 } else {
2090 tl_assert(i >= 0 && i < useda);
2091 if (tmpa->tym < tmpb->tym) return -1;
2092 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002093 if (tmpa->thrid < tmpb->thrid) return -1;
2094 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002095 /* we just established them as non-identical, hence: */
2096 }
2097 /*NOTREACHED*/
2098 tl_assert(0);
2099 }
sewardjf98e1c02008-10-25 16:22:41 +00002100
2101 if (useda < usedb) return -1;
2102 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002103 /*NOTREACHED*/
2104 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002105}
2106
2107
2108/* Debugging only. Display the given VTS in the buffer.
2109*/
sewardj7aa38a92011-02-27 23:04:12 +00002110void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
2111{
sewardjf98e1c02008-10-25 16:22:41 +00002112 ScalarTS* st;
2113 HChar unit[64];
2114 Word i, n;
2115 Int avail = nBuf;
2116 tl_assert(vts && vts->ts);
2117 tl_assert(nBuf > 16);
2118 buf[0] = '[';
2119 buf[1] = 0;
sewardj7aa38a92011-02-27 23:04:12 +00002120 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002121 for (i = 0; i < n; i++) {
2122 tl_assert(avail >= 40);
sewardj7aa38a92011-02-27 23:04:12 +00002123 st = &vts->ts[i];
sewardjf98e1c02008-10-25 16:22:41 +00002124 VG_(memset)(unit, 0, sizeof(unit));
sewardje4cce742011-02-24 15:25:24 +00002125 VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
2126 st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002127 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
2128 VG_(strcat)(buf, " ...]");
2129 buf[nBuf-1] = 0;
2130 return;
2131 }
2132 VG_(strcat)(buf, unit);
2133 avail -= VG_(strlen)(unit);
2134 }
2135 VG_(strcat)(buf, "]");
2136 buf[nBuf-1] = 0;
2137}
2138
2139
2140/* Debugging only. Return vts[index], so to speak.
2141*/
sewardj7aa38a92011-02-27 23:04:12 +00002142ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2143{
sewardjf98e1c02008-10-25 16:22:41 +00002144 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002145 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002146 stats__vts__indexat_slow++;
sewardjf98e1c02008-10-25 16:22:41 +00002147 tl_assert(vts && vts->ts);
sewardj7aa38a92011-02-27 23:04:12 +00002148 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002149 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002150 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002151 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002152 return st->tym;
2153 }
2154 return 0;
2155}
2156
2157
2158/////////////////////////////////////////////////////////////////
2159/////////////////////////////////////////////////////////////////
2160// //
2161// SECTION END vts primitives //
2162// //
2163/////////////////////////////////////////////////////////////////
2164/////////////////////////////////////////////////////////////////
2165
2166
2167
2168/////////////////////////////////////////////////////////////////
2169/////////////////////////////////////////////////////////////////
2170// //
2171// SECTION BEGIN main library //
2172// //
2173/////////////////////////////////////////////////////////////////
2174/////////////////////////////////////////////////////////////////
2175
2176
2177/////////////////////////////////////////////////////////
2178// //
2179// VTS set //
2180// //
2181/////////////////////////////////////////////////////////
2182
2183static WordFM* /* VTS* void void */ vts_set = NULL;
2184
2185static void vts_set_init ( void )
2186{
2187 tl_assert(!vts_set);
2188 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2189 HG_(free),
2190 (Word(*)(UWord,UWord))VTS__cmp_structural );
2191 tl_assert(vts_set);
2192}
2193
sewardj7aa38a92011-02-27 23:04:12 +00002194/* Given a VTS, look in vts_set to see if we already have a
2195 structurally identical one. If yes, return the pair (True, pointer
2196 to the existing one). If no, clone this one, add the clone to the
2197 set, and return (False, pointer to the clone). */
2198static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002199{
2200 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002201 stats__vts_set__focaa++;
2202 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002203 /* lookup cand (by value) */
2204 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2205 /* found it */
2206 tl_assert(valW == 0);
2207 /* if this fails, cand (by ref) was already present (!) */
2208 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002209 *res = (VTS*)keyW;
2210 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002211 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002212 /* not present. Clone, add and return address of clone. */
2213 stats__vts_set__focaa_a++;
2214 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2215 tl_assert(clone != cand);
2216 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2217 *res = clone;
2218 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002219 }
2220}
2221
2222
2223/////////////////////////////////////////////////////////
2224// //
2225// VTS table //
2226// //
2227/////////////////////////////////////////////////////////
2228
2229static void VtsID__invalidate_caches ( void ); /* fwds */
2230
2231/* A type to hold VTS table entries. Invariants:
2232 If .vts == NULL, then this entry is not in use, so:
2233 - .rc == 0
2234 - this entry is on the freelist (unfortunately, does not imply
2235 any constraints on value for .nextfree)
2236 If .vts != NULL, then this entry is in use:
2237 - .vts is findable in vts_set
2238 - .vts->id == this entry number
2239 - no specific value for .rc (even 0 is OK)
2240 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2241*/
2242typedef
2243 struct {
2244 VTS* vts; /* vts, in vts_set */
2245 UWord rc; /* reference count - enough for entire aspace */
2246 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2247 }
2248 VtsTE;
2249
2250/* The VTS table. */
2251static XArray* /* of VtsTE */ vts_tab = NULL;
2252
2253/* An index into the VTS table, indicating the start of the list of
2254 free (available for use) entries. If the list is empty, this is
2255 VtsID_INVALID. */
2256static VtsID vts_tab_freelist = VtsID_INVALID;
2257
2258/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2259 vts_tab equals or exceeds this size. After GC, the value here is
2260 set appropriately so as to check for the next GC point. */
2261static Word vts_next_GC_at = 1000;
2262
2263static void vts_tab_init ( void )
2264{
2265 vts_tab
2266 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2267 HG_(free), sizeof(VtsTE) );
2268 vts_tab_freelist
2269 = VtsID_INVALID;
2270 tl_assert(vts_tab);
2271}
2272
2273/* Add ii to the free list, checking that it looks out-of-use. */
2274static void add_to_free_list ( VtsID ii )
2275{
2276 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2277 tl_assert(ie->vts == NULL);
2278 tl_assert(ie->rc == 0);
2279 tl_assert(ie->freelink == VtsID_INVALID);
2280 ie->freelink = vts_tab_freelist;
2281 vts_tab_freelist = ii;
2282}
2283
2284/* Get an entry from the free list. This will return VtsID_INVALID if
2285 the free list is empty. */
2286static VtsID get_from_free_list ( void )
2287{
2288 VtsID ii;
2289 VtsTE* ie;
2290 if (vts_tab_freelist == VtsID_INVALID)
2291 return VtsID_INVALID;
2292 ii = vts_tab_freelist;
2293 ie = VG_(indexXA)( vts_tab, ii );
2294 tl_assert(ie->vts == NULL);
2295 tl_assert(ie->rc == 0);
2296 vts_tab_freelist = ie->freelink;
2297 return ii;
2298}
2299
2300/* Produce a new VtsID that can be used, either by getting it from
2301 the freelist, or, if that is empty, by expanding vts_tab. */
2302static VtsID get_new_VtsID ( void )
2303{
2304 VtsID ii;
2305 VtsTE te;
2306 ii = get_from_free_list();
2307 if (ii != VtsID_INVALID)
2308 return ii;
2309 te.vts = NULL;
2310 te.rc = 0;
2311 te.freelink = VtsID_INVALID;
2312 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2313 return ii;
2314}
2315
2316
2317/* Indirect callback from lib_zsm. */
2318static void VtsID__rcinc ( VtsID ii )
2319{
2320 VtsTE* ie;
2321 /* VG_(indexXA) does a range check for us */
2322 ie = VG_(indexXA)( vts_tab, ii );
2323 tl_assert(ie->vts); /* else it's not in use */
2324 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2325 tl_assert(ie->vts->id == ii);
2326 ie->rc++;
2327}
2328
2329/* Indirect callback from lib_zsm. */
2330static void VtsID__rcdec ( VtsID ii )
2331{
2332 VtsTE* ie;
2333 /* VG_(indexXA) does a range check for us */
2334 ie = VG_(indexXA)( vts_tab, ii );
2335 tl_assert(ie->vts); /* else it's not in use */
2336 tl_assert(ie->rc > 0); /* else RC snafu */
2337 tl_assert(ie->vts->id == ii);
2338 ie->rc--;
2339}
2340
2341
sewardj7aa38a92011-02-27 23:04:12 +00002342/* Look up 'cand' in our collection of VTSs. If present, return the
2343 VtsID for the pre-existing version. If not present, clone it, add
2344 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2345 it, and return that. */
2346static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002347{
sewardj7aa38a92011-02-27 23:04:12 +00002348 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002349 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002350 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2351 tl_assert(in_tab);
2352 if (already_have) {
2353 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002354 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002355 tl_assert(in_tab->id != VtsID_INVALID);
2356 ie = VG_(indexXA)( vts_tab, in_tab->id );
2357 tl_assert(ie->vts == in_tab);
2358 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002359 } else {
2360 VtsID ii = get_new_VtsID();
2361 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002362 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002363 ie->rc = 0;
2364 ie->freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002365 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002366 return ii;
2367 }
2368}
2369
2370
2371static void show_vts_stats ( HChar* caller )
2372{
2373 UWord nSet, nTab, nLive;
2374 ULong totrc;
2375 UWord n, i;
2376 nSet = VG_(sizeFM)( vts_set );
2377 nTab = VG_(sizeXA)( vts_tab );
2378 totrc = 0;
2379 nLive = 0;
2380 n = VG_(sizeXA)( vts_tab );
2381 for (i = 0; i < n; i++) {
2382 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2383 if (ie->vts) {
2384 nLive++;
2385 totrc += (ULong)ie->rc;
2386 } else {
2387 tl_assert(ie->rc == 0);
2388 }
2389 }
2390 VG_(printf)(" show_vts_stats %s\n", caller);
2391 VG_(printf)(" vts_tab size %4lu\n", nTab);
2392 VG_(printf)(" vts_tab live %4lu\n", nLive);
2393 VG_(printf)(" vts_set size %4lu\n", nSet);
2394 VG_(printf)(" total rc %4llu\n", totrc);
2395}
2396
2397/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002398__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002399static void vts_tab__do_GC ( Bool show_stats )
2400{
2401 UWord i, nTab, nLive, nFreed;
2402
2403 /* check this is actually necessary. */
2404 tl_assert(vts_tab_freelist == VtsID_INVALID);
2405
2406 /* empty the caches for partial order checks and binary joins. We
2407 could do better and prune out the entries to be deleted, but it
2408 ain't worth the hassle. */
2409 VtsID__invalidate_caches();
2410
2411 /* First, make the reference counts up to date. */
2412 zsm_flush_cache();
2413
2414 nTab = VG_(sizeXA)( vts_tab );
2415
2416 if (show_stats) {
2417 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2418 show_vts_stats("before GC");
2419 }
2420
2421 /* Now we can inspect the entire vts_tab. Any entries
2422 with zero .rc fields are now no longer in use and can be
2423 free list, removed from vts_set, and deleted. */
2424 nFreed = 0;
2425 for (i = 0; i < nTab; i++) {
2426 Bool present;
2427 UWord oldK = 0, oldV = 0;
2428 VtsTE* te = VG_(indexXA)( vts_tab, i );
2429 if (te->vts == NULL) {
2430 tl_assert(te->rc == 0);
2431 continue; /* already on the free list (presumably) */
2432 }
2433 if (te->rc > 0)
2434 continue; /* in use */
2435 /* Ok, we got one we can free. */
2436 tl_assert(te->vts->id == i);
2437 /* first, remove it from vts_set. */
2438 present = VG_(delFromFM)( vts_set,
2439 &oldK, &oldV, (UWord)te->vts );
2440 tl_assert(present); /* else it isn't in vts_set ?! */
2441 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2442 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2443 /* now free the VTS itself */
2444 VTS__delete(te->vts);
2445 te->vts = NULL;
2446 /* and finally put this entry on the free list */
2447 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2448 add_to_free_list( i );
2449 nFreed++;
2450 }
2451
2452 /* Now figure out when the next GC should be. We'll allow the
2453 number of VTSs to double before GCing again. Except of course
2454 that since we can't (or, at least, don't) shrink vts_tab, we
2455 can't set the threshhold value smaller than it. */
2456 tl_assert(nFreed <= nTab);
2457 nLive = nTab - nFreed;
2458 tl_assert(nLive >= 0 && nLive <= nTab);
2459 vts_next_GC_at = 2 * nLive;
2460 if (vts_next_GC_at < nTab)
2461 vts_next_GC_at = nTab;
2462
2463 if (show_stats) {
2464 show_vts_stats("after GC");
2465 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2466 }
2467
sewardj5e2ac3b2009-08-11 10:39:25 +00002468 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00002469 static UInt ctr = 0;
2470 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002471 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002472 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002473 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002474 }
2475}
2476
2477
2478/////////////////////////////////////////////////////////
2479// //
2480// Vts IDs //
2481// //
2482/////////////////////////////////////////////////////////
2483
2484//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00002485/* A temporary, max-sized VTS which is used as a temporary (the first
2486 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
2487static VTS* temp_max_sized_VTS = NULL;
2488
2489//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00002490static ULong stats__cmpLEQ_queries = 0;
2491static ULong stats__cmpLEQ_misses = 0;
2492static ULong stats__join2_queries = 0;
2493static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002494
2495static inline UInt ROL32 ( UInt w, Int n ) {
2496 w = (w << n) | (w >> (32-n));
2497 return w;
2498}
2499static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2500 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2501 return hash % nTab;
2502}
2503
sewardj23f12002009-07-24 08:45:08 +00002504#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00002505static
sewardj23f12002009-07-24 08:45:08 +00002506 struct { VtsID vi1; VtsID vi2; Bool leq; }
2507 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00002508
2509#define N_JOIN2_CACHE 1023
2510static
2511 struct { VtsID vi1; VtsID vi2; VtsID res; }
2512 join2_cache[N_JOIN2_CACHE];
2513
2514static void VtsID__invalidate_caches ( void ) {
2515 Int i;
sewardj23f12002009-07-24 08:45:08 +00002516 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
2517 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
2518 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
2519 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00002520 }
2521 for (i = 0; i < N_JOIN2_CACHE; i++) {
2522 join2_cache[i].vi1 = VtsID_INVALID;
2523 join2_cache[i].vi2 = VtsID_INVALID;
2524 join2_cache[i].res = VtsID_INVALID;
2525 }
2526}
2527//////////////////////////
2528
sewardjd52392d2008-11-08 20:36:26 +00002529//static Bool VtsID__is_valid ( VtsID vi ) {
2530// VtsTE* ve;
2531// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2532// return False;
2533// ve = VG_(indexXA)( vts_tab, vi );
2534// if (!ve->vts)
2535// return False;
2536// tl_assert(ve->vts->id == vi);
2537// return True;
2538//}
sewardjf98e1c02008-10-25 16:22:41 +00002539
2540static VTS* VtsID__to_VTS ( VtsID vi ) {
2541 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2542 tl_assert(te->vts);
2543 return te->vts;
2544}
2545
2546static void VtsID__pp ( VtsID vi ) {
2547 HChar buf[100];
2548 VTS* vts = VtsID__to_VTS(vi);
2549 VTS__show( buf, sizeof(buf)-1, vts );
2550 buf[sizeof(buf)-1] = 0;
2551 VG_(printf)("%s", buf);
2552}
2553
2554/* compute partial ordering relation of vi1 and vi2. */
2555__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00002556static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00002557 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00002558 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00002559 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00002560 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00002561 tl_assert(vi1 != vi2);
2562 ////++
sewardj23f12002009-07-24 08:45:08 +00002563 stats__cmpLEQ_queries++;
2564 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
2565 if (cmpLEQ_cache[hash].vi1 == vi1
2566 && cmpLEQ_cache[hash].vi2 == vi2)
2567 return cmpLEQ_cache[hash].leq;
2568 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00002569 ////--
2570 v1 = VtsID__to_VTS(vi1);
2571 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00002572 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00002573 ////++
sewardj23f12002009-07-24 08:45:08 +00002574 cmpLEQ_cache[hash].vi1 = vi1;
2575 cmpLEQ_cache[hash].vi2 = vi2;
2576 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00002577 ////--
sewardj23f12002009-07-24 08:45:08 +00002578 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00002579}
sewardj23f12002009-07-24 08:45:08 +00002580static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
2581 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002582}
2583
2584/* compute binary join */
2585__attribute__((noinline))
2586static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2587 UInt hash;
2588 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00002589 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00002590 //if (vi1 == vi2) return vi1;
2591 tl_assert(vi1 != vi2);
2592 ////++
2593 stats__join2_queries++;
2594 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2595 if (join2_cache[hash].vi1 == vi1
2596 && join2_cache[hash].vi2 == vi2)
2597 return join2_cache[hash].res;
2598 stats__join2_misses++;
2599 ////--
2600 vts1 = VtsID__to_VTS(vi1);
2601 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00002602 temp_max_sized_VTS->usedTS = 0;
2603 VTS__join(temp_max_sized_VTS, vts1,vts2);
2604 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00002605 ////++
2606 join2_cache[hash].vi1 = vi1;
2607 join2_cache[hash].vi2 = vi2;
2608 join2_cache[hash].res = res;
2609 ////--
2610 return res;
2611}
2612static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00002613 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002614}
2615
2616/* create a singleton VTS, namely [thr:1] */
2617static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00002618 temp_max_sized_VTS->usedTS = 0;
2619 VTS__singleton(temp_max_sized_VTS, thr,tym);
2620 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00002621}
2622
2623/* tick operation, creates value 1 if specified index is absent */
2624static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2625 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00002626 temp_max_sized_VTS->usedTS = 0;
2627 VTS__tick(temp_max_sized_VTS, idx,vts);
2628 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00002629}
2630
2631/* index into a VTS (only for assertions) */
2632static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2633 VTS* vts = VtsID__to_VTS(vi);
2634 return VTS__indexAt_SLOW( vts, idx );
2635}
2636
sewardj23f12002009-07-24 08:45:08 +00002637/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
2638 any, really) element in vi1 which is pointwise greater-than the
2639 corresponding element in vi2. If no such element exists, return
2640 NULL. This needs to be fairly quick since it is called every time
2641 a race is detected. */
2642static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
2643{
2644 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00002645 Thr* diffthr;
2646 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00002647 tl_assert(vi1 != vi2);
2648 vts1 = VtsID__to_VTS(vi1);
2649 vts2 = VtsID__to_VTS(vi2);
2650 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00002651 diffthrid = VTS__cmpLEQ(vts1, vts2);
2652 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00002653 tl_assert(diffthr); /* else they are LEQ ! */
2654 return diffthr;
2655}
2656
2657
2658/////////////////////////////////////////////////////////
2659// //
2660// Filters //
2661// //
2662/////////////////////////////////////////////////////////
2663
2664// baseline: 5, 9
2665#define FI_LINE_SZB_LOG2 5
2666#define FI_NUM_LINES_LOG2 10
2667
2668#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
2669#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
2670
2671#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
2672#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
2673
2674#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
2675 & (Addr)(FI_NUM_LINES-1) )
2676
2677
2678/* In the lines, each 8 bytes are treated individually, and are mapped
2679 to a UShort. Regardless of endianness of the underlying machine,
2680 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
2681 the highest address.
2682
2683 Of each bit pair, the higher numbered bit is set if a R has been
2684 seen, so the actual layout is:
2685
2686 15 14 ... 01 00
2687
2688 R W for addr+7 ... R W for addr+0
2689
2690 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
2691*/
2692
2693/* tags are separated from lines. tags are Addrs and are
2694 the base address of the line. */
2695typedef
2696 struct {
2697 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
2698 }
2699 FiLine;
2700
2701typedef
2702 struct {
2703 Addr tags[FI_NUM_LINES];
2704 FiLine lines[FI_NUM_LINES];
2705 }
2706 Filter;
2707
2708/* Forget everything we know -- clear the filter and let everything
2709 through. This needs to be as fast as possible, since it is called
2710 every time the running thread changes, and every time a thread's
2711 vector clocks change, which can be quite frequent. The obvious
2712 fast way to do this is simply to stuff in tags which we know are
2713 not going to match anything, since they're not aligned to the start
2714 of a line. */
2715static void Filter__clear ( Filter* fi, HChar* who )
2716{
2717 UWord i;
2718 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
2719 for (i = 0; i < FI_NUM_LINES; i += 8) {
2720 fi->tags[i+0] = 1; /* impossible value -- cannot match */
2721 fi->tags[i+1] = 1;
2722 fi->tags[i+2] = 1;
2723 fi->tags[i+3] = 1;
2724 fi->tags[i+4] = 1;
2725 fi->tags[i+5] = 1;
2726 fi->tags[i+6] = 1;
2727 fi->tags[i+7] = 1;
2728 }
2729 tl_assert(i == FI_NUM_LINES);
2730}
2731
2732/* Clearing an arbitrary range in the filter. Unfortunately
2733 we have to do this due to core-supplied new/die-mem events. */
2734
2735static void Filter__clear_1byte ( Filter* fi, Addr a )
2736{
2737 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2738 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2739 FiLine* line = &fi->lines[lineno];
2740 UWord loff = (a - atag) / 8;
2741 UShort mask = 0x3 << (2 * (a & 7));
2742 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2743 if (LIKELY( fi->tags[lineno] == atag )) {
2744 /* hit. clear the bits. */
2745 UShort u16 = line->u16s[loff];
2746 line->u16s[loff] = u16 & ~mask; /* clear them */
2747 } else {
2748 /* miss. The filter doesn't hold this address, so ignore. */
2749 }
2750}
2751
2752static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
2753{
2754 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2755 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2756 FiLine* line = &fi->lines[lineno];
2757 UWord loff = (a - atag) / 8;
2758 if (LIKELY( fi->tags[lineno] == atag )) {
2759 line->u16s[loff] = 0;
2760 } else {
2761 /* miss. The filter doesn't hold this address, so ignore. */
2762 }
2763}
2764
2765static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
2766{
2767 //VG_(printf)("%lu ", len);
2768 /* slowly do part preceding 8-alignment */
2769 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
2770 Filter__clear_1byte( fi, a );
2771 a++;
2772 len--;
2773 }
2774 /* vector loop */
2775 while (len >= 8) {
2776 Filter__clear_8bytes_aligned( fi, a );
2777 a += 8;
2778 len -= 8;
2779 }
2780 /* slowly do tail */
2781 while (UNLIKELY(len > 0)) {
2782 Filter__clear_1byte( fi, a );
2783 a++;
2784 len--;
2785 }
2786}
2787
2788
2789/* ------ Read handlers for the filter. ------ */
2790
2791static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
2792{
2793 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2794 return False;
2795 {
2796 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2797 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2798 FiLine* line = &fi->lines[lineno];
2799 UWord loff = (a - atag) / 8;
2800 UShort mask = 0xAAAA;
2801 if (LIKELY( fi->tags[lineno] == atag )) {
2802 /* hit. check line and update. */
2803 UShort u16 = line->u16s[loff];
2804 Bool ok = (u16 & mask) == mask; /* all R bits set? */
2805 line->u16s[loff] = u16 | mask; /* set them */
2806 return ok;
2807 } else {
2808 /* miss. nuke existing line and re-use it. */
2809 UWord i;
2810 fi->tags[lineno] = atag;
2811 for (i = 0; i < FI_LINE_SZB / 8; i++)
2812 line->u16s[i] = 0;
2813 line->u16s[loff] = mask;
2814 return False;
2815 }
2816 }
2817}
2818
2819static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
2820{
2821 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2822 return False;
2823 {
2824 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2825 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2826 FiLine* line = &fi->lines[lineno];
2827 UWord loff = (a - atag) / 8;
2828 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
2829 if (LIKELY( fi->tags[lineno] == atag )) {
2830 /* hit. check line and update. */
2831 UShort u16 = line->u16s[loff];
2832 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
2833 line->u16s[loff] = u16 | mask; /* set them */
2834 return ok;
2835 } else {
2836 /* miss. nuke existing line and re-use it. */
2837 UWord i;
2838 fi->tags[lineno] = atag;
2839 for (i = 0; i < FI_LINE_SZB / 8; i++)
2840 line->u16s[i] = 0;
2841 line->u16s[loff] = mask;
2842 return False;
2843 }
2844 }
2845}
2846
2847static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
2848{
2849 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2850 return False;
2851 {
2852 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2853 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2854 FiLine* line = &fi->lines[lineno];
2855 UWord loff = (a - atag) / 8;
2856 UShort mask = 0xA << (2 * (a & 6));
2857 /* mask is A000, 0A00, 00A0 or 000A */
2858 if (LIKELY( fi->tags[lineno] == atag )) {
2859 /* hit. check line and update. */
2860 UShort u16 = line->u16s[loff];
2861 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
2862 line->u16s[loff] = u16 | mask; /* set them */
2863 return ok;
2864 } else {
2865 /* miss. nuke existing line and re-use it. */
2866 UWord i;
2867 fi->tags[lineno] = atag;
2868 for (i = 0; i < FI_LINE_SZB / 8; i++)
2869 line->u16s[i] = 0;
2870 line->u16s[loff] = mask;
2871 return False;
2872 }
2873 }
2874}
2875
2876static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
2877{
2878 {
2879 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2880 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2881 FiLine* line = &fi->lines[lineno];
2882 UWord loff = (a - atag) / 8;
2883 UShort mask = 0x2 << (2 * (a & 7));
2884 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
2885 if (LIKELY( fi->tags[lineno] == atag )) {
2886 /* hit. check line and update. */
2887 UShort u16 = line->u16s[loff];
2888 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2889 line->u16s[loff] = u16 | mask; /* set them */
2890 return ok;
2891 } else {
2892 /* miss. nuke existing line and re-use it. */
2893 UWord i;
2894 fi->tags[lineno] = atag;
2895 for (i = 0; i < FI_LINE_SZB / 8; i++)
2896 line->u16s[i] = 0;
2897 line->u16s[loff] = mask;
2898 return False;
2899 }
2900 }
2901}
2902
2903
2904/* ------ Write handlers for the filter. ------ */
2905
2906static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
2907{
2908 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2909 return False;
2910 {
2911 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2912 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2913 FiLine* line = &fi->lines[lineno];
2914 UWord loff = (a - atag) / 8;
2915 UShort mask = 0xFFFF;
2916 if (LIKELY( fi->tags[lineno] == atag )) {
2917 /* hit. check line and update. */
2918 UShort u16 = line->u16s[loff];
2919 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
2920 line->u16s[loff] = u16 | mask; /* set them */
2921 return ok;
2922 } else {
2923 /* miss. nuke existing line and re-use it. */
2924 UWord i;
2925 fi->tags[lineno] = atag;
2926 for (i = 0; i < FI_LINE_SZB / 8; i++)
2927 line->u16s[i] = 0;
2928 line->u16s[loff] = mask;
2929 return False;
2930 }
2931 }
2932}
2933
2934static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
2935{
2936 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2937 return False;
2938 {
2939 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2940 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2941 FiLine* line = &fi->lines[lineno];
2942 UWord loff = (a - atag) / 8;
2943 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
2944 if (LIKELY( fi->tags[lineno] == atag )) {
2945 /* hit. check line and update. */
2946 UShort u16 = line->u16s[loff];
2947 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
2948 line->u16s[loff] = u16 | mask; /* set them */
2949 return ok;
2950 } else {
2951 /* miss. nuke existing line and re-use it. */
2952 UWord i;
2953 fi->tags[lineno] = atag;
2954 for (i = 0; i < FI_LINE_SZB / 8; i++)
2955 line->u16s[i] = 0;
2956 line->u16s[loff] = mask;
2957 return False;
2958 }
2959 }
2960}
2961
2962static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
2963{
2964 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2965 return False;
2966 {
2967 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2968 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2969 FiLine* line = &fi->lines[lineno];
2970 UWord loff = (a - atag) / 8;
2971 UShort mask = 0xF << (2 * (a & 6));
2972 /* mask is F000, 0F00, 00F0 or 000F */
2973 if (LIKELY( fi->tags[lineno] == atag )) {
2974 /* hit. check line and update. */
2975 UShort u16 = line->u16s[loff];
2976 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
2977 line->u16s[loff] = u16 | mask; /* set them */
2978 return ok;
2979 } else {
2980 /* miss. nuke existing line and re-use it. */
2981 UWord i;
2982 fi->tags[lineno] = atag;
2983 for (i = 0; i < FI_LINE_SZB / 8; i++)
2984 line->u16s[i] = 0;
2985 line->u16s[loff] = mask;
2986 return False;
2987 }
2988 }
2989}
2990
2991static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
2992{
2993 {
2994 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2995 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2996 FiLine* line = &fi->lines[lineno];
2997 UWord loff = (a - atag) / 8;
2998 UShort mask = 0x3 << (2 * (a & 7));
2999 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3000 if (LIKELY( fi->tags[lineno] == atag )) {
3001 /* hit. check line and update. */
3002 UShort u16 = line->u16s[loff];
3003 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3004 line->u16s[loff] = u16 | mask; /* set them */
3005 return ok;
3006 } else {
3007 /* miss. nuke existing line and re-use it. */
3008 UWord i;
3009 fi->tags[lineno] = atag;
3010 for (i = 0; i < FI_LINE_SZB / 8; i++)
3011 line->u16s[i] = 0;
3012 line->u16s[loff] = mask;
3013 return False;
3014 }
3015 }
3016}
3017
sewardjf98e1c02008-10-25 16:22:41 +00003018
3019/////////////////////////////////////////////////////////
3020// //
3021// Threads //
3022// //
3023/////////////////////////////////////////////////////////
3024
sewardj23f12002009-07-24 08:45:08 +00003025// QQQ move this somewhere else
3026typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
3027
sewardj8ab2c132009-08-02 09:34:35 +00003028/* How many of the above records to collect for each thread? Older
3029 ones are dumped when we run out of space. 62.5k requires 1MB per
3030 thread, since each ULong_n_EC record is 16 bytes long. When more
3031 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
3032 deleted to make space. Hence in the worst case we will be able to
3033 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
3034 Kw transitions (segments in this thread). For the current setting
3035 that gives a guaranteed stack for at least the last 31.25k
3036 segments. */
3037#define N_KWs_N_STACKs_PER_THREAD 62500
3038
3039
sewardjf98e1c02008-10-25 16:22:41 +00003040struct _Thr {
3041 /* Current VTSs for this thread. They change as we go along. viR
3042 is the VTS to be used for reads, viW for writes. Usually they
3043 are the same, but can differ when we deal with reader-writer
sewardj23f12002009-07-24 08:45:08 +00003044 locks. It is always the case that
3045 VtsID__cmpLEQ(viW,viR) == True
3046 that is, viW must be the same, or lagging behind, viR. */
sewardjf98e1c02008-10-25 16:22:41 +00003047 VtsID viR;
3048 VtsID viW;
sewardj23f12002009-07-24 08:45:08 +00003049
3050 /* Is initially False, and is set to true after the thread really
3051 has done a low-level exit. */
3052 Bool still_alive;
3053
sewardje4cce742011-02-24 15:25:24 +00003054 /* A small integer giving a unique identity to this Thr. See
3055 comments on the definition of ScalarTS for details. */
3056 ThrID thrid : SCALARTS_N_THRBITS;
3057
sewardj23f12002009-07-24 08:45:08 +00003058 /* A filter that removes references for which we believe that
3059 msmcread/msmcwrite will not change the state, nor report a
3060 race. */
3061 Filter* filter;
3062
sewardj60626642011-03-10 15:14:37 +00003063 /* A pointer back to the top level Thread structure. There is a
3064 1-1 mapping between Thread and Thr structures -- each Thr points
3065 at its corresponding Thread, and vice versa. Really, Thr and
3066 Thread should be merged into a single structure. */
3067 Thread* hgthread;
sewardj23f12002009-07-24 08:45:08 +00003068
sewardj8ab2c132009-08-02 09:34:35 +00003069 /* The ULongs (scalar Kws) in this accumulate in strictly
sewardj23f12002009-07-24 08:45:08 +00003070 increasing order, without duplicates. This is important because
sewardj8ab2c132009-08-02 09:34:35 +00003071 we need to be able to find a given scalar Kw in this array
sewardj23f12002009-07-24 08:45:08 +00003072 later, by binary search. */
sewardj8ab2c132009-08-02 09:34:35 +00003073 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
sewardjf98e1c02008-10-25 16:22:41 +00003074};
3075
sewardje4cce742011-02-24 15:25:24 +00003076
3077/* Maps ThrID values to their Thr*s (which contain ThrID values that
3078 should point back to the relevant slot in the array. Lowest
3079 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3080static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3081
3082/* And a counter to dole out ThrID values. For rationale/background,
3083 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003084static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003085
3086static ThrID Thr__to_ThrID ( Thr* thr ) {
3087 return thr->thrid;
3088}
3089static Thr* Thr__from_ThrID ( UInt thrid ) {
3090 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3091 tl_assert(thr->thrid == thrid);
3092 return thr;
3093}
3094
3095static Thr* Thr__new ( void )
3096{
sewardjf98e1c02008-10-25 16:22:41 +00003097 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3098 thr->viR = VtsID_INVALID;
3099 thr->viW = VtsID_INVALID;
sewardj23f12002009-07-24 08:45:08 +00003100 thr->still_alive = True;
3101 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
sewardj2d2ea2f2009-08-02 10:15:07 +00003102 /* We only really need this at history level 1, but unfortunately
3103 this routine is called before the command line processing is
3104 done (sigh), so we can't rely on HG_(clo_history_level) at this
3105 point. Hence always allocate it. Bah. */
sewardj8ab2c132009-08-02 09:34:35 +00003106 thr->local_Kws_n_stacks
sewardj2d2ea2f2009-08-02 10:15:07 +00003107 = VG_(newXA)( HG_(zalloc),
3108 "libhb.Thr__new.3 (local_Kws_and_stacks)",
sewardj23f12002009-07-24 08:45:08 +00003109 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003110
3111 /* Add this Thr* <-> ThrID binding to the mapping, and
3112 cross-check */
3113 if (!thrid_to_thr_map) {
3114 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3115 HG_(free), sizeof(Thr*) );
3116 tl_assert(thrid_to_thr_map);
3117 }
3118
sewardj7aa38a92011-02-27 23:04:12 +00003119 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003120 /* We're hosed. We have to stop. */
3121 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3122 }
3123
3124 thr->thrid = thrid_counter++;
3125 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3126 tl_assert(ix + 1024 == thr->thrid);
3127
sewardjf98e1c02008-10-25 16:22:41 +00003128 return thr;
3129}
3130
sewardj8ab2c132009-08-02 09:34:35 +00003131static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003132{
3133 Word nPresent;
3134 ULong_n_EC pair;
3135 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003136
3137 // We only collect this info at history level 1 (approx)
3138 if (HG_(clo_history_level) != 1)
3139 return;
3140
sewardj8ab2c132009-08-02 09:34:35 +00003141 /* This is the scalar Kw for thr. */
3142 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003143 pair.ec = main_get_EC( thr );
3144 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003145 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003146
3147 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003148 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003149
3150 /* Throw away old stacks, if necessary. We can't accumulate stuff
3151 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003152 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3153 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3154 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3155 if (0)
3156 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003157 thr, pair.ull, pair.ec );
3158 }
3159
3160 if (nPresent > 0) {
3161 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003162 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3163 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003164 }
3165
3166 if (nPresent == 0)
3167 pair.ec = NULL;
3168
sewardj8ab2c132009-08-02 09:34:35 +00003169 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003170
3171 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003172 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003173 thr, pair.ull, pair.ec );
3174 if (0)
3175 VG_(pp_ExeContext)(pair.ec);
3176}
3177
3178static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
3179{
3180 if (pair1->ull < pair2->ull) return -1;
3181 if (pair1->ull > pair2->ull) return 1;
3182 return 0;
3183}
3184
sewardjf98e1c02008-10-25 16:22:41 +00003185
3186/////////////////////////////////////////////////////////
3187// //
3188// Shadow Values //
3189// //
3190/////////////////////////////////////////////////////////
3191
3192// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3193// hb_zsm.h. We have to do everything else here.
3194
3195/* SVal is 64 bit unsigned int.
3196
3197 <---------30---------> <---------30--------->
3198 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003199 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003200 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3201
sewardjf98e1c02008-10-25 16:22:41 +00003202*/
3203#define SVAL_TAGMASK (3ULL << 62)
3204
3205static inline Bool SVal__isC ( SVal s ) {
3206 return (0ULL << 62) == (s & SVAL_TAGMASK);
3207}
3208static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3209 //tl_assert(VtsID__is_valid(rmini));
3210 //tl_assert(VtsID__is_valid(wmini));
3211 return (((ULong)rmini) << 32) | ((ULong)wmini);
3212}
3213static inline VtsID SVal__unC_Rmin ( SVal s ) {
3214 tl_assert(SVal__isC(s));
3215 return (VtsID)(s >> 32);
3216}
3217static inline VtsID SVal__unC_Wmin ( SVal s ) {
3218 tl_assert(SVal__isC(s));
3219 return (VtsID)(s & 0xFFFFFFFFULL);
3220}
3221
sewardj23f12002009-07-24 08:45:08 +00003222static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003223 return (2ULL << 62) == (s & SVAL_TAGMASK);
3224}
sewardj23f12002009-07-24 08:45:08 +00003225static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003226 return 2ULL << 62;
3227}
3228
3229/* Direct callback from lib_zsm. */
3230static void SVal__rcinc ( SVal s ) {
3231 if (SVal__isC(s)) {
3232 VtsID__rcinc( SVal__unC_Rmin(s) );
3233 VtsID__rcinc( SVal__unC_Wmin(s) );
3234 }
3235}
3236
3237/* Direct callback from lib_zsm. */
3238static void SVal__rcdec ( SVal s ) {
3239 if (SVal__isC(s)) {
3240 VtsID__rcdec( SVal__unC_Rmin(s) );
3241 VtsID__rcdec( SVal__unC_Wmin(s) );
3242 }
3243}
3244
3245
3246/////////////////////////////////////////////////////////
3247// //
sewardjd86e3a22008-12-03 11:39:37 +00003248// A simple group (memory) allocator //
3249// //
3250/////////////////////////////////////////////////////////
3251
3252//////////////// BEGIN general group allocator
3253typedef
3254 struct {
3255 UWord elemSzB; /* element size */
3256 UWord nPerGroup; /* # elems per group */
3257 void* (*alloc)(HChar*, SizeT); /* group allocator */
3258 HChar* cc; /* group allocator's cc */
3259 void (*free)(void*); /* group allocator's free-er (unused) */
3260 /* XArray of void* (pointers to groups). The groups themselves.
3261 Each element is a pointer to a block of size (elemSzB *
3262 nPerGroup) bytes. */
3263 XArray* groups;
3264 /* next free element. Is a pointer to an element in one of the
3265 groups pointed to by .groups. */
3266 void* nextFree;
3267 }
3268 GroupAlloc;
3269
3270static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3271 UWord elemSzB,
3272 UWord nPerGroup,
3273 void* (*alloc)(HChar*, SizeT),
3274 HChar* cc,
3275 void (*free)(void*) )
3276{
3277 tl_assert(0 == (elemSzB % sizeof(UWord)));
3278 tl_assert(elemSzB >= sizeof(UWord));
3279 tl_assert(nPerGroup >= 100); /* let's say */
3280 tl_assert(alloc);
3281 tl_assert(cc);
3282 tl_assert(free);
3283 tl_assert(ga);
3284 VG_(memset)(ga, 0, sizeof(*ga));
3285 ga->elemSzB = elemSzB;
3286 ga->nPerGroup = nPerGroup;
3287 ga->groups = NULL;
3288 ga->alloc = alloc;
3289 ga->cc = cc;
3290 ga->free = free;
3291 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3292 ga->nextFree = NULL;
3293 tl_assert(ga->groups);
3294}
3295
3296/* The freelist is empty. Allocate a new group and put all the new
3297 elements in it onto the freelist. */
3298__attribute__((noinline))
3299static void gal_add_new_group ( GroupAlloc* ga )
3300{
3301 Word i;
3302 UWord* group;
3303 tl_assert(ga);
3304 tl_assert(ga->nextFree == NULL);
3305 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3306 tl_assert(group);
3307 /* extend the freelist through the new group. Place the freelist
3308 pointer in the first word of each element. That's why the
3309 element size must be at least one word. */
3310 for (i = ga->nPerGroup-1; i >= 0; i--) {
3311 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3312 UWord* elem = (UWord*)elemC;
3313 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3314 *elem = (UWord)ga->nextFree;
3315 ga->nextFree = elem;
3316 }
3317 /* and add to our collection of groups */
3318 VG_(addToXA)( ga->groups, &group );
3319}
3320
3321inline static void* gal_Alloc ( GroupAlloc* ga )
3322{
3323 UWord* elem;
3324 if (UNLIKELY(ga->nextFree == NULL)) {
3325 gal_add_new_group(ga);
3326 }
3327 elem = ga->nextFree;
3328 ga->nextFree = (void*)*elem;
3329 *elem = 0; /* unnecessary, but just to be on the safe side */
3330 return elem;
3331}
3332
3333inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3334{
3335 tl_assert(n == ga->elemSzB);
3336 return gal_Alloc( ga );
3337}
3338
3339inline static void gal_Free ( GroupAlloc* ga, void* p )
3340{
3341 UWord* elem = (UWord*)p;
3342 *elem = (UWord)ga->nextFree;
3343 ga->nextFree = elem;
3344}
3345//////////////// END general group allocator
3346
3347
3348/////////////////////////////////////////////////////////
3349// //
sewardjf98e1c02008-10-25 16:22:41 +00003350// Change-event map2 //
3351// //
3352/////////////////////////////////////////////////////////
3353
sewardjf98e1c02008-10-25 16:22:41 +00003354#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3355
3356/* This is in two parts:
3357
sewardj23f12002009-07-24 08:45:08 +00003358 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003359 traces. When the reference count of a stack trace becomes zero,
3360 it is removed from the set and freed up. The intent is to have
3361 a set of stack traces which can be referred to from (2), but to
3362 only represent each one once. The set is indexed/searched by
3363 ordering on the stack trace vectors.
3364
sewardj849b0ed2008-12-21 10:43:10 +00003365 2. A SparseWA of OldRefs. These store information about each old
3366 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003367 location for which the information is recorded. For LRU
3368 purposes, each OldRef also contains a generation number,
3369 indicating when it was most recently accessed.
3370
3371 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003372 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3373 size) triples to RCECs. This allows us to collect the last
3374 access-traceback by up to N_OLDREF_ACCS different triples for
3375 this location. The accs[] array is a MTF-array. If a binding
3376 falls off the end, that's too bad -- we will lose info about
3377 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003378
sewardj849b0ed2008-12-21 10:43:10 +00003379 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003380 whose generation numbers are below some threshold; hence doing
3381 approximate LRU discarding. For each discarded OldRef we must
3382 of course decrement the reference count on the all RCECs it
3383 refers to, in order that entries from (1) eventually get
3384 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003385
3386 A major improvement in reliability of this mechanism would be to
3387 have a dynamically sized OldRef.accs[] array, so no entries ever
3388 fall off the end. In investigations (Dec 08) it appears that a
3389 major cause for the non-availability of conflicting-access traces
3390 in race reports is caused by the fixed size of this array. I
3391 suspect for most OldRefs, only a few entries are used, but for a
3392 minority of cases there is an overflow, leading to info lossage.
3393 Investigations also suggest this is very workload and scheduling
3394 sensitive. Therefore a dynamic sizing would be better.
3395
3396 However, dynamic sizing would defeat the use of a GroupAllocator
3397 for OldRef structures. And that's important for performance. So
3398 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003399*/
3400
3401
3402static UWord stats__ctxt_rcdec1 = 0;
3403static UWord stats__ctxt_rcdec2 = 0;
3404static UWord stats__ctxt_rcdec3 = 0;
3405static UWord stats__ctxt_rcdec_calls = 0;
3406static UWord stats__ctxt_rcdec_discards = 0;
3407static UWord stats__ctxt_rcdec1_eq = 0;
3408
3409static UWord stats__ctxt_tab_curr = 0;
3410static UWord stats__ctxt_tab_max = 0;
3411
3412static UWord stats__ctxt_tab_qs = 0;
3413static UWord stats__ctxt_tab_cmps = 0;
3414
3415
3416///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00003417//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00003418///
3419
3420#define N_FRAMES 8
3421
3422// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3423#define RCEC_MAGIC 0xab88abb2UL
3424
3425//#define N_RCEC_TAB 98317 /* prime */
3426#define N_RCEC_TAB 196613 /* prime */
3427
3428typedef
3429 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003430 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003431 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003432 UWord rc;
3433 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003434 UWord frames_hash; /* hash of all the frames */
3435 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003436 }
3437 RCEC;
3438
3439static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3440
3441
3442/* Gives an arbitrary total order on RCEC .frames fields */
3443static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3444 Word i;
3445 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3446 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003447 if (ec1->frames_hash < ec2->frames_hash) return -1;
3448 if (ec1->frames_hash > ec2->frames_hash) return 1;
3449 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003450 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003451 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003452 }
3453 return 0;
3454}
3455
3456
3457/* Dec the ref of this RCEC. */
3458static void ctxt__rcdec ( RCEC* ec )
3459{
3460 stats__ctxt_rcdec_calls++;
3461 tl_assert(ec && ec->magic == RCEC_MAGIC);
3462 tl_assert(ec->rc > 0);
3463 ec->rc--;
3464}
3465
3466static void ctxt__rcinc ( RCEC* ec )
3467{
3468 tl_assert(ec && ec->magic == RCEC_MAGIC);
3469 ec->rc++;
3470}
3471
3472
sewardjd86e3a22008-12-03 11:39:37 +00003473//////////// BEGIN RCEC group allocator
3474static GroupAlloc rcec_group_allocator;
3475
3476static RCEC* alloc_RCEC ( void ) {
3477 return gal_Alloc ( &rcec_group_allocator );
3478}
3479
3480static void free_RCEC ( RCEC* rcec ) {
3481 tl_assert(rcec->magic == RCEC_MAGIC);
3482 gal_Free( &rcec_group_allocator, rcec );
3483}
sewardj111544a2010-04-12 20:05:24 +00003484//////////// END RCEC group allocator
sewardjd86e3a22008-12-03 11:39:37 +00003485
3486
sewardjf98e1c02008-10-25 16:22:41 +00003487/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3488 move it one step closer the the front of the list, so as to make
3489 subsequent searches for it cheaper. */
3490static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3491{
3492 RCEC *ec0, *ec1, *ec2;
3493 if (ec == *headp)
3494 tl_assert(0); /* already at head of list */
3495 tl_assert(ec != NULL);
3496 ec0 = *headp;
3497 ec1 = NULL;
3498 ec2 = NULL;
3499 while (True) {
3500 if (ec0 == NULL || ec0 == ec) break;
3501 ec2 = ec1;
3502 ec1 = ec0;
3503 ec0 = ec0->next;
3504 }
3505 tl_assert(ec0 == ec);
3506 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3507 RCEC* tmp;
3508 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3509 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3510 closer to the start of the list. */
3511 tl_assert(ec2->next == ec1);
3512 tl_assert(ec1->next == ec0);
3513 tmp = ec0->next;
3514 ec2->next = ec0;
3515 ec0->next = ec1;
3516 ec1->next = tmp;
3517 }
3518 else
3519 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3520 /* it's second in the list. */
3521 tl_assert(*headp == ec1);
3522 tl_assert(ec1->next == ec0);
3523 ec1->next = ec0->next;
3524 ec0->next = ec1;
3525 *headp = ec0;
3526 }
3527}
3528
3529
3530/* Find the given RCEC in the tree, and return a pointer to it. Or,
3531 if not present, add the given one to the tree (by making a copy of
3532 it, so the caller can immediately deallocate the original) and
3533 return a pointer to the copy. The caller can safely have 'example'
3534 on its stack, since we will always return a pointer to a copy of
3535 it, not to the original. Note that the inserted node will have .rc
3536 of zero and so the caller must immediatly increment it. */
3537__attribute__((noinline))
3538static RCEC* ctxt__find_or_add ( RCEC* example )
3539{
3540 UWord hent;
3541 RCEC* copy;
3542 tl_assert(example && example->magic == RCEC_MAGIC);
3543 tl_assert(example->rc == 0);
3544
3545 /* Search the hash table to see if we already have it. */
3546 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00003547 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00003548 copy = contextTab[hent];
3549 while (1) {
3550 if (!copy) break;
3551 tl_assert(copy->magic == RCEC_MAGIC);
3552 stats__ctxt_tab_cmps++;
3553 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3554 copy = copy->next;
3555 }
3556
3557 if (copy) {
3558 tl_assert(copy != example);
3559 /* optimisation: if it's not at the head of its list, move 1
3560 step fwds, to make future searches cheaper */
3561 if (copy != contextTab[hent]) {
3562 move_RCEC_one_step_forward( &contextTab[hent], copy );
3563 }
3564 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003565 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00003566 tl_assert(copy != example);
3567 *copy = *example;
3568 copy->next = contextTab[hent];
3569 contextTab[hent] = copy;
3570 stats__ctxt_tab_curr++;
3571 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
3572 stats__ctxt_tab_max = stats__ctxt_tab_curr;
3573 }
3574 return copy;
3575}
3576
3577static inline UWord ROLW ( UWord w, Int n )
3578{
3579 Int bpw = 8 * sizeof(UWord);
3580 w = (w << n) | (w >> (bpw-n));
3581 return w;
3582}
3583
3584__attribute__((noinline))
3585static RCEC* get_RCEC ( Thr* thr )
3586{
3587 UWord hash, i;
3588 RCEC example;
3589 example.magic = RCEC_MAGIC;
3590 example.rc = 0;
3591 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00003592 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00003593 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00003594 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003595 hash ^= example.frames[i];
3596 hash = ROLW(hash, 19);
3597 }
njn6c83d5e2009-05-05 23:46:24 +00003598 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00003599 return ctxt__find_or_add( &example );
3600}
3601
3602///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00003603//// Part (2):
3604/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00003605///
3606
3607// (UInt) `echo "Old Reference Information" | md5sum`
3608#define OldRef_MAGIC 0x30b1f075UL
3609
sewardjc5ea9962008-12-07 01:41:46 +00003610/* Records an access: a thread and a context. The size
3611 (1,2,4,8) and read-or-writeness are also encoded as
3612 follows: bottom bit of .thr is 1 if write, 0 if read
3613 bottom 2 bits of .rcec are encode size:
3614 00 = 1, 01 = 2, 10 = 4, 11 = 8
3615*/
sewardjf98e1c02008-10-25 16:22:41 +00003616typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
3617
sewardj849b0ed2008-12-21 10:43:10 +00003618#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00003619
3620typedef
3621 struct {
sewardjd86e3a22008-12-03 11:39:37 +00003622 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003623 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00003624 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00003625 /* unused slots in this array have .thr == NULL */
3626 Thr_n_RCEC accs[N_OLDREF_ACCS];
3627 }
3628 OldRef;
3629
sewardjd86e3a22008-12-03 11:39:37 +00003630
3631//////////// BEGIN OldRef group allocator
3632static GroupAlloc oldref_group_allocator;
3633
3634static OldRef* alloc_OldRef ( void ) {
3635 return gal_Alloc ( &oldref_group_allocator );
3636}
3637
3638static void free_OldRef ( OldRef* r ) {
3639 tl_assert(r->magic == OldRef_MAGIC);
3640 gal_Free( &oldref_group_allocator, r );
3641}
3642//////////// END OldRef group allocator
3643
sewardjd86e3a22008-12-03 11:39:37 +00003644
sewardjbc307e52008-12-06 22:10:54 +00003645static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
3646static UWord oldrefGen = 0; /* current LRU generation # */
3647static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
3648static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00003649
sewardjc5ea9962008-12-07 01:41:46 +00003650inline static void* ptr_or_UWord ( void* p, UWord w ) {
3651 return (void*)( ((UWord)p) | ((UWord)w) );
3652}
3653inline static void* ptr_and_UWord ( void* p, UWord w ) {
3654 return (void*)( ((UWord)p) & ((UWord)w) );
3655}
3656
sewardj1669cc72008-12-13 01:20:21 +00003657inline static UInt min_UInt ( UInt a, UInt b ) {
3658 return a < b ? a : b;
3659}
3660
sewardja781be62008-12-08 00:12:28 +00003661/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
3662 first interval is lower, 1 if the first interval is higher, and 0
3663 if there is any overlap. Redundant paranoia with casting is there
3664 following what looked distinctly like a bug in gcc-4.1.2, in which
3665 some of the comparisons were done signedly instead of
3666 unsignedly. */
3667/* Copied from exp-ptrcheck/sg_main.c */
3668static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
3669 Addr a2, SizeT n2 ) {
3670 UWord a1w = (UWord)a1;
3671 UWord n1w = (UWord)n1;
3672 UWord a2w = (UWord)a2;
3673 UWord n2w = (UWord)n2;
3674 tl_assert(n1w > 0 && n2w > 0);
3675 if (a1w + n1w <= a2w) return -1L;
3676 if (a2w + n2w <= a1w) return 1L;
3677 return 0;
3678}
3679
sewardjc5ea9962008-12-07 01:41:46 +00003680static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00003681{
sewardjd86e3a22008-12-03 11:39:37 +00003682 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00003683 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00003684 Word i, j;
3685 UWord keyW, valW;
3686 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003687
sewardjc5ea9962008-12-07 01:41:46 +00003688 rcec = get_RCEC( thr );
3689 ctxt__rcinc(rcec);
3690
3691 /* encode the size and writeness of the transaction in the bottom
3692 two bits of thr and rcec. */
3693 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3694 switch (szB) {
3695 /* This doesn't look particularly branch-predictor friendly. */
3696 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3697 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3698 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3699 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3700 default: tl_assert(0);
3701 }
3702
3703 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003704 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003705
sewardjd86e3a22008-12-03 11:39:37 +00003706 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003707
3708 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003709 see if we have a stack trace pertaining to this (thread, R/W,
3710 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003711 tl_assert(keyW == a);
3712 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003713 tl_assert(ref->magic == OldRef_MAGIC);
3714
3715 tl_assert(thr);
3716 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003717 if (ref->accs[i].thr != thr)
3718 continue;
3719 /* since .thr encodes both the accessing thread and the
3720 read/writeness, we know now that at least those features
3721 of the access match this entry. So we just need to check
3722 the size indication. Do this by inspecting the lowest 2 bits of
3723 .rcec, which contain the encoded size info. */
3724 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3725 continue;
3726 /* else we have a match, so stop looking. */
3727 break;
sewardjf98e1c02008-10-25 16:22:41 +00003728 }
3729
3730 if (i < N_OLDREF_ACCS) {
3731 /* thread 'thr' has an entry at index 'i'. Update it. */
3732 if (i > 0) {
3733 Thr_n_RCEC tmp = ref->accs[i-1];
3734 ref->accs[i-1] = ref->accs[i];
3735 ref->accs[i] = tmp;
3736 i--;
3737 }
sewardjc5ea9962008-12-07 01:41:46 +00003738 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003739 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003740 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3741 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003742 tl_assert(ref->accs[i].thr == thr);
3743 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003744 /* No entry for this (thread, R/W, size) triple. Shuffle all
3745 of them down one slot, and put the new entry at the start
3746 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003747 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3748 /* the last slot is in use. We must dec the rc on the
3749 associated rcec. */
3750 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3751 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003752 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3753 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003754 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003755 } else {
3756 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3757 }
3758 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3759 ref->accs[j] = ref->accs[j-1];
3760 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003761 ref->accs[0].rcec = rcec;
3762 /* thr==NULL is used to signify an empty slot, so we can't
3763 add a NULL thr. */
3764 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003765 }
3766
3767 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003768
3769 } else {
3770
3771 /* We don't have a record for this address. Create a new one. */
3772 if (oldrefTreeN >= oldrefGenIncAt) {
3773 oldrefGen++;
3774 oldrefGenIncAt = oldrefTreeN + 50000;
3775 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3776 oldrefGen, oldrefTreeN );
3777 }
sewardjd86e3a22008-12-03 11:39:37 +00003778
3779 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003780 ref->magic = OldRef_MAGIC;
3781 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003782 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003783 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003784 /* thr==NULL is used to signify an empty slot, so we can't add a
3785 NULL thr. */
3786 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003787 for (j = 1; j < N_OLDREF_ACCS; j++) {
3788 ref->accs[j].thr = NULL;
3789 ref->accs[j].rcec = NULL;
3790 }
sewardjbc307e52008-12-06 22:10:54 +00003791 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003792 oldrefTreeN++;
3793
3794 }
3795}
3796
3797
sewardjc5ea9962008-12-07 01:41:46 +00003798Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3799 /*OUT*/Thr** resThr,
3800 /*OUT*/SizeT* resSzB,
3801 /*OUT*/Bool* resIsW,
3802 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003803{
sewardja781be62008-12-08 00:12:28 +00003804 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003805 OldRef* ref;
3806 UWord keyW, valW;
3807 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003808
sewardjc5ea9962008-12-07 01:41:46 +00003809 Thr* cand_thr;
3810 RCEC* cand_rcec;
3811 Bool cand_isW;
3812 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003813 Addr cand_a;
3814
3815 Addr toCheck[15];
3816 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003817
3818 tl_assert(thr);
3819 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003820
sewardja781be62008-12-08 00:12:28 +00003821 toCheck[nToCheck++] = a;
3822 for (i = -7; i < (Word)szB; i++) {
3823 if (i != 0)
3824 toCheck[nToCheck++] = a + i;
3825 }
3826 tl_assert(nToCheck <= 15);
3827
3828 /* Now see if we can find a suitable matching event for
3829 any of the addresses in toCheck[0 .. nToCheck-1]. */
3830 for (j = 0; j < nToCheck; j++) {
3831
3832 cand_a = toCheck[j];
3833 // VG_(printf)("test %ld %p\n", j, cand_a);
3834
3835 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3836 if (!b)
3837 continue;
3838
sewardjd86e3a22008-12-03 11:39:37 +00003839 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003840 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003841 tl_assert(ref->magic == OldRef_MAGIC);
3842 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3843
sewardjc5ea9962008-12-07 01:41:46 +00003844 cand_thr = NULL;
3845 cand_rcec = NULL;
3846 cand_isW = False;
3847 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003848
sewardjc5ea9962008-12-07 01:41:46 +00003849 for (i = 0; i < N_OLDREF_ACCS; i++) {
3850 Thr_n_RCEC* cand = &ref->accs[i];
3851 cand_thr = ptr_and_UWord(cand->thr, ~3);
3852 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3853 /* Decode the writeness from the bottom bit of .thr. */
3854 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3855 /* Decode the size from the bottom two bits of .rcec. */
3856 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3857 case 0: cand_szB = 1; break;
3858 case 1: cand_szB = 2; break;
3859 case 2: cand_szB = 4; break;
3860 case 3: cand_szB = 8; break;
3861 default: tl_assert(0);
3862 }
3863
3864 if (cand_thr == NULL)
3865 /* This slot isn't in use. Ignore it. */
3866 continue;
3867
3868 if (cand_thr == thr)
3869 /* This is an access by the same thread, but we're only
3870 interested in accesses from other threads. Ignore. */
3871 continue;
3872
3873 if ((!cand_isW) && (!isW))
3874 /* We don't want to report a read racing against another
3875 read; that's stupid. So in this case move on. */
3876 continue;
3877
sewardja781be62008-12-08 00:12:28 +00003878 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3879 /* No overlap with the access we're asking about. Ignore. */
3880 continue;
3881
sewardjc5ea9962008-12-07 01:41:46 +00003882 /* We have a match. Stop searching. */
3883 break;
3884 }
3885
3886 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3887
sewardja781be62008-12-08 00:12:28 +00003888 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00003889 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00003890 /* return with success */
3891 tl_assert(cand_thr);
3892 tl_assert(cand_rcec);
3893 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3894 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00003895 /* Count how many non-zero frames we have. */
3896 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3897 for (n = 0; n < maxNFrames; n++) {
3898 if (0 == cand_rcec->frames[n]) break;
3899 }
3900 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
sewardja781be62008-12-08 00:12:28 +00003901 *resThr = cand_thr;
3902 *resSzB = cand_szB;
3903 *resIsW = cand_isW;
3904 return True;
3905 }
sewardjc5ea9962008-12-07 01:41:46 +00003906
sewardja781be62008-12-08 00:12:28 +00003907 /* consider next address in toCheck[] */
3908 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003909
sewardja781be62008-12-08 00:12:28 +00003910 /* really didn't find anything. */
3911 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003912}
3913
3914static void event_map_init ( void )
3915{
3916 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003917
3918 /* Context (RCEC) group allocator */
3919 init_GroupAlloc ( &rcec_group_allocator,
3920 sizeof(RCEC),
3921 1000 /* RCECs per group */,
3922 HG_(zalloc),
3923 "libhb.event_map_init.1 (RCEC groups)",
3924 HG_(free) );
3925
3926 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003927 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003928 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003929 N_RCEC_TAB * sizeof(RCEC*) );
3930 tl_assert(contextTab);
3931 for (i = 0; i < N_RCEC_TAB; i++)
3932 contextTab[i] = NULL;
3933
sewardjd86e3a22008-12-03 11:39:37 +00003934 /* Oldref group allocator */
3935 init_GroupAlloc ( &oldref_group_allocator,
3936 sizeof(OldRef),
3937 1000 /* OldRefs per group */,
3938 HG_(zalloc),
3939 "libhb.event_map_init.3 (OldRef groups)",
3940 HG_(free) );
3941
sewardjd86e3a22008-12-03 11:39:37 +00003942 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003943 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003944 oldrefTree = VG_(newSWA)(
3945 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003946 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003947 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003948 );
3949 tl_assert(oldrefTree);
3950
3951 oldrefGen = 0;
3952 oldrefGenIncAt = 0;
3953 oldrefTreeN = 0;
3954}
3955
3956static void event_map__check_reference_counts ( Bool before )
3957{
3958 RCEC* rcec;
3959 OldRef* oldref;
3960 Word i;
3961 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003962 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003963
3964 /* Set the 'check' reference counts to zero. Also, optionally
3965 check that the real reference counts are non-zero. We allow
3966 these to fall to zero before a GC, but the GC must get rid of
3967 all those that are zero, hence none should be zero after a
3968 GC. */
3969 for (i = 0; i < N_RCEC_TAB; i++) {
3970 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3971 nEnts++;
3972 tl_assert(rcec);
3973 tl_assert(rcec->magic == RCEC_MAGIC);
3974 if (!before)
3975 tl_assert(rcec->rc > 0);
3976 rcec->rcX = 0;
3977 }
3978 }
3979
3980 /* check that the stats are sane */
3981 tl_assert(nEnts == stats__ctxt_tab_curr);
3982 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3983
3984 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003985 VG_(initIterSWA)( oldrefTree );
3986 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003987 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003988 tl_assert(oldref->magic == OldRef_MAGIC);
3989 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003990 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3991 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3992 if (aThr) {
3993 tl_assert(aRef);
3994 tl_assert(aRef->magic == RCEC_MAGIC);
3995 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003996 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003997 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003998 }
3999 }
4000 }
4001
4002 /* compare check ref counts with actual */
4003 for (i = 0; i < N_RCEC_TAB; i++) {
4004 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4005 tl_assert(rcec->rc == rcec->rcX);
4006 }
4007 }
4008}
4009
sewardj8fd92d32008-11-20 23:17:01 +00004010__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00004011static void event_map_maybe_GC ( void )
4012{
4013 OldRef* oldref;
4014 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00004015 XArray* refs2del;
4016 Word i, j, n2del;
4017
sewardj8fd92d32008-11-20 23:17:01 +00004018 UWord* genMap = NULL;
4019 UWord genMap_min = 0;
4020 UWord genMap_size = 0;
4021
sewardj849b0ed2008-12-21 10:43:10 +00004022 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00004023 return;
4024
4025 if (0)
4026 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
4027
sewardj849b0ed2008-12-21 10:43:10 +00004028 /* Check for sane command line params. Limit values must match
4029 those in hg_process_cmd_line_option. */
4030 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
sewardjf585e482009-08-16 22:52:29 +00004031 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
sewardj849b0ed2008-12-21 10:43:10 +00004032
sewardj8f5374e2008-12-07 11:40:17 +00004033 /* Check our counting is sane (expensive) */
4034 if (CHECK_CEM)
4035 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00004036
sewardj8f5374e2008-12-07 11:40:17 +00004037 /* Check the reference counts (expensive) */
4038 if (CHECK_CEM)
4039 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004040
sewardj8fd92d32008-11-20 23:17:01 +00004041 /* Compute the distribution of generation values in the ref tree.
4042 There are likely only to be a few different generation numbers
4043 in the whole tree, but we don't know what they are. Hence use a
4044 dynamically resized array of counters. The array is genMap[0
4045 .. genMap_size-1], where genMap[0] is the count for the
4046 generation number genMap_min, genMap[1] is the count for
4047 genMap_min+1, etc. If a new number is seen outside the range
4048 [genMap_min .. genMap_min + genMap_size - 1] then the array is
4049 copied into a larger array, and genMap_min and genMap_size are
4050 adjusted accordingly. */
4051
sewardjf98e1c02008-10-25 16:22:41 +00004052 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00004053
sewardjbc307e52008-12-06 22:10:54 +00004054 VG_(initIterSWA)( oldrefTree );
4055 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00004056
sewardjd86e3a22008-12-03 11:39:37 +00004057 UWord ea, key;
4058 oldref = (OldRef*)valW;
4059 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00004060
4061 /* BEGIN find 'ea', which is the index in genMap holding the
4062 count for generation number 'key'. */
4063 if (UNLIKELY(genMap == NULL)) {
4064 /* deal with the first key to be seen, so that the following
4065 cases don't need to handle the complexity of a NULL count
4066 array. */
4067 genMap_min = key;
4068 genMap_size = 1;
4069 genMap = HG_(zalloc)( "libhb.emmG.1a",
4070 genMap_size * sizeof(UWord) );
4071 ea = 0;
4072 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
4073 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004074 }
sewardj8fd92d32008-11-20 23:17:01 +00004075 else
4076 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
4077 /* this is the expected (almost-always-happens) case: 'key'
4078 is already mapped in the array. */
4079 ea = key - genMap_min;
4080 }
4081 else
4082 if (key < genMap_min) {
4083 /* 'key' appears before the start of the current array.
4084 Extend the current array by allocating a larger one and
4085 copying the current one to the upper end of it. */
4086 Word more;
4087 UWord* map2;
4088 more = genMap_min - key;
4089 tl_assert(more > 0);
4090 map2 = HG_(zalloc)( "libhb.emmG.1b",
4091 (genMap_size + more) * sizeof(UWord) );
4092 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
4093 HG_(free)( genMap );
4094 genMap = map2;
4095 genMap_size += more;
4096 genMap_min -= more;
4097 ea = 0;
4098 tl_assert(genMap_min == key);
4099 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
4100 key, genMap_min, genMap_min+genMap_size- 1 );
4101 }
4102 else {
4103 /* 'key' appears after the end of the current array. Extend
4104 the current array by allocating a larger one and copying
4105 the current one to the lower end of it. */
4106 Word more;
4107 UWord* map2;
4108 tl_assert(key >= genMap_min + genMap_size);
4109 more = key - (genMap_min + genMap_size) + 1;
4110 tl_assert(more > 0);
4111 map2 = HG_(zalloc)( "libhb.emmG.1c",
4112 (genMap_size + more) * sizeof(UWord) );
4113 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
4114 HG_(free)( genMap );
4115 genMap = map2;
4116 genMap_size += more;
4117 ea = genMap_size - 1;;
4118 tl_assert(genMap_min + genMap_size - 1 == key);
4119 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
4120 key, genMap_min, genMap_min+genMap_size- 1 );
4121 }
4122 /* END find 'ea' from 'key' */
4123
4124 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00004125 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00004126 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00004127 }
4128
sewardj8fd92d32008-11-20 23:17:01 +00004129 tl_assert(genMap);
4130 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00004131
sewardj8fd92d32008-11-20 23:17:01 +00004132 /* Sanity check what we just computed */
4133 { UWord sum = 0;
4134 for (i = 0; i < genMap_size; i++) {
4135 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
4136 i + genMap_min, genMap[i] );
4137 sum += genMap[i];
4138 }
4139 tl_assert(sum == oldrefTreeN);
4140 }
4141
4142 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00004143 retained = oldrefTreeN;
4144 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00004145
4146 for (i = 0; i < genMap_size; i++) {
4147 keyW = i + genMap_min;
4148 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00004149 tl_assert(keyW > 0); /* can't allow a generation # 0 */
4150 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
4151 tl_assert(keyW >= maxGen);
4152 tl_assert(retained >= valW);
4153 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00004154 > (UWord)(HG_(clo_conflict_cache_size)
4155 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00004156 retained -= valW;
4157 maxGen = keyW;
4158 } else {
4159 break;
4160 }
4161 }
sewardjf98e1c02008-10-25 16:22:41 +00004162
sewardj8fd92d32008-11-20 23:17:01 +00004163 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00004164
sewardj9b1f0fd2008-11-18 23:40:00 +00004165 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00004166
4167 /* Now make up a big list of the oldrefTree entries we want to
4168 delete. We can't simultaneously traverse the tree and delete
4169 stuff from it, so first we need to copy them off somewhere
4170 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00004171 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00004172 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00004173
sewardj9b1f0fd2008-11-18 23:40:00 +00004174 if (retained < oldrefTreeN) {
4175
4176 /* This is the normal (expected) case. We discard any ref whose
4177 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00004178 VG_(initIterSWA)( oldrefTree );
4179 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004180 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004181 tl_assert(oldref->magic == OldRef_MAGIC);
4182 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00004183 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004184 }
sewardjf98e1c02008-10-25 16:22:41 +00004185 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004186 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004187 VG_(message)(Vg_DebugMsg,
4188 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00004189 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004190 maxGen, retained );
4191 }
4192
4193 } else {
4194
4195 static UInt rand_seed = 0; /* leave as static */
4196
4197 /* Degenerate case: there's only one generation in the entire
4198 tree, so we need to have some other way of deciding which
4199 refs to throw away. Just throw out half of them randomly. */
4200 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00004201 VG_(initIterSWA)( oldrefTree );
4202 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004203 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00004204 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004205 tl_assert(oldref->magic == OldRef_MAGIC);
4206 n = VG_(random)( &rand_seed );
4207 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00004208 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004209 retained--;
4210 }
4211 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004212 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004213 VG_(message)(Vg_DebugMsg,
4214 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00004215 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004216 retained );
4217 }
4218
sewardjf98e1c02008-10-25 16:22:41 +00004219 }
4220
4221 n2del = VG_(sizeXA)( refs2del );
4222 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4223
4224 if (0) VG_(printf)("%s","deleting entries\n");
4225 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00004226 Bool b;
4227 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00004228 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00004229 tl_assert(b);
4230 tl_assert(keyW == ga2del);
4231 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004232 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00004233 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
4234 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
4235 if (aRef) {
4236 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004237 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004238 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004239 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004240 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004241 }
4242 }
sewardjd86e3a22008-12-03 11:39:37 +00004243
4244 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004245 }
4246
4247 VG_(deleteXA)( refs2del );
4248
sewardjc5ea9962008-12-07 01:41:46 +00004249 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004250
4251 oldrefTreeN = retained;
4252 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4253
4254 /* Throw away all RCECs with zero reference counts */
4255 for (i = 0; i < N_RCEC_TAB; i++) {
4256 RCEC** pp = &contextTab[i];
4257 RCEC* p = *pp;
4258 while (p) {
4259 if (p->rc == 0) {
4260 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004261 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004262 p = *pp;
4263 tl_assert(stats__ctxt_tab_curr > 0);
4264 stats__ctxt_tab_curr--;
4265 } else {
4266 pp = &p->next;
4267 p = p->next;
4268 }
4269 }
4270 }
4271
sewardj8f5374e2008-12-07 11:40:17 +00004272 /* Check the reference counts (expensive) */
4273 if (CHECK_CEM)
4274 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004275
4276 //if (0)
4277 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4278 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4279
4280}
4281
4282
4283/////////////////////////////////////////////////////////
4284// //
4285// Core MSM //
4286// //
4287/////////////////////////////////////////////////////////
4288
sewardj23f12002009-07-24 08:45:08 +00004289/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4290 Nov 08, and again after [...],
4291 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004292
sewardj23f12002009-07-24 08:45:08 +00004293static ULong stats__msmcread = 0;
4294static ULong stats__msmcread_change = 0;
4295static ULong stats__msmcwrite = 0;
4296static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004297
sewardj8ab2c132009-08-02 09:34:35 +00004298/* Some notes on the H1 history mechanism:
4299
4300 Transition rules are:
4301
4302 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4303 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4304
4305 After any access by a thread T to a location L, L's constraint pair
4306 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4307
4308 After a race by thread T conflicting with some previous access by
4309 some other thread U, for a location with constraint (before
4310 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4311 which the previously access lies.
4312
4313 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4314 are compared so as to find out which thread(s) this access
4315 conflicts with. Once that is established, we also require the
4316 pre-update Cw for the location, so we can index into it for those
4317 threads, to get the scalar clock values for the point at which the
4318 former accesses were made. (In fact we only bother to do any of
4319 this for an arbitrarily chosen one of the conflicting threads, as
4320 that's simpler, it avoids flooding the user with vast amounts of
4321 mostly useless information, and because the program is wrong if it
4322 contains any races at all -- so we don't really need to show all
4323 conflicting access pairs initially, so long as we only show none if
4324 none exist).
4325
4326 ---
4327
4328 That requires the auxiliary proof that
4329
4330 (Cr `join` Kw)[T] == Kw[T]
4331
4332 Why should that be true? Because for any thread T, Kw[T] >= the
4333 scalar clock value for T known by any other thread. In other
4334 words, because T's value for its own scalar clock is at least as up
4335 to date as the value for it known by any other thread (that is true
4336 for both the R- and W- scalar clocks). Hence no other thread will
4337 be able to feed in a value for that element (indirectly via a
4338 constraint) which will exceed Kw[T], and hence the join cannot
4339 cause that particular element to advance.
4340*/
4341
sewardjf98e1c02008-10-25 16:22:41 +00004342__attribute__((noinline))
4343static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004344 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004345 VtsID Cfailed,
4346 VtsID Kfailed,
4347 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004348{
sewardjc5ea9962008-12-07 01:41:46 +00004349 /* Call here to report a race. We just hand it onwards to
4350 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004351 error is going to be collected, then, at history_level 2, that
4352 queries the conflicting-event map. The alternative would be to
4353 query it right here. But that causes a lot of pointless queries
4354 for errors which will shortly be discarded as duplicates, and
4355 can become a performance overhead; so we defer the query until
4356 we know the error is not a duplicate. */
4357
4358 /* Stacks for the bounds of the (or one of the) conflicting
4359 segment(s). These are only set at history_level 1. */
4360 ExeContext* hist1_seg_start = NULL;
4361 ExeContext* hist1_seg_end = NULL;
4362 Thread* hist1_conf_thr = NULL;
4363
4364 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004365 tl_assert(acc_thr->hgthread);
4366 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004367 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4368
4369 if (HG_(clo_history_level) == 1) {
4370 Bool found;
4371 Word firstIx, lastIx;
4372 ULong_n_EC key;
4373
4374 /* At history_level 1, we must round up the relevant stack-pair
4375 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004376 deferring it is complex; we can't (easily) put Kfailed and
4377 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004378 getting tied up in difficulties with VtsID reference
4379 counting. So just do it now. */
4380 Thr* confThr;
4381 ULong confTym = 0;
4382 /* Which thread are we in conflict with? There may be more than
4383 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4384 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004385 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004386 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004387 conflict (semantics of return value of
4388 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4389 called us, just checked exactly this -- that there was in
4390 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004391 tl_assert(confThr);
4392
4393 /* Get the scalar clock value that the conflicting thread
4394 introduced into the constraint. A careful examination of the
4395 base machine rules shows that this must be the same as the
4396 conflicting thread's scalar clock when it created this
4397 constraint. Hence we know the scalar clock of the
4398 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004399 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004400
4401 /* Using this scalar clock, index into the conflicting thread's
4402 collection of stack traces made each time its vector clock
4403 (hence its scalar clock) changed. This gives the stack
4404 traces at the start and end of the conflicting segment (well,
4405 as per comment just above, of one of the conflicting
4406 segments, if there are more than one). */
4407 key.ull = confTym;
4408 key.ec = NULL;
4409 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004410 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004411 firstIx = lastIx = 0;
4412 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004413 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004414 &key, &firstIx, &lastIx,
4415 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4416 );
sewardj8ab2c132009-08-02 09:34:35 +00004417 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004418 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004419 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004420 confThr, confTym, found, firstIx, lastIx);
4421 /* We can't indefinitely collect stack traces at VTS
4422 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004423 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004424 ones, which in turn means we might fail to find index value
4425 confTym in the array. */
4426 if (found) {
4427 ULong_n_EC *pair_start, *pair_end;
4428 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004429 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004430 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004431 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004432 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004433 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004434 lastIx+1 );
4435 /* from properties of VG_(lookupXA) and the comparison fn used: */
4436 tl_assert(pair_start->ull < pair_end->ull);
4437 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004438 /* Could do a bit better here. It may be that pair_end
4439 doesn't have a stack, but the following entries in the
4440 array have the same scalar Kw and to have a stack. So
4441 we should search a bit further along the array than
4442 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004443 } else {
4444 if (confThr->still_alive)
4445 hist1_seg_end = main_get_EC( confThr );
4446 }
4447 // seg_start could be NULL iff this is the first stack in the thread
4448 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4449 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00004450 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00004451 }
4452 }
4453
sewardj60626642011-03-10 15:14:37 +00004454 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004455 szB, isWrite,
4456 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004457}
4458
4459static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004460 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004461 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004462 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4463 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004464}
4465
4466
4467/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004468static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004469 /* The following are only needed for
4470 creating error reports. */
4471 Thr* acc_thr,
4472 Addr acc_addr, SizeT szB )
4473{
4474 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004475 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004476
4477 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004478 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004479 tl_assert(is_sane_SVal_C(svOld));
4480 }
4481
sewardj1c0ce7a2009-07-01 08:10:49 +00004482 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004483 VtsID tviR = acc_thr->viR;
4484 VtsID tviW = acc_thr->viW;
4485 VtsID rmini = SVal__unC_Rmin(svOld);
4486 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004487 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4488 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004489 /* no race */
4490 /* Note: RWLOCK subtlety: use tviW, not tviR */
4491 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4492 goto out;
4493 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004494 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004495 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4496 tl_assert(leqxx);
4497 // same as in non-race case
4498 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4499 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004500 rmini, /* Cfailed */
4501 tviR, /* Kfailed */
4502 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004503 goto out;
4504 }
4505 }
4506 if (SVal__isA(svOld)) {
4507 /* reading no-access memory (sigh); leave unchanged */
4508 /* check for no pollution */
4509 tl_assert(svOld == SVal_NOACCESS);
4510 svNew = SVal_NOACCESS;
4511 goto out;
4512 }
sewardj23f12002009-07-24 08:45:08 +00004513 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004514 tl_assert(0);
4515
4516 out:
sewardj8f5374e2008-12-07 11:40:17 +00004517 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004518 tl_assert(is_sane_SVal_C(svNew));
4519 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004520 if (UNLIKELY(svNew != svOld)) {
4521 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004522 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004523 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004524 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004525 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004526 }
4527 }
4528 return svNew;
4529}
4530
4531
4532/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004533static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004534 /* The following are only needed for
4535 creating error reports. */
4536 Thr* acc_thr,
4537 Addr acc_addr, SizeT szB )
4538{
4539 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004540 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004541
4542 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004543 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004544 tl_assert(is_sane_SVal_C(svOld));
4545 }
4546
sewardj1c0ce7a2009-07-01 08:10:49 +00004547 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004548 VtsID tviW = acc_thr->viW;
4549 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004550 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4551 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004552 /* no race */
4553 svNew = SVal__mkC( tviW, tviW );
4554 goto out;
4555 } else {
4556 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00004557 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004558 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4559 tl_assert(leqxx);
4560 // same as in non-race case
4561 // proof: in the non-race case, we have
4562 // rmini <= wmini (invar on constraints)
4563 // tviW <= tviR (invar on thread clocks)
4564 // wmini <= tviW (from run-time check)
4565 // hence from transitivity of <= we have
4566 // rmini <= wmini <= tviW
4567 // and so join(rmini,tviW) == tviW
4568 // and join(wmini,tviW) == tviW
4569 // qed.
4570 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4571 VtsID__join2(wmini, tviW) );
4572 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004573 wmini, /* Cfailed */
4574 tviW, /* Kfailed */
4575 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004576 goto out;
4577 }
4578 }
4579 if (SVal__isA(svOld)) {
4580 /* writing no-access memory (sigh); leave unchanged */
4581 /* check for no pollution */
4582 tl_assert(svOld == SVal_NOACCESS);
4583 svNew = SVal_NOACCESS;
4584 goto out;
4585 }
sewardj23f12002009-07-24 08:45:08 +00004586 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004587 tl_assert(0);
4588
4589 out:
sewardj8f5374e2008-12-07 11:40:17 +00004590 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004591 tl_assert(is_sane_SVal_C(svNew));
4592 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004593 if (UNLIKELY(svNew != svOld)) {
4594 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004595 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004596 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004597 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004598 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004599 }
4600 }
4601 return svNew;
4602}
4603
4604
4605/////////////////////////////////////////////////////////
4606// //
4607// Apply core MSM to specific memory locations //
4608// //
4609/////////////////////////////////////////////////////////
4610
sewardj23f12002009-07-24 08:45:08 +00004611/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004612
sewardj23f12002009-07-24 08:45:08 +00004613static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004614 CacheLine* cl;
4615 UWord cloff, tno, toff;
4616 SVal svOld, svNew;
4617 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004618 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004619 cl = get_cacheline(a);
4620 cloff = get_cacheline_offset(a);
4621 tno = get_treeno(a);
4622 toff = get_tree_offset(a); /* == 0 .. 7 */
4623 descr = cl->descrs[tno];
4624 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4625 SVal* tree = &cl->svals[tno << 3];
4626 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004627 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004628 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4629 }
4630 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004631 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004632 if (CHECK_ZSM)
4633 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004634 cl->svals[cloff] = svNew;
4635}
4636
sewardj23f12002009-07-24 08:45:08 +00004637static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004638 CacheLine* cl;
4639 UWord cloff, tno, toff;
4640 SVal svOld, svNew;
4641 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004642 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004643 cl = get_cacheline(a);
4644 cloff = get_cacheline_offset(a);
4645 tno = get_treeno(a);
4646 toff = get_tree_offset(a); /* == 0 .. 7 */
4647 descr = cl->descrs[tno];
4648 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4649 SVal* tree = &cl->svals[tno << 3];
4650 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004651 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004652 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4653 }
4654 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004655 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004656 if (CHECK_ZSM)
4657 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004658 cl->svals[cloff] = svNew;
4659}
4660
sewardj23f12002009-07-24 08:45:08 +00004661/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004662
sewardj23f12002009-07-24 08:45:08 +00004663static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004664 CacheLine* cl;
4665 UWord cloff, tno, toff;
4666 SVal svOld, svNew;
4667 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004668 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004669 if (UNLIKELY(!aligned16(a))) goto slowcase;
4670 cl = get_cacheline(a);
4671 cloff = get_cacheline_offset(a);
4672 tno = get_treeno(a);
4673 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4674 descr = cl->descrs[tno];
4675 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4676 if (valid_value_is_below_me_16(descr, toff)) {
4677 goto slowcase;
4678 } else {
4679 SVal* tree = &cl->svals[tno << 3];
4680 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4681 }
sewardj8f5374e2008-12-07 11:40:17 +00004682 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004683 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4684 }
4685 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004686 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004687 if (CHECK_ZSM)
4688 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004689 cl->svals[cloff] = svNew;
4690 return;
4691 slowcase: /* misaligned, or must go further down the tree */
4692 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004693 zsm_sapply08__msmcread( thr, a + 0 );
4694 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004695}
4696
sewardj23f12002009-07-24 08:45:08 +00004697static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004698 CacheLine* cl;
4699 UWord cloff, tno, toff;
4700 SVal svOld, svNew;
4701 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004702 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004703 if (UNLIKELY(!aligned16(a))) goto slowcase;
4704 cl = get_cacheline(a);
4705 cloff = get_cacheline_offset(a);
4706 tno = get_treeno(a);
4707 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4708 descr = cl->descrs[tno];
4709 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4710 if (valid_value_is_below_me_16(descr, toff)) {
4711 goto slowcase;
4712 } else {
4713 SVal* tree = &cl->svals[tno << 3];
4714 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4715 }
sewardj8f5374e2008-12-07 11:40:17 +00004716 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004717 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4718 }
4719 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004720 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004721 if (CHECK_ZSM)
4722 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004723 cl->svals[cloff] = svNew;
4724 return;
4725 slowcase: /* misaligned, or must go further down the tree */
4726 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004727 zsm_sapply08__msmcwrite( thr, a + 0 );
4728 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004729}
4730
sewardj23f12002009-07-24 08:45:08 +00004731/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004732
sewardj23f12002009-07-24 08:45:08 +00004733static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004734 CacheLine* cl;
4735 UWord cloff, tno, toff;
4736 SVal svOld, svNew;
4737 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004738 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004739 if (UNLIKELY(!aligned32(a))) goto slowcase;
4740 cl = get_cacheline(a);
4741 cloff = get_cacheline_offset(a);
4742 tno = get_treeno(a);
4743 toff = get_tree_offset(a); /* == 0 or 4 */
4744 descr = cl->descrs[tno];
4745 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4746 if (valid_value_is_above_me_32(descr, toff)) {
4747 SVal* tree = &cl->svals[tno << 3];
4748 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4749 } else {
4750 goto slowcase;
4751 }
sewardj8f5374e2008-12-07 11:40:17 +00004752 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004753 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4754 }
4755 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004756 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004757 if (CHECK_ZSM)
4758 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004759 cl->svals[cloff] = svNew;
4760 return;
4761 slowcase: /* misaligned, or must go further down the tree */
4762 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004763 zsm_sapply16__msmcread( thr, a + 0 );
4764 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004765}
4766
sewardj23f12002009-07-24 08:45:08 +00004767static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004768 CacheLine* cl;
4769 UWord cloff, tno, toff;
4770 SVal svOld, svNew;
4771 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004772 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004773 if (UNLIKELY(!aligned32(a))) goto slowcase;
4774 cl = get_cacheline(a);
4775 cloff = get_cacheline_offset(a);
4776 tno = get_treeno(a);
4777 toff = get_tree_offset(a); /* == 0 or 4 */
4778 descr = cl->descrs[tno];
4779 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4780 if (valid_value_is_above_me_32(descr, toff)) {
4781 SVal* tree = &cl->svals[tno << 3];
4782 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4783 } else {
4784 goto slowcase;
4785 }
sewardj8f5374e2008-12-07 11:40:17 +00004786 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004787 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4788 }
4789 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004790 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004791 if (CHECK_ZSM)
4792 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004793 cl->svals[cloff] = svNew;
4794 return;
4795 slowcase: /* misaligned, or must go further down the tree */
4796 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004797 zsm_sapply16__msmcwrite( thr, a + 0 );
4798 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004799}
4800
sewardj23f12002009-07-24 08:45:08 +00004801/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004802
sewardj23f12002009-07-24 08:45:08 +00004803static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004804 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004805 UWord cloff, tno;
4806 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004807 SVal svOld, svNew;
4808 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004809 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004810 if (UNLIKELY(!aligned64(a))) goto slowcase;
4811 cl = get_cacheline(a);
4812 cloff = get_cacheline_offset(a);
4813 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004814 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004815 descr = cl->descrs[tno];
4816 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4817 goto slowcase;
4818 }
4819 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004820 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004821 if (CHECK_ZSM)
4822 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004823 cl->svals[cloff] = svNew;
4824 return;
4825 slowcase: /* misaligned, or must go further down the tree */
4826 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004827 zsm_sapply32__msmcread( thr, a + 0 );
4828 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004829}
4830
sewardj23f12002009-07-24 08:45:08 +00004831static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004832 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004833 UWord cloff, tno;
4834 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004835 SVal svOld, svNew;
4836 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004837 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004838 if (UNLIKELY(!aligned64(a))) goto slowcase;
4839 cl = get_cacheline(a);
4840 cloff = get_cacheline_offset(a);
4841 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004842 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004843 descr = cl->descrs[tno];
4844 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4845 goto slowcase;
4846 }
4847 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004848 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004849 if (CHECK_ZSM)
4850 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004851 cl->svals[cloff] = svNew;
4852 return;
4853 slowcase: /* misaligned, or must go further down the tree */
4854 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004855 zsm_sapply32__msmcwrite( thr, a + 0 );
4856 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004857}
4858
sewardj23f12002009-07-24 08:45:08 +00004859/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004860
4861static
sewardj23f12002009-07-24 08:45:08 +00004862void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004863 CacheLine* cl;
4864 UWord cloff, tno, toff;
4865 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004866 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004867 cl = get_cacheline(a);
4868 cloff = get_cacheline_offset(a);
4869 tno = get_treeno(a);
4870 toff = get_tree_offset(a); /* == 0 .. 7 */
4871 descr = cl->descrs[tno];
4872 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4873 SVal* tree = &cl->svals[tno << 3];
4874 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004875 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004876 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4877 }
4878 tl_assert(svNew != SVal_INVALID);
4879 cl->svals[cloff] = svNew;
4880}
4881
sewardj23f12002009-07-24 08:45:08 +00004882/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004883
4884static
sewardj23f12002009-07-24 08:45:08 +00004885void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004886 CacheLine* cl;
4887 UWord cloff, tno, toff;
4888 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004889 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004890 if (UNLIKELY(!aligned16(a))) goto slowcase;
4891 cl = get_cacheline(a);
4892 cloff = get_cacheline_offset(a);
4893 tno = get_treeno(a);
4894 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4895 descr = cl->descrs[tno];
4896 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4897 if (valid_value_is_below_me_16(descr, toff)) {
4898 /* Writing at this level. Need to fix up 'descr'. */
4899 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4900 /* At this point, the tree does not match cl->descr[tno] any
4901 more. The assignments below will fix it up. */
4902 } else {
4903 /* We can't indiscriminately write on the w16 node as in the
4904 w64 case, as that might make the node inconsistent with
4905 its parent. So first, pull down to this level. */
4906 SVal* tree = &cl->svals[tno << 3];
4907 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004908 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004909 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4910 }
4911 }
4912 tl_assert(svNew != SVal_INVALID);
4913 cl->svals[cloff + 0] = svNew;
4914 cl->svals[cloff + 1] = SVal_INVALID;
4915 return;
4916 slowcase: /* misaligned */
4917 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004918 zsm_swrite08( a + 0, svNew );
4919 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004920}
4921
sewardj23f12002009-07-24 08:45:08 +00004922/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004923
4924static
sewardj23f12002009-07-24 08:45:08 +00004925void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004926 CacheLine* cl;
4927 UWord cloff, tno, toff;
4928 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004929 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004930 if (UNLIKELY(!aligned32(a))) goto slowcase;
4931 cl = get_cacheline(a);
4932 cloff = get_cacheline_offset(a);
4933 tno = get_treeno(a);
4934 toff = get_tree_offset(a); /* == 0 or 4 */
4935 descr = cl->descrs[tno];
4936 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4937 if (valid_value_is_above_me_32(descr, toff)) {
4938 /* We can't indiscriminately write on the w32 node as in the
4939 w64 case, as that might make the node inconsistent with
4940 its parent. So first, pull down to this level. */
4941 SVal* tree = &cl->svals[tno << 3];
4942 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004943 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004944 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4945 } else {
4946 /* Writing at this level. Need to fix up 'descr'. */
4947 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4948 /* At this point, the tree does not match cl->descr[tno] any
4949 more. The assignments below will fix it up. */
4950 }
4951 }
4952 tl_assert(svNew != SVal_INVALID);
4953 cl->svals[cloff + 0] = svNew;
4954 cl->svals[cloff + 1] = SVal_INVALID;
4955 cl->svals[cloff + 2] = SVal_INVALID;
4956 cl->svals[cloff + 3] = SVal_INVALID;
4957 return;
4958 slowcase: /* misaligned */
4959 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004960 zsm_swrite16( a + 0, svNew );
4961 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004962}
4963
sewardj23f12002009-07-24 08:45:08 +00004964/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004965
4966static
sewardj23f12002009-07-24 08:45:08 +00004967void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004968 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004969 UWord cloff, tno;
4970 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00004971 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004972 if (UNLIKELY(!aligned64(a))) goto slowcase;
4973 cl = get_cacheline(a);
4974 cloff = get_cacheline_offset(a);
4975 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004976 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004977 cl->descrs[tno] = TREE_DESCR_64;
4978 tl_assert(svNew != SVal_INVALID);
4979 cl->svals[cloff + 0] = svNew;
4980 cl->svals[cloff + 1] = SVal_INVALID;
4981 cl->svals[cloff + 2] = SVal_INVALID;
4982 cl->svals[cloff + 3] = SVal_INVALID;
4983 cl->svals[cloff + 4] = SVal_INVALID;
4984 cl->svals[cloff + 5] = SVal_INVALID;
4985 cl->svals[cloff + 6] = SVal_INVALID;
4986 cl->svals[cloff + 7] = SVal_INVALID;
4987 return;
4988 slowcase: /* misaligned */
4989 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004990 zsm_swrite32( a + 0, svNew );
4991 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004992}
4993
sewardj23f12002009-07-24 08:45:08 +00004994/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004995
4996static
sewardj23f12002009-07-24 08:45:08 +00004997SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004998 CacheLine* cl;
4999 UWord cloff, tno, toff;
5000 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005001 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005002 cl = get_cacheline(a);
5003 cloff = get_cacheline_offset(a);
5004 tno = get_treeno(a);
5005 toff = get_tree_offset(a); /* == 0 .. 7 */
5006 descr = cl->descrs[tno];
5007 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5008 SVal* tree = &cl->svals[tno << 3];
5009 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5010 }
5011 return cl->svals[cloff];
5012}
5013
sewardj23f12002009-07-24 08:45:08 +00005014static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005015 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005016 stats__cline_scopy08s++;
5017 sv = zsm_sread08( src );
5018 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005019}
5020
5021
sewardj23f12002009-07-24 08:45:08 +00005022/* Block-copy states (needed for implementing realloc()). Note this
5023 doesn't change the filtering arrangements. The caller of
5024 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005025
sewardj23f12002009-07-24 08:45:08 +00005026static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005027{
5028 SizeT i;
5029 if (len == 0)
5030 return;
5031
5032 /* assert for non-overlappingness */
5033 tl_assert(src+len <= dst || dst+len <= src);
5034
5035 /* To be simple, just copy byte by byte. But so as not to wreck
5036 performance for later accesses to dst[0 .. len-1], normalise
5037 destination lines as we finish with them, and also normalise the
5038 line containing the first and last address. */
5039 for (i = 0; i < len; i++) {
5040 Bool normalise
5041 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5042 || i == 0 /* first in range */
5043 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005044 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005045 }
5046}
5047
5048
5049/* For setting address ranges to a given value. Has considerable
5050 sophistication so as to avoid generating large numbers of pointless
5051 cache loads/writebacks for large ranges. */
5052
5053/* Do small ranges in-cache, in the obvious way. */
5054static
sewardj23f12002009-07-24 08:45:08 +00005055void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005056{
5057 /* fast track a couple of common cases */
5058 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005059 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005060 return;
5061 }
5062 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005063 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005064 return;
5065 }
5066
5067 /* be completely general (but as efficient as possible) */
5068 if (len == 0) return;
5069
5070 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005071 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005072 a += 1;
5073 len -= 1;
5074 tl_assert(aligned16(a));
5075 }
5076 if (len == 0) return;
5077
5078 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005079 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005080 a += 2;
5081 len -= 2;
5082 tl_assert(aligned32(a));
5083 }
5084 if (len == 0) return;
5085
5086 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005087 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005088 a += 4;
5089 len -= 4;
5090 tl_assert(aligned64(a));
5091 }
5092 if (len == 0) return;
5093
5094 if (len >= 8) {
5095 tl_assert(aligned64(a));
5096 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005097 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005098 a += 8;
5099 len -= 8;
5100 }
5101 tl_assert(aligned64(a));
5102 }
5103 if (len == 0) return;
5104
5105 if (len >= 4)
5106 tl_assert(aligned32(a));
5107 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005108 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005109 a += 4;
5110 len -= 4;
5111 }
5112 if (len == 0) return;
5113
5114 if (len >= 2)
5115 tl_assert(aligned16(a));
5116 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005117 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005118 a += 2;
5119 len -= 2;
5120 }
5121 if (len == 0) return;
5122
5123 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005124 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005125 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005126 len -= 1;
5127 }
5128 tl_assert(len == 0);
5129}
5130
5131
sewardj23f12002009-07-24 08:45:08 +00005132/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005133 for larger ranges, try to operate directly on the out-of-cache
5134 representation, rather than dragging lines into the cache,
5135 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005136 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005137
sewardj23f12002009-07-24 08:45:08 +00005138 Note that this doesn't change the filtering arrangements. The
5139 caller of zsm_sset_range needs to attend to that. */
5140
5141static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005142{
5143 tl_assert(svNew != SVal_INVALID);
5144 stats__cache_make_New_arange += (ULong)len;
5145
5146 if (0 && len > 500)
5147 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5148
5149 if (0) {
5150 static UWord n_New_in_cache = 0;
5151 static UWord n_New_not_in_cache = 0;
5152 /* tag is 'a' with the in-line offset masked out,
5153 eg a[31]..a[4] 0000 */
5154 Addr tag = a & ~(N_LINE_ARANGE - 1);
5155 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5156 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5157 n_New_in_cache++;
5158 } else {
5159 n_New_not_in_cache++;
5160 }
5161 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5162 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5163 n_New_in_cache, n_New_not_in_cache );
5164 }
5165
5166 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005167 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005168 } else {
5169 Addr before_start = a;
5170 Addr aligned_start = cacheline_ROUNDUP(a);
5171 Addr after_start = cacheline_ROUNDDN(a + len);
5172 UWord before_len = aligned_start - before_start;
5173 UWord aligned_len = after_start - aligned_start;
5174 UWord after_len = a + len - after_start;
5175 tl_assert(before_start <= aligned_start);
5176 tl_assert(aligned_start <= after_start);
5177 tl_assert(before_len < N_LINE_ARANGE);
5178 tl_assert(after_len < N_LINE_ARANGE);
5179 tl_assert(get_cacheline_offset(aligned_start) == 0);
5180 if (get_cacheline_offset(a) == 0) {
5181 tl_assert(before_len == 0);
5182 tl_assert(a == aligned_start);
5183 }
5184 if (get_cacheline_offset(a+len) == 0) {
5185 tl_assert(after_len == 0);
5186 tl_assert(after_start == a+len);
5187 }
5188 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005189 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005190 }
5191 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005192 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005193 }
5194 stats__cache_make_New_inZrep += (ULong)aligned_len;
5195
5196 while (1) {
5197 Addr tag;
5198 UWord wix;
5199 if (aligned_start >= after_start)
5200 break;
5201 tl_assert(get_cacheline_offset(aligned_start) == 0);
5202 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5203 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5204 if (tag == cache_shmem.tags0[wix]) {
5205 UWord i;
5206 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005207 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005208 } else {
5209 UWord i;
5210 Word zix;
5211 SecMap* sm;
5212 LineZ* lineZ;
5213 /* This line is not in the cache. Do not force it in; instead
5214 modify it in-place. */
5215 /* find the Z line to write in and rcdec it or the
5216 associated F line. */
5217 find_Z_for_writing( &sm, &zix, tag );
5218 tl_assert(sm);
5219 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5220 lineZ = &sm->linesZ[zix];
5221 lineZ->dict[0] = svNew;
5222 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5223 for (i = 0; i < N_LINE_ARANGE/4; i++)
5224 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5225 rcinc_LineZ(lineZ);
5226 }
5227 aligned_start += N_LINE_ARANGE;
5228 aligned_len -= N_LINE_ARANGE;
5229 }
5230 tl_assert(aligned_start == after_start);
5231 tl_assert(aligned_len == 0);
5232 }
5233}
5234
5235
5236/////////////////////////////////////////////////////////
5237// //
sewardj23f12002009-07-24 08:45:08 +00005238// Front-filtering accesses //
5239// //
5240/////////////////////////////////////////////////////////
5241
5242static UWord stats__f_ac = 0;
5243static UWord stats__f_sk = 0;
5244
5245#if 0
5246# define STATS__F_SHOW \
5247 do { \
5248 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5249 VG_(printf)("filters: ac %lu sk %lu\n", \
5250 stats__f_ac, stats__f_sk); \
5251 } while (0)
5252#else
5253# define STATS__F_SHOW /* */
5254#endif
5255
5256void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5257 stats__f_ac++;
5258 STATS__F_SHOW;
5259 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5260 stats__f_sk++;
5261 return;
5262 }
5263 zsm_sapply08__msmcwrite(thr, a);
5264}
5265
5266void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5267 stats__f_ac++;
5268 STATS__F_SHOW;
5269 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5270 stats__f_sk++;
5271 return;
5272 }
5273 zsm_sapply16__msmcwrite(thr, a);
5274}
5275
5276void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5277 stats__f_ac++;
5278 STATS__F_SHOW;
5279 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5280 stats__f_sk++;
5281 return;
5282 }
5283 zsm_sapply32__msmcwrite(thr, a);
5284}
5285
5286void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5287 stats__f_ac++;
5288 STATS__F_SHOW;
5289 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5290 stats__f_sk++;
5291 return;
5292 }
5293 zsm_sapply64__msmcwrite(thr, a);
5294}
5295
5296void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5297{
5298 /* fast track a couple of common cases */
5299 if (len == 4 && aligned32(a)) {
5300 zsm_sapply32_f__msmcwrite( thr, a );
5301 return;
5302 }
5303 if (len == 8 && aligned64(a)) {
5304 zsm_sapply64_f__msmcwrite( thr, a );
5305 return;
5306 }
5307
5308 /* be completely general (but as efficient as possible) */
5309 if (len == 0) return;
5310
5311 if (!aligned16(a) && len >= 1) {
5312 zsm_sapply08_f__msmcwrite( thr, a );
5313 a += 1;
5314 len -= 1;
5315 tl_assert(aligned16(a));
5316 }
5317 if (len == 0) return;
5318
5319 if (!aligned32(a) && len >= 2) {
5320 zsm_sapply16_f__msmcwrite( thr, a );
5321 a += 2;
5322 len -= 2;
5323 tl_assert(aligned32(a));
5324 }
5325 if (len == 0) return;
5326
5327 if (!aligned64(a) && len >= 4) {
5328 zsm_sapply32_f__msmcwrite( thr, a );
5329 a += 4;
5330 len -= 4;
5331 tl_assert(aligned64(a));
5332 }
5333 if (len == 0) return;
5334
5335 if (len >= 8) {
5336 tl_assert(aligned64(a));
5337 while (len >= 8) {
5338 zsm_sapply64_f__msmcwrite( thr, a );
5339 a += 8;
5340 len -= 8;
5341 }
5342 tl_assert(aligned64(a));
5343 }
5344 if (len == 0) return;
5345
5346 if (len >= 4)
5347 tl_assert(aligned32(a));
5348 if (len >= 4) {
5349 zsm_sapply32_f__msmcwrite( thr, a );
5350 a += 4;
5351 len -= 4;
5352 }
5353 if (len == 0) return;
5354
5355 if (len >= 2)
5356 tl_assert(aligned16(a));
5357 if (len >= 2) {
5358 zsm_sapply16_f__msmcwrite( thr, a );
5359 a += 2;
5360 len -= 2;
5361 }
5362 if (len == 0) return;
5363
5364 if (len >= 1) {
5365 zsm_sapply08_f__msmcwrite( thr, a );
5366 //a += 1;
5367 len -= 1;
5368 }
5369 tl_assert(len == 0);
5370}
5371
5372void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5373 stats__f_ac++;
5374 STATS__F_SHOW;
5375 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5376 stats__f_sk++;
5377 return;
5378 }
5379 zsm_sapply08__msmcread(thr, a);
5380}
5381
5382void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5383 stats__f_ac++;
5384 STATS__F_SHOW;
5385 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5386 stats__f_sk++;
5387 return;
5388 }
5389 zsm_sapply16__msmcread(thr, a);
5390}
5391
5392void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5393 stats__f_ac++;
5394 STATS__F_SHOW;
5395 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5396 stats__f_sk++;
5397 return;
5398 }
5399 zsm_sapply32__msmcread(thr, a);
5400}
5401
5402void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5403 stats__f_ac++;
5404 STATS__F_SHOW;
5405 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5406 stats__f_sk++;
5407 return;
5408 }
5409 zsm_sapply64__msmcread(thr, a);
5410}
5411
5412void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5413{
5414 /* fast track a couple of common cases */
5415 if (len == 4 && aligned32(a)) {
5416 zsm_sapply32_f__msmcread( thr, a );
5417 return;
5418 }
5419 if (len == 8 && aligned64(a)) {
5420 zsm_sapply64_f__msmcread( thr, a );
5421 return;
5422 }
5423
5424 /* be completely general (but as efficient as possible) */
5425 if (len == 0) return;
5426
5427 if (!aligned16(a) && len >= 1) {
5428 zsm_sapply08_f__msmcread( thr, a );
5429 a += 1;
5430 len -= 1;
5431 tl_assert(aligned16(a));
5432 }
5433 if (len == 0) return;
5434
5435 if (!aligned32(a) && len >= 2) {
5436 zsm_sapply16_f__msmcread( thr, a );
5437 a += 2;
5438 len -= 2;
5439 tl_assert(aligned32(a));
5440 }
5441 if (len == 0) return;
5442
5443 if (!aligned64(a) && len >= 4) {
5444 zsm_sapply32_f__msmcread( thr, a );
5445 a += 4;
5446 len -= 4;
5447 tl_assert(aligned64(a));
5448 }
5449 if (len == 0) return;
5450
5451 if (len >= 8) {
5452 tl_assert(aligned64(a));
5453 while (len >= 8) {
5454 zsm_sapply64_f__msmcread( thr, a );
5455 a += 8;
5456 len -= 8;
5457 }
5458 tl_assert(aligned64(a));
5459 }
5460 if (len == 0) return;
5461
5462 if (len >= 4)
5463 tl_assert(aligned32(a));
5464 if (len >= 4) {
5465 zsm_sapply32_f__msmcread( thr, a );
5466 a += 4;
5467 len -= 4;
5468 }
5469 if (len == 0) return;
5470
5471 if (len >= 2)
5472 tl_assert(aligned16(a));
5473 if (len >= 2) {
5474 zsm_sapply16_f__msmcread( thr, a );
5475 a += 2;
5476 len -= 2;
5477 }
5478 if (len == 0) return;
5479
5480 if (len >= 1) {
5481 zsm_sapply08_f__msmcread( thr, a );
5482 //a += 1;
5483 len -= 1;
5484 }
5485 tl_assert(len == 0);
5486}
5487
5488void libhb_Thr_resumes ( Thr* thr )
5489{
5490 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005491 tl_assert(thr);
5492 tl_assert(thr->still_alive);
sewardj23f12002009-07-24 08:45:08 +00005493 Filter__clear(thr->filter, "libhb_Thr_resumes");
5494 /* A kludge, but .. if this thread doesn't have any marker stacks
5495 at all, get one right now. This is easier than figuring out
5496 exactly when at thread startup we can and can't take a stack
5497 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005498 if (HG_(clo_history_level) == 1) {
5499 tl_assert(thr->local_Kws_n_stacks);
5500 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5501 note_local_Kw_n_stack_for(thr);
5502 }
sewardj23f12002009-07-24 08:45:08 +00005503}
5504
5505
5506/////////////////////////////////////////////////////////
5507// //
sewardjf98e1c02008-10-25 16:22:41 +00005508// Synchronisation objects //
5509// //
5510/////////////////////////////////////////////////////////
5511
5512// (UInt) `echo "Synchronisation object" | md5sum`
5513#define SO_MAGIC 0x56b3c5b0U
5514
5515struct _SO {
5516 VtsID viR; /* r-clock of sender */
5517 VtsID viW; /* w-clock of sender */
5518 UInt magic;
5519};
5520
5521static SO* SO__Alloc ( void ) {
5522 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5523 so->viR = VtsID_INVALID;
5524 so->viW = VtsID_INVALID;
5525 so->magic = SO_MAGIC;
5526 return so;
5527}
5528static void SO__Dealloc ( SO* so ) {
5529 tl_assert(so);
5530 tl_assert(so->magic == SO_MAGIC);
5531 if (so->viR == VtsID_INVALID) {
5532 tl_assert(so->viW == VtsID_INVALID);
5533 } else {
5534 tl_assert(so->viW != VtsID_INVALID);
5535 VtsID__rcdec(so->viR);
5536 VtsID__rcdec(so->viW);
5537 }
5538 so->magic = 0;
5539 HG_(free)( so );
5540}
5541
5542
5543/////////////////////////////////////////////////////////
5544// //
5545// Top Level API //
5546// //
5547/////////////////////////////////////////////////////////
5548
5549static void show_thread_state ( HChar* str, Thr* t )
5550{
5551 if (1) return;
5552 if (t->viR == t->viW) {
5553 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5554 VtsID__pp( t->viR );
5555 VG_(printf)("%s","\n");
5556 } else {
5557 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5558 VtsID__pp( t->viR );
5559 VG_(printf)(" viW %u==", t->viW);
5560 VtsID__pp( t->viW );
5561 VG_(printf)("%s","\n");
5562 }
5563}
5564
5565
5566Thr* libhb_init (
5567 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00005568 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00005569 )
5570{
5571 Thr* thr;
5572 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00005573
5574 // We will have to have to store a large number of these,
5575 // so make sure they're the size we expect them to be.
5576 tl_assert(sizeof(ScalarTS) == 8);
sewardj7aa38a92011-02-27 23:04:12 +00005577 tl_assert(SCALARTS_N_THRBITS >= 11); /* because first 1024 unusable */
5578 tl_assert(SCALARTS_N_THRBITS <= 32); /* so as to fit in a UInt */
sewardje4cce742011-02-24 15:25:24 +00005579
sewardjf98e1c02008-10-25 16:22:41 +00005580 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00005581 tl_assert(get_EC);
5582 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00005583 main_get_EC = get_EC;
5584
5585 // No need to initialise hg_wordfm.
5586 // No need to initialise hg_wordset.
5587
sewardj7aa38a92011-02-27 23:04:12 +00005588 /* Allocated once and never deallocated. Used as a temporary in
5589 VTS singleton, tick and join operations. */
5590 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
5591 temp_max_sized_VTS->id = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00005592 vts_set_init();
5593 vts_tab_init();
5594 event_map_init();
5595 VtsID__invalidate_caches();
5596
5597 // initialise shadow memory
5598 zsm_init( SVal__rcinc, SVal__rcdec );
5599
5600 thr = Thr__new();
5601 vi = VtsID__mk_Singleton( thr, 1 );
5602 thr->viR = vi;
5603 thr->viW = vi;
5604 VtsID__rcinc(thr->viR);
5605 VtsID__rcinc(thr->viW);
5606
5607 show_thread_state(" root", thr);
5608 return thr;
5609}
5610
sewardj23f12002009-07-24 08:45:08 +00005611
sewardjf98e1c02008-10-25 16:22:41 +00005612Thr* libhb_create ( Thr* parent )
5613{
5614 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5615 the child's index. Since the child's index is guaranteed
5616 unique, it has never been seen before, so the implicit value
5617 before the tick is zero and after that is one. */
5618 Thr* child = Thr__new();
5619
5620 child->viR = VtsID__tick( parent->viR, child );
5621 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00005622 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00005623 VtsID__rcinc(child->viR);
5624 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005625 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00005626 early for that - it may not have a valid TId yet. So, let
5627 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00005628
5629 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5630 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5631
5632 /* and the parent has to move along too */
5633 VtsID__rcdec(parent->viR);
5634 VtsID__rcdec(parent->viW);
5635 parent->viR = VtsID__tick( parent->viR, parent );
5636 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00005637 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00005638 VtsID__rcinc(parent->viR);
5639 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005640 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00005641
5642 show_thread_state(" child", child);
5643 show_thread_state("parent", parent);
5644
5645 return child;
5646}
5647
5648/* Shut down the library, and print stats (in fact that's _all_
5649 this is for. */
5650void libhb_shutdown ( Bool show_stats )
5651{
5652 if (show_stats) {
5653 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
5654 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
5655 stats__secmaps_allocd,
5656 stats__secmap_ga_space_covered);
5657 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
5658 stats__secmap_linesZ_allocd,
5659 stats__secmap_linesZ_bytes);
5660 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
5661 stats__secmap_linesF_allocd,
5662 stats__secmap_linesF_bytes);
5663 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
5664 stats__secmap_iterator_steppings);
5665 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
5666 stats__secmaps_search, stats__secmaps_search_slow);
5667
5668 VG_(printf)("%s","\n");
5669 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
5670 stats__cache_totrefs, stats__cache_totmisses );
5671 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
5672 stats__cache_Z_fetches, stats__cache_F_fetches );
5673 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
5674 stats__cache_Z_wbacks, stats__cache_F_wbacks );
5675 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
5676 stats__cache_invals, stats__cache_flushes );
5677 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
5678 stats__cache_make_New_arange,
5679 stats__cache_make_New_inZrep);
5680
5681 VG_(printf)("%s","\n");
5682 VG_(printf)(" cline: %'10lu normalises\n",
5683 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00005684 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5685 stats__cline_cread64s,
5686 stats__cline_cread32s,
5687 stats__cline_cread16s,
5688 stats__cline_cread08s );
5689 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5690 stats__cline_cwrite64s,
5691 stats__cline_cwrite32s,
5692 stats__cline_cwrite16s,
5693 stats__cline_cwrite08s );
5694 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5695 stats__cline_swrite64s,
5696 stats__cline_swrite32s,
5697 stats__cline_swrite16s,
5698 stats__cline_swrite08s );
5699 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
5700 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00005701 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5702 stats__cline_64to32splits,
5703 stats__cline_32to16splits,
5704 stats__cline_16to8splits );
5705 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5706 stats__cline_64to32pulldown,
5707 stats__cline_32to16pulldown,
5708 stats__cline_16to8pulldown );
5709 if (0)
5710 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
5711 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
5712
5713 VG_(printf)("%s","\n");
5714
sewardjc8028ad2010-05-05 09:34:42 +00005715 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00005716 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00005717 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00005718 stats__msmcwrite, stats__msmcwrite_change);
5719 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
5720 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00005721 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
5722 stats__join2_queries, stats__join2_misses);
5723
5724 VG_(printf)("%s","\n");
sewardjc8028ad2010-05-05 09:34:42 +00005725 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
5726 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
5727 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
5728 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
sewardj7aa38a92011-02-27 23:04:12 +00005729 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
5730 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00005731 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
5732 stats__vts__indexat_slow );
5733
5734 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00005735 VG_(printf)(
5736 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
5737 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
5738 );
5739 VG_(printf)( " libhb: %lu entries in vts_set\n",
5740 VG_(sizeFM)( vts_set ) );
5741
5742 VG_(printf)("%s","\n");
5743 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
5744 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
5745 stats__ctxt_rcdec2,
5746 stats__ctxt_rcdec3 );
5747 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
5748 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
5749 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
5750 (UWord)N_RCEC_TAB,
5751 stats__ctxt_tab_curr );
5752 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
5753 stats__ctxt_tab_qs,
5754 stats__ctxt_tab_cmps );
5755#if 0
5756 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
5757 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
5758 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
5759 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
5760 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
5761 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
5762 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
5763 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
5764 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
5765 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
5766 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
5767 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
5768 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
5769 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
5770
5771 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
5772 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
5773 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
5774 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
5775#endif
5776
5777 VG_(printf)("%s","<<< END libhb stats >>>\n");
5778 VG_(printf)("%s","\n");
5779
5780 }
5781}
5782
5783void libhb_async_exit ( Thr* thr )
5784{
sewardj23f12002009-07-24 08:45:08 +00005785 tl_assert(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005786 tl_assert(thr->still_alive);
sewardj23f12002009-07-24 08:45:08 +00005787 thr->still_alive = False;
sewardj2d2ea2f2009-08-02 10:15:07 +00005788
5789 /* free up Filter and local_Kws_n_stacks (well, actually not the
5790 latter ..) */
5791 tl_assert(thr->filter);
5792 HG_(free)(thr->filter);
5793 thr->filter = NULL;
5794
5795 /* Another space-accuracy tradeoff. Do we want to be able to show
5796 H1 history for conflicts in threads which have since exited? If
5797 yes, then we better not free up thr->local_Kws_n_stacks. The
5798 downside is a potential per-thread leak of up to
5799 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
5800 XArray average overcommit factor is (1.5 I'd guess). */
5801 // hence:
5802 // VG_(deleteXA)(thr->local_Kws_n_stacks);
5803 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00005804}
5805
5806/* Both Segs and SOs point to VTSs. However, there is no sharing, so
5807 a Seg that points at a VTS is its one-and-only owner, and ditto for
5808 a SO that points at a VTS. */
5809
5810SO* libhb_so_alloc ( void )
5811{
5812 return SO__Alloc();
5813}
5814
5815void libhb_so_dealloc ( SO* so )
5816{
5817 tl_assert(so);
5818 tl_assert(so->magic == SO_MAGIC);
5819 SO__Dealloc(so);
5820}
5821
5822/* See comments in libhb.h for details on the meaning of
5823 strong vs weak sends and strong vs weak receives. */
5824void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
5825{
5826 /* Copy the VTSs from 'thr' into the sync object, and then move
5827 the thread along one step. */
5828
5829 tl_assert(so);
5830 tl_assert(so->magic == SO_MAGIC);
5831
5832 /* stay sane .. a thread's read-clock must always lead or be the
5833 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00005834 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
5835 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00005836 }
5837
5838 /* since we're overwriting the VtsIDs in the SO, we need to drop
5839 any references made by the previous contents thereof */
5840 if (so->viR == VtsID_INVALID) {
5841 tl_assert(so->viW == VtsID_INVALID);
5842 so->viR = thr->viR;
5843 so->viW = thr->viW;
5844 VtsID__rcinc(so->viR);
5845 VtsID__rcinc(so->viW);
5846 } else {
5847 /* In a strong send, we dump any previous VC in the SO and
5848 install the sending thread's VC instead. For a weak send we
5849 must join2 with what's already there. */
5850 tl_assert(so->viW != VtsID_INVALID);
5851 VtsID__rcdec(so->viR);
5852 VtsID__rcdec(so->viW);
5853 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
5854 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
5855 VtsID__rcinc(so->viR);
5856 VtsID__rcinc(so->viW);
5857 }
5858
5859 /* move both parent clocks along */
5860 VtsID__rcdec(thr->viR);
5861 VtsID__rcdec(thr->viW);
5862 thr->viR = VtsID__tick( thr->viR, thr );
5863 thr->viW = VtsID__tick( thr->viW, thr );
sewardj2d2ea2f2009-08-02 10:15:07 +00005864 if (thr->still_alive) {
5865 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00005866 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005867 }
sewardjf98e1c02008-10-25 16:22:41 +00005868 VtsID__rcinc(thr->viR);
5869 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005870
sewardjf98e1c02008-10-25 16:22:41 +00005871 if (strong_send)
5872 show_thread_state("s-send", thr);
5873 else
5874 show_thread_state("w-send", thr);
5875}
5876
5877void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
5878{
5879 tl_assert(so);
5880 tl_assert(so->magic == SO_MAGIC);
5881
5882 if (so->viR != VtsID_INVALID) {
5883 tl_assert(so->viW != VtsID_INVALID);
5884
5885 /* Weak receive (basically, an R-acquisition of a R-W lock).
5886 This advances the read-clock of the receiver, but not the
5887 write-clock. */
5888 VtsID__rcdec(thr->viR);
5889 thr->viR = VtsID__join2( thr->viR, so->viR );
5890 VtsID__rcinc(thr->viR);
5891
sewardj90eb22e2009-07-28 20:22:18 +00005892 /* At one point (r10589) it seemed safest to tick the clocks for
5893 the receiving thread after the join. But on reflection, I
5894 wonder if that might cause it to 'overtake' constraints,
5895 which could lead to missing races. So, back out that part of
5896 r10589. */
5897 //VtsID__rcdec(thr->viR);
5898 //thr->viR = VtsID__tick( thr->viR, thr );
5899 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00005900
sewardjf98e1c02008-10-25 16:22:41 +00005901 /* For a strong receive, we also advance the receiver's write
5902 clock, which means the receive as a whole is essentially
5903 equivalent to a W-acquisition of a R-W lock. */
5904 if (strong_recv) {
5905 VtsID__rcdec(thr->viW);
5906 thr->viW = VtsID__join2( thr->viW, so->viW );
5907 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005908
sewardj90eb22e2009-07-28 20:22:18 +00005909 /* See comment just above, re r10589. */
5910 //VtsID__rcdec(thr->viW);
5911 //thr->viW = VtsID__tick( thr->viW, thr );
5912 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00005913 }
5914
sewardjf4845dc2010-05-28 20:09:59 +00005915 if (thr->filter)
5916 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00005917 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00005918
sewardjf98e1c02008-10-25 16:22:41 +00005919 if (strong_recv)
5920 show_thread_state("s-recv", thr);
5921 else
5922 show_thread_state("w-recv", thr);
5923
5924 } else {
5925 tl_assert(so->viW == VtsID_INVALID);
5926 /* Deal with degenerate case: 'so' has no vts, so there has been
5927 no message posted to it. Just ignore this case. */
5928 show_thread_state("d-recv", thr);
5929 }
5930}
5931
5932Bool libhb_so_everSent ( SO* so )
5933{
5934 if (so->viR == VtsID_INVALID) {
5935 tl_assert(so->viW == VtsID_INVALID);
5936 return False;
5937 } else {
5938 tl_assert(so->viW != VtsID_INVALID);
5939 return True;
5940 }
5941}
5942
5943#define XXX1 0 // 0x67a106c
5944#define XXX2 0
5945
sewardj23f12002009-07-24 08:45:08 +00005946static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00005947 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
5948 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
5949 return False;
5950}
5951static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
sewardj23f12002009-07-24 08:45:08 +00005952 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00005953 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
5954 show_thread_state("", thr);
5955 VG_(printf)("%s","\n");
5956}
5957
sewardj23f12002009-07-24 08:45:08 +00005958void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005959{
5960 SVal sv = SVal__mkC(thr->viW, thr->viW);
5961 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00005962 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
5963 zsm_sset_range( a, szB, sv );
5964 Filter__clear_range( thr->filter, a, szB );
5965 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00005966}
5967
sewardjfd35d492011-03-17 19:39:55 +00005968void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005969{
sewardj23f12002009-07-24 08:45:08 +00005970 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00005971}
5972
sewardjfd35d492011-03-17 19:39:55 +00005973void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
5974{
5975 /* This really does put the requested range in NoAccess. It's
5976 expensive though. */
5977 SVal sv = SVal_NOACCESS;
5978 tl_assert(is_sane_SVal_C(sv));
5979 zsm_sset_range( a, szB, sv );
5980 Filter__clear_range( thr->filter, a, szB );
5981}
5982
sewardj406bac82010-03-03 23:03:40 +00005983void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
5984{
5985 SVal sv = SVal_NOACCESS;
5986 tl_assert(is_sane_SVal_C(sv));
5987 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
5988 zsm_sset_range( a, szB, sv );
5989 Filter__clear_range( thr->filter, a, szB );
5990 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
5991}
5992
sewardj0b20a152011-03-10 21:34:21 +00005993Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00005994 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00005995 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00005996}
5997
sewardj0b20a152011-03-10 21:34:21 +00005998void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00005999 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006000 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006001}
6002
sewardj23f12002009-07-24 08:45:08 +00006003void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006004{
sewardj23f12002009-07-24 08:45:08 +00006005 zsm_scopy_range(src, dst, len);
6006 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006007}
6008
6009void libhb_maybe_GC ( void )
6010{
6011 event_map_maybe_GC();
6012 /* If there are still freelist entries available, no need for a
6013 GC. */
6014 if (vts_tab_freelist != VtsID_INVALID)
6015 return;
6016 /* So all the table entries are full, and we're having to expand
6017 the table. But did we hit the threshhold point yet? */
6018 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6019 return;
6020 vts_tab__do_GC( False/*don't show stats*/ );
6021}
6022
6023
6024/////////////////////////////////////////////////////////////////
6025/////////////////////////////////////////////////////////////////
6026// //
6027// SECTION END main library //
6028// //
6029/////////////////////////////////////////////////////////////////
6030/////////////////////////////////////////////////////////////////
6031
6032/*--------------------------------------------------------------------*/
6033/*--- end libhb_main.c ---*/
6034/*--------------------------------------------------------------------*/