blob: 58b5c6a3e9039c2e128fd76e8c15f571af1b427b [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
njn9f207462009-03-10 22:02:09 +000012 Copyright (C) 2008-2009 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000046#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
sewardj23f12002009-07-24 08:45:08 +0000150static void zsm_sset_range ( Addr, SizeT, SVal );
151static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000152static void zsm_flush_cache ( void );
153
154#endif /* ! __HB_ZSM_H */
155
156
sewardjf98e1c02008-10-25 16:22:41 +0000157/* Round a up to the next multiple of N. N must be a power of 2 */
158#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
159/* Round a down to the next multiple of N. N must be a power of 2 */
160#define ROUNDDN(a, N) ((a) & ~(N-1))
161
162
163
164/* ------ User-supplied RC functions ------ */
165static void(*rcinc)(SVal) = NULL;
166static void(*rcdec)(SVal) = NULL;
167
168
169/* ------ CacheLine ------ */
170
171#define N_LINE_BITS 6 /* must be >= 3 */
172#define N_LINE_ARANGE (1 << N_LINE_BITS)
173#define N_LINE_TREES (N_LINE_ARANGE >> 3)
174
175typedef
176 struct {
177 UShort descrs[N_LINE_TREES];
178 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
179 }
180 CacheLine;
181
182#define TREE_DESCR_16_0 (1<<0)
183#define TREE_DESCR_32_0 (1<<1)
184#define TREE_DESCR_16_1 (1<<2)
185#define TREE_DESCR_64 (1<<3)
186#define TREE_DESCR_16_2 (1<<4)
187#define TREE_DESCR_32_1 (1<<5)
188#define TREE_DESCR_16_3 (1<<6)
189#define TREE_DESCR_8_0 (1<<7)
190#define TREE_DESCR_8_1 (1<<8)
191#define TREE_DESCR_8_2 (1<<9)
192#define TREE_DESCR_8_3 (1<<10)
193#define TREE_DESCR_8_4 (1<<11)
194#define TREE_DESCR_8_5 (1<<12)
195#define TREE_DESCR_8_6 (1<<13)
196#define TREE_DESCR_8_7 (1<<14)
197#define TREE_DESCR_DTY (1<<15)
198
199typedef
200 struct {
201 SVal dict[4]; /* can represent up to 4 diff values in the line */
202 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
203 dict indexes */
204 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
205 LineF to use, and dict[2..] are also SVal_INVALID. */
206 }
207 LineZ; /* compressed rep for a cache line */
208
209typedef
210 struct {
211 Bool inUse;
212 SVal w64s[N_LINE_ARANGE];
213 }
214 LineF; /* full rep for a cache line */
215
216/* Shadow memory.
217 Primary map is a WordFM Addr SecMap*.
218 SecMaps cover some page-size-ish section of address space and hold
219 a compressed representation.
220 CacheLine-sized chunks of SecMaps are copied into a Cache, being
221 decompressed when moved into the cache and recompressed on the
222 way out. Because of this, the cache must operate as a writeback
223 cache, not a writethrough one.
224
225 Each SecMap must hold a power-of-2 number of CacheLines. Hence
226 N_SECMAP_BITS must >= N_LINE_BITS.
227*/
228#define N_SECMAP_BITS 13
229#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
230
231// # CacheLines held by a SecMap
232#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
233
234/* The data in the SecMap is held in the array of LineZs. Each LineZ
235 either carries the required data directly, in a compressed
236 representation, or it holds (in .dict[0]) an index to the LineF in
237 .linesF that holds the full representation.
238
239 Currently-unused LineF's have their .inUse bit set to zero.
240 Since each in-use LineF is referred to be exactly one LineZ,
241 the number of .linesZ[] that refer to .linesF should equal
242 the number of .linesF[] that have .inUse == True.
243
244 RC obligations: the RCs presented to the user include exactly
245 the values in:
246 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
247 * F reps that are in use (.inUse == True)
248
249 Hence the following actions at the following transitions are required:
250
251 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
252 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
253 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
254 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
255*/
256typedef
257 struct {
258 UInt magic;
259 LineZ linesZ[N_SECMAP_ZLINES];
260 LineF* linesF;
261 UInt linesF_size;
262 }
263 SecMap;
264
265#define SecMap_MAGIC 0x571e58cbU
266
267static inline Bool is_sane_SecMap ( SecMap* sm ) {
268 return sm != NULL && sm->magic == SecMap_MAGIC;
269}
270
271/* ------ Cache ------ */
272
273#define N_WAY_BITS 16
274#define N_WAY_NENT (1 << N_WAY_BITS)
275
276/* Each tag is the address of the associated CacheLine, rounded down
277 to a CacheLine address boundary. A CacheLine size must be a power
278 of 2 and must be 8 or more. Hence an easy way to initialise the
279 cache so it is empty is to set all the tag values to any value % 8
280 != 0, eg 1. This means all queries in the cache initially miss.
281 It does however require us to detect and not writeback, any line
282 with a bogus tag. */
283typedef
284 struct {
285 CacheLine lyns0[N_WAY_NENT];
286 Addr tags0[N_WAY_NENT];
287 }
288 Cache;
289
290static inline Bool is_valid_scache_tag ( Addr tag ) {
291 /* a valid tag should be naturally aligned to the start of
292 a CacheLine. */
293 return 0 == (tag & (N_LINE_ARANGE - 1));
294}
295
296
297/* --------- Primary data structures --------- */
298
299/* Shadow memory primary map */
300static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
301static Cache cache_shmem;
302
303
304static UWord stats__secmaps_search = 0; // # SM finds
305static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
306static UWord stats__secmaps_allocd = 0; // # SecMaps issued
307static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
308static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
309static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
310static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
311static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
312static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
313static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
314static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
315static UWord stats__cache_F_fetches = 0; // # F lines fetched
316static UWord stats__cache_F_wbacks = 0; // # F lines written back
317static UWord stats__cache_invals = 0; // # cache invals
318static UWord stats__cache_flushes = 0; // # cache flushes
319static UWord stats__cache_totrefs = 0; // # total accesses
320static UWord stats__cache_totmisses = 0; // # misses
321static ULong stats__cache_make_New_arange = 0; // total arange made New
322static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
323static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000324static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
325static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
326static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
327static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
328static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
329static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
330static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
331static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
332static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
333static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
334static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
335static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
336static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
337static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000338static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
339static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
340static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
341static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
342static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
343static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
344
345static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
346 return a & ~(N_SECMAP_ARANGE - 1);
347}
348static inline UWord shmem__get_SecMap_offset ( Addr a ) {
349 return a & (N_SECMAP_ARANGE - 1);
350}
351
352
353/*----------------------------------------------------------------*/
354/*--- map_shmem :: WordFM Addr SecMap ---*/
355/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
356/*----------------------------------------------------------------*/
357
358/*--------------- SecMap allocation --------------- */
359
360static HChar* shmem__bigchunk_next = NULL;
361static HChar* shmem__bigchunk_end1 = NULL;
362
363static void* shmem__bigchunk_alloc ( SizeT n )
364{
365 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
366 tl_assert(n > 0);
367 n = VG_ROUNDUP(n, 16);
368 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
369 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
370 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
371 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
372 if (0)
373 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
374 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
375 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
376 if (shmem__bigchunk_next == NULL)
377 VG_(out_of_memory_NORETURN)(
378 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
379 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
380 }
381 tl_assert(shmem__bigchunk_next);
382 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
383 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
384 shmem__bigchunk_next += n;
385 return shmem__bigchunk_next - n;
386}
387
388static SecMap* shmem__alloc_SecMap ( void )
389{
390 Word i, j;
391 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
392 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
393 tl_assert(sm);
394 sm->magic = SecMap_MAGIC;
395 for (i = 0; i < N_SECMAP_ZLINES; i++) {
396 sm->linesZ[i].dict[0] = SVal_NOACCESS;
397 sm->linesZ[i].dict[1] = SVal_INVALID;
398 sm->linesZ[i].dict[2] = SVal_INVALID;
399 sm->linesZ[i].dict[3] = SVal_INVALID;
400 for (j = 0; j < N_LINE_ARANGE/4; j++)
401 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
402 }
403 sm->linesF = NULL;
404 sm->linesF_size = 0;
405 stats__secmaps_allocd++;
406 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
407 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
408 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
409 return sm;
410}
411
412typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
413static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
414
415static SecMap* shmem__find_SecMap ( Addr ga )
416{
417 SecMap* sm = NULL;
418 Addr gaKey = shmem__round_to_SecMap_base(ga);
419 // Cache
420 stats__secmaps_search++;
421 if (LIKELY(gaKey == smCache[0].gaKey))
422 return smCache[0].sm;
423 if (LIKELY(gaKey == smCache[1].gaKey)) {
424 SMCacheEnt tmp = smCache[0];
425 smCache[0] = smCache[1];
426 smCache[1] = tmp;
427 return smCache[0].sm;
428 }
429 if (gaKey == smCache[2].gaKey) {
430 SMCacheEnt tmp = smCache[1];
431 smCache[1] = smCache[2];
432 smCache[2] = tmp;
433 return smCache[1].sm;
434 }
435 // end Cache
436 stats__secmaps_search_slow++;
437 if (VG_(lookupFM)( map_shmem,
438 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
439 tl_assert(sm != NULL);
440 smCache[2] = smCache[1];
441 smCache[1] = smCache[0];
442 smCache[0].gaKey = gaKey;
443 smCache[0].sm = sm;
444 } else {
445 tl_assert(sm == NULL);
446 }
447 return sm;
448}
449
450static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
451{
452 SecMap* sm = shmem__find_SecMap ( ga );
453 if (LIKELY(sm)) {
454 return sm;
455 } else {
456 /* create a new one */
457 Addr gaKey = shmem__round_to_SecMap_base(ga);
458 sm = shmem__alloc_SecMap();
459 tl_assert(sm);
460 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
461 return sm;
462 }
463}
464
465
466/* ------------ LineF and LineZ related ------------ */
467
468static void rcinc_LineF ( LineF* lineF ) {
469 UWord i;
470 tl_assert(lineF->inUse);
471 for (i = 0; i < N_LINE_ARANGE; i++)
472 rcinc(lineF->w64s[i]);
473}
474
475static void rcdec_LineF ( LineF* lineF ) {
476 UWord i;
477 tl_assert(lineF->inUse);
478 for (i = 0; i < N_LINE_ARANGE; i++)
479 rcdec(lineF->w64s[i]);
480}
481
482static void rcinc_LineZ ( LineZ* lineZ ) {
483 tl_assert(lineZ->dict[0] != SVal_INVALID);
484 rcinc(lineZ->dict[0]);
485 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
486 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
487 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
488}
489
490static void rcdec_LineZ ( LineZ* lineZ ) {
491 tl_assert(lineZ->dict[0] != SVal_INVALID);
492 rcdec(lineZ->dict[0]);
493 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
494 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
495 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
496}
497
498inline
499static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
500 Word bix, shft, mask, prep;
501 tl_assert(ix >= 0);
502 bix = ix >> 2;
503 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
504 mask = 3 << shft;
505 prep = b2 << shft;
506 arr[bix] = (arr[bix] & ~mask) | prep;
507}
508
509inline
510static UWord read_twobit_array ( UChar* arr, UWord ix ) {
511 Word bix, shft;
512 tl_assert(ix >= 0);
513 bix = ix >> 2;
514 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
515 return (arr[bix] >> shft) & 3;
516}
517
518/* Given address 'tag', find either the Z or F line containing relevant
519 data, so it can be read into the cache.
520*/
521static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
522 /*OUT*/LineF** fp, Addr tag ) {
523 LineZ* lineZ;
524 LineF* lineF;
525 UWord zix;
526 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
527 UWord smoff = shmem__get_SecMap_offset(tag);
528 /* since smoff is derived from a valid tag, it should be
529 cacheline-aligned. */
530 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
531 zix = smoff >> N_LINE_BITS;
532 tl_assert(zix < N_SECMAP_ZLINES);
533 lineZ = &sm->linesZ[zix];
534 lineF = NULL;
535 if (lineZ->dict[0] == SVal_INVALID) {
536 UInt fix = (UInt)lineZ->dict[1];
537 tl_assert(sm->linesF);
538 tl_assert(sm->linesF_size > 0);
539 tl_assert(fix >= 0 && fix < sm->linesF_size);
540 lineF = &sm->linesF[fix];
541 tl_assert(lineF->inUse);
542 lineZ = NULL;
543 }
544 *zp = lineZ;
545 *fp = lineF;
546}
547
548/* Given address 'tag', return the relevant SecMap and the index of
549 the LineZ within it, in the expectation that the line is to be
550 overwritten. Regardless of whether 'tag' is currently associated
551 with a Z or F representation, to rcdec on the current
552 representation, in recognition of the fact that the contents are
553 just about to be overwritten. */
554static __attribute__((noinline))
555void find_Z_for_writing ( /*OUT*/SecMap** smp,
556 /*OUT*/Word* zixp,
557 Addr tag ) {
558 LineZ* lineZ;
559 LineF* lineF;
560 UWord zix;
561 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
562 UWord smoff = shmem__get_SecMap_offset(tag);
563 /* since smoff is derived from a valid tag, it should be
564 cacheline-aligned. */
565 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
566 zix = smoff >> N_LINE_BITS;
567 tl_assert(zix < N_SECMAP_ZLINES);
568 lineZ = &sm->linesZ[zix];
569 lineF = NULL;
570 /* re RCs, we are freeing up this LineZ/LineF so that new data can
571 be parked in it. Hence have to rcdec it accordingly. */
572 /* If lineZ has an associated lineF, free it up. */
573 if (lineZ->dict[0] == SVal_INVALID) {
574 UInt fix = (UInt)lineZ->dict[1];
575 tl_assert(sm->linesF);
576 tl_assert(sm->linesF_size > 0);
577 tl_assert(fix >= 0 && fix < sm->linesF_size);
578 lineF = &sm->linesF[fix];
579 tl_assert(lineF->inUse);
580 rcdec_LineF(lineF);
581 lineF->inUse = False;
582 } else {
583 rcdec_LineZ(lineZ);
584 }
585 *smp = sm;
586 *zixp = zix;
587}
588
589static __attribute__((noinline))
590void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
591 UInt i, new_size;
592 LineF* nyu;
593
594 if (sm->linesF) {
595 tl_assert(sm->linesF_size > 0);
596 } else {
597 tl_assert(sm->linesF_size == 0);
598 }
599
600 if (sm->linesF) {
601 for (i = 0; i < sm->linesF_size; i++) {
602 if (!sm->linesF[i].inUse) {
603 *fixp = (Word)i;
604 return;
605 }
606 }
607 }
608
609 /* No free F line found. Expand existing array and try again. */
610 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
611 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
612 new_size * sizeof(LineF) );
613 tl_assert(nyu);
614
615 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
616 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
617 * sizeof(LineF);
618
619 if (0)
620 VG_(printf)("SM %p: expand F array from %d to %d\n",
621 sm, (Int)sm->linesF_size, new_size);
622
623 for (i = 0; i < new_size; i++)
624 nyu[i].inUse = False;
625
626 if (sm->linesF) {
627 for (i = 0; i < sm->linesF_size; i++) {
628 tl_assert(sm->linesF[i].inUse);
629 nyu[i] = sm->linesF[i];
630 }
631 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
632 HG_(free)(sm->linesF);
633 }
634
635 sm->linesF = nyu;
636 sm->linesF_size = new_size;
637
638 for (i = 0; i < sm->linesF_size; i++) {
639 if (!sm->linesF[i].inUse) {
640 *fixp = (Word)i;
641 return;
642 }
643 }
644
645 /*NOTREACHED*/
646 tl_assert(0);
647}
648
649
650/* ------------ CacheLine and implicit-tree related ------------ */
651
652__attribute__((unused))
653static void pp_CacheLine ( CacheLine* cl ) {
654 Word i;
655 if (!cl) {
656 VG_(printf)("%s","pp_CacheLine(NULL)\n");
657 return;
658 }
659 for (i = 0; i < N_LINE_TREES; i++)
660 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
661 for (i = 0; i < N_LINE_ARANGE; i++)
662 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
663}
664
665static UChar descr_to_validbits ( UShort descr )
666{
667 /* a.k.a Party Time for gcc's constant folder */
668# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
669 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
670 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
671 ( (b8_5) << 12) | ( (b8_4) << 11) | \
672 ( (b8_3) << 10) | ( (b8_2) << 9) | \
673 ( (b8_1) << 8) | ( (b8_0) << 7) | \
674 ( (b16_3) << 6) | ( (b32_1) << 5) | \
675 ( (b16_2) << 4) | ( (b64) << 3) | \
676 ( (b16_1) << 2) | ( (b32_0) << 1) | \
677 ( (b16_0) << 0) ) )
678
679# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
680 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
681 ( (bit5) << 5) | ( (bit4) << 4) | \
682 ( (bit3) << 3) | ( (bit2) << 2) | \
683 ( (bit1) << 1) | ( (bit0) << 0) ) )
684
685 /* these should all get folded out at compile time */
686 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
687 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
688 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
689 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
690 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
691 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
692 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
693 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
694 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
695
696 switch (descr) {
697 /*
698 +--------------------------------- TREE_DESCR_8_7
699 | +------------------- TREE_DESCR_8_0
700 | | +---------------- TREE_DESCR_16_3
701 | | | +-------------- TREE_DESCR_32_1
702 | | | | +------------ TREE_DESCR_16_2
703 | | | | | +--------- TREE_DESCR_64
704 | | | | | | +------ TREE_DESCR_16_1
705 | | | | | | | +---- TREE_DESCR_32_0
706 | | | | | | | | +-- TREE_DESCR_16_0
707 | | | | | | | | |
708 | | | | | | | | | GRANULARITY, 7 -> 0 */
709 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
710 return BYTE(1,1,1,1,1,1,1,1);
711 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
712 return BYTE(1,1,0,1,1,1,1,1);
713 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
714 return BYTE(0,1,1,1,1,1,1,1);
715 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
716 return BYTE(0,1,0,1,1,1,1,1);
717
718 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
719 return BYTE(1,1,1,1,1,1,0,1);
720 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
721 return BYTE(1,1,0,1,1,1,0,1);
722 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
723 return BYTE(0,1,1,1,1,1,0,1);
724 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
725 return BYTE(0,1,0,1,1,1,0,1);
726
727 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
728 return BYTE(1,1,1,1,0,1,1,1);
729 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
730 return BYTE(1,1,0,1,0,1,1,1);
731 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
732 return BYTE(0,1,1,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
734 return BYTE(0,1,0,1,0,1,1,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
737 return BYTE(1,1,1,1,0,1,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
739 return BYTE(1,1,0,1,0,1,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
741 return BYTE(0,1,1,1,0,1,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
743 return BYTE(0,1,0,1,0,1,0,1);
744
745 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
746 return BYTE(0,0,0,1,1,1,1,1);
747 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
748 return BYTE(0,0,0,1,1,1,0,1);
749 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
750 return BYTE(0,0,0,1,0,1,1,1);
751 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
752 return BYTE(0,0,0,1,0,1,0,1);
753
754 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
755 return BYTE(1,1,1,1,0,0,0,1);
756 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
757 return BYTE(1,1,0,1,0,0,0,1);
758 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
759 return BYTE(0,1,1,1,0,0,0,1);
760 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
761 return BYTE(0,1,0,1,0,0,0,1);
762
763 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
764 return BYTE(0,0,0,1,0,0,0,1);
765
766 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
767 return BYTE(0,0,0,0,0,0,0,1);
768
769 default: return BYTE(0,0,0,0,0,0,0,0);
770 /* INVALID - any valid descr produces at least one
771 valid bit in tree[0..7]*/
772 }
773 /* NOTREACHED*/
774 tl_assert(0);
775
776# undef DESCR
777# undef BYTE
778}
779
780__attribute__((unused))
781static Bool is_sane_Descr ( UShort descr ) {
782 return descr_to_validbits(descr) != 0;
783}
784
785static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
786 VG_(sprintf)(dst,
787 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
788 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
789 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
790 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
791 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
792 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
793 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
794 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
795 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
796 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
797 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
798 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
803 );
804}
805static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
806 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
807 (Int)((byte & 128) ? 1 : 0),
808 (Int)((byte & 64) ? 1 : 0),
809 (Int)((byte & 32) ? 1 : 0),
810 (Int)((byte & 16) ? 1 : 0),
811 (Int)((byte & 8) ? 1 : 0),
812 (Int)((byte & 4) ? 1 : 0),
813 (Int)((byte & 2) ? 1 : 0),
814 (Int)((byte & 1) ? 1 : 0)
815 );
816}
817
818static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
819 Word i;
820 UChar validbits = descr_to_validbits(descr);
821 HChar buf[128], buf2[128];
822 if (validbits == 0)
823 goto bad;
824 for (i = 0; i < 8; i++) {
825 if (validbits & (1<<i)) {
826 if (tree[i] == SVal_INVALID)
827 goto bad;
828 } else {
829 if (tree[i] != SVal_INVALID)
830 goto bad;
831 }
832 }
833 return True;
834 bad:
835 sprintf_Descr( buf, descr );
836 sprintf_Byte( buf2, validbits );
837 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
838 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
839 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
840 for (i = 0; i < 8; i++)
841 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
842 VG_(printf)("%s","}\n");
843 return 0;
844}
845
846static Bool is_sane_CacheLine ( CacheLine* cl )
847{
848 Word tno, cloff;
849
850 if (!cl) goto bad;
851
852 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
853 UShort descr = cl->descrs[tno];
854 SVal* tree = &cl->svals[cloff];
855 if (!is_sane_Descr_and_Tree(descr, tree))
856 goto bad;
857 }
858 tl_assert(cloff == N_LINE_ARANGE);
859 return True;
860 bad:
861 pp_CacheLine(cl);
862 return False;
863}
864
865static UShort normalise_tree ( /*MOD*/SVal* tree )
866{
867 UShort descr;
868 /* pre: incoming tree[0..7] does not have any invalid shvals, in
869 particular no zeroes. */
870 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
871 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
872 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
873 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
874 tl_assert(0);
875
876 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
877 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
878 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
879 /* build 16-bit layer */
880 if (tree[1] == tree[0]) {
881 tree[1] = SVal_INVALID;
882 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
883 descr |= TREE_DESCR_16_0;
884 }
885 if (tree[3] == tree[2]) {
886 tree[3] = SVal_INVALID;
887 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
888 descr |= TREE_DESCR_16_1;
889 }
890 if (tree[5] == tree[4]) {
891 tree[5] = SVal_INVALID;
892 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
893 descr |= TREE_DESCR_16_2;
894 }
895 if (tree[7] == tree[6]) {
896 tree[7] = SVal_INVALID;
897 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
898 descr |= TREE_DESCR_16_3;
899 }
900 /* build 32-bit layer */
901 if (tree[2] == tree[0]
902 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
903 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
904 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
905 descr |= TREE_DESCR_32_0;
906 }
907 if (tree[6] == tree[4]
908 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
909 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
910 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
911 descr |= TREE_DESCR_32_1;
912 }
913 /* build 64-bit layer */
914 if (tree[4] == tree[0]
915 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
916 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
917 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
918 descr |= TREE_DESCR_64;
919 }
920 return descr;
921}
922
923/* This takes a cacheline where all the data is at the leaves
924 (w8[..]) and builds a correctly normalised tree. */
925static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
926{
927 Word tno, cloff;
928 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
929 SVal* tree = &cl->svals[cloff];
930 cl->descrs[tno] = normalise_tree( tree );
931 }
932 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000933 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000934 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
935 stats__cline_normalises++;
936}
937
938
939typedef struct { UChar count; SVal sval; } CountedSVal;
940
941static
942void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
943 /*OUT*/Word* dstUsedP,
944 Word nDst, CacheLine* src )
945{
946 Word tno, cloff, dstUsed;
947
948 tl_assert(nDst == N_LINE_ARANGE);
949 dstUsed = 0;
950
951 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
952 UShort descr = src->descrs[tno];
953 SVal* tree = &src->svals[cloff];
954
955 /* sequentialise the tree described by (descr,tree). */
956# define PUT(_n,_v) \
957 do { dst[dstUsed ].count = (_n); \
958 dst[dstUsed++].sval = (_v); \
959 } while (0)
960
961 /* byte 0 */
962 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
963 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
964 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
965 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
966 /* byte 1 */
967 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
968 /* byte 2 */
969 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
970 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
971 /* byte 3 */
972 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
973 /* byte 4 */
974 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
975 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
976 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
977 /* byte 5 */
978 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
979 /* byte 6 */
980 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
981 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
982 /* byte 7 */
983 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
984
985# undef PUT
986 /* END sequentialise the tree described by (descr,tree). */
987
988 }
989 tl_assert(cloff == N_LINE_ARANGE);
990 tl_assert(dstUsed <= nDst);
991
992 *dstUsedP = dstUsed;
993}
994
995/* Write the cacheline 'wix' to backing store. Where it ends up
996 is determined by its tag field. */
997static __attribute__((noinline)) void cacheline_wback ( UWord wix )
998{
999 Word i, j, k, m;
1000 Addr tag;
1001 SecMap* sm;
1002 CacheLine* cl;
1003 LineZ* lineZ;
1004 LineF* lineF;
1005 Word zix, fix, csvalsUsed;
1006 CountedSVal csvals[N_LINE_ARANGE];
1007 SVal sv;
1008
1009 if (0)
1010 VG_(printf)("scache wback line %d\n", (Int)wix);
1011
1012 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1013
1014 tag = cache_shmem.tags0[wix];
1015 cl = &cache_shmem.lyns0[wix];
1016
1017 /* The cache line may have been invalidated; if so, ignore it. */
1018 if (!is_valid_scache_tag(tag))
1019 return;
1020
1021 /* Where are we going to put it? */
1022 sm = NULL;
1023 lineZ = NULL;
1024 lineF = NULL;
1025 zix = fix = -1;
1026
1027 /* find the Z line to write in and rcdec it or the associated F
1028 line. */
1029 find_Z_for_writing( &sm, &zix, tag );
1030
1031 tl_assert(sm);
1032 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1033 lineZ = &sm->linesZ[zix];
1034
1035 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001036 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001037 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1038
1039 csvalsUsed = -1;
1040 sequentialise_CacheLine( csvals, &csvalsUsed,
1041 N_LINE_ARANGE, cl );
1042 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1043 if (0) VG_(printf)("%lu ", csvalsUsed);
1044
1045 lineZ->dict[0] = lineZ->dict[1]
1046 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1047
1048 /* i indexes actual shadow values, k is cursor in csvals */
1049 i = 0;
1050 for (k = 0; k < csvalsUsed; k++) {
1051
1052 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001053 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001054 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1055 /* do we already have it? */
1056 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1057 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1058 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1059 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1060 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001061 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001062 tl_assert(sv != SVal_INVALID);
1063 if (lineZ->dict[0]
1064 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1065 if (lineZ->dict[1]
1066 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1067 if (lineZ->dict[2]
1068 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1069 if (lineZ->dict[3]
1070 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1071 break; /* we'll have to use the f rep */
1072 dict_ok:
1073 m = csvals[k].count;
1074 if (m == 8) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 write_twobit_array( lineZ->ix2s, i+1, j );
1077 write_twobit_array( lineZ->ix2s, i+2, j );
1078 write_twobit_array( lineZ->ix2s, i+3, j );
1079 write_twobit_array( lineZ->ix2s, i+4, j );
1080 write_twobit_array( lineZ->ix2s, i+5, j );
1081 write_twobit_array( lineZ->ix2s, i+6, j );
1082 write_twobit_array( lineZ->ix2s, i+7, j );
1083 i += 8;
1084 }
1085 else if (m == 4) {
1086 write_twobit_array( lineZ->ix2s, i+0, j );
1087 write_twobit_array( lineZ->ix2s, i+1, j );
1088 write_twobit_array( lineZ->ix2s, i+2, j );
1089 write_twobit_array( lineZ->ix2s, i+3, j );
1090 i += 4;
1091 }
1092 else if (m == 1) {
1093 write_twobit_array( lineZ->ix2s, i+0, j );
1094 i += 1;
1095 }
1096 else if (m == 2) {
1097 write_twobit_array( lineZ->ix2s, i+0, j );
1098 write_twobit_array( lineZ->ix2s, i+1, j );
1099 i += 2;
1100 }
1101 else {
1102 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1103 }
1104
1105 }
1106
1107 if (LIKELY(i == N_LINE_ARANGE)) {
1108 /* Construction of the compressed representation was
1109 successful. */
1110 rcinc_LineZ(lineZ);
1111 stats__cache_Z_wbacks++;
1112 } else {
1113 /* Cannot use the compressed(z) representation. Use the full(f)
1114 rep instead. */
1115 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1116 alloc_F_for_writing( sm, &fix );
1117 tl_assert(sm->linesF);
1118 tl_assert(sm->linesF_size > 0);
1119 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1120 lineF = &sm->linesF[fix];
1121 tl_assert(!lineF->inUse);
1122 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1123 lineZ->dict[1] = (SVal)fix;
1124 lineF->inUse = True;
1125 i = 0;
1126 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001127 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001128 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1129 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001130 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001131 tl_assert(sv != SVal_INVALID);
1132 for (m = csvals[k].count; m > 0; m--) {
1133 lineF->w64s[i] = sv;
1134 i++;
1135 }
1136 }
1137 tl_assert(i == N_LINE_ARANGE);
1138 rcinc_LineF(lineF);
1139 stats__cache_F_wbacks++;
1140 }
sewardjf98e1c02008-10-25 16:22:41 +00001141}
1142
1143/* Fetch the cacheline 'wix' from the backing store. The tag
1144 associated with 'wix' is assumed to have already been filled in;
1145 hence that is used to determine where in the backing store to read
1146 from. */
1147static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1148{
1149 Word i;
1150 Addr tag;
1151 CacheLine* cl;
1152 LineZ* lineZ;
1153 LineF* lineF;
1154
1155 if (0)
1156 VG_(printf)("scache fetch line %d\n", (Int)wix);
1157
1158 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1159
1160 tag = cache_shmem.tags0[wix];
1161 cl = &cache_shmem.lyns0[wix];
1162
1163 /* reject nonsense requests */
1164 tl_assert(is_valid_scache_tag(tag));
1165
1166 lineZ = NULL;
1167 lineF = NULL;
1168 find_ZF_for_reading( &lineZ, &lineF, tag );
1169 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1170
1171 /* expand the data into the bottom layer of the tree, then get
1172 cacheline_normalise to build the descriptor array. */
1173 if (lineF) {
1174 tl_assert(lineF->inUse);
1175 for (i = 0; i < N_LINE_ARANGE; i++) {
1176 cl->svals[i] = lineF->w64s[i];
1177 }
1178 stats__cache_F_fetches++;
1179 } else {
1180 for (i = 0; i < N_LINE_ARANGE; i++) {
1181 SVal sv;
1182 UWord ix = read_twobit_array( lineZ->ix2s, i );
1183 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1184 sv = lineZ->dict[ix];
1185 tl_assert(sv != SVal_INVALID);
1186 cl->svals[i] = sv;
1187 }
1188 stats__cache_Z_fetches++;
1189 }
1190 normalise_CacheLine( cl );
1191}
1192
1193static void shmem__invalidate_scache ( void ) {
1194 Word wix;
1195 if (0) VG_(printf)("%s","scache inval\n");
1196 tl_assert(!is_valid_scache_tag(1));
1197 for (wix = 0; wix < N_WAY_NENT; wix++) {
1198 cache_shmem.tags0[wix] = 1/*INVALID*/;
1199 }
1200 stats__cache_invals++;
1201}
1202
1203static void shmem__flush_and_invalidate_scache ( void ) {
1204 Word wix;
1205 Addr tag;
1206 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1207 tl_assert(!is_valid_scache_tag(1));
1208 for (wix = 0; wix < N_WAY_NENT; wix++) {
1209 tag = cache_shmem.tags0[wix];
1210 if (tag == 1/*INVALID*/) {
1211 /* already invalid; nothing to do */
1212 } else {
1213 tl_assert(is_valid_scache_tag(tag));
1214 cacheline_wback( wix );
1215 }
1216 cache_shmem.tags0[wix] = 1/*INVALID*/;
1217 }
1218 stats__cache_flushes++;
1219 stats__cache_invals++;
1220}
1221
1222
1223static inline Bool aligned16 ( Addr a ) {
1224 return 0 == (a & 1);
1225}
1226static inline Bool aligned32 ( Addr a ) {
1227 return 0 == (a & 3);
1228}
1229static inline Bool aligned64 ( Addr a ) {
1230 return 0 == (a & 7);
1231}
1232static inline UWord get_cacheline_offset ( Addr a ) {
1233 return (UWord)(a & (N_LINE_ARANGE - 1));
1234}
1235static inline Addr cacheline_ROUNDUP ( Addr a ) {
1236 return ROUNDUP(a, N_LINE_ARANGE);
1237}
1238static inline Addr cacheline_ROUNDDN ( Addr a ) {
1239 return ROUNDDN(a, N_LINE_ARANGE);
1240}
1241static inline UWord get_treeno ( Addr a ) {
1242 return get_cacheline_offset(a) >> 3;
1243}
1244static inline UWord get_tree_offset ( Addr a ) {
1245 return a & 7;
1246}
1247
1248static __attribute__((noinline))
1249 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1250static inline CacheLine* get_cacheline ( Addr a )
1251{
1252 /* tag is 'a' with the in-line offset masked out,
1253 eg a[31]..a[4] 0000 */
1254 Addr tag = a & ~(N_LINE_ARANGE - 1);
1255 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1256 stats__cache_totrefs++;
1257 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1258 return &cache_shmem.lyns0[wix];
1259 } else {
1260 return get_cacheline_MISS( a );
1261 }
1262}
1263
1264static __attribute__((noinline))
1265 CacheLine* get_cacheline_MISS ( Addr a )
1266{
1267 /* tag is 'a' with the in-line offset masked out,
1268 eg a[31]..a[4] 0000 */
1269
1270 CacheLine* cl;
1271 Addr* tag_old_p;
1272 Addr tag = a & ~(N_LINE_ARANGE - 1);
1273 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1274
1275 tl_assert(tag != cache_shmem.tags0[wix]);
1276
1277 /* Dump the old line into the backing store. */
1278 stats__cache_totmisses++;
1279
1280 cl = &cache_shmem.lyns0[wix];
1281 tag_old_p = &cache_shmem.tags0[wix];
1282
1283 if (is_valid_scache_tag( *tag_old_p )) {
1284 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001285 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001286 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1287 cacheline_wback( wix );
1288 }
1289 /* and reload the new one */
1290 *tag_old_p = tag;
1291 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001292 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001293 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1294 return cl;
1295}
1296
1297static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1298 stats__cline_64to32pulldown++;
1299 switch (toff) {
1300 case 0: case 4:
1301 tl_assert(descr & TREE_DESCR_64);
1302 tree[4] = tree[0];
1303 descr &= ~TREE_DESCR_64;
1304 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1305 break;
1306 default:
1307 tl_assert(0);
1308 }
1309 return descr;
1310}
1311
1312static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1313 stats__cline_32to16pulldown++;
1314 switch (toff) {
1315 case 0: case 2:
1316 if (!(descr & TREE_DESCR_32_0)) {
1317 descr = pulldown_to_32(tree, 0, descr);
1318 }
1319 tl_assert(descr & TREE_DESCR_32_0);
1320 tree[2] = tree[0];
1321 descr &= ~TREE_DESCR_32_0;
1322 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1323 break;
1324 case 4: case 6:
1325 if (!(descr & TREE_DESCR_32_1)) {
1326 descr = pulldown_to_32(tree, 4, descr);
1327 }
1328 tl_assert(descr & TREE_DESCR_32_1);
1329 tree[6] = tree[4];
1330 descr &= ~TREE_DESCR_32_1;
1331 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1332 break;
1333 default:
1334 tl_assert(0);
1335 }
1336 return descr;
1337}
1338
1339static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1340 stats__cline_16to8pulldown++;
1341 switch (toff) {
1342 case 0: case 1:
1343 if (!(descr & TREE_DESCR_16_0)) {
1344 descr = pulldown_to_16(tree, 0, descr);
1345 }
1346 tl_assert(descr & TREE_DESCR_16_0);
1347 tree[1] = tree[0];
1348 descr &= ~TREE_DESCR_16_0;
1349 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1350 break;
1351 case 2: case 3:
1352 if (!(descr & TREE_DESCR_16_1)) {
1353 descr = pulldown_to_16(tree, 2, descr);
1354 }
1355 tl_assert(descr & TREE_DESCR_16_1);
1356 tree[3] = tree[2];
1357 descr &= ~TREE_DESCR_16_1;
1358 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1359 break;
1360 case 4: case 5:
1361 if (!(descr & TREE_DESCR_16_2)) {
1362 descr = pulldown_to_16(tree, 4, descr);
1363 }
1364 tl_assert(descr & TREE_DESCR_16_2);
1365 tree[5] = tree[4];
1366 descr &= ~TREE_DESCR_16_2;
1367 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1368 break;
1369 case 6: case 7:
1370 if (!(descr & TREE_DESCR_16_3)) {
1371 descr = pulldown_to_16(tree, 6, descr);
1372 }
1373 tl_assert(descr & TREE_DESCR_16_3);
1374 tree[7] = tree[6];
1375 descr &= ~TREE_DESCR_16_3;
1376 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1377 break;
1378 default:
1379 tl_assert(0);
1380 }
1381 return descr;
1382}
1383
1384
1385static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1386 UShort mask;
1387 switch (toff) {
1388 case 0:
1389 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1390 tl_assert( (descr & mask) == mask );
1391 descr &= ~mask;
1392 descr |= TREE_DESCR_16_0;
1393 break;
1394 case 2:
1395 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1396 tl_assert( (descr & mask) == mask );
1397 descr &= ~mask;
1398 descr |= TREE_DESCR_16_1;
1399 break;
1400 case 4:
1401 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1402 tl_assert( (descr & mask) == mask );
1403 descr &= ~mask;
1404 descr |= TREE_DESCR_16_2;
1405 break;
1406 case 6:
1407 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1408 tl_assert( (descr & mask) == mask );
1409 descr &= ~mask;
1410 descr |= TREE_DESCR_16_3;
1411 break;
1412 default:
1413 tl_assert(0);
1414 }
1415 return descr;
1416}
1417
1418static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1419 UShort mask;
1420 switch (toff) {
1421 case 0:
1422 if (!(descr & TREE_DESCR_16_0))
1423 descr = pullup_descr_to_16(descr, 0);
1424 if (!(descr & TREE_DESCR_16_1))
1425 descr = pullup_descr_to_16(descr, 2);
1426 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1427 tl_assert( (descr & mask) == mask );
1428 descr &= ~mask;
1429 descr |= TREE_DESCR_32_0;
1430 break;
1431 case 4:
1432 if (!(descr & TREE_DESCR_16_2))
1433 descr = pullup_descr_to_16(descr, 4);
1434 if (!(descr & TREE_DESCR_16_3))
1435 descr = pullup_descr_to_16(descr, 6);
1436 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1437 tl_assert( (descr & mask) == mask );
1438 descr &= ~mask;
1439 descr |= TREE_DESCR_32_1;
1440 break;
1441 default:
1442 tl_assert(0);
1443 }
1444 return descr;
1445}
1446
1447static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1448 switch (toff) {
1449 case 0: case 4:
1450 return 0 != (descr & TREE_DESCR_64);
1451 default:
1452 tl_assert(0);
1453 }
1454}
1455
1456static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1457 switch (toff) {
1458 case 0:
1459 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1460 case 2:
1461 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1462 case 4:
1463 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1464 case 6:
1465 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1466 default:
1467 tl_assert(0);
1468 }
1469}
1470
1471/* ------------ Cache management ------------ */
1472
1473static void zsm_flush_cache ( void )
1474{
1475 shmem__flush_and_invalidate_scache();
1476}
1477
1478
1479static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1480{
1481 tl_assert( sizeof(UWord) == sizeof(Addr) );
1482
1483 rcinc = p_rcinc;
1484 rcdec = p_rcdec;
1485
1486 tl_assert(map_shmem == NULL);
1487 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1488 HG_(free),
1489 NULL/*unboxed UWord cmp*/);
1490 tl_assert(map_shmem != NULL);
1491 shmem__invalidate_scache();
1492
1493 /* a SecMap must contain an integral number of CacheLines */
1494 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1495 /* also ... a CacheLine holds an integral number of trees */
1496 tl_assert(0 == (N_LINE_ARANGE % 8));
1497}
1498
1499/////////////////////////////////////////////////////////////////
1500/////////////////////////////////////////////////////////////////
1501// //
1502// SECTION END compressed shadow memory //
1503// //
1504/////////////////////////////////////////////////////////////////
1505/////////////////////////////////////////////////////////////////
1506
1507
1508
1509/////////////////////////////////////////////////////////////////
1510/////////////////////////////////////////////////////////////////
1511// //
1512// SECTION BEGIN vts primitives //
1513// //
1514/////////////////////////////////////////////////////////////////
1515/////////////////////////////////////////////////////////////////
1516
1517#ifndef __HB_VTS_H
1518#define __HB_VTS_H
1519
1520/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1521 lowest 30 bits of an SVal. */
1522typedef UInt VtsID;
1523#define VtsID_INVALID 0xFFFFFFFF
1524
1525/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1526 a backlink for the caller's convenience. Since we have no idea
1527 what to set that to in the library, it always gets set to
1528 VtsID_INVALID. */
1529typedef
1530 struct {
1531 VtsID id;
1532 XArray* ts; /* XArray* ScalarTS(abstract) */
1533 }
1534 VTS;
1535
1536
1537/* Create a new, empty VTS. */
sewardj23f12002009-07-24 08:45:08 +00001538static VTS* VTS__new ( void );
sewardjf98e1c02008-10-25 16:22:41 +00001539
1540/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001541static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001542
1543/* Create a new singleton VTS. */
sewardj23f12002009-07-24 08:45:08 +00001544static VTS* VTS__singleton ( Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001545
1546/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1547 not modified. */
sewardj23f12002009-07-24 08:45:08 +00001548static VTS* VTS__tick ( Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001549
1550/* Return a new VTS constructed as the join (max) of the 2 args.
1551 Neither arg is modified. */
sewardj23f12002009-07-24 08:45:08 +00001552static VTS* VTS__join ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001553
sewardj23f12002009-07-24 08:45:08 +00001554/* Compute the partial ordering relation of the two args. Although we
1555 could be completely general and return an enumeration value (EQ,
1556 LT, GT, UN), in fact we only need LEQ, and so we may as well
1557 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001558
sewardj23f12002009-07-24 08:45:08 +00001559 Returns NULL iff LEQ(A,B), or non-NULL if not. In the latter case,
1560 the returned Thr* indicates the discovered point for which they are
1561 not. There may be more than one such point, but we only care about
1562 seeing one of them, not all of them. This rather strange
1563 convention is used because sometimes we want to know the actual
1564 index at which they first differ. */
1565static Thr* VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001566
1567/* Compute an arbitrary structural (total) ordering on the two args,
1568 based on their VCs, so they can be looked up in a table, tree, etc.
1569 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001570static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001571
1572/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001573static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001574
1575/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001576static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001577
1578#endif /* ! __HB_VTS_H */
1579
1580
1581/*--------------- to do with Vector Timestamps ---------------*/
1582
1583/* Scalar Timestamp */
1584typedef
1585 struct {
1586 Thr* thr;
1587 ULong tym;
1588 }
1589 ScalarTS;
1590
1591
1592static Bool is_sane_VTS ( VTS* vts )
1593{
1594 UWord i, n;
1595 ScalarTS *st1, *st2;
1596 if (!vts) return False;
1597 if (!vts->ts) return False;
1598 n = VG_(sizeXA)( vts->ts );
1599 if (n >= 2) {
1600 for (i = 0; i < n-1; i++) {
1601 st1 = VG_(indexXA)( vts->ts, i );
1602 st2 = VG_(indexXA)( vts->ts, i+1 );
1603 if (st1->thr >= st2->thr)
1604 return False;
1605 if (st1->tym == 0 || st2->tym == 0)
1606 return False;
1607 }
1608 }
1609 return True;
1610}
1611
1612
1613/* Create a new, empty VTS.
1614*/
1615VTS* VTS__new ( void )
1616{
1617 VTS* vts;
1618 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1619 tl_assert(vts);
1620 vts->id = VtsID_INVALID;
1621 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1622 HG_(free), sizeof(ScalarTS) );
1623 tl_assert(vts->ts);
1624 return vts;
1625}
1626
1627
1628/* Delete this VTS in its entirety.
1629*/
1630void VTS__delete ( VTS* vts )
1631{
1632 tl_assert(vts);
1633 tl_assert(vts->ts);
1634 VG_(deleteXA)( vts->ts );
1635 HG_(free)(vts);
1636}
1637
1638
1639/* Create a new singleton VTS.
1640*/
1641VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1642 ScalarTS st;
1643 VTS* vts;
1644 tl_assert(thr);
1645 tl_assert(tym >= 1);
1646 vts = VTS__new();
1647 st.thr = thr;
1648 st.tym = tym;
1649 VG_(addToXA)( vts->ts, &st );
1650 return vts;
1651}
1652
1653
1654/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1655 not modified.
1656*/
1657VTS* VTS__tick ( Thr* me, VTS* vts )
1658{
1659 ScalarTS* here = NULL;
1660 ScalarTS tmp;
1661 VTS* res;
1662 Word i, n;
1663 tl_assert(me);
1664 tl_assert(is_sane_VTS(vts));
1665 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1666 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1667 res = VTS__new();
1668 n = VG_(sizeXA)( vts->ts );
1669
1670 /* main loop doesn't handle zero-entry case correctly, so
1671 special-case it. */
1672 if (n == 0) {
1673 tmp.thr = me;
1674 tmp.tym = 1;
1675 VG_(addToXA)( res->ts, &tmp );
1676 tl_assert(is_sane_VTS(res));
1677 return res;
1678 }
1679
1680 for (i = 0; i < n; i++) {
1681 here = VG_(indexXA)( vts->ts, i );
1682 if (me < here->thr) {
1683 /* We just went past 'me', without seeing it. */
1684 tmp.thr = me;
1685 tmp.tym = 1;
1686 VG_(addToXA)( res->ts, &tmp );
1687 tmp = *here;
1688 VG_(addToXA)( res->ts, &tmp );
1689 i++;
1690 break;
1691 }
1692 else if (me == here->thr) {
1693 tmp = *here;
1694 tmp.tym++;
1695 VG_(addToXA)( res->ts, &tmp );
1696 i++;
1697 break;
1698 }
1699 else /* me > here->thr */ {
1700 tmp = *here;
1701 VG_(addToXA)( res->ts, &tmp );
1702 }
1703 }
1704 tl_assert(i >= 0 && i <= n);
1705 if (i == n && here && here->thr < me) {
1706 tmp.thr = me;
1707 tmp.tym = 1;
1708 VG_(addToXA)( res->ts, &tmp );
1709 } else {
1710 for (/*keepgoing*/; i < n; i++) {
1711 here = VG_(indexXA)( vts->ts, i );
1712 tmp = *here;
1713 VG_(addToXA)( res->ts, &tmp );
1714 }
1715 }
1716 tl_assert(is_sane_VTS(res));
1717 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1718 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1719 return res;
1720}
1721
1722
1723/* Return a new VTS constructed as the join (max) of the 2 args.
1724 Neither arg is modified.
1725*/
1726VTS* VTS__join ( VTS* a, VTS* b )
1727{
1728 Word ia, ib, useda, usedb;
1729 ULong tyma, tymb, tymMax;
1730 Thr* thr;
1731 VTS* res;
sewardjf98e1c02008-10-25 16:22:41 +00001732
1733 tl_assert(a && a->ts);
1734 tl_assert(b && b->ts);
1735 useda = VG_(sizeXA)( a->ts );
1736 usedb = VG_(sizeXA)( b->ts );
1737
1738 res = VTS__new();
1739 ia = ib = 0;
1740
1741 while (1) {
1742
1743 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1744 from a and b in order, where thr is the next Thr*
1745 occurring in either a or b, and tyma/b are the relevant
1746 scalar timestamps, taking into account implicit zeroes. */
1747 tl_assert(ia >= 0 && ia <= useda);
1748 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001749
njn4c245e52009-03-15 23:25:38 +00001750 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001751 /* both empty - done */
1752 break;
njn4c245e52009-03-15 23:25:38 +00001753
1754 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001755 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001756 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001757 thr = tmpb->thr;
1758 tyma = 0;
1759 tymb = tmpb->tym;
1760 ib++;
njn4c245e52009-03-15 23:25:38 +00001761
1762 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001763 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001764 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001765 thr = tmpa->thr;
1766 tyma = tmpa->tym;
1767 tymb = 0;
1768 ia++;
njn4c245e52009-03-15 23:25:38 +00001769
1770 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001771 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001772 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1773 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001774 if (tmpa->thr < tmpb->thr) {
1775 /* a has the lowest unconsidered Thr* */
1776 thr = tmpa->thr;
1777 tyma = tmpa->tym;
1778 tymb = 0;
1779 ia++;
njn4c245e52009-03-15 23:25:38 +00001780 } else if (tmpa->thr > tmpb->thr) {
sewardjf98e1c02008-10-25 16:22:41 +00001781 /* b has the lowest unconsidered Thr* */
1782 thr = tmpb->thr;
1783 tyma = 0;
1784 tymb = tmpb->tym;
1785 ib++;
1786 } else {
1787 /* they both next mention the same Thr* */
1788 tl_assert(tmpa->thr == tmpb->thr);
1789 thr = tmpa->thr; /* == tmpb->thr */
1790 tyma = tmpa->tym;
1791 tymb = tmpb->tym;
1792 ia++;
1793 ib++;
1794 }
1795 }
1796
1797 /* having laboriously determined (thr, tyma, tymb), do something
1798 useful with it. */
1799 tymMax = tyma > tymb ? tyma : tymb;
1800 if (tymMax > 0) {
1801 ScalarTS st;
1802 st.thr = thr;
1803 st.tym = tymMax;
1804 VG_(addToXA)( res->ts, &st );
1805 }
1806
1807 }
1808
1809 tl_assert(is_sane_VTS( res ));
1810
1811 return res;
1812}
1813
1814
sewardj23f12002009-07-24 08:45:08 +00001815/* Determine if 'a' <= 'b', in the partial ordering. Returns NULL if
1816 they are, or the first Thr* for which they are not. This rather
1817 strange convention is used because sometimes we want to know the
1818 actual index at which they first differ. */
1819static Thr* VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00001820{
sewardj23f12002009-07-24 08:45:08 +00001821 Word ia, ib, useda, usedb;
1822 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00001823
1824 tl_assert(a && a->ts);
1825 tl_assert(b && b->ts);
1826 useda = VG_(sizeXA)( a->ts );
1827 usedb = VG_(sizeXA)( b->ts );
1828
1829 ia = ib = 0;
1830
1831 while (1) {
1832
njn4c245e52009-03-15 23:25:38 +00001833 /* This logic is to enumerate doubles (tyma, tymb) drawn
1834 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00001835 scalar timestamps, taking into account implicit zeroes. */
sewardj23f12002009-07-24 08:45:08 +00001836 Thr* thr;
1837
sewardjf98e1c02008-10-25 16:22:41 +00001838 tl_assert(ia >= 0 && ia <= useda);
1839 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001840
njn4c245e52009-03-15 23:25:38 +00001841 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001842 /* both empty - done */
1843 break;
njn4c245e52009-03-15 23:25:38 +00001844
1845 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001846 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001847 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001848 tyma = 0;
1849 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001850 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001851 ib++;
njn4c245e52009-03-15 23:25:38 +00001852
1853 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001854 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001855 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001856 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001857 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001858 tymb = 0;
1859 ia++;
njn4c245e52009-03-15 23:25:38 +00001860
1861 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001862 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001863 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1864 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001865 if (tmpa->thr < tmpb->thr) {
1866 /* a has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001867 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001868 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001869 tymb = 0;
1870 ia++;
1871 }
1872 else
1873 if (tmpa->thr > tmpb->thr) {
1874 /* b has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001875 tyma = 0;
1876 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001877 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001878 ib++;
1879 } else {
1880 /* they both next mention the same Thr* */
1881 tl_assert(tmpa->thr == tmpb->thr);
sewardjf98e1c02008-10-25 16:22:41 +00001882 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001883 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001884 tymb = tmpb->tym;
1885 ia++;
1886 ib++;
1887 }
1888 }
1889
njn4c245e52009-03-15 23:25:38 +00001890 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00001891 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00001892 if (tyma > tymb) {
1893 /* not LEQ at this index. Quit, since the answer is
1894 determined already. */
1895 tl_assert(thr);
1896 return thr;
1897 }
sewardjf98e1c02008-10-25 16:22:41 +00001898 }
1899
sewardj23f12002009-07-24 08:45:08 +00001900 return NULL; /* all points are LEQ */
sewardjf98e1c02008-10-25 16:22:41 +00001901}
1902
1903
1904/* Compute an arbitrary structural (total) ordering on the two args,
1905 based on their VCs, so they can be looked up in a table, tree, etc.
1906 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1907*/
1908Word VTS__cmp_structural ( VTS* a, VTS* b )
1909{
1910 /* We just need to generate an arbitrary total ordering based on
1911 a->ts and b->ts. Preferably do it in a way which comes across likely
1912 differences relatively quickly. */
1913 Word i, useda, usedb;
1914 ScalarTS *tmpa, *tmpb;
1915
1916 tl_assert(a && a->ts);
1917 tl_assert(b && b->ts);
1918 useda = VG_(sizeXA)( a->ts );
1919 usedb = VG_(sizeXA)( b->ts );
1920
1921 if (useda < usedb) return -1;
1922 if (useda > usedb) return 1;
1923
1924 /* Same length vectors, so let's step through them together. */
1925 tl_assert(useda == usedb);
1926 for (i = 0; i < useda; i++) {
1927 tmpa = VG_(indexXA)( a->ts, i );
1928 tmpb = VG_(indexXA)( b->ts, i );
1929 if (tmpa->tym < tmpb->tym) return -1;
1930 if (tmpa->tym > tmpb->tym) return 1;
1931 if (tmpa->thr < tmpb->thr) return -1;
1932 if (tmpa->thr > tmpb->thr) return 1;
1933 }
1934
1935 /* They're identical. */
1936 return 0;
1937}
1938
1939
1940/* Debugging only. Display the given VTS in the buffer.
1941*/
1942void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1943 ScalarTS* st;
1944 HChar unit[64];
1945 Word i, n;
1946 Int avail = nBuf;
1947 tl_assert(vts && vts->ts);
1948 tl_assert(nBuf > 16);
1949 buf[0] = '[';
1950 buf[1] = 0;
1951 n = VG_(sizeXA)( vts->ts );
1952 for (i = 0; i < n; i++) {
1953 tl_assert(avail >= 40);
1954 st = VG_(indexXA)( vts->ts, i );
1955 VG_(memset)(unit, 0, sizeof(unit));
1956 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1957 st->thr, st->tym);
1958 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1959 VG_(strcat)(buf, " ...]");
1960 buf[nBuf-1] = 0;
1961 return;
1962 }
1963 VG_(strcat)(buf, unit);
1964 avail -= VG_(strlen)(unit);
1965 }
1966 VG_(strcat)(buf, "]");
1967 buf[nBuf-1] = 0;
1968}
1969
1970
1971/* Debugging only. Return vts[index], so to speak.
1972*/
1973ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1974 UWord i, n;
1975 tl_assert(vts && vts->ts);
1976 n = VG_(sizeXA)( vts->ts );
1977 for (i = 0; i < n; i++) {
1978 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1979 if (st->thr == idx)
1980 return st->tym;
1981 }
1982 return 0;
1983}
1984
1985
1986/////////////////////////////////////////////////////////////////
1987/////////////////////////////////////////////////////////////////
1988// //
1989// SECTION END vts primitives //
1990// //
1991/////////////////////////////////////////////////////////////////
1992/////////////////////////////////////////////////////////////////
1993
1994
1995
1996/////////////////////////////////////////////////////////////////
1997/////////////////////////////////////////////////////////////////
1998// //
1999// SECTION BEGIN main library //
2000// //
2001/////////////////////////////////////////////////////////////////
2002/////////////////////////////////////////////////////////////////
2003
2004
2005/////////////////////////////////////////////////////////
2006// //
2007// VTS set //
2008// //
2009/////////////////////////////////////////////////////////
2010
2011static WordFM* /* VTS* void void */ vts_set = NULL;
2012
2013static void vts_set_init ( void )
2014{
2015 tl_assert(!vts_set);
2016 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2017 HG_(free),
2018 (Word(*)(UWord,UWord))VTS__cmp_structural );
2019 tl_assert(vts_set);
2020}
2021
2022/* Given a newly made VTS, look in vts_set to see if we already have
2023 an identical one. If yes, free up this one and return instead a
2024 pointer to the existing one. If no, add this one to the set and
2025 return the same pointer. Caller differentiates the two cases by
2026 comparing returned pointer with the supplied one (although that
2027 does require that the supplied VTS is not already in the set).
2028*/
2029static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2030{
2031 UWord keyW, valW;
2032 /* lookup cand (by value) */
2033 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2034 /* found it */
2035 tl_assert(valW == 0);
2036 /* if this fails, cand (by ref) was already present (!) */
2037 tl_assert(keyW != (UWord)cand);
2038 VTS__delete(cand);
2039 return (VTS*)keyW;
2040 } else {
2041 /* not present. Add and return pointer to same. */
2042 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2043 return cand;
2044 }
2045}
2046
2047
2048/////////////////////////////////////////////////////////
2049// //
2050// VTS table //
2051// //
2052/////////////////////////////////////////////////////////
2053
2054static void VtsID__invalidate_caches ( void ); /* fwds */
2055
2056/* A type to hold VTS table entries. Invariants:
2057 If .vts == NULL, then this entry is not in use, so:
2058 - .rc == 0
2059 - this entry is on the freelist (unfortunately, does not imply
2060 any constraints on value for .nextfree)
2061 If .vts != NULL, then this entry is in use:
2062 - .vts is findable in vts_set
2063 - .vts->id == this entry number
2064 - no specific value for .rc (even 0 is OK)
2065 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2066*/
2067typedef
2068 struct {
2069 VTS* vts; /* vts, in vts_set */
2070 UWord rc; /* reference count - enough for entire aspace */
2071 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2072 }
2073 VtsTE;
2074
2075/* The VTS table. */
2076static XArray* /* of VtsTE */ vts_tab = NULL;
2077
2078/* An index into the VTS table, indicating the start of the list of
2079 free (available for use) entries. If the list is empty, this is
2080 VtsID_INVALID. */
2081static VtsID vts_tab_freelist = VtsID_INVALID;
2082
2083/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2084 vts_tab equals or exceeds this size. After GC, the value here is
2085 set appropriately so as to check for the next GC point. */
2086static Word vts_next_GC_at = 1000;
2087
2088static void vts_tab_init ( void )
2089{
2090 vts_tab
2091 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2092 HG_(free), sizeof(VtsTE) );
2093 vts_tab_freelist
2094 = VtsID_INVALID;
2095 tl_assert(vts_tab);
2096}
2097
2098/* Add ii to the free list, checking that it looks out-of-use. */
2099static void add_to_free_list ( VtsID ii )
2100{
2101 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2102 tl_assert(ie->vts == NULL);
2103 tl_assert(ie->rc == 0);
2104 tl_assert(ie->freelink == VtsID_INVALID);
2105 ie->freelink = vts_tab_freelist;
2106 vts_tab_freelist = ii;
2107}
2108
2109/* Get an entry from the free list. This will return VtsID_INVALID if
2110 the free list is empty. */
2111static VtsID get_from_free_list ( void )
2112{
2113 VtsID ii;
2114 VtsTE* ie;
2115 if (vts_tab_freelist == VtsID_INVALID)
2116 return VtsID_INVALID;
2117 ii = vts_tab_freelist;
2118 ie = VG_(indexXA)( vts_tab, ii );
2119 tl_assert(ie->vts == NULL);
2120 tl_assert(ie->rc == 0);
2121 vts_tab_freelist = ie->freelink;
2122 return ii;
2123}
2124
2125/* Produce a new VtsID that can be used, either by getting it from
2126 the freelist, or, if that is empty, by expanding vts_tab. */
2127static VtsID get_new_VtsID ( void )
2128{
2129 VtsID ii;
2130 VtsTE te;
2131 ii = get_from_free_list();
2132 if (ii != VtsID_INVALID)
2133 return ii;
2134 te.vts = NULL;
2135 te.rc = 0;
2136 te.freelink = VtsID_INVALID;
2137 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2138 return ii;
2139}
2140
2141
2142/* Indirect callback from lib_zsm. */
2143static void VtsID__rcinc ( VtsID ii )
2144{
2145 VtsTE* ie;
2146 /* VG_(indexXA) does a range check for us */
2147 ie = VG_(indexXA)( vts_tab, ii );
2148 tl_assert(ie->vts); /* else it's not in use */
2149 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2150 tl_assert(ie->vts->id == ii);
2151 ie->rc++;
2152}
2153
2154/* Indirect callback from lib_zsm. */
2155static void VtsID__rcdec ( VtsID ii )
2156{
2157 VtsTE* ie;
2158 /* VG_(indexXA) does a range check for us */
2159 ie = VG_(indexXA)( vts_tab, ii );
2160 tl_assert(ie->vts); /* else it's not in use */
2161 tl_assert(ie->rc > 0); /* else RC snafu */
2162 tl_assert(ie->vts->id == ii);
2163 ie->rc--;
2164}
2165
2166
2167/* Look up 'cand' in our collection of VTSs. If present, deallocate
2168 it and return the VtsID for the pre-existing version. If not
2169 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2170 for it, and return that. */
2171static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2172{
2173 VTS* auld;
2174 tl_assert(cand->id == VtsID_INVALID);
2175 auld = vts_set__find_and_dealloc__or_add(cand);
2176 if (auld != cand) {
2177 /* We already have an Aulde one. Use that. */
2178 VtsTE* ie;
2179 tl_assert(auld->id != VtsID_INVALID);
2180 ie = VG_(indexXA)( vts_tab, auld->id );
2181 tl_assert(ie->vts == auld);
2182 return auld->id;
2183 } else {
2184 VtsID ii = get_new_VtsID();
2185 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2186 ie->vts = cand;
2187 ie->rc = 0;
2188 ie->freelink = VtsID_INVALID;
2189 cand->id = ii;
2190 return ii;
2191 }
2192}
2193
2194
2195static void show_vts_stats ( HChar* caller )
2196{
2197 UWord nSet, nTab, nLive;
2198 ULong totrc;
2199 UWord n, i;
2200 nSet = VG_(sizeFM)( vts_set );
2201 nTab = VG_(sizeXA)( vts_tab );
2202 totrc = 0;
2203 nLive = 0;
2204 n = VG_(sizeXA)( vts_tab );
2205 for (i = 0; i < n; i++) {
2206 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2207 if (ie->vts) {
2208 nLive++;
2209 totrc += (ULong)ie->rc;
2210 } else {
2211 tl_assert(ie->rc == 0);
2212 }
2213 }
2214 VG_(printf)(" show_vts_stats %s\n", caller);
2215 VG_(printf)(" vts_tab size %4lu\n", nTab);
2216 VG_(printf)(" vts_tab live %4lu\n", nLive);
2217 VG_(printf)(" vts_set size %4lu\n", nSet);
2218 VG_(printf)(" total rc %4llu\n", totrc);
2219}
2220
2221/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002222__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002223static void vts_tab__do_GC ( Bool show_stats )
2224{
2225 UWord i, nTab, nLive, nFreed;
2226
2227 /* check this is actually necessary. */
2228 tl_assert(vts_tab_freelist == VtsID_INVALID);
2229
2230 /* empty the caches for partial order checks and binary joins. We
2231 could do better and prune out the entries to be deleted, but it
2232 ain't worth the hassle. */
2233 VtsID__invalidate_caches();
2234
2235 /* First, make the reference counts up to date. */
2236 zsm_flush_cache();
2237
2238 nTab = VG_(sizeXA)( vts_tab );
2239
2240 if (show_stats) {
2241 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2242 show_vts_stats("before GC");
2243 }
2244
2245 /* Now we can inspect the entire vts_tab. Any entries
2246 with zero .rc fields are now no longer in use and can be
2247 free list, removed from vts_set, and deleted. */
2248 nFreed = 0;
2249 for (i = 0; i < nTab; i++) {
2250 Bool present;
2251 UWord oldK = 0, oldV = 0;
2252 VtsTE* te = VG_(indexXA)( vts_tab, i );
2253 if (te->vts == NULL) {
2254 tl_assert(te->rc == 0);
2255 continue; /* already on the free list (presumably) */
2256 }
2257 if (te->rc > 0)
2258 continue; /* in use */
2259 /* Ok, we got one we can free. */
2260 tl_assert(te->vts->id == i);
2261 /* first, remove it from vts_set. */
2262 present = VG_(delFromFM)( vts_set,
2263 &oldK, &oldV, (UWord)te->vts );
2264 tl_assert(present); /* else it isn't in vts_set ?! */
2265 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2266 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2267 /* now free the VTS itself */
2268 VTS__delete(te->vts);
2269 te->vts = NULL;
2270 /* and finally put this entry on the free list */
2271 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2272 add_to_free_list( i );
2273 nFreed++;
2274 }
2275
2276 /* Now figure out when the next GC should be. We'll allow the
2277 number of VTSs to double before GCing again. Except of course
2278 that since we can't (or, at least, don't) shrink vts_tab, we
2279 can't set the threshhold value smaller than it. */
2280 tl_assert(nFreed <= nTab);
2281 nLive = nTab - nFreed;
2282 tl_assert(nLive >= 0 && nLive <= nTab);
2283 vts_next_GC_at = 2 * nLive;
2284 if (vts_next_GC_at < nTab)
2285 vts_next_GC_at = nTab;
2286
2287 if (show_stats) {
2288 show_vts_stats("after GC");
2289 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2290 }
2291
sewardjd024ae52008-11-09 20:47:57 +00002292 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002293 static UInt ctr = 0;
2294 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002295 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002296 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002297 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002298 }
2299}
2300
2301
2302/////////////////////////////////////////////////////////
2303// //
2304// Vts IDs //
2305// //
2306/////////////////////////////////////////////////////////
2307
2308//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00002309static ULong stats__cmpLEQ_queries = 0;
2310static ULong stats__cmpLEQ_misses = 0;
2311static ULong stats__join2_queries = 0;
2312static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002313
2314static inline UInt ROL32 ( UInt w, Int n ) {
2315 w = (w << n) | (w >> (32-n));
2316 return w;
2317}
2318static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2319 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2320 return hash % nTab;
2321}
2322
sewardj23f12002009-07-24 08:45:08 +00002323#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00002324static
sewardj23f12002009-07-24 08:45:08 +00002325 struct { VtsID vi1; VtsID vi2; Bool leq; }
2326 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00002327
2328#define N_JOIN2_CACHE 1023
2329static
2330 struct { VtsID vi1; VtsID vi2; VtsID res; }
2331 join2_cache[N_JOIN2_CACHE];
2332
2333static void VtsID__invalidate_caches ( void ) {
2334 Int i;
sewardj23f12002009-07-24 08:45:08 +00002335 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
2336 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
2337 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
2338 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00002339 }
2340 for (i = 0; i < N_JOIN2_CACHE; i++) {
2341 join2_cache[i].vi1 = VtsID_INVALID;
2342 join2_cache[i].vi2 = VtsID_INVALID;
2343 join2_cache[i].res = VtsID_INVALID;
2344 }
2345}
2346//////////////////////////
2347
sewardjd52392d2008-11-08 20:36:26 +00002348//static Bool VtsID__is_valid ( VtsID vi ) {
2349// VtsTE* ve;
2350// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2351// return False;
2352// ve = VG_(indexXA)( vts_tab, vi );
2353// if (!ve->vts)
2354// return False;
2355// tl_assert(ve->vts->id == vi);
2356// return True;
2357//}
sewardjf98e1c02008-10-25 16:22:41 +00002358
2359static VTS* VtsID__to_VTS ( VtsID vi ) {
2360 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2361 tl_assert(te->vts);
2362 return te->vts;
2363}
2364
2365static void VtsID__pp ( VtsID vi ) {
2366 HChar buf[100];
2367 VTS* vts = VtsID__to_VTS(vi);
2368 VTS__show( buf, sizeof(buf)-1, vts );
2369 buf[sizeof(buf)-1] = 0;
2370 VG_(printf)("%s", buf);
2371}
2372
2373/* compute partial ordering relation of vi1 and vi2. */
2374__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00002375static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00002376 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00002377 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00002378 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00002379 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00002380 tl_assert(vi1 != vi2);
2381 ////++
sewardj23f12002009-07-24 08:45:08 +00002382 stats__cmpLEQ_queries++;
2383 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
2384 if (cmpLEQ_cache[hash].vi1 == vi1
2385 && cmpLEQ_cache[hash].vi2 == vi2)
2386 return cmpLEQ_cache[hash].leq;
2387 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00002388 ////--
2389 v1 = VtsID__to_VTS(vi1);
2390 v2 = VtsID__to_VTS(vi2);
sewardj23f12002009-07-24 08:45:08 +00002391 leq = VTS__cmpLEQ( v1, v2 ) == NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002392 ////++
sewardj23f12002009-07-24 08:45:08 +00002393 cmpLEQ_cache[hash].vi1 = vi1;
2394 cmpLEQ_cache[hash].vi2 = vi2;
2395 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00002396 ////--
sewardj23f12002009-07-24 08:45:08 +00002397 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00002398}
sewardj23f12002009-07-24 08:45:08 +00002399static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
2400 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002401}
2402
2403/* compute binary join */
2404__attribute__((noinline))
2405static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2406 UInt hash;
2407 VtsID res;
2408 VTS *vts1, *vts2, *nyu;
2409 //if (vi1 == vi2) return vi1;
2410 tl_assert(vi1 != vi2);
2411 ////++
2412 stats__join2_queries++;
2413 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2414 if (join2_cache[hash].vi1 == vi1
2415 && join2_cache[hash].vi2 == vi2)
2416 return join2_cache[hash].res;
2417 stats__join2_misses++;
2418 ////--
2419 vts1 = VtsID__to_VTS(vi1);
2420 vts2 = VtsID__to_VTS(vi2);
2421 nyu = VTS__join(vts1,vts2);
2422 res = vts_tab__find_and_dealloc__or_add(nyu);
2423 ////++
2424 join2_cache[hash].vi1 = vi1;
2425 join2_cache[hash].vi2 = vi2;
2426 join2_cache[hash].res = res;
2427 ////--
2428 return res;
2429}
2430static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00002431 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002432}
2433
2434/* create a singleton VTS, namely [thr:1] */
2435static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2436 VTS* nyu = VTS__singleton(thr,tym);
2437 return vts_tab__find_and_dealloc__or_add(nyu);
2438}
2439
2440/* tick operation, creates value 1 if specified index is absent */
2441static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2442 VTS* vts = VtsID__to_VTS(vi);
2443 VTS* nyu = VTS__tick(idx,vts);
2444 return vts_tab__find_and_dealloc__or_add(nyu);
2445}
2446
2447/* index into a VTS (only for assertions) */
2448static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2449 VTS* vts = VtsID__to_VTS(vi);
2450 return VTS__indexAt_SLOW( vts, idx );
2451}
2452
sewardj23f12002009-07-24 08:45:08 +00002453/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
2454 any, really) element in vi1 which is pointwise greater-than the
2455 corresponding element in vi2. If no such element exists, return
2456 NULL. This needs to be fairly quick since it is called every time
2457 a race is detected. */
2458static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
2459{
2460 VTS *vts1, *vts2;
2461 Thr* diffthr;
2462 tl_assert(vi1 != vi2);
2463 vts1 = VtsID__to_VTS(vi1);
2464 vts2 = VtsID__to_VTS(vi2);
2465 tl_assert(vts1 != vts2);
2466 diffthr = VTS__cmpLEQ(vts1, vts2);
2467 tl_assert(diffthr); /* else they are LEQ ! */
2468 return diffthr;
2469}
2470
2471
2472/////////////////////////////////////////////////////////
2473// //
2474// Filters //
2475// //
2476/////////////////////////////////////////////////////////
2477
2478// baseline: 5, 9
2479#define FI_LINE_SZB_LOG2 5
2480#define FI_NUM_LINES_LOG2 10
2481
2482#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
2483#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
2484
2485#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
2486#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
2487
2488#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
2489 & (Addr)(FI_NUM_LINES-1) )
2490
2491
2492/* In the lines, each 8 bytes are treated individually, and are mapped
2493 to a UShort. Regardless of endianness of the underlying machine,
2494 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
2495 the highest address.
2496
2497 Of each bit pair, the higher numbered bit is set if a R has been
2498 seen, so the actual layout is:
2499
2500 15 14 ... 01 00
2501
2502 R W for addr+7 ... R W for addr+0
2503
2504 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
2505*/
2506
2507/* tags are separated from lines. tags are Addrs and are
2508 the base address of the line. */
2509typedef
2510 struct {
2511 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
2512 }
2513 FiLine;
2514
2515typedef
2516 struct {
2517 Addr tags[FI_NUM_LINES];
2518 FiLine lines[FI_NUM_LINES];
2519 }
2520 Filter;
2521
2522/* Forget everything we know -- clear the filter and let everything
2523 through. This needs to be as fast as possible, since it is called
2524 every time the running thread changes, and every time a thread's
2525 vector clocks change, which can be quite frequent. The obvious
2526 fast way to do this is simply to stuff in tags which we know are
2527 not going to match anything, since they're not aligned to the start
2528 of a line. */
2529static void Filter__clear ( Filter* fi, HChar* who )
2530{
2531 UWord i;
2532 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
2533 for (i = 0; i < FI_NUM_LINES; i += 8) {
2534 fi->tags[i+0] = 1; /* impossible value -- cannot match */
2535 fi->tags[i+1] = 1;
2536 fi->tags[i+2] = 1;
2537 fi->tags[i+3] = 1;
2538 fi->tags[i+4] = 1;
2539 fi->tags[i+5] = 1;
2540 fi->tags[i+6] = 1;
2541 fi->tags[i+7] = 1;
2542 }
2543 tl_assert(i == FI_NUM_LINES);
2544}
2545
2546/* Clearing an arbitrary range in the filter. Unfortunately
2547 we have to do this due to core-supplied new/die-mem events. */
2548
2549static void Filter__clear_1byte ( Filter* fi, Addr a )
2550{
2551 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2552 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2553 FiLine* line = &fi->lines[lineno];
2554 UWord loff = (a - atag) / 8;
2555 UShort mask = 0x3 << (2 * (a & 7));
2556 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2557 if (LIKELY( fi->tags[lineno] == atag )) {
2558 /* hit. clear the bits. */
2559 UShort u16 = line->u16s[loff];
2560 line->u16s[loff] = u16 & ~mask; /* clear them */
2561 } else {
2562 /* miss. The filter doesn't hold this address, so ignore. */
2563 }
2564}
2565
2566static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
2567{
2568 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2569 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2570 FiLine* line = &fi->lines[lineno];
2571 UWord loff = (a - atag) / 8;
2572 if (LIKELY( fi->tags[lineno] == atag )) {
2573 line->u16s[loff] = 0;
2574 } else {
2575 /* miss. The filter doesn't hold this address, so ignore. */
2576 }
2577}
2578
2579static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
2580{
2581 //VG_(printf)("%lu ", len);
2582 /* slowly do part preceding 8-alignment */
2583 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
2584 Filter__clear_1byte( fi, a );
2585 a++;
2586 len--;
2587 }
2588 /* vector loop */
2589 while (len >= 8) {
2590 Filter__clear_8bytes_aligned( fi, a );
2591 a += 8;
2592 len -= 8;
2593 }
2594 /* slowly do tail */
2595 while (UNLIKELY(len > 0)) {
2596 Filter__clear_1byte( fi, a );
2597 a++;
2598 len--;
2599 }
2600}
2601
2602
2603/* ------ Read handlers for the filter. ------ */
2604
2605static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
2606{
2607 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2608 return False;
2609 {
2610 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2611 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2612 FiLine* line = &fi->lines[lineno];
2613 UWord loff = (a - atag) / 8;
2614 UShort mask = 0xAAAA;
2615 if (LIKELY( fi->tags[lineno] == atag )) {
2616 /* hit. check line and update. */
2617 UShort u16 = line->u16s[loff];
2618 Bool ok = (u16 & mask) == mask; /* all R bits set? */
2619 line->u16s[loff] = u16 | mask; /* set them */
2620 return ok;
2621 } else {
2622 /* miss. nuke existing line and re-use it. */
2623 UWord i;
2624 fi->tags[lineno] = atag;
2625 for (i = 0; i < FI_LINE_SZB / 8; i++)
2626 line->u16s[i] = 0;
2627 line->u16s[loff] = mask;
2628 return False;
2629 }
2630 }
2631}
2632
2633static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
2634{
2635 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2636 return False;
2637 {
2638 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2639 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2640 FiLine* line = &fi->lines[lineno];
2641 UWord loff = (a - atag) / 8;
2642 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
2643 if (LIKELY( fi->tags[lineno] == atag )) {
2644 /* hit. check line and update. */
2645 UShort u16 = line->u16s[loff];
2646 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
2647 line->u16s[loff] = u16 | mask; /* set them */
2648 return ok;
2649 } else {
2650 /* miss. nuke existing line and re-use it. */
2651 UWord i;
2652 fi->tags[lineno] = atag;
2653 for (i = 0; i < FI_LINE_SZB / 8; i++)
2654 line->u16s[i] = 0;
2655 line->u16s[loff] = mask;
2656 return False;
2657 }
2658 }
2659}
2660
2661static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
2662{
2663 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2664 return False;
2665 {
2666 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2667 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2668 FiLine* line = &fi->lines[lineno];
2669 UWord loff = (a - atag) / 8;
2670 UShort mask = 0xA << (2 * (a & 6));
2671 /* mask is A000, 0A00, 00A0 or 000A */
2672 if (LIKELY( fi->tags[lineno] == atag )) {
2673 /* hit. check line and update. */
2674 UShort u16 = line->u16s[loff];
2675 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
2676 line->u16s[loff] = u16 | mask; /* set them */
2677 return ok;
2678 } else {
2679 /* miss. nuke existing line and re-use it. */
2680 UWord i;
2681 fi->tags[lineno] = atag;
2682 for (i = 0; i < FI_LINE_SZB / 8; i++)
2683 line->u16s[i] = 0;
2684 line->u16s[loff] = mask;
2685 return False;
2686 }
2687 }
2688}
2689
2690static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
2691{
2692 {
2693 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2694 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2695 FiLine* line = &fi->lines[lineno];
2696 UWord loff = (a - atag) / 8;
2697 UShort mask = 0x2 << (2 * (a & 7));
2698 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
2699 if (LIKELY( fi->tags[lineno] == atag )) {
2700 /* hit. check line and update. */
2701 UShort u16 = line->u16s[loff];
2702 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2703 line->u16s[loff] = u16 | mask; /* set them */
2704 return ok;
2705 } else {
2706 /* miss. nuke existing line and re-use it. */
2707 UWord i;
2708 fi->tags[lineno] = atag;
2709 for (i = 0; i < FI_LINE_SZB / 8; i++)
2710 line->u16s[i] = 0;
2711 line->u16s[loff] = mask;
2712 return False;
2713 }
2714 }
2715}
2716
2717
2718/* ------ Write handlers for the filter. ------ */
2719
2720static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
2721{
2722 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2723 return False;
2724 {
2725 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2726 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2727 FiLine* line = &fi->lines[lineno];
2728 UWord loff = (a - atag) / 8;
2729 UShort mask = 0xFFFF;
2730 if (LIKELY( fi->tags[lineno] == atag )) {
2731 /* hit. check line and update. */
2732 UShort u16 = line->u16s[loff];
2733 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
2734 line->u16s[loff] = u16 | mask; /* set them */
2735 return ok;
2736 } else {
2737 /* miss. nuke existing line and re-use it. */
2738 UWord i;
2739 fi->tags[lineno] = atag;
2740 for (i = 0; i < FI_LINE_SZB / 8; i++)
2741 line->u16s[i] = 0;
2742 line->u16s[loff] = mask;
2743 return False;
2744 }
2745 }
2746}
2747
2748static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
2749{
2750 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2751 return False;
2752 {
2753 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2754 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2755 FiLine* line = &fi->lines[lineno];
2756 UWord loff = (a - atag) / 8;
2757 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
2758 if (LIKELY( fi->tags[lineno] == atag )) {
2759 /* hit. check line and update. */
2760 UShort u16 = line->u16s[loff];
2761 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
2762 line->u16s[loff] = u16 | mask; /* set them */
2763 return ok;
2764 } else {
2765 /* miss. nuke existing line and re-use it. */
2766 UWord i;
2767 fi->tags[lineno] = atag;
2768 for (i = 0; i < FI_LINE_SZB / 8; i++)
2769 line->u16s[i] = 0;
2770 line->u16s[loff] = mask;
2771 return False;
2772 }
2773 }
2774}
2775
2776static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
2777{
2778 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2779 return False;
2780 {
2781 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2782 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2783 FiLine* line = &fi->lines[lineno];
2784 UWord loff = (a - atag) / 8;
2785 UShort mask = 0xF << (2 * (a & 6));
2786 /* mask is F000, 0F00, 00F0 or 000F */
2787 if (LIKELY( fi->tags[lineno] == atag )) {
2788 /* hit. check line and update. */
2789 UShort u16 = line->u16s[loff];
2790 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
2791 line->u16s[loff] = u16 | mask; /* set them */
2792 return ok;
2793 } else {
2794 /* miss. nuke existing line and re-use it. */
2795 UWord i;
2796 fi->tags[lineno] = atag;
2797 for (i = 0; i < FI_LINE_SZB / 8; i++)
2798 line->u16s[i] = 0;
2799 line->u16s[loff] = mask;
2800 return False;
2801 }
2802 }
2803}
2804
2805static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
2806{
2807 {
2808 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2809 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2810 FiLine* line = &fi->lines[lineno];
2811 UWord loff = (a - atag) / 8;
2812 UShort mask = 0x3 << (2 * (a & 7));
2813 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2814 if (LIKELY( fi->tags[lineno] == atag )) {
2815 /* hit. check line and update. */
2816 UShort u16 = line->u16s[loff];
2817 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2818 line->u16s[loff] = u16 | mask; /* set them */
2819 return ok;
2820 } else {
2821 /* miss. nuke existing line and re-use it. */
2822 UWord i;
2823 fi->tags[lineno] = atag;
2824 for (i = 0; i < FI_LINE_SZB / 8; i++)
2825 line->u16s[i] = 0;
2826 line->u16s[loff] = mask;
2827 return False;
2828 }
2829 }
2830}
2831
sewardjf98e1c02008-10-25 16:22:41 +00002832
2833/////////////////////////////////////////////////////////
2834// //
2835// Threads //
2836// //
2837/////////////////////////////////////////////////////////
2838
sewardj23f12002009-07-24 08:45:08 +00002839// QQQ move this somewhere else
2840typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
2841
sewardjf98e1c02008-10-25 16:22:41 +00002842struct _Thr {
2843 /* Current VTSs for this thread. They change as we go along. viR
2844 is the VTS to be used for reads, viW for writes. Usually they
2845 are the same, but can differ when we deal with reader-writer
sewardj23f12002009-07-24 08:45:08 +00002846 locks. It is always the case that
2847 VtsID__cmpLEQ(viW,viR) == True
2848 that is, viW must be the same, or lagging behind, viR. */
sewardjf98e1c02008-10-25 16:22:41 +00002849 VtsID viR;
2850 VtsID viW;
sewardj23f12002009-07-24 08:45:08 +00002851
2852 /* Is initially False, and is set to true after the thread really
2853 has done a low-level exit. */
2854 Bool still_alive;
2855
2856 /* A filter that removes references for which we believe that
2857 msmcread/msmcwrite will not change the state, nor report a
2858 race. */
2859 Filter* filter;
2860
sewardjf98e1c02008-10-25 16:22:41 +00002861 /* opaque (to us) data we hold on behalf of the library's user. */
2862 void* opaque;
sewardj23f12002009-07-24 08:45:08 +00002863
2864 /* The ULongs (scalar Krs) in this accumulate in strictly
2865 increasing order, without duplicates. This is important because
2866 we need to be able to find a given scalar Kr in this array
2867 later, by binary search. */
2868 XArray* /* ULong_n_EC */ local_Krs_n_stacks;
sewardjf98e1c02008-10-25 16:22:41 +00002869};
2870
2871static Thr* Thr__new ( void ) {
2872 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2873 thr->viR = VtsID_INVALID;
2874 thr->viW = VtsID_INVALID;
sewardj23f12002009-07-24 08:45:08 +00002875 thr->still_alive = True;
2876 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
2877 thr->local_Krs_n_stacks
2878 = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.3 (local_Krs_and_stacks)",
2879 HG_(free), sizeof(ULong_n_EC) );
sewardjf98e1c02008-10-25 16:22:41 +00002880 return thr;
2881}
2882
sewardj23f12002009-07-24 08:45:08 +00002883static void note_local_Kr_n_stack_for ( Thr* thr )
2884{
2885 Word nPresent;
2886 ULong_n_EC pair;
2887 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00002888
2889 // We only collect this info at history level 1 (approx)
2890 if (HG_(clo_history_level) != 1)
2891 return;
2892
sewardj23f12002009-07-24 08:45:08 +00002893 /* This is the scalar Kr for thr. */
2894 pair.ull = VtsID__indexAt( thr->viR, thr );
2895 pair.ec = main_get_EC( thr );
2896 tl_assert(pair.ec);
2897 tl_assert(thr->local_Krs_n_stacks);
2898
2899 /* check that we're not adding duplicates */
2900 nPresent = VG_(sizeXA)( thr->local_Krs_n_stacks );
2901
2902 /* Throw away old stacks, if necessary. We can't accumulate stuff
2903 indefinitely. */
2904 if (nPresent > 10000) {
2905 VG_(dropHeadXA)( thr->local_Krs_n_stacks, nPresent / 2 );
2906 nPresent = VG_(sizeXA)( thr->local_Krs_n_stacks );
2907 if (1)
2908 VG_(printf)("LOCAL Kr: thr %p, Kr %llu, ec %p (!!! gc !!!)\n",
2909 thr, pair.ull, pair.ec );
2910 }
2911
2912 if (nPresent > 0) {
2913 ULong_n_EC* prevPair
2914 = (ULong_n_EC*)VG_(indexXA)( thr->local_Krs_n_stacks, nPresent-1 );
2915 tl_assert( prevPair->ull < pair.ull );
2916 }
2917
2918 if (nPresent == 0)
2919 pair.ec = NULL;
2920
2921 VG_(addToXA)( thr->local_Krs_n_stacks, &pair );
2922
2923 if (0)
2924 VG_(printf)("LOCAL Kr: thr %p, Kr %llu, ec %p\n",
2925 thr, pair.ull, pair.ec );
2926 if (0)
2927 VG_(pp_ExeContext)(pair.ec);
2928}
2929
2930static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
2931{
2932 if (pair1->ull < pair2->ull) return -1;
2933 if (pair1->ull > pair2->ull) return 1;
2934 return 0;
2935}
2936
sewardjf98e1c02008-10-25 16:22:41 +00002937
2938/////////////////////////////////////////////////////////
2939// //
2940// Shadow Values //
2941// //
2942/////////////////////////////////////////////////////////
2943
2944// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2945// hb_zsm.h. We have to do everything else here.
2946
2947/* SVal is 64 bit unsigned int.
2948
2949 <---------30---------> <---------30--------->
2950 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00002951 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00002952 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
2953
sewardjf98e1c02008-10-25 16:22:41 +00002954*/
2955#define SVAL_TAGMASK (3ULL << 62)
2956
2957static inline Bool SVal__isC ( SVal s ) {
2958 return (0ULL << 62) == (s & SVAL_TAGMASK);
2959}
2960static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2961 //tl_assert(VtsID__is_valid(rmini));
2962 //tl_assert(VtsID__is_valid(wmini));
2963 return (((ULong)rmini) << 32) | ((ULong)wmini);
2964}
2965static inline VtsID SVal__unC_Rmin ( SVal s ) {
2966 tl_assert(SVal__isC(s));
2967 return (VtsID)(s >> 32);
2968}
2969static inline VtsID SVal__unC_Wmin ( SVal s ) {
2970 tl_assert(SVal__isC(s));
2971 return (VtsID)(s & 0xFFFFFFFFULL);
2972}
2973
sewardj23f12002009-07-24 08:45:08 +00002974static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00002975 return (2ULL << 62) == (s & SVAL_TAGMASK);
2976}
sewardj23f12002009-07-24 08:45:08 +00002977static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00002978 return 2ULL << 62;
2979}
2980
2981/* Direct callback from lib_zsm. */
2982static void SVal__rcinc ( SVal s ) {
2983 if (SVal__isC(s)) {
2984 VtsID__rcinc( SVal__unC_Rmin(s) );
2985 VtsID__rcinc( SVal__unC_Wmin(s) );
2986 }
2987}
2988
2989/* Direct callback from lib_zsm. */
2990static void SVal__rcdec ( SVal s ) {
2991 if (SVal__isC(s)) {
2992 VtsID__rcdec( SVal__unC_Rmin(s) );
2993 VtsID__rcdec( SVal__unC_Wmin(s) );
2994 }
2995}
2996
2997
2998/////////////////////////////////////////////////////////
2999// //
sewardjd86e3a22008-12-03 11:39:37 +00003000// A simple group (memory) allocator //
3001// //
3002/////////////////////////////////////////////////////////
3003
3004//////////////// BEGIN general group allocator
3005typedef
3006 struct {
3007 UWord elemSzB; /* element size */
3008 UWord nPerGroup; /* # elems per group */
3009 void* (*alloc)(HChar*, SizeT); /* group allocator */
3010 HChar* cc; /* group allocator's cc */
3011 void (*free)(void*); /* group allocator's free-er (unused) */
3012 /* XArray of void* (pointers to groups). The groups themselves.
3013 Each element is a pointer to a block of size (elemSzB *
3014 nPerGroup) bytes. */
3015 XArray* groups;
3016 /* next free element. Is a pointer to an element in one of the
3017 groups pointed to by .groups. */
3018 void* nextFree;
3019 }
3020 GroupAlloc;
3021
3022static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3023 UWord elemSzB,
3024 UWord nPerGroup,
3025 void* (*alloc)(HChar*, SizeT),
3026 HChar* cc,
3027 void (*free)(void*) )
3028{
3029 tl_assert(0 == (elemSzB % sizeof(UWord)));
3030 tl_assert(elemSzB >= sizeof(UWord));
3031 tl_assert(nPerGroup >= 100); /* let's say */
3032 tl_assert(alloc);
3033 tl_assert(cc);
3034 tl_assert(free);
3035 tl_assert(ga);
3036 VG_(memset)(ga, 0, sizeof(*ga));
3037 ga->elemSzB = elemSzB;
3038 ga->nPerGroup = nPerGroup;
3039 ga->groups = NULL;
3040 ga->alloc = alloc;
3041 ga->cc = cc;
3042 ga->free = free;
3043 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3044 ga->nextFree = NULL;
3045 tl_assert(ga->groups);
3046}
3047
3048/* The freelist is empty. Allocate a new group and put all the new
3049 elements in it onto the freelist. */
3050__attribute__((noinline))
3051static void gal_add_new_group ( GroupAlloc* ga )
3052{
3053 Word i;
3054 UWord* group;
3055 tl_assert(ga);
3056 tl_assert(ga->nextFree == NULL);
3057 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3058 tl_assert(group);
3059 /* extend the freelist through the new group. Place the freelist
3060 pointer in the first word of each element. That's why the
3061 element size must be at least one word. */
3062 for (i = ga->nPerGroup-1; i >= 0; i--) {
3063 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3064 UWord* elem = (UWord*)elemC;
3065 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3066 *elem = (UWord)ga->nextFree;
3067 ga->nextFree = elem;
3068 }
3069 /* and add to our collection of groups */
3070 VG_(addToXA)( ga->groups, &group );
3071}
3072
3073inline static void* gal_Alloc ( GroupAlloc* ga )
3074{
3075 UWord* elem;
3076 if (UNLIKELY(ga->nextFree == NULL)) {
3077 gal_add_new_group(ga);
3078 }
3079 elem = ga->nextFree;
3080 ga->nextFree = (void*)*elem;
3081 *elem = 0; /* unnecessary, but just to be on the safe side */
3082 return elem;
3083}
3084
3085inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3086{
3087 tl_assert(n == ga->elemSzB);
3088 return gal_Alloc( ga );
3089}
3090
3091inline static void gal_Free ( GroupAlloc* ga, void* p )
3092{
3093 UWord* elem = (UWord*)p;
3094 *elem = (UWord)ga->nextFree;
3095 ga->nextFree = elem;
3096}
3097//////////////// END general group allocator
3098
3099
3100/////////////////////////////////////////////////////////
3101// //
sewardjf98e1c02008-10-25 16:22:41 +00003102// Change-event map2 //
3103// //
3104/////////////////////////////////////////////////////////
3105
sewardjf98e1c02008-10-25 16:22:41 +00003106#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3107
3108/* This is in two parts:
3109
sewardj23f12002009-07-24 08:45:08 +00003110 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003111 traces. When the reference count of a stack trace becomes zero,
3112 it is removed from the set and freed up. The intent is to have
3113 a set of stack traces which can be referred to from (2), but to
3114 only represent each one once. The set is indexed/searched by
3115 ordering on the stack trace vectors.
3116
sewardj849b0ed2008-12-21 10:43:10 +00003117 2. A SparseWA of OldRefs. These store information about each old
3118 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003119 location for which the information is recorded. For LRU
3120 purposes, each OldRef also contains a generation number,
3121 indicating when it was most recently accessed.
3122
3123 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003124 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3125 size) triples to RCECs. This allows us to collect the last
3126 access-traceback by up to N_OLDREF_ACCS different triples for
3127 this location. The accs[] array is a MTF-array. If a binding
3128 falls off the end, that's too bad -- we will lose info about
3129 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003130
sewardj849b0ed2008-12-21 10:43:10 +00003131 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003132 whose generation numbers are below some threshold; hence doing
3133 approximate LRU discarding. For each discarded OldRef we must
3134 of course decrement the reference count on the all RCECs it
3135 refers to, in order that entries from (1) eventually get
3136 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003137
3138 A major improvement in reliability of this mechanism would be to
3139 have a dynamically sized OldRef.accs[] array, so no entries ever
3140 fall off the end. In investigations (Dec 08) it appears that a
3141 major cause for the non-availability of conflicting-access traces
3142 in race reports is caused by the fixed size of this array. I
3143 suspect for most OldRefs, only a few entries are used, but for a
3144 minority of cases there is an overflow, leading to info lossage.
3145 Investigations also suggest this is very workload and scheduling
3146 sensitive. Therefore a dynamic sizing would be better.
3147
3148 However, dynamic sizing would defeat the use of a GroupAllocator
3149 for OldRef structures. And that's important for performance. So
3150 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003151*/
3152
3153
3154static UWord stats__ctxt_rcdec1 = 0;
3155static UWord stats__ctxt_rcdec2 = 0;
3156static UWord stats__ctxt_rcdec3 = 0;
3157static UWord stats__ctxt_rcdec_calls = 0;
3158static UWord stats__ctxt_rcdec_discards = 0;
3159static UWord stats__ctxt_rcdec1_eq = 0;
3160
3161static UWord stats__ctxt_tab_curr = 0;
3162static UWord stats__ctxt_tab_max = 0;
3163
3164static UWord stats__ctxt_tab_qs = 0;
3165static UWord stats__ctxt_tab_cmps = 0;
3166
3167
3168///////////////////////////////////////////////////////
3169//// Part (1): An OSet of RCECs
3170///
3171
3172#define N_FRAMES 8
3173
3174// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3175#define RCEC_MAGIC 0xab88abb2UL
3176
3177//#define N_RCEC_TAB 98317 /* prime */
3178#define N_RCEC_TAB 196613 /* prime */
3179
3180typedef
3181 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003182 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003183 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003184 UWord rc;
3185 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003186 UWord frames_hash; /* hash of all the frames */
3187 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003188 }
3189 RCEC;
3190
3191static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3192
3193
3194/* Gives an arbitrary total order on RCEC .frames fields */
3195static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3196 Word i;
3197 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3198 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003199 if (ec1->frames_hash < ec2->frames_hash) return -1;
3200 if (ec1->frames_hash > ec2->frames_hash) return 1;
3201 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003202 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003203 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003204 }
3205 return 0;
3206}
3207
3208
3209/* Dec the ref of this RCEC. */
3210static void ctxt__rcdec ( RCEC* ec )
3211{
3212 stats__ctxt_rcdec_calls++;
3213 tl_assert(ec && ec->magic == RCEC_MAGIC);
3214 tl_assert(ec->rc > 0);
3215 ec->rc--;
3216}
3217
3218static void ctxt__rcinc ( RCEC* ec )
3219{
3220 tl_assert(ec && ec->magic == RCEC_MAGIC);
3221 ec->rc++;
3222}
3223
3224
sewardjd86e3a22008-12-03 11:39:37 +00003225//////////// BEGIN RCEC group allocator
3226static GroupAlloc rcec_group_allocator;
3227
3228static RCEC* alloc_RCEC ( void ) {
3229 return gal_Alloc ( &rcec_group_allocator );
3230}
3231
3232static void free_RCEC ( RCEC* rcec ) {
3233 tl_assert(rcec->magic == RCEC_MAGIC);
3234 gal_Free( &rcec_group_allocator, rcec );
3235}
3236//////////// END OldRef group allocator
3237
3238
sewardjf98e1c02008-10-25 16:22:41 +00003239/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3240 move it one step closer the the front of the list, so as to make
3241 subsequent searches for it cheaper. */
3242static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3243{
3244 RCEC *ec0, *ec1, *ec2;
3245 if (ec == *headp)
3246 tl_assert(0); /* already at head of list */
3247 tl_assert(ec != NULL);
3248 ec0 = *headp;
3249 ec1 = NULL;
3250 ec2 = NULL;
3251 while (True) {
3252 if (ec0 == NULL || ec0 == ec) break;
3253 ec2 = ec1;
3254 ec1 = ec0;
3255 ec0 = ec0->next;
3256 }
3257 tl_assert(ec0 == ec);
3258 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3259 RCEC* tmp;
3260 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3261 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3262 closer to the start of the list. */
3263 tl_assert(ec2->next == ec1);
3264 tl_assert(ec1->next == ec0);
3265 tmp = ec0->next;
3266 ec2->next = ec0;
3267 ec0->next = ec1;
3268 ec1->next = tmp;
3269 }
3270 else
3271 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3272 /* it's second in the list. */
3273 tl_assert(*headp == ec1);
3274 tl_assert(ec1->next == ec0);
3275 ec1->next = ec0->next;
3276 ec0->next = ec1;
3277 *headp = ec0;
3278 }
3279}
3280
3281
3282/* Find the given RCEC in the tree, and return a pointer to it. Or,
3283 if not present, add the given one to the tree (by making a copy of
3284 it, so the caller can immediately deallocate the original) and
3285 return a pointer to the copy. The caller can safely have 'example'
3286 on its stack, since we will always return a pointer to a copy of
3287 it, not to the original. Note that the inserted node will have .rc
3288 of zero and so the caller must immediatly increment it. */
3289__attribute__((noinline))
3290static RCEC* ctxt__find_or_add ( RCEC* example )
3291{
3292 UWord hent;
3293 RCEC* copy;
3294 tl_assert(example && example->magic == RCEC_MAGIC);
3295 tl_assert(example->rc == 0);
3296
3297 /* Search the hash table to see if we already have it. */
3298 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00003299 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00003300 copy = contextTab[hent];
3301 while (1) {
3302 if (!copy) break;
3303 tl_assert(copy->magic == RCEC_MAGIC);
3304 stats__ctxt_tab_cmps++;
3305 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3306 copy = copy->next;
3307 }
3308
3309 if (copy) {
3310 tl_assert(copy != example);
3311 /* optimisation: if it's not at the head of its list, move 1
3312 step fwds, to make future searches cheaper */
3313 if (copy != contextTab[hent]) {
3314 move_RCEC_one_step_forward( &contextTab[hent], copy );
3315 }
3316 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003317 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00003318 tl_assert(copy != example);
3319 *copy = *example;
3320 copy->next = contextTab[hent];
3321 contextTab[hent] = copy;
3322 stats__ctxt_tab_curr++;
3323 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
3324 stats__ctxt_tab_max = stats__ctxt_tab_curr;
3325 }
3326 return copy;
3327}
3328
3329static inline UWord ROLW ( UWord w, Int n )
3330{
3331 Int bpw = 8 * sizeof(UWord);
3332 w = (w << n) | (w >> (bpw-n));
3333 return w;
3334}
3335
3336__attribute__((noinline))
3337static RCEC* get_RCEC ( Thr* thr )
3338{
3339 UWord hash, i;
3340 RCEC example;
3341 example.magic = RCEC_MAGIC;
3342 example.rc = 0;
3343 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00003344 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00003345 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00003346 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003347 hash ^= example.frames[i];
3348 hash = ROLW(hash, 19);
3349 }
njn6c83d5e2009-05-05 23:46:24 +00003350 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00003351 return ctxt__find_or_add( &example );
3352}
3353
3354///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00003355//// Part (2):
3356/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00003357///
3358
3359// (UInt) `echo "Old Reference Information" | md5sum`
3360#define OldRef_MAGIC 0x30b1f075UL
3361
sewardjc5ea9962008-12-07 01:41:46 +00003362/* Records an access: a thread and a context. The size
3363 (1,2,4,8) and read-or-writeness are also encoded as
3364 follows: bottom bit of .thr is 1 if write, 0 if read
3365 bottom 2 bits of .rcec are encode size:
3366 00 = 1, 01 = 2, 10 = 4, 11 = 8
3367*/
sewardjf98e1c02008-10-25 16:22:41 +00003368typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
3369
sewardj849b0ed2008-12-21 10:43:10 +00003370#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00003371
3372typedef
3373 struct {
sewardjd86e3a22008-12-03 11:39:37 +00003374 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003375 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00003376 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00003377 /* unused slots in this array have .thr == NULL */
3378 Thr_n_RCEC accs[N_OLDREF_ACCS];
3379 }
3380 OldRef;
3381
sewardjd86e3a22008-12-03 11:39:37 +00003382
3383//////////// BEGIN OldRef group allocator
3384static GroupAlloc oldref_group_allocator;
3385
3386static OldRef* alloc_OldRef ( void ) {
3387 return gal_Alloc ( &oldref_group_allocator );
3388}
3389
3390static void free_OldRef ( OldRef* r ) {
3391 tl_assert(r->magic == OldRef_MAGIC);
3392 gal_Free( &oldref_group_allocator, r );
3393}
3394//////////// END OldRef group allocator
3395
sewardjd86e3a22008-12-03 11:39:37 +00003396
sewardjbc307e52008-12-06 22:10:54 +00003397static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
3398static UWord oldrefGen = 0; /* current LRU generation # */
3399static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
3400static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00003401
sewardjc5ea9962008-12-07 01:41:46 +00003402inline static void* ptr_or_UWord ( void* p, UWord w ) {
3403 return (void*)( ((UWord)p) | ((UWord)w) );
3404}
3405inline static void* ptr_and_UWord ( void* p, UWord w ) {
3406 return (void*)( ((UWord)p) & ((UWord)w) );
3407}
3408
sewardj1669cc72008-12-13 01:20:21 +00003409inline static UInt min_UInt ( UInt a, UInt b ) {
3410 return a < b ? a : b;
3411}
3412
sewardja781be62008-12-08 00:12:28 +00003413/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
3414 first interval is lower, 1 if the first interval is higher, and 0
3415 if there is any overlap. Redundant paranoia with casting is there
3416 following what looked distinctly like a bug in gcc-4.1.2, in which
3417 some of the comparisons were done signedly instead of
3418 unsignedly. */
3419/* Copied from exp-ptrcheck/sg_main.c */
3420static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
3421 Addr a2, SizeT n2 ) {
3422 UWord a1w = (UWord)a1;
3423 UWord n1w = (UWord)n1;
3424 UWord a2w = (UWord)a2;
3425 UWord n2w = (UWord)n2;
3426 tl_assert(n1w > 0 && n2w > 0);
3427 if (a1w + n1w <= a2w) return -1L;
3428 if (a2w + n2w <= a1w) return 1L;
3429 return 0;
3430}
3431
sewardjc5ea9962008-12-07 01:41:46 +00003432static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00003433{
sewardjd86e3a22008-12-03 11:39:37 +00003434 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00003435 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00003436 Word i, j;
3437 UWord keyW, valW;
3438 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003439
sewardjc5ea9962008-12-07 01:41:46 +00003440 rcec = get_RCEC( thr );
3441 ctxt__rcinc(rcec);
3442
3443 /* encode the size and writeness of the transaction in the bottom
3444 two bits of thr and rcec. */
3445 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3446 switch (szB) {
3447 /* This doesn't look particularly branch-predictor friendly. */
3448 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3449 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3450 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3451 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3452 default: tl_assert(0);
3453 }
3454
3455 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003456 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003457
sewardjd86e3a22008-12-03 11:39:37 +00003458 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003459
3460 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003461 see if we have a stack trace pertaining to this (thread, R/W,
3462 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003463 tl_assert(keyW == a);
3464 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003465 tl_assert(ref->magic == OldRef_MAGIC);
3466
3467 tl_assert(thr);
3468 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003469 if (ref->accs[i].thr != thr)
3470 continue;
3471 /* since .thr encodes both the accessing thread and the
3472 read/writeness, we know now that at least those features
3473 of the access match this entry. So we just need to check
3474 the size indication. Do this by inspecting the lowest 2 bits of
3475 .rcec, which contain the encoded size info. */
3476 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3477 continue;
3478 /* else we have a match, so stop looking. */
3479 break;
sewardjf98e1c02008-10-25 16:22:41 +00003480 }
3481
3482 if (i < N_OLDREF_ACCS) {
3483 /* thread 'thr' has an entry at index 'i'. Update it. */
3484 if (i > 0) {
3485 Thr_n_RCEC tmp = ref->accs[i-1];
3486 ref->accs[i-1] = ref->accs[i];
3487 ref->accs[i] = tmp;
3488 i--;
3489 }
sewardjc5ea9962008-12-07 01:41:46 +00003490 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003491 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003492 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3493 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003494 tl_assert(ref->accs[i].thr == thr);
3495 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003496 /* No entry for this (thread, R/W, size) triple. Shuffle all
3497 of them down one slot, and put the new entry at the start
3498 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003499 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3500 /* the last slot is in use. We must dec the rc on the
3501 associated rcec. */
3502 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3503 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003504 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3505 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003506 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003507 } else {
3508 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3509 }
3510 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3511 ref->accs[j] = ref->accs[j-1];
3512 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003513 ref->accs[0].rcec = rcec;
3514 /* thr==NULL is used to signify an empty slot, so we can't
3515 add a NULL thr. */
3516 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003517 }
3518
3519 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003520
3521 } else {
3522
3523 /* We don't have a record for this address. Create a new one. */
3524 if (oldrefTreeN >= oldrefGenIncAt) {
3525 oldrefGen++;
3526 oldrefGenIncAt = oldrefTreeN + 50000;
3527 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3528 oldrefGen, oldrefTreeN );
3529 }
sewardjd86e3a22008-12-03 11:39:37 +00003530
3531 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003532 ref->magic = OldRef_MAGIC;
3533 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003534 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003535 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003536 /* thr==NULL is used to signify an empty slot, so we can't add a
3537 NULL thr. */
3538 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003539 for (j = 1; j < N_OLDREF_ACCS; j++) {
3540 ref->accs[j].thr = NULL;
3541 ref->accs[j].rcec = NULL;
3542 }
sewardjbc307e52008-12-06 22:10:54 +00003543 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003544 oldrefTreeN++;
3545
3546 }
3547}
3548
3549
sewardjc5ea9962008-12-07 01:41:46 +00003550Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3551 /*OUT*/Thr** resThr,
3552 /*OUT*/SizeT* resSzB,
3553 /*OUT*/Bool* resIsW,
3554 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003555{
sewardja781be62008-12-08 00:12:28 +00003556 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003557 OldRef* ref;
3558 UWord keyW, valW;
3559 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003560
sewardjc5ea9962008-12-07 01:41:46 +00003561 Thr* cand_thr;
3562 RCEC* cand_rcec;
3563 Bool cand_isW;
3564 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003565 Addr cand_a;
3566
3567 Addr toCheck[15];
3568 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003569
3570 tl_assert(thr);
3571 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003572
sewardja781be62008-12-08 00:12:28 +00003573 toCheck[nToCheck++] = a;
3574 for (i = -7; i < (Word)szB; i++) {
3575 if (i != 0)
3576 toCheck[nToCheck++] = a + i;
3577 }
3578 tl_assert(nToCheck <= 15);
3579
3580 /* Now see if we can find a suitable matching event for
3581 any of the addresses in toCheck[0 .. nToCheck-1]. */
3582 for (j = 0; j < nToCheck; j++) {
3583
3584 cand_a = toCheck[j];
3585 // VG_(printf)("test %ld %p\n", j, cand_a);
3586
3587 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3588 if (!b)
3589 continue;
3590
sewardjd86e3a22008-12-03 11:39:37 +00003591 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003592 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003593 tl_assert(ref->magic == OldRef_MAGIC);
3594 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3595
sewardjc5ea9962008-12-07 01:41:46 +00003596 cand_thr = NULL;
3597 cand_rcec = NULL;
3598 cand_isW = False;
3599 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003600
sewardjc5ea9962008-12-07 01:41:46 +00003601 for (i = 0; i < N_OLDREF_ACCS; i++) {
3602 Thr_n_RCEC* cand = &ref->accs[i];
3603 cand_thr = ptr_and_UWord(cand->thr, ~3);
3604 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3605 /* Decode the writeness from the bottom bit of .thr. */
3606 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3607 /* Decode the size from the bottom two bits of .rcec. */
3608 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3609 case 0: cand_szB = 1; break;
3610 case 1: cand_szB = 2; break;
3611 case 2: cand_szB = 4; break;
3612 case 3: cand_szB = 8; break;
3613 default: tl_assert(0);
3614 }
3615
3616 if (cand_thr == NULL)
3617 /* This slot isn't in use. Ignore it. */
3618 continue;
3619
3620 if (cand_thr == thr)
3621 /* This is an access by the same thread, but we're only
3622 interested in accesses from other threads. Ignore. */
3623 continue;
3624
3625 if ((!cand_isW) && (!isW))
3626 /* We don't want to report a read racing against another
3627 read; that's stupid. So in this case move on. */
3628 continue;
3629
sewardja781be62008-12-08 00:12:28 +00003630 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3631 /* No overlap with the access we're asking about. Ignore. */
3632 continue;
3633
sewardjc5ea9962008-12-07 01:41:46 +00003634 /* We have a match. Stop searching. */
3635 break;
3636 }
3637
3638 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3639
sewardja781be62008-12-08 00:12:28 +00003640 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00003641 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00003642 /* return with success */
3643 tl_assert(cand_thr);
3644 tl_assert(cand_rcec);
3645 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3646 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00003647 /* Count how many non-zero frames we have. */
3648 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3649 for (n = 0; n < maxNFrames; n++) {
3650 if (0 == cand_rcec->frames[n]) break;
3651 }
3652 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
sewardja781be62008-12-08 00:12:28 +00003653 *resThr = cand_thr;
3654 *resSzB = cand_szB;
3655 *resIsW = cand_isW;
3656 return True;
3657 }
sewardjc5ea9962008-12-07 01:41:46 +00003658
sewardja781be62008-12-08 00:12:28 +00003659 /* consider next address in toCheck[] */
3660 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003661
sewardja781be62008-12-08 00:12:28 +00003662 /* really didn't find anything. */
3663 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003664}
3665
3666static void event_map_init ( void )
3667{
3668 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003669
3670 /* Context (RCEC) group allocator */
3671 init_GroupAlloc ( &rcec_group_allocator,
3672 sizeof(RCEC),
3673 1000 /* RCECs per group */,
3674 HG_(zalloc),
3675 "libhb.event_map_init.1 (RCEC groups)",
3676 HG_(free) );
3677
3678 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003679 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003680 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003681 N_RCEC_TAB * sizeof(RCEC*) );
3682 tl_assert(contextTab);
3683 for (i = 0; i < N_RCEC_TAB; i++)
3684 contextTab[i] = NULL;
3685
sewardjd86e3a22008-12-03 11:39:37 +00003686 /* Oldref group allocator */
3687 init_GroupAlloc ( &oldref_group_allocator,
3688 sizeof(OldRef),
3689 1000 /* OldRefs per group */,
3690 HG_(zalloc),
3691 "libhb.event_map_init.3 (OldRef groups)",
3692 HG_(free) );
3693
sewardjd86e3a22008-12-03 11:39:37 +00003694 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003695 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003696 oldrefTree = VG_(newSWA)(
3697 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003698 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003699 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003700 );
3701 tl_assert(oldrefTree);
3702
3703 oldrefGen = 0;
3704 oldrefGenIncAt = 0;
3705 oldrefTreeN = 0;
3706}
3707
3708static void event_map__check_reference_counts ( Bool before )
3709{
3710 RCEC* rcec;
3711 OldRef* oldref;
3712 Word i;
3713 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003714 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003715
3716 /* Set the 'check' reference counts to zero. Also, optionally
3717 check that the real reference counts are non-zero. We allow
3718 these to fall to zero before a GC, but the GC must get rid of
3719 all those that are zero, hence none should be zero after a
3720 GC. */
3721 for (i = 0; i < N_RCEC_TAB; i++) {
3722 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3723 nEnts++;
3724 tl_assert(rcec);
3725 tl_assert(rcec->magic == RCEC_MAGIC);
3726 if (!before)
3727 tl_assert(rcec->rc > 0);
3728 rcec->rcX = 0;
3729 }
3730 }
3731
3732 /* check that the stats are sane */
3733 tl_assert(nEnts == stats__ctxt_tab_curr);
3734 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3735
3736 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003737 VG_(initIterSWA)( oldrefTree );
3738 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003739 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003740 tl_assert(oldref->magic == OldRef_MAGIC);
3741 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003742 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3743 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3744 if (aThr) {
3745 tl_assert(aRef);
3746 tl_assert(aRef->magic == RCEC_MAGIC);
3747 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003748 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003749 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003750 }
3751 }
3752 }
3753
3754 /* compare check ref counts with actual */
3755 for (i = 0; i < N_RCEC_TAB; i++) {
3756 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3757 tl_assert(rcec->rc == rcec->rcX);
3758 }
3759 }
3760}
3761
sewardj8fd92d32008-11-20 23:17:01 +00003762__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003763static void event_map_maybe_GC ( void )
3764{
3765 OldRef* oldref;
3766 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003767 XArray* refs2del;
3768 Word i, j, n2del;
3769
sewardj8fd92d32008-11-20 23:17:01 +00003770 UWord* genMap = NULL;
3771 UWord genMap_min = 0;
3772 UWord genMap_size = 0;
3773
sewardj849b0ed2008-12-21 10:43:10 +00003774 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00003775 return;
3776
3777 if (0)
3778 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3779
sewardj849b0ed2008-12-21 10:43:10 +00003780 /* Check for sane command line params. Limit values must match
3781 those in hg_process_cmd_line_option. */
3782 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
3783 tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
3784
sewardj8f5374e2008-12-07 11:40:17 +00003785 /* Check our counting is sane (expensive) */
3786 if (CHECK_CEM)
3787 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003788
sewardj8f5374e2008-12-07 11:40:17 +00003789 /* Check the reference counts (expensive) */
3790 if (CHECK_CEM)
3791 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003792
sewardj8fd92d32008-11-20 23:17:01 +00003793 /* Compute the distribution of generation values in the ref tree.
3794 There are likely only to be a few different generation numbers
3795 in the whole tree, but we don't know what they are. Hence use a
3796 dynamically resized array of counters. The array is genMap[0
3797 .. genMap_size-1], where genMap[0] is the count for the
3798 generation number genMap_min, genMap[1] is the count for
3799 genMap_min+1, etc. If a new number is seen outside the range
3800 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3801 copied into a larger array, and genMap_min and genMap_size are
3802 adjusted accordingly. */
3803
sewardjf98e1c02008-10-25 16:22:41 +00003804 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003805
sewardjbc307e52008-12-06 22:10:54 +00003806 VG_(initIterSWA)( oldrefTree );
3807 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003808
sewardjd86e3a22008-12-03 11:39:37 +00003809 UWord ea, key;
3810 oldref = (OldRef*)valW;
3811 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003812
3813 /* BEGIN find 'ea', which is the index in genMap holding the
3814 count for generation number 'key'. */
3815 if (UNLIKELY(genMap == NULL)) {
3816 /* deal with the first key to be seen, so that the following
3817 cases don't need to handle the complexity of a NULL count
3818 array. */
3819 genMap_min = key;
3820 genMap_size = 1;
3821 genMap = HG_(zalloc)( "libhb.emmG.1a",
3822 genMap_size * sizeof(UWord) );
3823 ea = 0;
3824 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3825 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003826 }
sewardj8fd92d32008-11-20 23:17:01 +00003827 else
3828 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3829 /* this is the expected (almost-always-happens) case: 'key'
3830 is already mapped in the array. */
3831 ea = key - genMap_min;
3832 }
3833 else
3834 if (key < genMap_min) {
3835 /* 'key' appears before the start of the current array.
3836 Extend the current array by allocating a larger one and
3837 copying the current one to the upper end of it. */
3838 Word more;
3839 UWord* map2;
3840 more = genMap_min - key;
3841 tl_assert(more > 0);
3842 map2 = HG_(zalloc)( "libhb.emmG.1b",
3843 (genMap_size + more) * sizeof(UWord) );
3844 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3845 HG_(free)( genMap );
3846 genMap = map2;
3847 genMap_size += more;
3848 genMap_min -= more;
3849 ea = 0;
3850 tl_assert(genMap_min == key);
3851 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3852 key, genMap_min, genMap_min+genMap_size- 1 );
3853 }
3854 else {
3855 /* 'key' appears after the end of the current array. Extend
3856 the current array by allocating a larger one and copying
3857 the current one to the lower end of it. */
3858 Word more;
3859 UWord* map2;
3860 tl_assert(key >= genMap_min + genMap_size);
3861 more = key - (genMap_min + genMap_size) + 1;
3862 tl_assert(more > 0);
3863 map2 = HG_(zalloc)( "libhb.emmG.1c",
3864 (genMap_size + more) * sizeof(UWord) );
3865 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3866 HG_(free)( genMap );
3867 genMap = map2;
3868 genMap_size += more;
3869 ea = genMap_size - 1;;
3870 tl_assert(genMap_min + genMap_size - 1 == key);
3871 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3872 key, genMap_min, genMap_min+genMap_size- 1 );
3873 }
3874 /* END find 'ea' from 'key' */
3875
3876 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003877 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003878 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003879 }
3880
sewardj8fd92d32008-11-20 23:17:01 +00003881 tl_assert(genMap);
3882 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003883
sewardj8fd92d32008-11-20 23:17:01 +00003884 /* Sanity check what we just computed */
3885 { UWord sum = 0;
3886 for (i = 0; i < genMap_size; i++) {
3887 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3888 i + genMap_min, genMap[i] );
3889 sum += genMap[i];
3890 }
3891 tl_assert(sum == oldrefTreeN);
3892 }
3893
3894 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003895 retained = oldrefTreeN;
3896 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003897
3898 for (i = 0; i < genMap_size; i++) {
3899 keyW = i + genMap_min;
3900 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003901 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3902 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3903 tl_assert(keyW >= maxGen);
3904 tl_assert(retained >= valW);
3905 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00003906 > (UWord)(HG_(clo_conflict_cache_size)
3907 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00003908 retained -= valW;
3909 maxGen = keyW;
3910 } else {
3911 break;
3912 }
3913 }
sewardjf98e1c02008-10-25 16:22:41 +00003914
sewardj8fd92d32008-11-20 23:17:01 +00003915 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003916
sewardj9b1f0fd2008-11-18 23:40:00 +00003917 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003918
3919 /* Now make up a big list of the oldrefTree entries we want to
3920 delete. We can't simultaneously traverse the tree and delete
3921 stuff from it, so first we need to copy them off somewhere
3922 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003923 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003924 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003925
sewardj9b1f0fd2008-11-18 23:40:00 +00003926 if (retained < oldrefTreeN) {
3927
3928 /* This is the normal (expected) case. We discard any ref whose
3929 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003930 VG_(initIterSWA)( oldrefTree );
3931 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003932 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003933 tl_assert(oldref->magic == OldRef_MAGIC);
3934 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003935 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003936 }
sewardjf98e1c02008-10-25 16:22:41 +00003937 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003938 if (VG_(clo_verbosity) > 1) {
3939 VG_(message)(Vg_DebugMsg,
3940 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00003941 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003942 maxGen, retained );
3943 }
3944
3945 } else {
3946
3947 static UInt rand_seed = 0; /* leave as static */
3948
3949 /* Degenerate case: there's only one generation in the entire
3950 tree, so we need to have some other way of deciding which
3951 refs to throw away. Just throw out half of them randomly. */
3952 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00003953 VG_(initIterSWA)( oldrefTree );
3954 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003955 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003956 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003957 tl_assert(oldref->magic == OldRef_MAGIC);
3958 n = VG_(random)( &rand_seed );
3959 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003960 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003961 retained--;
3962 }
3963 }
3964 if (VG_(clo_verbosity) > 1) {
3965 VG_(message)(Vg_DebugMsg,
3966 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00003967 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003968 retained );
3969 }
3970
sewardjf98e1c02008-10-25 16:22:41 +00003971 }
3972
3973 n2del = VG_(sizeXA)( refs2del );
3974 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3975
3976 if (0) VG_(printf)("%s","deleting entries\n");
3977 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003978 Bool b;
3979 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00003980 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00003981 tl_assert(b);
3982 tl_assert(keyW == ga2del);
3983 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003984 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00003985 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
3986 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
3987 if (aRef) {
3988 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003989 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00003990 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00003991 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003992 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003993 }
3994 }
sewardjd86e3a22008-12-03 11:39:37 +00003995
3996 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00003997 }
3998
3999 VG_(deleteXA)( refs2del );
4000
sewardjc5ea9962008-12-07 01:41:46 +00004001 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004002
4003 oldrefTreeN = retained;
4004 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4005
4006 /* Throw away all RCECs with zero reference counts */
4007 for (i = 0; i < N_RCEC_TAB; i++) {
4008 RCEC** pp = &contextTab[i];
4009 RCEC* p = *pp;
4010 while (p) {
4011 if (p->rc == 0) {
4012 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004013 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004014 p = *pp;
4015 tl_assert(stats__ctxt_tab_curr > 0);
4016 stats__ctxt_tab_curr--;
4017 } else {
4018 pp = &p->next;
4019 p = p->next;
4020 }
4021 }
4022 }
4023
sewardj8f5374e2008-12-07 11:40:17 +00004024 /* Check the reference counts (expensive) */
4025 if (CHECK_CEM)
4026 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004027
4028 //if (0)
4029 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4030 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4031
4032}
4033
4034
4035/////////////////////////////////////////////////////////
4036// //
4037// Core MSM //
4038// //
4039/////////////////////////////////////////////////////////
4040
sewardj23f12002009-07-24 08:45:08 +00004041/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4042 Nov 08, and again after [...],
4043 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004044
sewardj23f12002009-07-24 08:45:08 +00004045static ULong stats__msmcread = 0;
4046static ULong stats__msmcread_change = 0;
4047static ULong stats__msmcwrite = 0;
4048static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004049
4050__attribute__((noinline))
4051static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004052 Addr acc_addr, SizeT szB, Bool isWrite,
4053 VtsID vtsConstraint, VtsID vtsKlock )
sewardjf98e1c02008-10-25 16:22:41 +00004054{
sewardjc5ea9962008-12-07 01:41:46 +00004055 /* Call here to report a race. We just hand it onwards to
4056 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004057 error is going to be collected, then, at history_level 2, that
4058 queries the conflicting-event map. The alternative would be to
4059 query it right here. But that causes a lot of pointless queries
4060 for errors which will shortly be discarded as duplicates, and
4061 can become a performance overhead; so we defer the query until
4062 we know the error is not a duplicate. */
4063
4064 /* Stacks for the bounds of the (or one of the) conflicting
4065 segment(s). These are only set at history_level 1. */
4066 ExeContext* hist1_seg_start = NULL;
4067 ExeContext* hist1_seg_end = NULL;
4068 Thread* hist1_conf_thr = NULL;
4069
4070 tl_assert(acc_thr);
sewardjc5ea9962008-12-07 01:41:46 +00004071 tl_assert(acc_thr->opaque);
sewardj23f12002009-07-24 08:45:08 +00004072 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4073
4074 if (HG_(clo_history_level) == 1) {
4075 Bool found;
4076 Word firstIx, lastIx;
4077 ULong_n_EC key;
4078
4079 /* At history_level 1, we must round up the relevant stack-pair
4080 for the conflicting segment right now. This is because
4081 deferring it is complex; we can't (easily) put vtsKlock and
4082 vtsConstraint into the XError and wait for later without
4083 getting tied up in difficulties with VtsID reference
4084 counting. So just do it now. */
4085 Thr* confThr;
4086 ULong confTym = 0;
4087 /* Which thread are we in conflict with? There may be more than
4088 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4089 (in fact it's the one with the lowest Thr* value). */
4090 confThr = VtsID__findFirst_notLEQ( vtsConstraint, vtsKlock );
4091 /* This must exist! since if it was NULL then there's no
4092 conflict (semantics of return value of VtsID__findFirst_notLEQ) */
4093 tl_assert(confThr);
4094
4095 /* Get the scalar clock value that the conflicting thread
4096 introduced into the constraint. A careful examination of the
4097 base machine rules shows that this must be the same as the
4098 conflicting thread's scalar clock when it created this
4099 constraint. Hence we know the scalar clock of the
4100 conflicting thread when the conflicting access was made. */
4101 confTym = VtsID__indexAt( vtsConstraint, confThr );
4102
4103 /* Using this scalar clock, index into the conflicting thread's
4104 collection of stack traces made each time its vector clock
4105 (hence its scalar clock) changed. This gives the stack
4106 traces at the start and end of the conflicting segment (well,
4107 as per comment just above, of one of the conflicting
4108 segments, if there are more than one). */
4109 key.ull = confTym;
4110 key.ec = NULL;
4111 /* tl_assert(confThr); -- asserted just above */
4112 tl_assert(confThr->local_Krs_n_stacks);
4113 firstIx = lastIx = 0;
4114 found = VG_(lookupXA_UNSAFE)(
4115 confThr->local_Krs_n_stacks,
4116 &key, &firstIx, &lastIx,
4117 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4118 );
4119 if (0) VG_(printf)("record_race_info %u %u confThr %p "
4120 "confTym %llu found %d (%lu,%lu)\n",
4121 vtsConstraint, vtsKlock,
4122 confThr, confTym, found, firstIx, lastIx);
4123 /* We can't indefinitely collect stack traces at VTS
4124 transitions, since we'd eventually run out of memory. Hence
4125 note_local_Kr_n_stack_for will eventually throw away old
4126 ones, which in turn means we might fail to find index value
4127 confTym in the array. */
4128 if (found) {
4129 ULong_n_EC *pair_start, *pair_end;
4130 pair_start
4131 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Krs_n_stacks, lastIx );
4132 hist1_seg_start = pair_start->ec;
4133 if (lastIx+1 < VG_(sizeXA)( confThr->local_Krs_n_stacks )) {
4134 pair_end
4135 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Krs_n_stacks,
4136 lastIx+1 );
4137 /* from properties of VG_(lookupXA) and the comparison fn used: */
4138 tl_assert(pair_start->ull < pair_end->ull);
4139 hist1_seg_end = pair_end->ec;
4140 } else {
4141 if (confThr->still_alive)
4142 hist1_seg_end = main_get_EC( confThr );
4143 }
4144 // seg_start could be NULL iff this is the first stack in the thread
4145 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4146 //if (seg_end) VG_(pp_ExeContext)(seg_end);
4147 hist1_conf_thr = confThr->opaque;
4148 }
4149 }
4150
sewardjc5ea9962008-12-07 01:41:46 +00004151 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004152 szB, isWrite,
4153 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004154}
4155
4156static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004157 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004158 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004159 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4160 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004161}
4162
4163
4164/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004165static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004166 /* The following are only needed for
4167 creating error reports. */
4168 Thr* acc_thr,
4169 Addr acc_addr, SizeT szB )
4170{
4171 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004172 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004173
4174 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004175 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004176 tl_assert(is_sane_SVal_C(svOld));
4177 }
4178
sewardj1c0ce7a2009-07-01 08:10:49 +00004179 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004180 VtsID tviR = acc_thr->viR;
4181 VtsID tviW = acc_thr->viW;
4182 VtsID rmini = SVal__unC_Rmin(svOld);
4183 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004184 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4185 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004186 /* no race */
4187 /* Note: RWLOCK subtlety: use tviW, not tviR */
4188 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4189 goto out;
4190 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004191 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004192 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4193 tl_assert(leqxx);
4194 // same as in non-race case
4195 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4196 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
4197 rmini, tviR );
sewardjf98e1c02008-10-25 16:22:41 +00004198 goto out;
4199 }
4200 }
4201 if (SVal__isA(svOld)) {
4202 /* reading no-access memory (sigh); leave unchanged */
4203 /* check for no pollution */
4204 tl_assert(svOld == SVal_NOACCESS);
4205 svNew = SVal_NOACCESS;
4206 goto out;
4207 }
sewardj23f12002009-07-24 08:45:08 +00004208 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004209 tl_assert(0);
4210
4211 out:
sewardj8f5374e2008-12-07 11:40:17 +00004212 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004213 tl_assert(is_sane_SVal_C(svNew));
4214 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004215 if (UNLIKELY(svNew != svOld)) {
4216 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004217 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004218 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004219 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004220 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004221 }
4222 }
4223 return svNew;
4224}
4225
4226
4227/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004228static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004229 /* The following are only needed for
4230 creating error reports. */
4231 Thr* acc_thr,
4232 Addr acc_addr, SizeT szB )
4233{
4234 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004235 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004236
4237 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004238 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004239 tl_assert(is_sane_SVal_C(svOld));
4240 }
4241
sewardj1c0ce7a2009-07-01 08:10:49 +00004242 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004243 VtsID tviW = acc_thr->viW;
4244 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004245 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4246 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004247 /* no race */
4248 svNew = SVal__mkC( tviW, tviW );
4249 goto out;
4250 } else {
4251 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00004252 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004253 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4254 tl_assert(leqxx);
4255 // same as in non-race case
4256 // proof: in the non-race case, we have
4257 // rmini <= wmini (invar on constraints)
4258 // tviW <= tviR (invar on thread clocks)
4259 // wmini <= tviW (from run-time check)
4260 // hence from transitivity of <= we have
4261 // rmini <= wmini <= tviW
4262 // and so join(rmini,tviW) == tviW
4263 // and join(wmini,tviW) == tviW
4264 // qed.
4265 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4266 VtsID__join2(wmini, tviW) );
4267 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
4268 wmini, acc_thr->viW );
sewardjf98e1c02008-10-25 16:22:41 +00004269 goto out;
4270 }
4271 }
4272 if (SVal__isA(svOld)) {
4273 /* writing no-access memory (sigh); leave unchanged */
4274 /* check for no pollution */
4275 tl_assert(svOld == SVal_NOACCESS);
4276 svNew = SVal_NOACCESS;
4277 goto out;
4278 }
sewardj23f12002009-07-24 08:45:08 +00004279 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004280 tl_assert(0);
4281
4282 out:
sewardj8f5374e2008-12-07 11:40:17 +00004283 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004284 tl_assert(is_sane_SVal_C(svNew));
4285 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004286 if (UNLIKELY(svNew != svOld)) {
4287 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004288 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004289 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004290 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004291 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004292 }
4293 }
4294 return svNew;
4295}
4296
4297
4298/////////////////////////////////////////////////////////
4299// //
4300// Apply core MSM to specific memory locations //
4301// //
4302/////////////////////////////////////////////////////////
4303
sewardj23f12002009-07-24 08:45:08 +00004304/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004305
sewardj23f12002009-07-24 08:45:08 +00004306static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004307 CacheLine* cl;
4308 UWord cloff, tno, toff;
4309 SVal svOld, svNew;
4310 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004311 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004312 cl = get_cacheline(a);
4313 cloff = get_cacheline_offset(a);
4314 tno = get_treeno(a);
4315 toff = get_tree_offset(a); /* == 0 .. 7 */
4316 descr = cl->descrs[tno];
4317 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4318 SVal* tree = &cl->svals[tno << 3];
4319 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004320 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004321 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4322 }
4323 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004324 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004325 if (CHECK_ZSM)
4326 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004327 cl->svals[cloff] = svNew;
4328}
4329
sewardj23f12002009-07-24 08:45:08 +00004330static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004331 CacheLine* cl;
4332 UWord cloff, tno, toff;
4333 SVal svOld, svNew;
4334 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004335 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004336 cl = get_cacheline(a);
4337 cloff = get_cacheline_offset(a);
4338 tno = get_treeno(a);
4339 toff = get_tree_offset(a); /* == 0 .. 7 */
4340 descr = cl->descrs[tno];
4341 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4342 SVal* tree = &cl->svals[tno << 3];
4343 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004344 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004345 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4346 }
4347 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004348 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004349 if (CHECK_ZSM)
4350 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004351 cl->svals[cloff] = svNew;
4352}
4353
sewardj23f12002009-07-24 08:45:08 +00004354/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004355
sewardj23f12002009-07-24 08:45:08 +00004356static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004357 CacheLine* cl;
4358 UWord cloff, tno, toff;
4359 SVal svOld, svNew;
4360 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004361 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004362 if (UNLIKELY(!aligned16(a))) goto slowcase;
4363 cl = get_cacheline(a);
4364 cloff = get_cacheline_offset(a);
4365 tno = get_treeno(a);
4366 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4367 descr = cl->descrs[tno];
4368 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4369 if (valid_value_is_below_me_16(descr, toff)) {
4370 goto slowcase;
4371 } else {
4372 SVal* tree = &cl->svals[tno << 3];
4373 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4374 }
sewardj8f5374e2008-12-07 11:40:17 +00004375 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004376 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4377 }
4378 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004379 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004380 if (CHECK_ZSM)
4381 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004382 cl->svals[cloff] = svNew;
4383 return;
4384 slowcase: /* misaligned, or must go further down the tree */
4385 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004386 zsm_sapply08__msmcread( thr, a + 0 );
4387 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004388}
4389
sewardj23f12002009-07-24 08:45:08 +00004390static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004391 CacheLine* cl;
4392 UWord cloff, tno, toff;
4393 SVal svOld, svNew;
4394 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004395 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004396 if (UNLIKELY(!aligned16(a))) goto slowcase;
4397 cl = get_cacheline(a);
4398 cloff = get_cacheline_offset(a);
4399 tno = get_treeno(a);
4400 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4401 descr = cl->descrs[tno];
4402 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4403 if (valid_value_is_below_me_16(descr, toff)) {
4404 goto slowcase;
4405 } else {
4406 SVal* tree = &cl->svals[tno << 3];
4407 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4408 }
sewardj8f5374e2008-12-07 11:40:17 +00004409 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004410 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4411 }
4412 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004413 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004414 if (CHECK_ZSM)
4415 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004416 cl->svals[cloff] = svNew;
4417 return;
4418 slowcase: /* misaligned, or must go further down the tree */
4419 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004420 zsm_sapply08__msmcwrite( thr, a + 0 );
4421 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004422}
4423
sewardj23f12002009-07-24 08:45:08 +00004424/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004425
sewardj23f12002009-07-24 08:45:08 +00004426static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004427 CacheLine* cl;
4428 UWord cloff, tno, toff;
4429 SVal svOld, svNew;
4430 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004431 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004432 if (UNLIKELY(!aligned32(a))) goto slowcase;
4433 cl = get_cacheline(a);
4434 cloff = get_cacheline_offset(a);
4435 tno = get_treeno(a);
4436 toff = get_tree_offset(a); /* == 0 or 4 */
4437 descr = cl->descrs[tno];
4438 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4439 if (valid_value_is_above_me_32(descr, toff)) {
4440 SVal* tree = &cl->svals[tno << 3];
4441 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4442 } else {
4443 goto slowcase;
4444 }
sewardj8f5374e2008-12-07 11:40:17 +00004445 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004446 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4447 }
4448 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004449 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004450 if (CHECK_ZSM)
4451 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004452 cl->svals[cloff] = svNew;
4453 return;
4454 slowcase: /* misaligned, or must go further down the tree */
4455 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004456 zsm_sapply16__msmcread( thr, a + 0 );
4457 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004458}
4459
sewardj23f12002009-07-24 08:45:08 +00004460static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004461 CacheLine* cl;
4462 UWord cloff, tno, toff;
4463 SVal svOld, svNew;
4464 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004465 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004466 if (UNLIKELY(!aligned32(a))) goto slowcase;
4467 cl = get_cacheline(a);
4468 cloff = get_cacheline_offset(a);
4469 tno = get_treeno(a);
4470 toff = get_tree_offset(a); /* == 0 or 4 */
4471 descr = cl->descrs[tno];
4472 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4473 if (valid_value_is_above_me_32(descr, toff)) {
4474 SVal* tree = &cl->svals[tno << 3];
4475 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4476 } else {
4477 goto slowcase;
4478 }
sewardj8f5374e2008-12-07 11:40:17 +00004479 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004480 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4481 }
4482 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004483 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004484 if (CHECK_ZSM)
4485 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004486 cl->svals[cloff] = svNew;
4487 return;
4488 slowcase: /* misaligned, or must go further down the tree */
4489 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004490 zsm_sapply16__msmcwrite( thr, a + 0 );
4491 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004492}
4493
sewardj23f12002009-07-24 08:45:08 +00004494/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004495
sewardj23f12002009-07-24 08:45:08 +00004496static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004497 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004498 UWord cloff, tno;
4499 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004500 SVal svOld, svNew;
4501 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004502 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004503 if (UNLIKELY(!aligned64(a))) goto slowcase;
4504 cl = get_cacheline(a);
4505 cloff = get_cacheline_offset(a);
4506 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004507 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004508 descr = cl->descrs[tno];
4509 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4510 goto slowcase;
4511 }
4512 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004513 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004514 if (CHECK_ZSM)
4515 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004516 cl->svals[cloff] = svNew;
4517 return;
4518 slowcase: /* misaligned, or must go further down the tree */
4519 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004520 zsm_sapply32__msmcread( thr, a + 0 );
4521 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004522}
4523
sewardj23f12002009-07-24 08:45:08 +00004524static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004525 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004526 UWord cloff, tno;
4527 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004528 SVal svOld, svNew;
4529 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004530 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004531 if (UNLIKELY(!aligned64(a))) goto slowcase;
4532 cl = get_cacheline(a);
4533 cloff = get_cacheline_offset(a);
4534 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004535 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004536 descr = cl->descrs[tno];
4537 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4538 goto slowcase;
4539 }
4540 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004541 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004542 if (CHECK_ZSM)
4543 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004544 cl->svals[cloff] = svNew;
4545 return;
4546 slowcase: /* misaligned, or must go further down the tree */
4547 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004548 zsm_sapply32__msmcwrite( thr, a + 0 );
4549 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004550}
4551
sewardj23f12002009-07-24 08:45:08 +00004552/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004553
4554static
sewardj23f12002009-07-24 08:45:08 +00004555void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004556 CacheLine* cl;
4557 UWord cloff, tno, toff;
4558 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004559 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004560 cl = get_cacheline(a);
4561 cloff = get_cacheline_offset(a);
4562 tno = get_treeno(a);
4563 toff = get_tree_offset(a); /* == 0 .. 7 */
4564 descr = cl->descrs[tno];
4565 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4566 SVal* tree = &cl->svals[tno << 3];
4567 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004568 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004569 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4570 }
4571 tl_assert(svNew != SVal_INVALID);
4572 cl->svals[cloff] = svNew;
4573}
4574
sewardj23f12002009-07-24 08:45:08 +00004575/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004576
4577static
sewardj23f12002009-07-24 08:45:08 +00004578void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004579 CacheLine* cl;
4580 UWord cloff, tno, toff;
4581 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004582 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004583 if (UNLIKELY(!aligned16(a))) goto slowcase;
4584 cl = get_cacheline(a);
4585 cloff = get_cacheline_offset(a);
4586 tno = get_treeno(a);
4587 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4588 descr = cl->descrs[tno];
4589 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4590 if (valid_value_is_below_me_16(descr, toff)) {
4591 /* Writing at this level. Need to fix up 'descr'. */
4592 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4593 /* At this point, the tree does not match cl->descr[tno] any
4594 more. The assignments below will fix it up. */
4595 } else {
4596 /* We can't indiscriminately write on the w16 node as in the
4597 w64 case, as that might make the node inconsistent with
4598 its parent. So first, pull down to this level. */
4599 SVal* tree = &cl->svals[tno << 3];
4600 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004601 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004602 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4603 }
4604 }
4605 tl_assert(svNew != SVal_INVALID);
4606 cl->svals[cloff + 0] = svNew;
4607 cl->svals[cloff + 1] = SVal_INVALID;
4608 return;
4609 slowcase: /* misaligned */
4610 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004611 zsm_swrite08( a + 0, svNew );
4612 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004613}
4614
sewardj23f12002009-07-24 08:45:08 +00004615/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004616
4617static
sewardj23f12002009-07-24 08:45:08 +00004618void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004619 CacheLine* cl;
4620 UWord cloff, tno, toff;
4621 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004622 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004623 if (UNLIKELY(!aligned32(a))) goto slowcase;
4624 cl = get_cacheline(a);
4625 cloff = get_cacheline_offset(a);
4626 tno = get_treeno(a);
4627 toff = get_tree_offset(a); /* == 0 or 4 */
4628 descr = cl->descrs[tno];
4629 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4630 if (valid_value_is_above_me_32(descr, toff)) {
4631 /* We can't indiscriminately write on the w32 node as in the
4632 w64 case, as that might make the node inconsistent with
4633 its parent. So first, pull down to this level. */
4634 SVal* tree = &cl->svals[tno << 3];
4635 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004636 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004637 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4638 } else {
4639 /* Writing at this level. Need to fix up 'descr'. */
4640 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4641 /* At this point, the tree does not match cl->descr[tno] any
4642 more. The assignments below will fix it up. */
4643 }
4644 }
4645 tl_assert(svNew != SVal_INVALID);
4646 cl->svals[cloff + 0] = svNew;
4647 cl->svals[cloff + 1] = SVal_INVALID;
4648 cl->svals[cloff + 2] = SVal_INVALID;
4649 cl->svals[cloff + 3] = SVal_INVALID;
4650 return;
4651 slowcase: /* misaligned */
4652 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004653 zsm_swrite16( a + 0, svNew );
4654 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004655}
4656
sewardj23f12002009-07-24 08:45:08 +00004657/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004658
4659static
sewardj23f12002009-07-24 08:45:08 +00004660void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004661 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004662 UWord cloff, tno;
4663 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00004664 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004665 if (UNLIKELY(!aligned64(a))) goto slowcase;
4666 cl = get_cacheline(a);
4667 cloff = get_cacheline_offset(a);
4668 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004669 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004670 cl->descrs[tno] = TREE_DESCR_64;
4671 tl_assert(svNew != SVal_INVALID);
4672 cl->svals[cloff + 0] = svNew;
4673 cl->svals[cloff + 1] = SVal_INVALID;
4674 cl->svals[cloff + 2] = SVal_INVALID;
4675 cl->svals[cloff + 3] = SVal_INVALID;
4676 cl->svals[cloff + 4] = SVal_INVALID;
4677 cl->svals[cloff + 5] = SVal_INVALID;
4678 cl->svals[cloff + 6] = SVal_INVALID;
4679 cl->svals[cloff + 7] = SVal_INVALID;
4680 return;
4681 slowcase: /* misaligned */
4682 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004683 zsm_swrite32( a + 0, svNew );
4684 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004685}
4686
sewardj23f12002009-07-24 08:45:08 +00004687/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004688
4689static
sewardj23f12002009-07-24 08:45:08 +00004690SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004691 CacheLine* cl;
4692 UWord cloff, tno, toff;
4693 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004694 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004695 cl = get_cacheline(a);
4696 cloff = get_cacheline_offset(a);
4697 tno = get_treeno(a);
4698 toff = get_tree_offset(a); /* == 0 .. 7 */
4699 descr = cl->descrs[tno];
4700 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4701 SVal* tree = &cl->svals[tno << 3];
4702 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4703 }
4704 return cl->svals[cloff];
4705}
4706
sewardj23f12002009-07-24 08:45:08 +00004707static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00004708 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00004709 stats__cline_scopy08s++;
4710 sv = zsm_sread08( src );
4711 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00004712}
4713
4714
sewardj23f12002009-07-24 08:45:08 +00004715/* Block-copy states (needed for implementing realloc()). Note this
4716 doesn't change the filtering arrangements. The caller of
4717 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00004718
sewardj23f12002009-07-24 08:45:08 +00004719static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00004720{
4721 SizeT i;
4722 if (len == 0)
4723 return;
4724
4725 /* assert for non-overlappingness */
4726 tl_assert(src+len <= dst || dst+len <= src);
4727
4728 /* To be simple, just copy byte by byte. But so as not to wreck
4729 performance for later accesses to dst[0 .. len-1], normalise
4730 destination lines as we finish with them, and also normalise the
4731 line containing the first and last address. */
4732 for (i = 0; i < len; i++) {
4733 Bool normalise
4734 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4735 || i == 0 /* first in range */
4736 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00004737 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00004738 }
4739}
4740
4741
4742/* For setting address ranges to a given value. Has considerable
4743 sophistication so as to avoid generating large numbers of pointless
4744 cache loads/writebacks for large ranges. */
4745
4746/* Do small ranges in-cache, in the obvious way. */
4747static
sewardj23f12002009-07-24 08:45:08 +00004748void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004749{
4750 /* fast track a couple of common cases */
4751 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00004752 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004753 return;
4754 }
4755 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00004756 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004757 return;
4758 }
4759
4760 /* be completely general (but as efficient as possible) */
4761 if (len == 0) return;
4762
4763 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004764 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004765 a += 1;
4766 len -= 1;
4767 tl_assert(aligned16(a));
4768 }
4769 if (len == 0) return;
4770
4771 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004772 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004773 a += 2;
4774 len -= 2;
4775 tl_assert(aligned32(a));
4776 }
4777 if (len == 0) return;
4778
4779 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004780 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004781 a += 4;
4782 len -= 4;
4783 tl_assert(aligned64(a));
4784 }
4785 if (len == 0) return;
4786
4787 if (len >= 8) {
4788 tl_assert(aligned64(a));
4789 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00004790 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004791 a += 8;
4792 len -= 8;
4793 }
4794 tl_assert(aligned64(a));
4795 }
4796 if (len == 0) return;
4797
4798 if (len >= 4)
4799 tl_assert(aligned32(a));
4800 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004801 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004802 a += 4;
4803 len -= 4;
4804 }
4805 if (len == 0) return;
4806
4807 if (len >= 2)
4808 tl_assert(aligned16(a));
4809 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004810 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004811 a += 2;
4812 len -= 2;
4813 }
4814 if (len == 0) return;
4815
4816 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004817 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00004818 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004819 len -= 1;
4820 }
4821 tl_assert(len == 0);
4822}
4823
4824
sewardj23f12002009-07-24 08:45:08 +00004825/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00004826 for larger ranges, try to operate directly on the out-of-cache
4827 representation, rather than dragging lines into the cache,
4828 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00004829 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00004830
sewardj23f12002009-07-24 08:45:08 +00004831 Note that this doesn't change the filtering arrangements. The
4832 caller of zsm_sset_range needs to attend to that. */
4833
4834static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004835{
4836 tl_assert(svNew != SVal_INVALID);
4837 stats__cache_make_New_arange += (ULong)len;
4838
4839 if (0 && len > 500)
4840 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4841
4842 if (0) {
4843 static UWord n_New_in_cache = 0;
4844 static UWord n_New_not_in_cache = 0;
4845 /* tag is 'a' with the in-line offset masked out,
4846 eg a[31]..a[4] 0000 */
4847 Addr tag = a & ~(N_LINE_ARANGE - 1);
4848 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4849 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4850 n_New_in_cache++;
4851 } else {
4852 n_New_not_in_cache++;
4853 }
4854 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4855 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4856 n_New_in_cache, n_New_not_in_cache );
4857 }
4858
4859 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00004860 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004861 } else {
4862 Addr before_start = a;
4863 Addr aligned_start = cacheline_ROUNDUP(a);
4864 Addr after_start = cacheline_ROUNDDN(a + len);
4865 UWord before_len = aligned_start - before_start;
4866 UWord aligned_len = after_start - aligned_start;
4867 UWord after_len = a + len - after_start;
4868 tl_assert(before_start <= aligned_start);
4869 tl_assert(aligned_start <= after_start);
4870 tl_assert(before_len < N_LINE_ARANGE);
4871 tl_assert(after_len < N_LINE_ARANGE);
4872 tl_assert(get_cacheline_offset(aligned_start) == 0);
4873 if (get_cacheline_offset(a) == 0) {
4874 tl_assert(before_len == 0);
4875 tl_assert(a == aligned_start);
4876 }
4877 if (get_cacheline_offset(a+len) == 0) {
4878 tl_assert(after_len == 0);
4879 tl_assert(after_start == a+len);
4880 }
4881 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004882 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004883 }
4884 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004885 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004886 }
4887 stats__cache_make_New_inZrep += (ULong)aligned_len;
4888
4889 while (1) {
4890 Addr tag;
4891 UWord wix;
4892 if (aligned_start >= after_start)
4893 break;
4894 tl_assert(get_cacheline_offset(aligned_start) == 0);
4895 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4896 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4897 if (tag == cache_shmem.tags0[wix]) {
4898 UWord i;
4899 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00004900 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004901 } else {
4902 UWord i;
4903 Word zix;
4904 SecMap* sm;
4905 LineZ* lineZ;
4906 /* This line is not in the cache. Do not force it in; instead
4907 modify it in-place. */
4908 /* find the Z line to write in and rcdec it or the
4909 associated F line. */
4910 find_Z_for_writing( &sm, &zix, tag );
4911 tl_assert(sm);
4912 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4913 lineZ = &sm->linesZ[zix];
4914 lineZ->dict[0] = svNew;
4915 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4916 for (i = 0; i < N_LINE_ARANGE/4; i++)
4917 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4918 rcinc_LineZ(lineZ);
4919 }
4920 aligned_start += N_LINE_ARANGE;
4921 aligned_len -= N_LINE_ARANGE;
4922 }
4923 tl_assert(aligned_start == after_start);
4924 tl_assert(aligned_len == 0);
4925 }
4926}
4927
4928
4929/////////////////////////////////////////////////////////
4930// //
sewardj23f12002009-07-24 08:45:08 +00004931// Front-filtering accesses //
4932// //
4933/////////////////////////////////////////////////////////
4934
4935static UWord stats__f_ac = 0;
4936static UWord stats__f_sk = 0;
4937
4938#if 0
4939# define STATS__F_SHOW \
4940 do { \
4941 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
4942 VG_(printf)("filters: ac %lu sk %lu\n", \
4943 stats__f_ac, stats__f_sk); \
4944 } while (0)
4945#else
4946# define STATS__F_SHOW /* */
4947#endif
4948
4949void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
4950 stats__f_ac++;
4951 STATS__F_SHOW;
4952 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
4953 stats__f_sk++;
4954 return;
4955 }
4956 zsm_sapply08__msmcwrite(thr, a);
4957}
4958
4959void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
4960 stats__f_ac++;
4961 STATS__F_SHOW;
4962 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
4963 stats__f_sk++;
4964 return;
4965 }
4966 zsm_sapply16__msmcwrite(thr, a);
4967}
4968
4969void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
4970 stats__f_ac++;
4971 STATS__F_SHOW;
4972 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
4973 stats__f_sk++;
4974 return;
4975 }
4976 zsm_sapply32__msmcwrite(thr, a);
4977}
4978
4979void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
4980 stats__f_ac++;
4981 STATS__F_SHOW;
4982 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
4983 stats__f_sk++;
4984 return;
4985 }
4986 zsm_sapply64__msmcwrite(thr, a);
4987}
4988
4989void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
4990{
4991 /* fast track a couple of common cases */
4992 if (len == 4 && aligned32(a)) {
4993 zsm_sapply32_f__msmcwrite( thr, a );
4994 return;
4995 }
4996 if (len == 8 && aligned64(a)) {
4997 zsm_sapply64_f__msmcwrite( thr, a );
4998 return;
4999 }
5000
5001 /* be completely general (but as efficient as possible) */
5002 if (len == 0) return;
5003
5004 if (!aligned16(a) && len >= 1) {
5005 zsm_sapply08_f__msmcwrite( thr, a );
5006 a += 1;
5007 len -= 1;
5008 tl_assert(aligned16(a));
5009 }
5010 if (len == 0) return;
5011
5012 if (!aligned32(a) && len >= 2) {
5013 zsm_sapply16_f__msmcwrite( thr, a );
5014 a += 2;
5015 len -= 2;
5016 tl_assert(aligned32(a));
5017 }
5018 if (len == 0) return;
5019
5020 if (!aligned64(a) && len >= 4) {
5021 zsm_sapply32_f__msmcwrite( thr, a );
5022 a += 4;
5023 len -= 4;
5024 tl_assert(aligned64(a));
5025 }
5026 if (len == 0) return;
5027
5028 if (len >= 8) {
5029 tl_assert(aligned64(a));
5030 while (len >= 8) {
5031 zsm_sapply64_f__msmcwrite( thr, a );
5032 a += 8;
5033 len -= 8;
5034 }
5035 tl_assert(aligned64(a));
5036 }
5037 if (len == 0) return;
5038
5039 if (len >= 4)
5040 tl_assert(aligned32(a));
5041 if (len >= 4) {
5042 zsm_sapply32_f__msmcwrite( thr, a );
5043 a += 4;
5044 len -= 4;
5045 }
5046 if (len == 0) return;
5047
5048 if (len >= 2)
5049 tl_assert(aligned16(a));
5050 if (len >= 2) {
5051 zsm_sapply16_f__msmcwrite( thr, a );
5052 a += 2;
5053 len -= 2;
5054 }
5055 if (len == 0) return;
5056
5057 if (len >= 1) {
5058 zsm_sapply08_f__msmcwrite( thr, a );
5059 //a += 1;
5060 len -= 1;
5061 }
5062 tl_assert(len == 0);
5063}
5064
5065void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5066 stats__f_ac++;
5067 STATS__F_SHOW;
5068 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5069 stats__f_sk++;
5070 return;
5071 }
5072 zsm_sapply08__msmcread(thr, a);
5073}
5074
5075void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5076 stats__f_ac++;
5077 STATS__F_SHOW;
5078 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5079 stats__f_sk++;
5080 return;
5081 }
5082 zsm_sapply16__msmcread(thr, a);
5083}
5084
5085void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5086 stats__f_ac++;
5087 STATS__F_SHOW;
5088 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5089 stats__f_sk++;
5090 return;
5091 }
5092 zsm_sapply32__msmcread(thr, a);
5093}
5094
5095void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5096 stats__f_ac++;
5097 STATS__F_SHOW;
5098 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5099 stats__f_sk++;
5100 return;
5101 }
5102 zsm_sapply64__msmcread(thr, a);
5103}
5104
5105void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5106{
5107 /* fast track a couple of common cases */
5108 if (len == 4 && aligned32(a)) {
5109 zsm_sapply32_f__msmcread( thr, a );
5110 return;
5111 }
5112 if (len == 8 && aligned64(a)) {
5113 zsm_sapply64_f__msmcread( thr, a );
5114 return;
5115 }
5116
5117 /* be completely general (but as efficient as possible) */
5118 if (len == 0) return;
5119
5120 if (!aligned16(a) && len >= 1) {
5121 zsm_sapply08_f__msmcread( thr, a );
5122 a += 1;
5123 len -= 1;
5124 tl_assert(aligned16(a));
5125 }
5126 if (len == 0) return;
5127
5128 if (!aligned32(a) && len >= 2) {
5129 zsm_sapply16_f__msmcread( thr, a );
5130 a += 2;
5131 len -= 2;
5132 tl_assert(aligned32(a));
5133 }
5134 if (len == 0) return;
5135
5136 if (!aligned64(a) && len >= 4) {
5137 zsm_sapply32_f__msmcread( thr, a );
5138 a += 4;
5139 len -= 4;
5140 tl_assert(aligned64(a));
5141 }
5142 if (len == 0) return;
5143
5144 if (len >= 8) {
5145 tl_assert(aligned64(a));
5146 while (len >= 8) {
5147 zsm_sapply64_f__msmcread( thr, a );
5148 a += 8;
5149 len -= 8;
5150 }
5151 tl_assert(aligned64(a));
5152 }
5153 if (len == 0) return;
5154
5155 if (len >= 4)
5156 tl_assert(aligned32(a));
5157 if (len >= 4) {
5158 zsm_sapply32_f__msmcread( thr, a );
5159 a += 4;
5160 len -= 4;
5161 }
5162 if (len == 0) return;
5163
5164 if (len >= 2)
5165 tl_assert(aligned16(a));
5166 if (len >= 2) {
5167 zsm_sapply16_f__msmcread( thr, a );
5168 a += 2;
5169 len -= 2;
5170 }
5171 if (len == 0) return;
5172
5173 if (len >= 1) {
5174 zsm_sapply08_f__msmcread( thr, a );
5175 //a += 1;
5176 len -= 1;
5177 }
5178 tl_assert(len == 0);
5179}
5180
5181void libhb_Thr_resumes ( Thr* thr )
5182{
5183 if (0) VG_(printf)("resume %p\n", thr);
5184 Filter__clear(thr->filter, "libhb_Thr_resumes");
5185 /* A kludge, but .. if this thread doesn't have any marker stacks
5186 at all, get one right now. This is easier than figuring out
5187 exactly when at thread startup we can and can't take a stack
5188 snapshot. */
5189 tl_assert(thr->local_Krs_n_stacks);
5190 if (VG_(sizeXA)( thr->local_Krs_n_stacks ) == 0)
5191 note_local_Kr_n_stack_for(thr);
5192}
5193
5194
5195/////////////////////////////////////////////////////////
5196// //
sewardjf98e1c02008-10-25 16:22:41 +00005197// Synchronisation objects //
5198// //
5199/////////////////////////////////////////////////////////
5200
5201// (UInt) `echo "Synchronisation object" | md5sum`
5202#define SO_MAGIC 0x56b3c5b0U
5203
5204struct _SO {
5205 VtsID viR; /* r-clock of sender */
5206 VtsID viW; /* w-clock of sender */
5207 UInt magic;
5208};
5209
5210static SO* SO__Alloc ( void ) {
5211 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5212 so->viR = VtsID_INVALID;
5213 so->viW = VtsID_INVALID;
5214 so->magic = SO_MAGIC;
5215 return so;
5216}
5217static void SO__Dealloc ( SO* so ) {
5218 tl_assert(so);
5219 tl_assert(so->magic == SO_MAGIC);
5220 if (so->viR == VtsID_INVALID) {
5221 tl_assert(so->viW == VtsID_INVALID);
5222 } else {
5223 tl_assert(so->viW != VtsID_INVALID);
5224 VtsID__rcdec(so->viR);
5225 VtsID__rcdec(so->viW);
5226 }
5227 so->magic = 0;
5228 HG_(free)( so );
5229}
5230
5231
5232/////////////////////////////////////////////////////////
5233// //
5234// Top Level API //
5235// //
5236/////////////////////////////////////////////////////////
5237
5238static void show_thread_state ( HChar* str, Thr* t )
5239{
5240 if (1) return;
5241 if (t->viR == t->viW) {
5242 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5243 VtsID__pp( t->viR );
5244 VG_(printf)("%s","\n");
5245 } else {
5246 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5247 VtsID__pp( t->viR );
5248 VG_(printf)(" viW %u==", t->viW);
5249 VtsID__pp( t->viW );
5250 VG_(printf)("%s","\n");
5251 }
5252}
5253
5254
5255Thr* libhb_init (
5256 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00005257 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00005258 )
5259{
5260 Thr* thr;
5261 VtsID vi;
5262 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00005263 tl_assert(get_EC);
5264 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00005265 main_get_EC = get_EC;
5266
5267 // No need to initialise hg_wordfm.
5268 // No need to initialise hg_wordset.
5269
5270 vts_set_init();
5271 vts_tab_init();
5272 event_map_init();
5273 VtsID__invalidate_caches();
5274
5275 // initialise shadow memory
5276 zsm_init( SVal__rcinc, SVal__rcdec );
5277
5278 thr = Thr__new();
5279 vi = VtsID__mk_Singleton( thr, 1 );
5280 thr->viR = vi;
5281 thr->viW = vi;
5282 VtsID__rcinc(thr->viR);
5283 VtsID__rcinc(thr->viW);
5284
5285 show_thread_state(" root", thr);
5286 return thr;
5287}
5288
sewardj23f12002009-07-24 08:45:08 +00005289
sewardjf98e1c02008-10-25 16:22:41 +00005290Thr* libhb_create ( Thr* parent )
5291{
5292 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5293 the child's index. Since the child's index is guaranteed
5294 unique, it has never been seen before, so the implicit value
5295 before the tick is zero and after that is one. */
5296 Thr* child = Thr__new();
5297
5298 child->viR = VtsID__tick( parent->viR, child );
5299 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00005300 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00005301 VtsID__rcinc(child->viR);
5302 VtsID__rcinc(child->viW);
sewardj23f12002009-07-24 08:45:08 +00005303 /* We need to do note_local_Kr_n_stack_for( child ), but it's too
5304 early for that - it may not have a valid TId yet. So, let
5305 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00005306
5307 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5308 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5309
5310 /* and the parent has to move along too */
5311 VtsID__rcdec(parent->viR);
5312 VtsID__rcdec(parent->viW);
5313 parent->viR = VtsID__tick( parent->viR, parent );
5314 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00005315 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00005316 VtsID__rcinc(parent->viR);
5317 VtsID__rcinc(parent->viW);
sewardj23f12002009-07-24 08:45:08 +00005318 note_local_Kr_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00005319
5320 show_thread_state(" child", child);
5321 show_thread_state("parent", parent);
5322
5323 return child;
5324}
5325
5326/* Shut down the library, and print stats (in fact that's _all_
5327 this is for. */
5328void libhb_shutdown ( Bool show_stats )
5329{
5330 if (show_stats) {
5331 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
5332 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
5333 stats__secmaps_allocd,
5334 stats__secmap_ga_space_covered);
5335 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
5336 stats__secmap_linesZ_allocd,
5337 stats__secmap_linesZ_bytes);
5338 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
5339 stats__secmap_linesF_allocd,
5340 stats__secmap_linesF_bytes);
5341 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
5342 stats__secmap_iterator_steppings);
5343 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
5344 stats__secmaps_search, stats__secmaps_search_slow);
5345
5346 VG_(printf)("%s","\n");
5347 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
5348 stats__cache_totrefs, stats__cache_totmisses );
5349 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
5350 stats__cache_Z_fetches, stats__cache_F_fetches );
5351 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
5352 stats__cache_Z_wbacks, stats__cache_F_wbacks );
5353 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
5354 stats__cache_invals, stats__cache_flushes );
5355 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
5356 stats__cache_make_New_arange,
5357 stats__cache_make_New_inZrep);
5358
5359 VG_(printf)("%s","\n");
5360 VG_(printf)(" cline: %'10lu normalises\n",
5361 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00005362 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5363 stats__cline_cread64s,
5364 stats__cline_cread32s,
5365 stats__cline_cread16s,
5366 stats__cline_cread08s );
5367 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5368 stats__cline_cwrite64s,
5369 stats__cline_cwrite32s,
5370 stats__cline_cwrite16s,
5371 stats__cline_cwrite08s );
5372 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5373 stats__cline_swrite64s,
5374 stats__cline_swrite32s,
5375 stats__cline_swrite16s,
5376 stats__cline_swrite08s );
5377 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
5378 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00005379 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5380 stats__cline_64to32splits,
5381 stats__cline_32to16splits,
5382 stats__cline_16to8splits );
5383 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5384 stats__cline_64to32pulldown,
5385 stats__cline_32to16pulldown,
5386 stats__cline_16to8pulldown );
5387 if (0)
5388 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
5389 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
5390
5391 VG_(printf)("%s","\n");
5392
sewardj23f12002009-07-24 08:45:08 +00005393 VG_(printf)(" libhb: %'13llu msmcread (%'llu changed)\n",
5394 stats__msmcread, stats__msmcread_change);
5395 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu changed)\n",
5396 stats__msmcwrite, stats__msmcwrite_change);
5397 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
5398 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00005399 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
5400 stats__join2_queries, stats__join2_misses);
5401
5402 VG_(printf)("%s","\n");
5403 VG_(printf)(
5404 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
5405 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
5406 );
5407 VG_(printf)( " libhb: %lu entries in vts_set\n",
5408 VG_(sizeFM)( vts_set ) );
5409
5410 VG_(printf)("%s","\n");
5411 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
5412 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
5413 stats__ctxt_rcdec2,
5414 stats__ctxt_rcdec3 );
5415 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
5416 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
5417 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
5418 (UWord)N_RCEC_TAB,
5419 stats__ctxt_tab_curr );
5420 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
5421 stats__ctxt_tab_qs,
5422 stats__ctxt_tab_cmps );
5423#if 0
5424 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
5425 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
5426 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
5427 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
5428 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
5429 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
5430 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
5431 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
5432 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
5433 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
5434 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
5435 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
5436 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
5437 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
5438
5439 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
5440 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
5441 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
5442 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
5443#endif
5444
5445 VG_(printf)("%s","<<< END libhb stats >>>\n");
5446 VG_(printf)("%s","\n");
5447
5448 }
5449}
5450
5451void libhb_async_exit ( Thr* thr )
5452{
sewardj23f12002009-07-24 08:45:08 +00005453 tl_assert(thr);
5454 thr->still_alive = False;
5455 /* XXX free up Filter and local_Krs_n_stacks */
sewardjf98e1c02008-10-25 16:22:41 +00005456}
5457
5458/* Both Segs and SOs point to VTSs. However, there is no sharing, so
5459 a Seg that points at a VTS is its one-and-only owner, and ditto for
5460 a SO that points at a VTS. */
5461
5462SO* libhb_so_alloc ( void )
5463{
5464 return SO__Alloc();
5465}
5466
5467void libhb_so_dealloc ( SO* so )
5468{
5469 tl_assert(so);
5470 tl_assert(so->magic == SO_MAGIC);
5471 SO__Dealloc(so);
5472}
5473
5474/* See comments in libhb.h for details on the meaning of
5475 strong vs weak sends and strong vs weak receives. */
5476void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
5477{
5478 /* Copy the VTSs from 'thr' into the sync object, and then move
5479 the thread along one step. */
5480
5481 tl_assert(so);
5482 tl_assert(so->magic == SO_MAGIC);
5483
5484 /* stay sane .. a thread's read-clock must always lead or be the
5485 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00005486 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
5487 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00005488 }
5489
5490 /* since we're overwriting the VtsIDs in the SO, we need to drop
5491 any references made by the previous contents thereof */
5492 if (so->viR == VtsID_INVALID) {
5493 tl_assert(so->viW == VtsID_INVALID);
5494 so->viR = thr->viR;
5495 so->viW = thr->viW;
5496 VtsID__rcinc(so->viR);
5497 VtsID__rcinc(so->viW);
5498 } else {
5499 /* In a strong send, we dump any previous VC in the SO and
5500 install the sending thread's VC instead. For a weak send we
5501 must join2 with what's already there. */
5502 tl_assert(so->viW != VtsID_INVALID);
5503 VtsID__rcdec(so->viR);
5504 VtsID__rcdec(so->viW);
5505 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
5506 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
5507 VtsID__rcinc(so->viR);
5508 VtsID__rcinc(so->viW);
5509 }
5510
5511 /* move both parent clocks along */
5512 VtsID__rcdec(thr->viR);
5513 VtsID__rcdec(thr->viW);
5514 thr->viR = VtsID__tick( thr->viR, thr );
5515 thr->viW = VtsID__tick( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00005516 Filter__clear(thr->filter, "libhb_so_send");
5517 if (thr->still_alive)
5518 note_local_Kr_n_stack_for(thr);
sewardjf98e1c02008-10-25 16:22:41 +00005519 VtsID__rcinc(thr->viR);
5520 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005521
sewardjf98e1c02008-10-25 16:22:41 +00005522 if (strong_send)
5523 show_thread_state("s-send", thr);
5524 else
5525 show_thread_state("w-send", thr);
5526}
5527
5528void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
5529{
5530 tl_assert(so);
5531 tl_assert(so->magic == SO_MAGIC);
5532
5533 if (so->viR != VtsID_INVALID) {
5534 tl_assert(so->viW != VtsID_INVALID);
5535
5536 /* Weak receive (basically, an R-acquisition of a R-W lock).
5537 This advances the read-clock of the receiver, but not the
5538 write-clock. */
5539 VtsID__rcdec(thr->viR);
5540 thr->viR = VtsID__join2( thr->viR, so->viR );
5541 VtsID__rcinc(thr->viR);
5542
sewardj23f12002009-07-24 08:45:08 +00005543// QQQ
5544VtsID__rcdec(thr->viR);
5545thr->viR = VtsID__tick( thr->viR, thr );
5546VtsID__rcinc(thr->viR);
5547
sewardjf98e1c02008-10-25 16:22:41 +00005548 /* For a strong receive, we also advance the receiver's write
5549 clock, which means the receive as a whole is essentially
5550 equivalent to a W-acquisition of a R-W lock. */
5551 if (strong_recv) {
5552 VtsID__rcdec(thr->viW);
5553 thr->viW = VtsID__join2( thr->viW, so->viW );
5554 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005555
5556
5557// QQQ
5558VtsID__rcdec(thr->viW);
5559thr->viW = VtsID__tick( thr->viW, thr );
5560VtsID__rcinc(thr->viW);
5561
5562
sewardjf98e1c02008-10-25 16:22:41 +00005563 }
5564
sewardj23f12002009-07-24 08:45:08 +00005565 Filter__clear(thr->filter, "libhb_so_recv");
5566 note_local_Kr_n_stack_for(thr);
5567
sewardjf98e1c02008-10-25 16:22:41 +00005568 if (strong_recv)
5569 show_thread_state("s-recv", thr);
5570 else
5571 show_thread_state("w-recv", thr);
5572
5573 } else {
5574 tl_assert(so->viW == VtsID_INVALID);
5575 /* Deal with degenerate case: 'so' has no vts, so there has been
5576 no message posted to it. Just ignore this case. */
5577 show_thread_state("d-recv", thr);
5578 }
5579}
5580
5581Bool libhb_so_everSent ( SO* so )
5582{
5583 if (so->viR == VtsID_INVALID) {
5584 tl_assert(so->viW == VtsID_INVALID);
5585 return False;
5586 } else {
5587 tl_assert(so->viW != VtsID_INVALID);
5588 return True;
5589 }
5590}
5591
5592#define XXX1 0 // 0x67a106c
5593#define XXX2 0
5594
sewardj23f12002009-07-24 08:45:08 +00005595static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00005596 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
5597 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
5598 return False;
5599}
5600static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
sewardj23f12002009-07-24 08:45:08 +00005601 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00005602 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
5603 show_thread_state("", thr);
5604 VG_(printf)("%s","\n");
5605}
5606
sewardj23f12002009-07-24 08:45:08 +00005607void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005608{
5609 SVal sv = SVal__mkC(thr->viW, thr->viW);
5610 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00005611 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
5612 zsm_sset_range( a, szB, sv );
5613 Filter__clear_range( thr->filter, a, szB );
5614 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00005615}
5616
sewardj23f12002009-07-24 08:45:08 +00005617void libhb_srange_noaccess ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005618{
sewardj23f12002009-07-24 08:45:08 +00005619 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00005620}
5621
5622void* libhb_get_Thr_opaque ( Thr* thr ) {
5623 tl_assert(thr);
5624 return thr->opaque;
5625}
5626
5627void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
5628 tl_assert(thr);
5629 thr->opaque = v;
5630}
5631
sewardj23f12002009-07-24 08:45:08 +00005632void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005633{
sewardj23f12002009-07-24 08:45:08 +00005634 zsm_scopy_range(src, dst, len);
5635 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00005636}
5637
5638void libhb_maybe_GC ( void )
5639{
5640 event_map_maybe_GC();
5641 /* If there are still freelist entries available, no need for a
5642 GC. */
5643 if (vts_tab_freelist != VtsID_INVALID)
5644 return;
5645 /* So all the table entries are full, and we're having to expand
5646 the table. But did we hit the threshhold point yet? */
5647 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5648 return;
5649 vts_tab__do_GC( False/*don't show stats*/ );
5650}
5651
5652
5653/////////////////////////////////////////////////////////////////
5654/////////////////////////////////////////////////////////////////
5655// //
5656// SECTION END main library //
5657// //
5658/////////////////////////////////////////////////////////////////
5659/////////////////////////////////////////////////////////////////
5660
5661/*--------------------------------------------------------------------*/
5662/*--- end libhb_main.c ---*/
5663/*--------------------------------------------------------------------*/